24#include <seastar/core/aligned_buffer.hh>
25#include <seastar/core/cacheline.hh>
26#include <seastar/core/circular_buffer.hh>
28#include <seastar/core/condition-variable.hh>
29#include <seastar/core/enum.hh>
30#include <seastar/core/file.hh>
31#include <seastar/core/future.hh>
33#include <seastar/core/internal/io_desc.hh>
34#include <seastar/core/internal/io_request.hh>
35#include <seastar/core/internal/io_sink.hh>
36#include <seastar/core/iostream.hh>
37#include <seastar/core/lowres_clock.hh>
38#include <seastar/core/make_task.hh>
39#include <seastar/core/manual_clock.hh>
40#include <seastar/core/memory.hh>
42#include <seastar/core/internal/estimated_histogram.hh>
44#include <seastar/core/reactor_config.hh>
45#include <seastar/core/scattered_message.hh>
47#include <seastar/core/scheduling_specific.hh>
48#include <seastar/core/seastar.hh>
49#include <seastar/core/semaphore.hh>
50#include <seastar/core/shared_mutex.hh>
51#include <seastar/core/sstring.hh>
52#include <seastar/core/temporary_buffer.hh>
53#include <seastar/core/thread_cputime_clock.hh>
55#include <seastar/core/gate.hh>
56#include <seastar/net/api.hh>
57#include <seastar/util/eclipse.hh>
58#include <seastar/util/log.hh>
59#include <seastar/util/modules.hh>
60#include <seastar/util/noncopyable_function.hh>
61#include <seastar/util/std-compat.hh>
62#include "internal/pollable_fd.hh"
65#include <boost/container/static_vector.hpp>
72#include <unordered_map>
77#include <sys/socket.h>
78#include <netinet/ip.h>
83struct _Unwind_Exception;
87using shard_id = unsigned;
100struct hash<::sockaddr_in> {
101 size_t operator()(::sockaddr_in a)
const {
102 return a.sin_port ^ a.sin_addr.s_addr;
108bool operator==(const ::sockaddr_in a, const ::sockaddr_in b);
115class reactor_backend_selector;
117class reactor_backend;
122class reactor_stall_sampler;
123class cpu_stall_detector;
124class buffer_allocator;
128size_t scheduling_group_count();
130void increase_thrown_exceptions_counter() noexcept;
140 virtual void complete_with(ssize_t res)
final override;
142 virtual void complete(
size_t res)
noexcept = 0;
143 virtual void set_exception(std::exception_ptr eptr)
noexcept = 0;
154 class batch_flush_pollfn;
156 class drain_cross_cpu_freelist_pollfn;
157 class lowres_timer_pollfn;
158 class manual_timer_pollfn;
160 class reap_kernel_completions_pollfn;
161 class kernel_submit_work_pollfn;
162 class io_queue_submission_pollfn;
163 class syscall_pollfn;
164 class execution_stage_pollfn;
166 friend class file_data_source_impl;
167 friend class internal::reactor_stall_sampler;
168 friend class preempt_io_context;
169 friend struct hrtimer_aio_completion;
170 friend class reactor_backend_epoll;
171 friend class reactor_backend_aio;
172 friend class reactor_backend_uring;
173 friend class reactor_backend_selector;
176 friend class aio_storage_context;
178 using poller = internal::poller;
184 uint64_t aio_reads = 0;
185 uint64_t aio_read_bytes = 0;
186 uint64_t aio_writes = 0;
187 uint64_t aio_write_bytes = 0;
188 uint64_t aio_outsizes = 0;
189 uint64_t aio_errors = 0;
190 uint64_t fstream_reads = 0;
191 uint64_t fstream_read_bytes = 0;
192 uint64_t fstream_reads_blocked = 0;
193 uint64_t fstream_read_bytes_blocked = 0;
194 uint64_t fstream_read_aheads_discarded = 0;
195 uint64_t fstream_read_ahead_discarded_bytes = 0;
202 uint64_t tasks_processed = 0;
204 friend void io_completion::complete_with(ssize_t);
211 std::shared_ptr<smp> _smp;
216 std::unique_ptr<reactor_backend> _backend;
217 sigset_t _active_sigmask;
218 std::vector<pollfn*> _pollers;
220 static constexpr unsigned max_aio_per_queue = 128;
221 static constexpr unsigned max_queues = 8;
222 static constexpr unsigned max_aio = max_aio_per_queue * max_queues;
225 std::unordered_map<dev_t, std::unique_ptr<io_queue>> _io_queues;
227 internal::io_sink _io_sink;
228 unsigned _num_io_groups = 0;
230 std::vector<noncopyable_function<future<> ()>> _exit_funcs;
232 bool _stopping =
false;
233 bool _stopped =
false;
234 bool _finished_running_tasks =
false;
236 std::optional<future<std::unique_ptr<network_stack>>> _network_stack_ready;
240 internal::preemption_monitor _preemption_monitor{};
241 uint64_t _global_tasks_processed = 0;
243 metrics::internal::time_estimated_histogram _stalls_histogram;
244 std::unique_ptr<internal::cpu_stall_detector> _cpu_stall_detector;
246 timer<>::set_t _timers;
247 timer<>::set_t::timer_list_t _expired_timers;
248 timer<lowres_clock>::set_t _lowres_timers;
249 timer<lowres_clock>::set_t::timer_list_t _expired_lowres_timers;
250 timer<manual_clock>::set_t _manual_timers;
251 timer<manual_clock>::set_t::timer_list_t _expired_manual_timers;
253 uint64_t _fsyncs = 0;
254 uint64_t _cxx_exceptions = 0;
255 uint64_t _abandoned_failed_futures = 0;
257 explicit task_queue(
unsigned id, sstring name, sstring shortname,
float shares);
258 int64_t _vruntime = 0;
260 int64_t _reciprocal_shares_times_2_power_32;
261 bool _active =
false;
263 sched_clock::time_point _ts;
264 sched_clock::duration _runtime = {};
265 sched_clock::duration _waittime = {};
266 sched_clock::duration _starvetime = {};
267 uint64_t _tasks_processed = 0;
268 circular_buffer<task*> _q;
272 static constexpr size_t shortname_size = 4;
274 int64_t to_vruntime(sched_clock::duration runtime)
const;
275 void set_shares(
float shares)
noexcept;
276 struct indirect_compare;
277 sched_clock::duration _time_spent_on_task_quota_violations = {};
279 void rename(sstring new_name, sstring new_shortname);
281 void register_stats();
284 boost::container::static_vector<std::unique_ptr<task_queue>, max_scheduling_groups()> _task_queues;
285 internal::scheduling_group_specific_thread_local_data _scheduling_group_specific_data;
286 shared_mutex _scheduling_group_keys_mutex;
287 int64_t _last_vruntime = 0;
288 task_queue_list _active_task_queues;
289 task_queue_list _activating_task_queues;
290 task_queue* _at_destroy_tasks;
291 task* _current_task =
nullptr;
301 std::unique_ptr<network_stack> _network_stack;
302 lowres_clock::time_point _lowres_next_timeout = lowres_clock::time_point::max();
303 std::optional<pollable_fd> _aio_eventfd;
304 const bool _reuseport;
305 circular_buffer<double> _loads;
312 sched_clock::duration _last_true_steal{0};
315 sched_clock::duration _last_mono_steal{0};
316 sched_clock::duration _total_idle{0};
317 sched_clock::duration _total_sleep{0};
318 sched_clock::time_point _start_time =
now();
319 output_stream<char>::batch_flush_list_t _flush_batching;
320 std::atomic<bool> _sleeping
alignas(seastar::cache_line_size){0};
321 pthread_t _thread_id
alignas(seastar::cache_line_size) = pthread_self();
322 std::atomic<bool> _dying{
false};
323 gate _background_gate;
326 static std::chrono::nanoseconds calculate_poll_time();
327 static void block_notifier(
int);
328 bool flush_pending_aio();
329 steady_clock_type::time_point next_pending_aio() const noexcept;
330 bool reap_kernel_completions();
331 bool flush_tcp_batches();
332 void update_lowres_clocks() noexcept;
333 bool do_expire_lowres_timers() noexcept;
334 bool do_check_lowres_timers() const noexcept;
335 void expire_manual_timers() noexcept;
336 void start_aio_eventfd_loop();
337 void stop_aio_eventfd_loop();
346 bool pure_poll_once();
353 bool stopped() const noexcept {
return _stopped; }
355 uint64_t polls() const noexcept {
return _polls; }
364 bool pure_poll_signal()
const;
365 void handle_signal(
int signo, noncopyable_function<
void ()>&& handler);
366 void handle_signal_once(
int signo, noncopyable_function<
void ()>&& handler);
367 static void action(
int signo, siginfo_t* siginfo,
void* ignore);
368 static void failed_to_handle(
int signo);
370 struct signal_handler {
371 signal_handler(
int signo, noncopyable_function<
void ()>&& handler);
372 noncopyable_function<void ()> _handler;
374 std::atomic<uint64_t> _pending_signals;
375 std::unordered_map<int, signal_handler> _signal_handlers;
379 std::unique_ptr<thread_pool> _thread_pool;
380 friend class thread_pool;
381 friend class thread_context;
382 friend class internal::cpu_stall_detector;
386 uint64_t pending_task_count()
const;
387 void run_tasks(task_queue& tq);
388 bool have_more_tasks()
const;
389 bool posix_reuseport_detect();
390 void run_some_tasks();
391 void activate(task_queue& tq);
392 void insert_active_task_queue(task_queue* tq);
393 task_queue* pop_active_task_queue(sched_clock::time_point
now);
394 void insert_activating_task_queues();
395 void account_runtime(task_queue& tq, sched_clock::duration runtime);
396 void allocate_scheduling_group_specific_data(
scheduling_group sg,
unsigned long key_id);
401 uint64_t tasks_processed()
const;
402 uint64_t min_vruntime()
const;
403 void request_preemption();
404 void start_handling_signal();
405 void reset_preemption_monitor();
406 void service_highres_timer() noexcept;
433 explicit
reactor(
std::
shared_ptr<
smp>
smp, alien::instance& alien,
unsigned id, reactor_backend_selector rbs, reactor_config cfg);
436 void operator=(const
reactor&) = delete;
438 static sched_clock::time_point
now() noexcept {
441 sched_clock::duration uptime() {
442 return now() - _start_time;
445 io_queue& get_io_queue(dev_t devid = 0) {
446 auto queue = _io_queues.find(devid);
447 if (queue == _io_queues.end()) {
448 return *_io_queues.at(0);
450 return *(queue->second);
455 future<> update_bandwidth_for_queues(internal::priority_class pc, uint64_t bandwidth);
457 void rename_queues(internal::priority_class pc, sstring new_name);
459 void update_shares_for_queues(internal::priority_class pc, uint32_t shares);
461 server_socket
listen(socket_address sa, listen_options opts = {});
463 future<connected_socket>
connect(socket_address sa);
464 future<connected_socket>
connect(socket_address, socket_address, transport proto = transport::TCP);
466 pollable_fd posix_listen(socket_address sa, listen_options opts = {});
468 bool posix_reuseport_available()
const {
return _reuseport; }
470 pollable_fd make_pollable_fd(socket_address sa,
int proto);
472 future<> posix_connect(pollable_fd pfd, socket_address sa, socket_address local);
474 future<> send_all(pollable_fd_state& fd,
const void* buffer,
size_t size);
476 future<file> open_file_dma(std::string_view name,
open_flags flags, file_open_options options = {})
noexcept;
477 future<file> open_directory(std::string_view name)
noexcept;
478 future<>
make_directory(std::string_view name, file_permissions permissions = file_permissions::default_dir_permissions)
noexcept;
479 future<>
touch_directory(std::string_view name, file_permissions permissions = file_permissions::default_dir_permissions)
noexcept;
480 future<std::optional<directory_entry_type>>
file_type(std::string_view name, follow_symlink = follow_symlink::yes)
noexcept;
481 future<stat_data>
file_stat(std::string_view pathname, follow_symlink)
noexcept;
482 future<>
chown(std::string_view filepath, uid_t owner, gid_t group);
483 future<std::optional<struct group_details>>
getgrnam(std::string_view name);
484 future<uint64_t>
file_size(std::string_view pathname)
noexcept;
485 future<bool>
file_accessible(std::string_view pathname, access_flags flags)
noexcept;
486 future<bool>
file_exists(std::string_view pathname)
noexcept {
489 future<fs_type>
file_system_at(std::string_view pathname)
noexcept;
490 future<std::filesystem::space_info>
file_system_space(std::string_view pathname)
noexcept;
491 future<struct statvfs> statvfs(std::string_view pathname)
noexcept;
492 future<>
remove_file(std::string_view pathname)
noexcept;
493 future<>
rename_file(std::string_view old_pathname, std::string_view new_pathname)
noexcept;
494 future<>
link_file(std::string_view oldpath, std::string_view newpath)
noexcept;
495 future<>
chmod(std::string_view name, file_permissions permissions)
noexcept;
497 future<size_t> read_directory(
int fd,
char* buffer,
size_t buffer_size);
499 future<int> inotify_add_watch(
int fd, std::string_view path, uint32_t flags);
501 future<std::tuple<file_desc, file_desc>>
make_pipe();
502 future<std::tuple<pid_t, file_desc, file_desc, file_desc>>
503 spawn(std::string_view pathname,
504 std::vector<sstring> argv,
505 std::vector<sstring> env = {});
506 future<int> waitpid(pid_t pid);
507 void kill(pid_t pid,
int sig);
511 future<> when_started() {
return _start_promise.
get_future(); }
514 template <
typename Rep,
typename Period>
515 future<> wait_for_stop(std::chrono::duration<Rep, Period> timeout) {
516 return _stop_requested.
wait(timeout, [
this] {
return _stopping; });
519 void at_exit(noncopyable_function<future<> ()> func);
521 template <
typename Func>
522 void at_destroy(Func&& func) {
523 _at_destroy_tasks->_q.push_back(make_task(default_scheduling_group(), std::forward<Func>(func)));
526 task* current_task()
const {
return _current_task; }
531 void set_current_task(task* t) { _current_task = t; }
533 void add_task(task* t)
noexcept;
534 void add_urgent_task(task* t)
noexcept;
536 void run_in_background(future<> f);
538 template <
typename Func>
539 void run_in_background(Func&& func) {
540 run_in_background(futurize_invoke(std::forward<Func>(func)));
552 _idle_cpu_handler = std::move(handler);
556 void add_high_priority_task(
task*)
noexcept;
560 [[deprecated(
"Use this_shard_id")]]
561 shard_id cpu_id()
const;
565 steady_clock_type::duration total_idle_time();
566 steady_clock_type::duration total_busy_time();
567 steady_clock_type::duration total_awake_time()
const;
568 std::chrono::nanoseconds total_cpu_time()
const;
569 std::chrono::nanoseconds total_steal_time();
571 const io_stats& get_io_stats()
const {
return _io_stats; }
578 uint64_t abandoned_failed_futures()
const {
return _abandoned_failed_futures; }
588 void register_poller(
pollfn* p);
589 void unregister_poller(
pollfn* p);
591 void register_metrics();
594 future<> fdatasync(
int fd)
noexcept;
610 friend class posix_file_impl;
611 friend class blockdev_file_impl;
612 friend class timer<>;
616 friend class internal::poller;
619 friend void seastar::internal::increase_thrown_exceptions_counter() noexcept;
620 friend
void report_failed_future(const
std::exception_ptr& eptr) noexcept;
621 metrics::metric_groups _metric_groups;
626 friend
seastar::internal::log_buf::inserter_iterator do_dump_task_queue(
seastar::internal::log_buf::inserter_iterator it, const task_queue& tq);
628 future<struct statfs> fstatfs(
int fd) noexcept;
635 void enable_timer(steady_clock_type::time_point when) noexcept;
645 void set_strict_dma(
bool value);
646 void set_bypass_fsync(
bool value);
647 void update_blocked_reactor_notify_ms(
std::chrono::milliseconds ms);
648 std::chrono::milliseconds get_blocked_reactor_notify_ms() const;
652 static void with_allow_abandoned_failed_futures(
unsigned count,
noncopyable_function<
void ()> func);
658 static std::function<void ()> get_stall_detector_report_function();
662extern __thread
reactor* local_engine;
663extern __thread
size_t task_quota;
667 return *local_engine;
671inline bool engine_is_ready() {
672 return local_engine !=
nullptr;
675inline int hrtimer_signal() {
682extern logger seastar_logger;
Definition: circular_buffer_fixed_capacity.hh:53
Conditional variable.
Definition: condition-variable.hh:73
future wait() noexcept
Definition: condition-variable.hh:214
A representation of a possibly not-yet-computed value.
Definition: future.hh:1219
Definition: reactor.hh:138
Definition: io_queue.hh:77
Definition: io_desc.hh:28
Low-resolution and efficient steady clock.
Definition: lowres_clock.hh:56
Definition: manual_clock.hh:35
holds the metric definition.
Definition: metrics_registration.hh:94
Definition: pollable_fd.hh:62
Definition: pollable_fd.hh:136
Definition: reactor.hh:650
static void set_stall_detector_report_function(std::function< void()> report)
Definition: reactor.hh:147
sched_stats get_sched_stats() const
friend void handle_signal(int signo, noncopyable_function< void()> &&handler, bool once)
Sets a signal handler for the specified signal.
void set_idle_cpu_handler(idle_cpu_handler &&handler)
Definition: reactor.hh:551
alien::instance & alien()
Definition: reactor.hh:208
Definition: reactor.hh:183
Scheduling statistics.
Definition: reactor.hh:198
Definition: scheduling.hh:183
Identifies function calls that are accounted as a group.
Definition: scheduling.hh:285
Definition: shared_ptr.hh:507
Definition: socket_defs.hh:47
Definition: temporary_buffer.hh:67
future touch_directory(std::string_view name, file_permissions permissions=file_permissions::default_dir_permissions) noexcept
future remove_file(std::string_view name) noexcept
future rename_file(std::string_view old_name, std::string_view new_name) noexcept
future chmod(std::string_view name, file_permissions permissions) noexcept
future< uint64_t > file_size(std::string_view name) noexcept
future< std::optional< struct group_details > > getgrnam(std::string_view name)
future< bool > file_exists(std::string_view name) noexcept
future< fs_type > file_system_at(std::string_view name) noexcept
future< bool > file_accessible(std::string_view name, access_flags flags) noexcept
future link_file(std::string_view oldpath, std::string_view newpath) noexcept
future chown(std::string_view filepath, uid_t owner, gid_t group)
future< std::optional< directory_entry_type > > file_type(std::string_view name, follow_symlink follow=follow_symlink::yes) noexcept
open_flags
Definition: file-types.hh:41
future< stat_data > file_stat(std::string_view name, follow_symlink fs=follow_symlink::yes) noexcept
future make_directory(std::string_view name, file_permissions permissions=file_permissions::default_dir_permissions) noexcept
future< T > get_future() noexcept
Gets the promise's associated future.
Definition: future.hh:1905
future now()
Returns a ready future.
Definition: later.hh:35
future< std::tuple< file_desc, file_desc > > make_pipe()
server_socket listen(socket_address sa)
future< connected_socket > connect(socket_address sa)
holds the metric_groups definition needed by class that reports metrics
future configure(const options &opts)
set the metrics configuration
Seastar API namespace.
Definition: abort_on_ebadf.hh:26
const noncopyable_function< bool()> & work_waiting_on_reactor
Definition: idle_cpu_handler.hh:46
void handle_signal(int signo, noncopyable_function< void()> &&handler, bool once=false)
Sets a signal handler for the specified signal.
future destroy_scheduling_group(scheduling_group sg) noexcept
future rename_scheduling_group(scheduling_group sg, sstring new_name) noexcept
idle_cpu_handler_result
Definition: idle_cpu_handler.hh:37
@ no_more_work
The user callback has no more work to perform.
future< std::filesystem::space_info > file_system_space(std::string_view name) noexcept
noncopyable_function< idle_cpu_handler_result(work_waiting_on_reactor poll)> idle_cpu_handler
Definition: idle_cpu_handler.hh:52
future< scheduling_group > create_scheduling_group(sstring name, float shares) noexcept
future< scheduling_group_key > scheduling_group_key_create(scheduling_group_key_config cfg) noexcept
Definition: noncopyable_function.hh:37
Configuration for the reactor.
Definition: reactor_config.hh:53
Definition: scheduling.hh:143