Seastar
High performance C++ framework for concurrent servers
stall_detector.hh
1
2/*
3 * This file is open source software, licensed to you under the terms
4 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
5 * distributed with this work for additional information regarding copyright
6 * ownership. You may not use this file except in compliance with the License.
7 *
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19/*
20 * Copyright (C) 2018 ScyllaDB
21 */
22
23#pragma once
24
25#ifndef SEASTAR_MODULE
26#include <signal.h>
27#include <atomic>
28#include <limits>
29#include <chrono>
30#include <functional>
31#include <memory>
32#include <linux/perf_event.h>
33#endif
34#include <seastar/core/posix.hh>
37#include <seastar/util/modules.hh>
38
39namespace seastar {
40
41class reactor;
42class thread_cputime_clock;
43
44namespace internal {
45
46struct cpu_stall_detector_config {
47 std::chrono::duration<double> threshold = std::chrono::seconds(2);
48 unsigned stall_detector_reports_per_minute = 1;
49 float slack = 0.3; // fraction of threshold that we're allowed to overshoot
50 bool oneline = true; // print a simplified backtrace on a single line
51 std::function<void ()> report; // alternative reporting function for tests
52};
53
54// Detects stalls in continuations that run for too long
55class cpu_stall_detector {
56protected:
57 std::atomic<uint64_t> _last_tasks_processed_seen{};
58 unsigned _stall_detector_reports_per_minute;
59 std::atomic<uint64_t> _stall_detector_missed_ticks = { 0 };
60 unsigned _reported = 0;
61 unsigned _total_reported = 0;
62 unsigned _max_reports_per_minute;
63 unsigned _shard_id;
64 unsigned _thread_id;
65 unsigned _report_at{};
66 sched_clock::time_point _minute_mark{};
67 sched_clock::time_point _rearm_timer_at{};
68 sched_clock::time_point _run_started_at{};
69 sched_clock::duration _threshold;
70 sched_clock::duration _slack;
71 cpu_stall_detector_config _config;
73 friend reactor;
74 virtual bool is_spurious_signal() {
75 return false;
76 }
77 virtual void maybe_report_kernel_trace() {}
78private:
79 void maybe_report();
80 virtual void arm_timer() = 0;
81 void report_suppressions(sched_clock::time_point now);
82 void reset_suppression_state(sched_clock::time_point now);
83public:
84 using clock_type = thread_cputime_clock;
85public:
86 explicit cpu_stall_detector(cpu_stall_detector_config cfg = {});
87 virtual ~cpu_stall_detector() = default;
88 static int signal_number() { return SIGRTMIN + 1; }
89 void start_task_run(sched_clock::time_point now);
90 void end_task_run(sched_clock::time_point now);
91 void generate_trace();
92 void update_config(cpu_stall_detector_config cfg);
93 cpu_stall_detector_config get_config() const;
94 void on_signal();
95 virtual void start_sleep() = 0;
96 void end_sleep();
97};
98
99class cpu_stall_detector_posix_timer : public cpu_stall_detector {
100 timer_t _timer;
101public:
102 explicit cpu_stall_detector_posix_timer(cpu_stall_detector_config cfg = {});
103 virtual ~cpu_stall_detector_posix_timer() override;
104private:
105 virtual void arm_timer() override;
106 virtual void start_sleep() override;
107};
108
109class cpu_stall_detector_linux_perf_event : public cpu_stall_detector {
110 file_desc _fd;
111 bool _enabled = false;
112 uint64_t _current_period = 0;
113 struct ::perf_event_mmap_page* _mmap;
114 char* _data_area;
115 size_t _data_area_mask;
116 // after the detector has been armed (i.e., _enabled is true), this
117 // is the moment at or after which the next signal is expected to occur
118 // and can be used for detecting spurious signals
119 sched_clock::time_point _next_signal_time{};
120private:
121 class data_area_reader {
122 cpu_stall_detector_linux_perf_event& _p;
123 const char* _data_area;
124 size_t _data_area_mask;
125 uint64_t _head;
126 uint64_t _tail;
127 public:
128 explicit data_area_reader(cpu_stall_detector_linux_perf_event& p)
129 : _p(p)
130 , _data_area(p._data_area)
131 , _data_area_mask(p._data_area_mask) {
132 _head = _p._mmap->data_head;
133 _tail = _p._mmap->data_tail;
134 std::atomic_thread_fence(std::memory_order_acquire); // required after reading data_head
135 }
136 ~data_area_reader() {
137 std::atomic_thread_fence(std::memory_order_release); // not documented, but probably required before writing data_tail
138 _p._mmap->data_tail = _tail;
139 }
140 uint64_t read_u64() {
141 uint64_t ret;
142 // We cannot wrap around if the 8-byte unit is aligned
143 std::copy_n(_data_area + (_tail & _data_area_mask), 8, reinterpret_cast<char*>(&ret));
144 _tail += 8;
145 return ret;
146 }
147 template <typename S>
148 S read_struct() {
149 static_assert(sizeof(S) % 8 == 0);
150 S ret;
151 char* p = reinterpret_cast<char*>(&ret);
152 for (size_t i = 0; i != sizeof(S); i += 8) {
153 uint64_t w = read_u64();
154 std::copy_n(reinterpret_cast<const char*>(&w), 8, p + i);
155 }
156 return ret;
157 }
158 void skip(uint64_t bytes_to_skip) {
159 _tail += bytes_to_skip;
160 }
161 // skip all the remaining data in the buffer, as-if calling read until
162 // have_data returns false (but much faster)
163 void skip_all() {
164 _tail = _head;
165 }
166 bool have_data() const {
167 return _head != _tail;
168 }
169 };
170public:
171 static std::unique_ptr<cpu_stall_detector_linux_perf_event> try_make(cpu_stall_detector_config cfg = {});
172 explicit cpu_stall_detector_linux_perf_event(file_desc fd, cpu_stall_detector_config cfg = {});
173 ~cpu_stall_detector_linux_perf_event();
174 virtual void arm_timer() override;
175 virtual void start_sleep() override;
176 virtual bool is_spurious_signal() override;
177 virtual void maybe_report_kernel_trace() override;
178};
179
180std::unique_ptr<cpu_stall_detector> make_cpu_stall_detector(cpu_stall_detector_config cfg = {});
181
182}
183}
holds the metric definition.
Definition: metrics_registration.hh:94
future now()
Returns a ready future.
Definition: later.hh:35
holds the metric_groups definition needed by class that reports metrics
Seastar API namespace.
Definition: abort_on_ebadf.hh:26