Seastar
High performance C++ framework for concurrent servers
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
perf_tests.hh
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2018 ScyllaDB Ltd.
20 */
21
22#pragma once
23
24#include <atomic>
25#include <memory>
26
27#include <fmt/format.h>
28
29#include <seastar/core/coroutine.hh>
30#include <seastar/core/future.hh>
31#include <seastar/core/loop.hh>
32#include <seastar/testing/linux_perf_event.hh>
33
34using namespace seastar;
35
36namespace perf_tests {
37namespace internal {
38
39struct config;
40
41using clock_type = std::chrono::steady_clock;
42
44public:
45 uint64_t allocations = 0;
46 uint64_t tasks_executed = 0;
47 uint64_t instructions_retired = 0;
48 uint64_t cpu_cycles_retired = 0;
49
50private:
51 static uint64_t perf_mallocs();
52 static uint64_t perf_tasks_processed();
53
54public:
55 perf_stats() = default;
56 perf_stats(uint64_t allocations_, uint64_t tasks_executed_, uint64_t instructions_retired_ = 0, uint64_t cpu_cycles_retired_ = 0)
57 : allocations(allocations_)
58 , tasks_executed(tasks_executed_)
59 , instructions_retired(instructions_retired_)
60 , cpu_cycles_retired(cpu_cycles_retired_)
61 {}
62 perf_stats(perf_stats&& o) noexcept
63 : allocations(std::exchange(o.allocations, 0))
64 , tasks_executed(std::exchange(o.tasks_executed, 0))
65 , instructions_retired(std::exchange(o.instructions_retired, 0))
66 , cpu_cycles_retired(std::exchange(o.cpu_cycles_retired, 0))
67 {}
68 perf_stats(const perf_stats& o) = default;
69
70 perf_stats& operator=(perf_stats&& o) = default;
71 perf_stats& operator=(const perf_stats& o) = default;
72
73 perf_stats& operator+=(perf_stats b);
74 perf_stats& operator-=(perf_stats b);
75
76 static perf_stats snapshot(linux_perf_event* instructions_retired_counter = nullptr, linux_perf_event* cpu_cycles_retired_counter = nullptr);
77};
78
79inline
81operator+(perf_stats a, perf_stats b) {
82 a.allocations += b.allocations;
83 a.tasks_executed += b.tasks_executed;
84 a.instructions_retired += b.instructions_retired;
85 a.cpu_cycles_retired += b.cpu_cycles_retired;
86 return a;
87}
88
89inline
90perf_stats
91operator-(perf_stats a, perf_stats b) {
92 a.allocations -= b.allocations;
93 a.tasks_executed -= b.tasks_executed;
94 a.instructions_retired -= b.instructions_retired;
95 a.cpu_cycles_retired -= b.cpu_cycles_retired;
96 return a;
97}
98
99inline perf_stats& perf_stats::operator+=(perf_stats b) {
100 allocations += b.allocations;
101 tasks_executed += b.tasks_executed;
102 instructions_retired += b.instructions_retired;
103 cpu_cycles_retired += b.cpu_cycles_retired;
104 return *this;
105}
106
107inline perf_stats& perf_stats::operator-=(perf_stats b) {
108 allocations -= b.allocations;
109 tasks_executed -= b.tasks_executed;
110 instructions_retired -= b.instructions_retired;
111 cpu_cycles_retired -= b.cpu_cycles_retired;
112 return *this;
113}
114
116 std::string _test_case;
117 std::string _test_group;
118
119 uint64_t _single_run_iterations = 0;
120 std::atomic<uint64_t> _max_single_run_iterations;
121protected:
122 linux_perf_event _instructions_retired_counter = linux_perf_event::user_instructions_retired();
123 linux_perf_event _cpu_cycles_retired_counter = linux_perf_event::user_cpu_cycles_retired();
124private:
125 void do_run(const config&);
126public:
127 struct run_result {
128 clock_type::duration duration;
129 perf_stats stats;
130 };
131protected:
132 [[gnu::always_inline]] [[gnu::hot]]
133 bool stop_iteration() const {
134 return _single_run_iterations >= _max_single_run_iterations.load(std::memory_order_relaxed);
135 }
136
137 [[gnu::always_inline]] [[gnu::hot]]
138 void next_iteration(size_t n) {
139 _single_run_iterations += n;
140 }
141
142 virtual void set_up() = 0;
143 virtual void tear_down() noexcept = 0;
144 virtual future<run_result> do_single_run() = 0;
145public:
146 performance_test(const std::string& test_case, const std::string& test_group)
147 : _test_case(test_case)
148 , _test_group(test_group)
149 { }
150
151 virtual ~performance_test() = default;
152
153 const std::string& test_case() const { return _test_case; }
154 const std::string& test_group() const { return _test_group; }
155 std::string name() const { return fmt::format("{}.{}", test_group(), test_case()); }
156
157 void run(const config&);
158public:
159 static void register_test(std::unique_ptr<performance_test>);
160};
161
162// Helper for measuring time.
163// Each microbenchmark can either use the default behaviour which measures
164// only the start and stop time of the whole run or manually invoke
165// start_measuring_time() and stop_measuring_time() in order to measure
166// only parts of each iteration.
168 clock_type::time_point _run_start_time;
169 clock_type::time_point _start_time;
170 clock_type::duration _total_time;
171
172 perf_stats _start_stats;
173 perf_stats _total_stats;
174
175 linux_perf_event* _instructions_retired_counter = nullptr;
176 linux_perf_event* _cpu_cycles_retired_counter = nullptr;
177
178public:
179 [[gnu::always_inline]] [[gnu::hot]]
180 void start_run(linux_perf_event* instructions_retired_counter = nullptr, linux_perf_event* cpu_cycles_retired_counter = nullptr) {
181 _instructions_retired_counter = instructions_retired_counter;
182 _cpu_cycles_retired_counter = cpu_cycles_retired_counter;
183 _total_time = { };
184 _total_stats = {};
185 auto t = clock_type::now();
186 _run_start_time = t;
187 _start_time = t;
188 _start_stats = perf_stats::snapshot(_instructions_retired_counter, _cpu_cycles_retired_counter);
189 }
190
191 [[gnu::always_inline]] [[gnu::hot]]
193 auto t = clock_type::now();
195 if (_start_time == _run_start_time) {
196 ret.duration = t - _start_time;
197 auto stats = perf_stats::snapshot(_instructions_retired_counter, _cpu_cycles_retired_counter);
198 ret.stats = stats - _start_stats;
199 } else {
200 ret.duration = _total_time;
201 ret.stats = _total_stats;
202 }
203 _instructions_retired_counter = nullptr;
204 _cpu_cycles_retired_counter = nullptr;
205 return ret;
206 }
207
208 [[gnu::always_inline]] [[gnu::hot]]
209 void start_iteration() {
210 _start_time = clock_type::now();
211 _start_stats = perf_stats::snapshot(_instructions_retired_counter, _cpu_cycles_retired_counter);
212 }
213
214 [[gnu::always_inline]] [[gnu::hot]]
215 void stop_iteration() {
216 auto t = clock_type::now();
217 _total_time += t - _start_time;
218 perf_stats stats;
219 stats = perf_stats::snapshot(_instructions_retired_counter, _cpu_cycles_retired_counter);
220 _total_stats += stats - _start_stats;
221 }
222};
223
224extern time_measurement measure_time;
225
226template<typename Test>
228 std::optional<Test> _test;
229
230 using test_ret_type = decltype(_test->run());
231 // true iff the test method returns future<...>
232 static constexpr bool is_async_test = is_future<test_ret_type>::value;
233 // true iff the test returns the number of iterations run, otherwise it returns
234 // void and we consider each invocation to be 1 iteration
235 static constexpr bool is_iteration_returning = !(std::is_same_v<test_ret_type, future<>> || std::is_void_v<test_ret_type>);
236private:
237
238protected:
239 virtual void set_up() override {
240 _test.emplace();
241 }
242
243 virtual void tear_down() noexcept override {
244 _test = std::nullopt;
245 }
246
247 [[gnu::hot]]
248 virtual future<run_result> do_single_run() override {
249 _instructions_retired_counter.enable();
250 _cpu_cycles_retired_counter.enable();
251 measure_time.start_run(&_instructions_retired_counter, &_cpu_cycles_retired_counter);
252 while (!stop_iteration()) {
253 if constexpr (is_async_test) {
254 if constexpr (is_iteration_returning) {
255 auto f = _test->run();
256 next_iteration(f.available() ? std::move(f).get() : co_await std::move(f));
257 } else {
258 auto f = _test->run();
259 // The available() check is functionally redundant, but is significantly faster
260 // than invoking the co_await machinery on a future-returning function.
261 if (!f.available()) {
262 co_await std::move(f);
263 }
264 next_iteration(1);
265 }
266 } else {
267 if constexpr (is_iteration_returning) {
268 next_iteration(_test->run());
269 } else {
270 _test->run();
271 next_iteration(1);
272 }
273 }
274 }
275 auto ret = measure_time.stop_run();
276 _instructions_retired_counter.disable();
277 _cpu_cycles_retired_counter.disable();
278 co_return ret;
279 }
280public:
281 using performance_test::performance_test;
282};
283
284void register_test(std::unique_ptr<performance_test>);
285
286template<typename Test>
288 test_registrar(const std::string& test_group, const std::string& test_case) {
289 auto test = std::make_unique<concrete_performance_test<Test>>(test_case, test_group);
290 performance_test::register_test(std::move(test));
291 }
292};
293
294}
295
296[[gnu::always_inline]]
297inline void start_measuring_time()
298{
299 internal::measure_time.start_iteration();
300}
301
302[[gnu::always_inline]]
303inline void stop_measuring_time()
304{
305 internal::measure_time.stop_iteration();
306}
307
308
309template<typename T>
310void do_not_optimize(const T& v)
311{
312 asm volatile("" : : "r,m" (v));
313}
314
315}
316
317// PERF_TEST and PERF_TEST_F support both synchronous and asynchronous functions.
318// The former should return `void`, the latter `future<>`.
319// PERF_TEST_C executes a coroutine function, if enabled.
320// PERF_TEST_CN executes a coroutine function, if enabled, returning the number of inner-loops.
321//
322// Test cases may perform multiple operations in a single run, this may be desirable
323// if the cost of an individual operation is very small. This allows measuring either
324// the latency of throughput depending on how the test in written. In such cases,
325// the test function shall return either size_t or future<size_t> for synchronous and
326// asynchronous cases respectively. The returned value shall be the number of iterations
327// done in a single test run.
328
329#define PERF_TEST_F(test_group, test_case) \
330 struct test_##test_group##_##test_case : test_group { \
331 [[gnu::always_inline]] inline auto run(); \
332 }; \
333 static ::perf_tests::internal::test_registrar<test_##test_group##_##test_case> \
334 test_##test_group##_##test_case##_registrar(#test_group, #test_case); \
335 [[gnu::always_inline]] auto test_##test_group##_##test_case::run()
336
337#define PERF_TEST(test_group, test_case) \
338 struct test_##test_group##_##test_case { \
339 [[gnu::always_inline]] inline auto run(); \
340 }; \
341 static ::perf_tests::internal::test_registrar<test_##test_group##_##test_case> \
342 test_##test_group##_##test_case##_registrar(#test_group, #test_case); \
343 [[gnu::always_inline]] auto test_##test_group##_##test_case::run()
344
345
346#define PERF_TEST_C(test_group, test_case) \
347 struct test_##test_group##_##test_case : test_group { \
348 inline future<> run(); \
349 }; \
350 static ::perf_tests::internal::test_registrar<test_##test_group##_##test_case> \
351 test_##test_group##_##test_case##_registrar(#test_group, #test_case); \
352 future<> test_##test_group##_##test_case::run()
353
354#define PERF_TEST_CN(test_group, test_case) \
355 struct test_##test_group##_##test_case : test_group { \
356 inline future<size_t> run(); \
357 }; \
358 static ::perf_tests::internal::test_registrar<test_##test_group##_##test_case> \
359 test_##test_group##_##test_case##_registrar(#test_group, #test_case); \
360 future<size_t> test_##test_group##_##test_case::run()
Definition: linux_perf_event.hh:36
Definition: perf_tests.hh:43
Definition: perf_tests.hh:115
Definition: perf_tests.hh:167
Type-safe boolean.
Definition: bool_class.hh:58
A representation of a possibly not-yet-computed value.
Definition: future.hh:1197
future now()
Returns a ready future.
Definition: later.hh:35
Seastar API namespace.
Definition: abort_on_ebadf.hh:26
STL namespace.
Definition: perf_tests.hh:287
Check whether a type is a future.
Definition: future.hh:1010