Seastar
High performance C++ framework for concurrent servers
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
perf_tests.hh
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2018 ScyllaDB Ltd.
20 */
21
22#pragma once
23
24#include <atomic>
25#include <memory>
26
27#include <fmt/format.h>
28
29#include <seastar/core/future.hh>
30#include <seastar/core/loop.hh>
31#include <seastar/testing/linux_perf_event.hh>
32
33using namespace seastar;
34
35namespace perf_tests {
36namespace internal {
37
38struct config;
39
40using clock_type = std::chrono::steady_clock;
41
43public:
44 uint64_t allocations = 0;
45 uint64_t tasks_executed = 0;
46 uint64_t instructions_retired = 0;
47 uint64_t cpu_cycles_retired = 0;
48
49private:
50 static uint64_t perf_mallocs();
51 static uint64_t perf_tasks_processed();
52
53public:
54 perf_stats() = default;
55 perf_stats(uint64_t allocations_, uint64_t tasks_executed_, uint64_t instructions_retired_ = 0, uint64_t cpu_cycles_retired_ = 0)
56 : allocations(allocations_)
57 , tasks_executed(tasks_executed_)
58 , instructions_retired(instructions_retired_)
59 , cpu_cycles_retired(cpu_cycles_retired_)
60 {}
61 perf_stats(perf_stats&& o) noexcept
62 : allocations(std::exchange(o.allocations, 0))
63 , tasks_executed(std::exchange(o.tasks_executed, 0))
64 , instructions_retired(std::exchange(o.instructions_retired, 0))
65 , cpu_cycles_retired(std::exchange(o.cpu_cycles_retired, 0))
66 {}
67 perf_stats(const perf_stats& o) = default;
68
69 perf_stats& operator=(perf_stats&& o) = default;
70 perf_stats& operator=(const perf_stats& o) = default;
71
72 perf_stats& operator+=(perf_stats b);
73 perf_stats& operator-=(perf_stats b);
74
75 static perf_stats snapshot(linux_perf_event* instructions_retired_counter = nullptr, linux_perf_event* cpu_cycles_retired_counter = nullptr);
76};
77
78inline
80operator+(perf_stats a, perf_stats b) {
81 a.allocations += b.allocations;
82 a.tasks_executed += b.tasks_executed;
83 a.instructions_retired += b.instructions_retired;
84 a.cpu_cycles_retired += b.cpu_cycles_retired;
85 return a;
86}
87
88inline
89perf_stats
90operator-(perf_stats a, perf_stats b) {
91 a.allocations -= b.allocations;
92 a.tasks_executed -= b.tasks_executed;
93 a.instructions_retired -= b.instructions_retired;
94 a.cpu_cycles_retired -= b.cpu_cycles_retired;
95 return a;
96}
97
98inline perf_stats& perf_stats::operator+=(perf_stats b) {
99 allocations += b.allocations;
100 tasks_executed += b.tasks_executed;
101 instructions_retired += b.instructions_retired;
102 cpu_cycles_retired += b.cpu_cycles_retired;
103 return *this;
104}
105
106inline perf_stats& perf_stats::operator-=(perf_stats b) {
107 allocations -= b.allocations;
108 tasks_executed -= b.tasks_executed;
109 instructions_retired -= b.instructions_retired;
110 cpu_cycles_retired -= b.cpu_cycles_retired;
111 return *this;
112}
113
115 std::string _test_case;
116 std::string _test_group;
117
118 uint64_t _single_run_iterations = 0;
119 std::atomic<uint64_t> _max_single_run_iterations;
120protected:
121 linux_perf_event _instructions_retired_counter = linux_perf_event::user_instructions_retired();
122 linux_perf_event _cpu_cycles_retired_counter = linux_perf_event::user_cpu_cycles_retired();
123private:
124 void do_run(const config&);
125public:
126 struct run_result {
127 clock_type::duration duration;
128 perf_stats stats;
129 };
130protected:
131 [[gnu::always_inline]] [[gnu::hot]]
132 bool stop_iteration() const {
133 return _single_run_iterations >= _max_single_run_iterations.load(std::memory_order_relaxed);
134 }
135
136 [[gnu::always_inline]] [[gnu::hot]]
137 void next_iteration(size_t n) {
138 _single_run_iterations += n;
139 }
140
141 virtual void set_up() = 0;
142 virtual void tear_down() noexcept = 0;
143 virtual future<run_result> do_single_run() = 0;
144public:
145 performance_test(const std::string& test_case, const std::string& test_group)
146 : _test_case(test_case)
147 , _test_group(test_group)
148 { }
149
150 virtual ~performance_test() = default;
151
152 const std::string& test_case() const { return _test_case; }
153 const std::string& test_group() const { return _test_group; }
154 std::string name() const { return fmt::format("{}.{}", test_group(), test_case()); }
155
156 void run(const config&);
157public:
158 static void register_test(std::unique_ptr<performance_test>);
159};
160
161// Helper for measuring time.
162// Each microbenchmark can either use the default behaviour which measures
163// only the start and stop time of the whole run or manually invoke
164// start_measuring_time() and stop_measuring_time() in order to measure
165// only parts of each iteration.
167 clock_type::time_point _run_start_time;
168 clock_type::time_point _start_time;
169 clock_type::duration _total_time;
170
171 perf_stats _start_stats;
172 perf_stats _total_stats;
173
174 linux_perf_event* _instructions_retired_counter = nullptr;
175 linux_perf_event* _cpu_cycles_retired_counter = nullptr;
176
177public:
178 [[gnu::always_inline]] [[gnu::hot]]
179 void start_run(linux_perf_event* instructions_retired_counter = nullptr, linux_perf_event* cpu_cycles_retired_counter = nullptr) {
180 _instructions_retired_counter = instructions_retired_counter;
181 _cpu_cycles_retired_counter = cpu_cycles_retired_counter;
182 _total_time = { };
183 _total_stats = {};
184 auto t = clock_type::now();
185 _run_start_time = t;
186 _start_time = t;
187 _start_stats = perf_stats::snapshot(_instructions_retired_counter, _cpu_cycles_retired_counter);
188 }
189
190 [[gnu::always_inline]] [[gnu::hot]]
192 auto t = clock_type::now();
194 if (_start_time == _run_start_time) {
195 ret.duration = t - _start_time;
196 auto stats = perf_stats::snapshot(_instructions_retired_counter, _cpu_cycles_retired_counter);
197 ret.stats = stats - _start_stats;
198 } else {
199 ret.duration = _total_time;
200 ret.stats = _total_stats;
201 }
202 _instructions_retired_counter = nullptr;
203 _cpu_cycles_retired_counter = nullptr;
204 return ret;
205 }
206
207 [[gnu::always_inline]] [[gnu::hot]]
208 void start_iteration() {
209 _start_time = clock_type::now();
210 _start_stats = perf_stats::snapshot(_instructions_retired_counter, _cpu_cycles_retired_counter);
211 }
212
213 [[gnu::always_inline]] [[gnu::hot]]
214 void stop_iteration() {
215 auto t = clock_type::now();
216 _total_time += t - _start_time;
217 perf_stats stats;
218 stats = perf_stats::snapshot(_instructions_retired_counter, _cpu_cycles_retired_counter);
219 _total_stats += stats - _start_stats;
220 }
221};
222
223extern time_measurement measure_time;
224
225namespace {
226
227template<bool Condition, typename TrueFn, typename FalseFn>
228struct do_if_constexpr_ : FalseFn {
229 do_if_constexpr_(TrueFn, FalseFn false_fn) : FalseFn(std::move(false_fn)) { }
230 decltype(auto) operator()() const {
231 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64095
232 return FalseFn::operator()(0);
233 }
234};
235template<typename TrueFn, typename FalseFn>
236struct do_if_constexpr_<true, TrueFn, FalseFn> : TrueFn {
237 do_if_constexpr_(TrueFn true_fn, FalseFn) : TrueFn(std::move(true_fn)) { }
238 decltype(auto) operator()() const { return TrueFn::operator()(0); }
239};
240
241template<bool Condition, typename TrueFn, typename FalseFn>
242do_if_constexpr_<Condition, TrueFn, FalseFn> if_constexpr_(TrueFn&& true_fn, FalseFn&& false_fn)
243{
244 return do_if_constexpr_<Condition, TrueFn, FalseFn>(std::forward<TrueFn>(true_fn),
245 std::forward<FalseFn>(false_fn));
246}
247
248}
249
250template<typename Test>
252 std::optional<Test> _test;
253private:
254 template<typename... Args>
255 auto run_test(Args&&...) {
256 return _test->run();
257 }
258
259protected:
260 virtual void set_up() override {
261 _test.emplace();
262 }
263
264 virtual void tear_down() noexcept override {
265 _test = std::nullopt;
266 }
267
268 [[gnu::hot]]
269 virtual future<run_result> do_single_run() override {
270 // Redundant 'this->'s courtesy of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61636
271 _instructions_retired_counter.enable();
272 _cpu_cycles_retired_counter.enable();
273 return if_constexpr_<is_future<decltype(_test->run())>::value>([&] (auto&&...) {
274 measure_time.start_run(&_instructions_retired_counter);
275 return do_until([this] { return this->stop_iteration(); }, [this] {
276 return if_constexpr_<std::is_same_v<decltype(_test->run()), future<>>>([&] (auto&&...) {
277 this->next_iteration(1);
278 return _test->run();
279 }, [&] (auto&&... dependency) {
280 // We need `dependency` to make sure the compiler won't be able to instantiate anything
281 // (and notice that the code does not compile) if this part of if_constexpr_ is not active.
282 return run_test(dependency...).then([&] (size_t n) {
283 this->next_iteration(n);
284 });
285 })();
286 }).then([] {
287 return measure_time.stop_run();
288 }).finally([this] {
289 _instructions_retired_counter.disable();
290 _cpu_cycles_retired_counter.disable();
291 });
292 }, [&] (auto&&...) {
293 measure_time.start_run(&_instructions_retired_counter, &_cpu_cycles_retired_counter);
294 while (!stop_iteration()) {
295 if_constexpr_<std::is_void_v<decltype(_test->run())>>([&] (auto&&...) {
296 (void)_test->run();
297 this->next_iteration(1);
298 }, [&] (auto&&... dependency) {
299 // We need `dependency` to make sure the compiler won't be able to instantiate anything
300 // (and notice that the code does not compile) if this part of if_constexpr_ is not active.
301 this->next_iteration(run_test(dependency...));
302 })();
303 }
304 auto ret = measure_time.stop_run();
305 _instructions_retired_counter.disable();
306 _cpu_cycles_retired_counter.disable();
307 return make_ready_future<run_result>(std::move(ret));
308 })();
309 }
310public:
311 using performance_test::performance_test;
312};
313
314void register_test(std::unique_ptr<performance_test>);
315
316template<typename Test>
318 test_registrar(const std::string& test_group, const std::string& test_case) {
319 auto test = std::make_unique<concrete_performance_test<Test>>(test_case, test_group);
320 performance_test::register_test(std::move(test));
321 }
322};
323
324}
325
326[[gnu::always_inline]]
327inline void start_measuring_time()
328{
329 internal::measure_time.start_iteration();
330}
331
332[[gnu::always_inline]]
333inline void stop_measuring_time()
334{
335 internal::measure_time.stop_iteration();
336}
337
338
339template<typename T>
340void do_not_optimize(const T& v)
341{
342 asm volatile("" : : "r,m" (v));
343}
344
345}
346
347// PERF_TEST and PERF_TEST_F support both synchronous and asynchronous functions.
348// The former should return `void`, the latter `future<>`.
349// PERF_TEST_C executes a coroutine function, if enabled.
350// PERF_TEST_CN executes a coroutine function, if enabled, returning the number of inner-loops.
351//
352// Test cases may perform multiple operations in a single run, this may be desirable
353// if the cost of an individual operation is very small. This allows measuring either
354// the latency of throughput depending on how the test in written. In such cases,
355// the test function shall return either size_t or future<size_t> for synchronous and
356// asynchronous cases respectively. The returned value shall be the number of iterations
357// done in a single test run.
358
359#define PERF_TEST_F(test_group, test_case) \
360 struct test_##test_group##_##test_case : test_group { \
361 [[gnu::always_inline]] inline auto run(); \
362 }; \
363 static ::perf_tests::internal::test_registrar<test_##test_group##_##test_case> \
364 test_##test_group##_##test_case##_registrar(#test_group, #test_case); \
365 [[gnu::always_inline]] auto test_##test_group##_##test_case::run()
366
367#define PERF_TEST(test_group, test_case) \
368 struct test_##test_group##_##test_case { \
369 [[gnu::always_inline]] inline auto run(); \
370 }; \
371 static ::perf_tests::internal::test_registrar<test_##test_group##_##test_case> \
372 test_##test_group##_##test_case##_registrar(#test_group, #test_case); \
373 [[gnu::always_inline]] auto test_##test_group##_##test_case::run()
374
375
376#define PERF_TEST_C(test_group, test_case) \
377 struct test_##test_group##_##test_case : test_group { \
378 inline future<> run(); \
379 }; \
380 static ::perf_tests::internal::test_registrar<test_##test_group##_##test_case> \
381 test_##test_group##_##test_case##_registrar(#test_group, #test_case); \
382 future<> test_##test_group##_##test_case::run()
383
384#define PERF_TEST_CN(test_group, test_case) \
385 struct test_##test_group##_##test_case : test_group { \
386 inline future<size_t> run(); \
387 }; \
388 static ::perf_tests::internal::test_registrar<test_##test_group##_##test_case> \
389 test_##test_group##_##test_case##_registrar(#test_group, #test_case); \
390 future<size_t> test_##test_group##_##test_case::run()
Definition: linux_perf_event.hh:36
Definition: perf_tests.hh:42
Definition: perf_tests.hh:114
Definition: perf_tests.hh:166
Type-safe boolean.
Definition: bool_class.hh:58
A representation of a possibly not-yet-computed value.
Definition: future.hh:1240
future do_until(StopCondition stop_cond, AsyncAction action) noexcept
Definition: loop.hh:339
future now()
Returns a ready future.
Definition: later.hh:35
Seastar API namespace.
Definition: abort_on_ebadf.hh:26
STL namespace.
Definition: perf_tests.hh:317
Check whether a type is a future.
Definition: future.hh:1032