Seastar
High performance C++ framework for concurrent servers
estimated_histogram.hh
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2023 ScyllaDB.
20 */
21
22#pragma once
23
24#include <cmath>
25#include <algorithm>
26#include <vector>
27#include <chrono>
28#include <seastar/core/metrics_types.hh>
29#include <seastar/core/print.hh>
30#include <seastar/core/bitops.hh>
31#include <limits>
32#include <array>
33
34namespace seastar::metrics::internal {
123template<uint64_t Min, uint64_t Max, size_t Precision>
125 static constexpr unsigned NUM_EXP_RANGES = log2floor(Max/Min);
126 static constexpr size_t NUM_BUCKETS = NUM_EXP_RANGES * Precision + 1;
127 static constexpr unsigned PRECISION_BITS = log2floor(Precision);
128 static constexpr unsigned BASESHIFT = log2floor(Min);
129 static constexpr uint64_t LOWER_BITS_MASK = Precision - 1;
130 static constexpr size_t MIN_ID = log2ceil(Min) * Precision + 1; // id0 (x,1]
131 std::array<uint64_t, NUM_BUCKETS> _buckets;
132public:
134 clear();
135 }
136
142 uint64_t get_bucket_lower_limit(uint16_t bucket_id) const {
143 if (bucket_id == NUM_BUCKETS - 1) {
144 return Max;
145 }
146 int16_t exp_rang = (bucket_id >> PRECISION_BITS);
147 return (Min << exp_rang) + ((bucket_id & LOWER_BITS_MASK) << (exp_rang + BASESHIFT - PRECISION_BITS));
148 }
149
155 uint64_t get_bucket_upper_limit(uint16_t bucket_id) const {
156 if (bucket_id == NUM_BUCKETS - 1) {
157 return std::numeric_limits<uint64_t>::max();
158 }
159 return get_bucket_lower_limit(bucket_id + 1);
160 }
161
167 uint16_t find_bucket_index(uint64_t val) const {
168 if (val >= Max) {
169 return NUM_BUCKETS - 1;
170 }
171 if (val <= Min) {
172 return 0;
173 }
174 uint16_t range = log2floor(val);
175 val >>= range - PRECISION_BITS; // leave the top most N+1 bits where N is the resolution.
176 return ((range - BASESHIFT) << PRECISION_BITS) + (val & LOWER_BITS_MASK);
177 }
178
182 void clear() {
183 std::fill(_buckets.begin(), _buckets.end(), 0);
184 }
185
190 void add(uint64_t n) {
191 _buckets.at(find_bucket_index(n))++;
192 }
193
201 uint64_t min() const {
202 for (size_t i = 0; i < NUM_BUCKETS; i ++) {
203 if (_buckets[i] > 0) {
204 return get_bucket_lower_limit(i);
205 }
206 }
207 return 0;
208 }
209
217 uint64_t max() const {
218 for (int i = NUM_BUCKETS - 1; i >= 0; i--) {
219 if (_buckets[i] > 0) {
220 return get_bucket_upper_limit(i);
221 }
222 }
223 return 0;
224 }
225
230 for (size_t i = 0; i < NUM_BUCKETS; i++) {
231 _buckets[i] += b.get(i);
232 }
233 return *this;
234 }
235
236 template<uint64_t A, uint64_t B, size_t C>
238
239 /*
240 * \brief returns the count in the given bucket
241 */
242 uint64_t get(size_t bucket) const {
243 return _buckets[bucket];
244 }
245
261 uint64_t quantile(float quantile) const {
262 if (quantile < 0 || quantile > 1.0) {
263 throw std::runtime_error("Invalid quantile value " + std::to_string(quantile) + ". Value should be between 0 and 1");
264 }
265 auto c = count();
266
267 if (!c) {
268 return 0; // no data
269 }
270
271 auto pcount = uint64_t(std::floor(c * quantile));
272 uint64_t elements = 0;
273 for (size_t i = 0; i < NUM_BUCKETS - 2; i++) {
274 if (_buckets[i]) {
275 elements += _buckets[i];
276 if (elements >= pcount) {
277 return get_bucket_lower_limit(i);
278 }
279 }
280 }
281 return Max; // overflowed value is in the requested quantile
282 }
283
288 uint64_t mean() const {
289 uint64_t elements = 0;
290 double sum = 0;
291 for (size_t i = 0; i < NUM_BUCKETS - 1; i++) {
292 elements += _buckets[i];
293 sum += _buckets[i] * get_bucket_lower_limit(i);
294 }
295 return (elements) ? sum / elements : 0;
296 }
297
301 size_t size() const {
302 return NUM_BUCKETS;
303 }
304
308 uint64_t count() const {
309 uint64_t sum = 0L;
310 for (size_t i = 0; i < NUM_BUCKETS; i++) {
311 sum += _buckets[i];
312 }
313 return sum;
314 }
315
320 for (size_t i = 0; i < NUM_BUCKETS; i++) {
321 _buckets[i] *= v;
322 }
323 return *this;
324 }
325
326 uint64_t& operator[](size_t b) noexcept {
327 return _buckets[b];
328 }
339 int32_t get_schema() const noexcept {
340 return PRECISION_BITS;
341 }
342
351 int32_t min_as_native_histogram_id() const noexcept {
352 return MIN_ID;
353 }
354
355 seastar::metrics::histogram to_metrics_histogram() const noexcept {
357 res.buckets.resize(size() - 1);
358 uint64_t cummulative_count = 0;
359 res.sample_sum = 0;
361
362 for (size_t i = 0; i < NUM_BUCKETS - 1; i++) {
363 auto& v = res.buckets[i];
364 v.upper_bound = get_bucket_lower_limit(i + 1);
365 cummulative_count += get(i);
366 v.count = cummulative_count;
367 res.sample_sum += get(i) * v.upper_bound;
368 }
369 // The count serves as the infinite bucket
370 res.sample_count = cummulative_count + get(NUM_BUCKETS - 1);
371 res.sample_sum += get(NUM_BUCKETS - 1) * get_bucket_lower_limit(NUM_BUCKETS - 1);
372 return res;
373 }
374};
375
376template<uint64_t Min, uint64_t Max, size_t NumBuckets>
377inline approximate_exponential_histogram<Min, Max, NumBuckets> base_estimated_histogram_merge(approximate_exponential_histogram<Min, Max, NumBuckets> a, const approximate_exponential_histogram<Min, Max, NumBuckets>& b) {
378 return a.merge(b);
379}
380
389public:
392 return *this;
393 }
394
395 void add_micro(uint64_t n) {
397 }
398
399 template<typename T>
400 void add(const T& latency) {
401 add_micro(std::chrono::duration_cast<std::chrono::microseconds>(latency).count());
402 }
403};
404
405inline time_estimated_histogram time_estimated_histogram_merge(time_estimated_histogram a, const time_estimated_histogram& b) {
406 return a.merge(b);
407}
408}
uint64_t mean() const
returns the mean histogram value (average of bucket offsets, weighted by count) It will return 0 if t...
Definition: estimated_histogram.hh:288
void clear()
clear the current values.
Definition: estimated_histogram.hh:182
uint64_t get_bucket_upper_limit(uint16_t bucket_id) const
Returns the bucket upper limit given the bucket id. The last bucket (Infinity bucket) will return UMA...
Definition: estimated_histogram.hh:155
int32_t get_schema() const noexcept
get_schema the schema of a native histogram
Definition: estimated_histogram.hh:339
uint64_t max() const
returns the largest value that could have been added to this histogram. This method looks for the fir...
Definition: estimated_histogram.hh:217
uint64_t get_bucket_lower_limit(uint16_t bucket_id) const
Returns the bucket lower limit given the bucket id. The first and last bucket will always return the ...
Definition: estimated_histogram.hh:142
int32_t min_as_native_histogram_id() const noexcept
return the histogram min as a native histogram ID
Definition: estimated_histogram.hh:351
uint64_t count() const
returns the total number of values inserted
Definition: estimated_histogram.hh:308
approximate_exponential_histogram & merge(const approximate_exponential_histogram &b)
merge a histogram to the current one.
Definition: estimated_histogram.hh:229
approximate_exponential_histogram & operator*=(double v)
multiple all the buckets content in the histogram by a constant
Definition: estimated_histogram.hh:319
uint64_t min() const
returns the smallest value that could have been added to this histogram This method looks for the fir...
Definition: estimated_histogram.hh:201
size_t size() const
returns the number of buckets;
Definition: estimated_histogram.hh:301
uint64_t quantile(float quantile) const
get a histogram quantile
Definition: estimated_histogram.hh:261
void add(uint64_t n)
Add an item to the histogram Increments the count of the bucket holding that value.
Definition: estimated_histogram.hh:190
uint16_t find_bucket_index(uint64_t val) const
Find the bucket index for a given value The position of a value that is lower or equal to Min will al...
Definition: estimated_histogram.hh:167
estimated histogram for duration values time_estimated_histogram is used for short task timing....
Definition: estimated_histogram.hh:388
native histogram specific information
Definition: metrics_types.hh:51
Histogram data type.
Definition: metrics_types.hh:69