Seastar
High performance C++ framework for concurrent servers
slab.hh
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright 2015 Cloudius Systems
20 */
21#pragma once
22
23#include <boost/intrusive/unordered_set.hpp>
24#include <boost/intrusive/list.hpp>
25#include <stdlib.h>
26#include <stdio.h>
27#include <stdint.h>
28#include <assert.h>
29#include <vector>
30#include <algorithm>
32#include <seastar/core/align.hh>
33#include <seastar/core/memory.hh>
34
35namespace seastar {
36
37static constexpr uint16_t SLAB_MAGIC_NUMBER = 0x51AB; // meant to be 'SLAB' :-)
38
39/*
40 * Item requirements
41 * - Extend it to slab_item_base.
42 * - First parameter of constructor must be uint32_t _slab_page_index.
43 * - Implement get_slab_page_index() to return _slab_page_index.
44 * - Implement is_unlocked() to check if Item can be evicted.
45 */
46
47/*
48 * slab_page_desc is 1:1 mapped to slab page.
49 * footprint: 80b for each slab page.
50 */
52private:
53 boost::intrusive::list_member_hook<> _lru_link;
54 boost::intrusive::list_member_hook<> _free_pages_link;
55 void *_slab_page;
56 std::vector<uintptr_t> _free_objects;
57 uint32_t _refcnt;
58 uint32_t _index; // index into slab page vector
59 uint16_t _magic;
60 uint8_t _slab_class_id;
61public:
62 slab_page_desc(void *slab_page, size_t objects, size_t object_size, uint8_t slab_class_id, uint32_t index)
63 : _slab_page(slab_page)
64 , _refcnt(0U)
65 , _index(index)
66 , _magic(SLAB_MAGIC_NUMBER)
67 , _slab_class_id(slab_class_id)
68 {
69 auto object = reinterpret_cast<uintptr_t>(slab_page);
70 _free_objects.reserve(objects - 1);
71 for (auto i = 1u; i < objects; i++) {
72 object += object_size;
73 _free_objects.push_back(object);
74 }
75 }
76
77 bool empty() const {
78 return _free_objects.empty();
79 }
80
81 size_t size() const {
82 return _free_objects.size();
83 }
84
85 uint32_t& refcnt() {
86 return _refcnt;
87 }
88
89 uint32_t index() const {
90 return _index;
91 }
92
93 uint16_t magic() const {
94 return _magic;
95 }
96
97 uint8_t slab_class_id() const {
98 return _slab_class_id;
99 }
100
101 void* slab_page() const {
102 return _slab_page;
103 }
104
105 std::vector<uintptr_t>& free_objects() {
106 return _free_objects;
107 }
108
109 void* allocate_object() {
110 assert(!_free_objects.empty());
111 auto object = reinterpret_cast<void*>(_free_objects.back());
112 _free_objects.pop_back();
113 return object;
114 }
115
116 void free_object(void *object) {
117 _free_objects.push_back(reinterpret_cast<uintptr_t>(object));
118 }
119
120 template<typename Item>
121 friend class slab_class;
122 template<typename Item>
123 friend class slab_allocator;
124};
125
127 boost::intrusive::list_member_hook<> _lru_link;
128
129 template<typename Item>
130 friend class slab_class;
131};
132
133template<typename Item>
135private:
136 boost::intrusive::list<slab_page_desc,
137 boost::intrusive::member_hook<slab_page_desc, boost::intrusive::list_member_hook<>,
138 &slab_page_desc::_free_pages_link>> _free_slab_pages;
139 boost::intrusive::list<slab_item_base,
140 boost::intrusive::member_hook<slab_item_base, boost::intrusive::list_member_hook<>,
141 &slab_item_base::_lru_link>> _lru;
142 size_t _size; // size of objects
143 uint8_t _slab_class_id;
144private:
145 template<typename... Args>
146 inline
147 Item* create_item(void *object, uint32_t slab_page_index, Args&&... args) {
148 Item *new_item = new(object) Item(slab_page_index, std::forward<Args>(args)...);
149 _lru.push_front(reinterpret_cast<slab_item_base&>(*new_item));
150 return new_item;
151 }
152
153 inline
154 std::pair<void *, uint32_t> evict_lru_item(std::function<void (Item& item_ref)>& erase_func) {
155 if (_lru.empty()) {
156 return { nullptr, 0U };
157 }
158
159 Item& victim = reinterpret_cast<Item&>(_lru.back());
160 uint32_t index = victim.get_slab_page_index();
161 assert(victim.is_unlocked());
162 _lru.erase(_lru.iterator_to(reinterpret_cast<slab_item_base&>(victim)));
163 // WARNING: You need to make sure that erase_func will not release victim back to slab.
164 erase_func(victim);
165
166 return { reinterpret_cast<void*>(&victim), index };
167 }
168public:
169 slab_class(size_t size, uint8_t slab_class_id)
170 : _size(size)
171 , _slab_class_id(slab_class_id)
172 {
173 }
174 slab_class(slab_class&&) = default;
175 ~slab_class() {
176 _free_slab_pages.clear();
177 _lru.clear();
178 }
179
180 size_t size() const {
181 return _size;
182 }
183
184 bool empty() const {
185 return _free_slab_pages.empty();
186 }
187
188 bool has_no_slab_pages() const {
189 return _lru.empty();
190 }
191
192 template<typename... Args>
193 Item *create(Args&&... args) {
194 assert(!_free_slab_pages.empty());
195 auto& desc = _free_slab_pages.back();
196 auto object = desc.allocate_object();
197 if (desc.empty()) {
198 // if empty, remove desc from the list of slab pages with free objects.
199 _free_slab_pages.erase(_free_slab_pages.iterator_to(desc));
200 }
201
202 return create_item(object, desc.index(), std::forward<Args>(args)...);
203 }
204
205 template<typename... Args>
206 Item *create_from_new_page(uint64_t max_object_size, uint32_t slab_page_index,
207 std::function<void (slab_page_desc& desc)> insert_slab_page_desc,
208 Args&&... args) {
209 // allocate slab page.
210 constexpr size_t alignment = std::alignment_of_v<Item>;
211 void *slab_page = aligned_alloc(alignment, max_object_size);
212 if (!slab_page) {
213 throw std::bad_alloc{};
214 }
215 // allocate descriptor to slab page.
216 slab_page_desc *desc = nullptr;
217 assert(_size % alignment == 0);
218 try {
219 auto objects = max_object_size / _size;
220 desc = new slab_page_desc(slab_page, objects, _size, _slab_class_id, slab_page_index);
221 } catch (const std::bad_alloc& e) {
222 ::free(slab_page);
223 throw std::bad_alloc{};
224 }
225
226 _free_slab_pages.push_front(*desc);
227 insert_slab_page_desc(*desc);
228
229 // first object from the allocated slab page is returned.
230 return create_item(slab_page, slab_page_index, std::forward<Args>(args)...);
231 }
232
233 template<typename... Args>
234 Item *create_from_lru(std::function<void (Item& item_ref)>& erase_func, Args&&... args) {
235 auto ret = evict_lru_item(erase_func);
236 if (!ret.first) {
237 throw std::bad_alloc{};
238 }
239 return create_item(ret.first, ret.second, std::forward<Args>(args)...);
240 }
241
242 void free_item(Item *item, slab_page_desc& desc) {
243 void *object = item;
244 _lru.erase(_lru.iterator_to(reinterpret_cast<slab_item_base&>(*item)));
245 desc.free_object(object);
246 if (desc.size() == 1) {
247 // push back desc into the list of slab pages with free objects.
248 _free_slab_pages.push_back(desc);
249 }
250 }
251
252 void touch_item(Item *item) {
253 auto& item_ref = reinterpret_cast<slab_item_base&>(*item);
254 _lru.erase(_lru.iterator_to(item_ref));
255 _lru.push_front(item_ref);
256 }
257
258 void remove_item_from_lru(Item *item) {
259 auto& item_ref = reinterpret_cast<slab_item_base&>(*item);
260 _lru.erase(_lru.iterator_to(item_ref));
261 }
262
263 void insert_item_into_lru(Item *item) {
264 auto& item_ref = reinterpret_cast<slab_item_base&>(*item);
265 _lru.push_front(item_ref);
266 }
267
268 void remove_desc_from_free_list(slab_page_desc& desc) {
269 assert(desc.slab_class_id() == _slab_class_id);
270 _free_slab_pages.erase(_free_slab_pages.iterator_to(desc));
271 }
272};
273
274template<typename Item>
276private:
277 std::vector<size_t> _slab_class_sizes;
278 std::vector<slab_class<Item>> _slab_classes;
280 // erase_func() is used to remove the item from the cache using slab.
281 std::function<void (Item& item_ref)> _erase_func;
282 std::vector<slab_page_desc*> _slab_pages_vector;
283 boost::intrusive::list<slab_page_desc,
284 boost::intrusive::member_hook<slab_page_desc, boost::intrusive::list_member_hook<>,
285 &slab_page_desc::_lru_link>> _slab_page_desc_lru;
286 uint64_t _max_object_size;
287 uint64_t _available_slab_pages;
288 struct collectd_stats {
289 uint64_t allocs;
290 uint64_t frees;
291 } _stats;
292 memory::reclaimer *_reclaimer = nullptr;
293 bool _reclaimed = false;
294private:
295 memory::reclaiming_result evict_lru_slab_page() {
296 if (_slab_page_desc_lru.empty()) {
297 // NOTE: Nothing to evict. If this happens, it implies that all
298 // slab pages in the slab are being used at the same time.
299 // That being said, this event is very unlikely to happen.
300 return memory::reclaiming_result::reclaimed_nothing;
301 }
302 // get descriptor of the least-recently-used slab page and related info.
303 auto& desc = _slab_page_desc_lru.back();
304 assert(desc.refcnt() == 0);
305 uint8_t slab_class_id = desc.slab_class_id();
306 auto slab_class = get_slab_class(slab_class_id);
307 void *slab_page = desc.slab_page();
308
309 auto& free_objects = desc.free_objects();
310 if (!desc.empty()) {
311 // if not empty, remove desc from the list of slab pages with free objects.
312 slab_class->remove_desc_from_free_list(desc);
313 // and sort the array of free objects for binary search later on.
314 std::sort(free_objects.begin(), free_objects.end());
315 }
316 // remove desc from the list of slab page descriptors.
317 _slab_page_desc_lru.erase(_slab_page_desc_lru.iterator_to(desc));
318 // remove desc from the slab page vector.
319 _slab_pages_vector[desc.index()] = nullptr;
320
321 // Iterate through objects in the slab page and if the object is an allocated
322 // item, the item should be removed from LRU and then erased.
323 uintptr_t object = reinterpret_cast<uintptr_t>(slab_page);
324 auto object_size = slab_class->size();
325 auto objects = _max_object_size / object_size;
326 for (auto i = 0u; i < objects; i++, object += object_size) {
327 if (!desc.empty()) {
328 // if binary_search returns true, it means that object at the current
329 // offset isn't an item.
330 if (std::binary_search(free_objects.begin(), free_objects.end(), object)) {
331 continue;
332 }
333 }
334 Item* item = reinterpret_cast<Item*>(object);
335 assert(item->is_unlocked());
336 slab_class->remove_item_from_lru(item);
337 _erase_func(*item);
338 _stats.frees++;
339 }
340#ifdef SEASTAR_DEBUG
341 printf("lru slab page eviction succeeded! desc_empty?=%d\n", desc.empty());
342#endif
343 ::free(slab_page); // free slab page object
344 delete &desc; // free its descriptor
345 return memory::reclaiming_result::reclaimed_something;
346 }
347
348 /*
349 * Reclaim the least recently used slab page that is unused.
350 */
351 memory::reclaiming_result reclaim() {
352 // once reclaimer was called, slab pages should no longer be allocated, as the
353 // memory used by slab is supposed to be calibrated.
354 _reclaimed = true;
355 // FIXME: Should reclaim() only evict a single slab page at a time?
356 return evict_lru_slab_page();
357 }
358
359 void initialize_slab_allocator(double growth_factor, uint64_t limit) {
360 constexpr size_t alignment = std::alignment_of_v<Item>;
361 constexpr size_t initial_size = 96;
362 size_t size = initial_size; // initial object size
363 uint8_t slab_class_id = 0U;
364
365 while (_max_object_size / size > 1) {
366 size = align_up(size, alignment);
367 _slab_class_sizes.push_back(size);
368 _slab_classes.emplace_back(size, slab_class_id);
369 size *= growth_factor;
370 assert(slab_class_id < std::numeric_limits<uint8_t>::max());
371 slab_class_id++;
372 }
373 _slab_class_sizes.push_back(_max_object_size);
374 _slab_classes.emplace_back(_max_object_size, slab_class_id);
375
376 // If slab limit is zero, enable reclaimer.
377 if (!limit) {
378 _reclaimer = new memory::reclaimer([this] { return reclaim(); });
379 } else {
380 _slab_pages_vector.reserve(_available_slab_pages);
381 }
382 }
383
384 slab_class<Item>* get_slab_class(const size_t size) {
385 // given a size, find slab class with binary search.
386 auto i = std::lower_bound(_slab_class_sizes.begin(), _slab_class_sizes.end(), size);
387 if (i == _slab_class_sizes.end()) {
388 return nullptr;
389 }
390 auto dist = std::distance(_slab_class_sizes.begin(), i);
391 return &_slab_classes[dist];
392 }
393
394 slab_class<Item>* get_slab_class(const uint8_t slab_class_id) {
395 assert(slab_class_id >= 0 && slab_class_id < _slab_classes.size());
396 return &_slab_classes[slab_class_id];
397 }
398
399 void register_metrics() {
400 namespace sm = seastar::metrics;
401 _metrics.add_group("slab", {
402 sm::make_counter("malloc_total_operations", sm::description("Total number of slab malloc operations"), _stats.allocs),
403 sm::make_counter("free_total_operations", sm::description("Total number of slab free operations"), _stats.frees),
404 sm::make_gauge("malloc_objects", sm::description("Number of slab created objects currently in memory"), [this] {
405 return _stats.allocs - _stats.frees;
406 })
407 });
408 }
409
410 inline slab_page_desc& get_slab_page_desc(Item *item)
411 {
412 auto desc = _slab_pages_vector[item->get_slab_page_index()];
413 assert(desc != nullptr);
414 assert(desc->magic() == SLAB_MAGIC_NUMBER);
415 return *desc;
416 }
417
418 inline bool can_allocate_page(slab_class<Item>& sc) {
419 return (_reclaimer && !_reclaimed) ||
420 (_available_slab_pages > 0 || sc.has_no_slab_pages());
421 }
422public:
423 slab_allocator(double growth_factor, uint64_t limit, uint64_t max_object_size)
424 : _max_object_size(max_object_size)
425 , _available_slab_pages(limit / max_object_size)
426 {
427 initialize_slab_allocator(growth_factor, limit);
428 register_metrics();
429 }
430
431 slab_allocator(double growth_factor, uint64_t limit, uint64_t max_object_size,
432 std::function<void (Item& item_ref)> erase_func)
433 : _erase_func(std::move(erase_func))
434 , _max_object_size(max_object_size)
435 , _available_slab_pages(limit / max_object_size)
436 {
437 initialize_slab_allocator(growth_factor, limit);
438 register_metrics();
439 }
440
442 {
443 _slab_classes.clear();
444 _slab_page_desc_lru.clear();
445 for (auto desc : _slab_pages_vector) {
446 if (!desc) {
447 continue;
448 }
449 ::free(desc->slab_page());
450 delete desc;
451 }
452 delete _reclaimer;
453 }
454
458 template<typename... Args>
459 Item* create(const size_t size, Args&&... args) {
460 auto slab_class = get_slab_class(size);
461 if (!slab_class) {
462 throw std::bad_alloc{};
463 }
464
465 Item *item = nullptr;
466 if (!slab_class->empty()) {
467 item = slab_class->create(std::forward<Args>(args)...);
468 _stats.allocs++;
469 } else {
470 if (can_allocate_page(*slab_class)) {
471 auto index_to_insert = _slab_pages_vector.size();
472 item = slab_class->create_from_new_page(_max_object_size, index_to_insert,
473 [this](slab_page_desc& desc) {
474 if (_reclaimer) {
475 // insert desc into the LRU list of slab page descriptors.
476 _slab_page_desc_lru.push_front(desc);
477 }
478 // insert desc into the slab page vector.
479 _slab_pages_vector.push_back(&desc);
480 },
481 std::forward<Args>(args)...);
482 if (_available_slab_pages > 0) {
483 _available_slab_pages--;
484 }
485 _stats.allocs++;
486 } else if (_erase_func) {
487 item = slab_class->create_from_lru(_erase_func, std::forward<Args>(args)...);
488 }
489 }
490 return item;
491 }
492
493 void lock_item(Item *item) {
494 auto& desc = get_slab_page_desc(item);
495 if (_reclaimer) {
496 auto& refcnt = desc.refcnt();
497
498 if (++refcnt == 1) {
499 // remove slab page descriptor from list of slab page descriptors.
500 _slab_page_desc_lru.erase(_slab_page_desc_lru.iterator_to(desc));
501 }
502 }
503 // remove item from the lru of its slab class.
504 auto slab_class = get_slab_class(desc.slab_class_id());
505 slab_class->remove_item_from_lru(item);
506 }
507
508 void unlock_item(Item *item) {
509 auto& desc = get_slab_page_desc(item);
510 if (_reclaimer) {
511 auto& refcnt = desc.refcnt();
512
513 if (--refcnt == 0) {
514 // insert slab page descriptor back into list of slab page descriptors.
515 _slab_page_desc_lru.push_front(desc);
516 }
517 }
518 // insert item into the lru of its slab class.
519 auto slab_class = get_slab_class(desc.slab_class_id());
520 slab_class->insert_item_into_lru(item);
521 }
522
526 void free(Item *item) {
527 if (item) {
528 auto& desc = get_slab_page_desc(item);
529 auto slab_class = get_slab_class(desc.slab_class_id());
530 slab_class->free_item(item, desc);
531 _stats.frees++;
532 }
533 }
534
538 void touch(Item *item) {
539 if (item) {
540 auto& desc = get_slab_page_desc(item);
541 auto slab_class = get_slab_class(desc.slab_class_id());
542 slab_class->touch_item(item);
543 }
544 }
545
550 auto class_id = 0;
551 for (auto& slab_class : _slab_classes) {
552 size_t size = slab_class.size();
553 printf("slab[%3d]\tsize: %10lu\tper-slab-page: %5lu\n", class_id, size, _max_object_size / size);
554 class_id++;
555 }
556 }
557
561 size_t class_size(const size_t size) {
562 auto slab_class = get_slab_class(size);
563 return (slab_class) ? slab_class->size() : 0;
564 }
565};
566
567}
holds the metric definition.
Definition: metrics_registration.hh:94
metric_groups & add_group(const group_name_type &name, const std::initializer_list< metric_definition > &l)
Add metrics belonging to the same group.
Definition: slab.hh:275
void touch(Item *item)
Definition: slab.hh:538
void free(Item *item)
Definition: slab.hh:526
size_t class_size(const size_t size)
Definition: slab.hh:561
void print_slab_classes()
Definition: slab.hh:549
Item * create(const size_t size, Args &&... args)
Definition: slab.hh:459
Definition: slab.hh:134
Definition: slab.hh:126
impl::metric_definition_impl make_gauge(metric_name_type name, T &&val, description d=description(), std::vector< label_instance > labels={})
Gauge are a general purpose metric.
Definition: metrics.hh:442
impl::metric_definition_impl make_counter(metric_name_type name, T &&val, description d=description(), std::vector< label_instance > labels={})
create a counter metric
Definition: metrics.hh:528
header for metrics creation.
metrics creation and registration
Definition: estimated_histogram.hh:34
Seastar API namespace.
Definition: abort_on_ebadf.hh:26
Definition: slab.hh:51