Seastar
High performance C++ framework for concurrent servers
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
slab.hh
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright 2015 Cloudius Systems
20 */
21#pragma once
22
23#include <boost/intrusive/unordered_set.hpp>
24#include <boost/intrusive/list.hpp>
25#include <stdlib.h>
26#include <stdio.h>
27#include <stdint.h>
28#include <assert.h>
29#include <vector>
30#include <algorithm>
32#include <seastar/core/align.hh>
33#include <seastar/core/memory.hh>
34#include <seastar/util/assert.hh>
35
36namespace seastar {
37
38static constexpr uint16_t SLAB_MAGIC_NUMBER = 0x51AB; // meant to be 'SLAB' :-)
39
40/*
41 * Item requirements
42 * - Extend it to slab_item_base.
43 * - First parameter of constructor must be uint32_t _slab_page_index.
44 * - Implement get_slab_page_index() to return _slab_page_index.
45 * - Implement is_unlocked() to check if Item can be evicted.
46 */
47
48/*
49 * slab_page_desc is 1:1 mapped to slab page.
50 * footprint: 80b for each slab page.
51 */
53private:
54 boost::intrusive::list_member_hook<> _lru_link;
55 boost::intrusive::list_member_hook<> _free_pages_link;
56 void *_slab_page;
57 std::vector<uintptr_t> _free_objects;
58 uint32_t _refcnt;
59 uint32_t _index; // index into slab page vector
60 uint16_t _magic;
61 uint8_t _slab_class_id;
62public:
63 slab_page_desc(void *slab_page, size_t objects, size_t object_size, uint8_t slab_class_id, uint32_t index)
64 : _slab_page(slab_page)
65 , _refcnt(0U)
66 , _index(index)
67 , _magic(SLAB_MAGIC_NUMBER)
68 , _slab_class_id(slab_class_id)
69 {
70 auto object = reinterpret_cast<uintptr_t>(slab_page);
71 _free_objects.reserve(objects - 1);
72 for (auto i = 1u; i < objects; i++) {
73 object += object_size;
74 _free_objects.push_back(object);
75 }
76 }
77
78 bool empty() const {
79 return _free_objects.empty();
80 }
81
82 size_t size() const {
83 return _free_objects.size();
84 }
85
86 uint32_t& refcnt() {
87 return _refcnt;
88 }
89
90 uint32_t index() const {
91 return _index;
92 }
93
94 uint16_t magic() const {
95 return _magic;
96 }
97
98 uint8_t slab_class_id() const {
99 return _slab_class_id;
100 }
101
102 void* slab_page() const {
103 return _slab_page;
104 }
105
106 std::vector<uintptr_t>& free_objects() {
107 return _free_objects;
108 }
109
110 void* allocate_object() {
111 SEASTAR_ASSERT(!_free_objects.empty());
112 auto object = reinterpret_cast<void*>(_free_objects.back());
113 _free_objects.pop_back();
114 return object;
115 }
116
117 void free_object(void *object) {
118 _free_objects.push_back(reinterpret_cast<uintptr_t>(object));
119 }
120
121 template<typename Item>
122 friend class slab_class;
123 template<typename Item>
124 friend class slab_allocator;
125};
126
128 boost::intrusive::list_member_hook<> _lru_link;
129
130 template<typename Item>
131 friend class slab_class;
132};
133
134template<typename Item>
136private:
137 boost::intrusive::list<slab_page_desc,
138 boost::intrusive::member_hook<slab_page_desc, boost::intrusive::list_member_hook<>,
139 &slab_page_desc::_free_pages_link>> _free_slab_pages;
140 boost::intrusive::list<slab_item_base,
141 boost::intrusive::member_hook<slab_item_base, boost::intrusive::list_member_hook<>,
142 &slab_item_base::_lru_link>> _lru;
143 size_t _size; // size of objects
144 uint8_t _slab_class_id;
145private:
146 template<typename... Args>
147 inline
148 Item* create_item(void *object, uint32_t slab_page_index, Args&&... args) {
149 Item *new_item = new(object) Item(slab_page_index, std::forward<Args>(args)...);
150 _lru.push_front(reinterpret_cast<slab_item_base&>(*new_item));
151 return new_item;
152 }
153
154 inline
155 std::pair<void *, uint32_t> evict_lru_item(std::function<void (Item& item_ref)>& erase_func) {
156 if (_lru.empty()) {
157 return { nullptr, 0U };
158 }
159
160 Item& victim = reinterpret_cast<Item&>(_lru.back());
161 uint32_t index = victim.get_slab_page_index();
162 SEASTAR_ASSERT(victim.is_unlocked());
163 _lru.erase(_lru.iterator_to(reinterpret_cast<slab_item_base&>(victim)));
164 // WARNING: You need to make sure that erase_func will not release victim back to slab.
165 erase_func(victim);
166
167 return { reinterpret_cast<void*>(&victim), index };
168 }
169public:
170 slab_class(size_t size, uint8_t slab_class_id)
171 : _size(size)
172 , _slab_class_id(slab_class_id)
173 {
174 }
175 slab_class(slab_class&&) = default;
176 ~slab_class() {
177 _free_slab_pages.clear();
178 _lru.clear();
179 }
180
181 size_t size() const {
182 return _size;
183 }
184
185 bool empty() const {
186 return _free_slab_pages.empty();
187 }
188
189 bool has_no_slab_pages() const {
190 return _lru.empty();
191 }
192
193 template<typename... Args>
194 Item *create(Args&&... args) {
195 SEASTAR_ASSERT(!_free_slab_pages.empty());
196 auto& desc = _free_slab_pages.back();
197 auto object = desc.allocate_object();
198 if (desc.empty()) {
199 // if empty, remove desc from the list of slab pages with free objects.
200 _free_slab_pages.erase(_free_slab_pages.iterator_to(desc));
201 }
202
203 return create_item(object, desc.index(), std::forward<Args>(args)...);
204 }
205
206 template<typename... Args>
207 Item *create_from_new_page(uint64_t max_object_size, uint32_t slab_page_index,
208 std::function<void (slab_page_desc& desc)> insert_slab_page_desc,
209 Args&&... args) {
210 // allocate slab page.
211 constexpr size_t alignment = std::alignment_of_v<Item>;
212 void *slab_page = aligned_alloc(alignment, max_object_size);
213 if (!slab_page) {
214 throw std::bad_alloc{};
215 }
216 // allocate descriptor to slab page.
217 slab_page_desc *desc = nullptr;
218 SEASTAR_ASSERT(_size % alignment == 0);
219 try {
220 auto objects = max_object_size / _size;
221 desc = new slab_page_desc(slab_page, objects, _size, _slab_class_id, slab_page_index);
222 } catch (const std::bad_alloc& e) {
223 ::free(slab_page);
224 throw std::bad_alloc{};
225 }
226
227 _free_slab_pages.push_front(*desc);
228 insert_slab_page_desc(*desc);
229
230 // first object from the allocated slab page is returned.
231 return create_item(slab_page, slab_page_index, std::forward<Args>(args)...);
232 }
233
234 template<typename... Args>
235 Item *create_from_lru(std::function<void (Item& item_ref)>& erase_func, Args&&... args) {
236 auto ret = evict_lru_item(erase_func);
237 if (!ret.first) {
238 throw std::bad_alloc{};
239 }
240 return create_item(ret.first, ret.second, std::forward<Args>(args)...);
241 }
242
243 void free_item(Item *item, slab_page_desc& desc) {
244 void *object = item;
245 _lru.erase(_lru.iterator_to(reinterpret_cast<slab_item_base&>(*item)));
246 desc.free_object(object);
247 if (desc.size() == 1) {
248 // push back desc into the list of slab pages with free objects.
249 _free_slab_pages.push_back(desc);
250 }
251 }
252
253 void touch_item(Item *item) {
254 auto& item_ref = reinterpret_cast<slab_item_base&>(*item);
255 _lru.erase(_lru.iterator_to(item_ref));
256 _lru.push_front(item_ref);
257 }
258
259 void remove_item_from_lru(Item *item) {
260 auto& item_ref = reinterpret_cast<slab_item_base&>(*item);
261 _lru.erase(_lru.iterator_to(item_ref));
262 }
263
264 void insert_item_into_lru(Item *item) {
265 auto& item_ref = reinterpret_cast<slab_item_base&>(*item);
266 _lru.push_front(item_ref);
267 }
268
269 void remove_desc_from_free_list(slab_page_desc& desc) {
270 SEASTAR_ASSERT(desc.slab_class_id() == _slab_class_id);
271 _free_slab_pages.erase(_free_slab_pages.iterator_to(desc));
272 }
273};
274
275template<typename Item>
277private:
278 std::vector<size_t> _slab_class_sizes;
279 std::vector<slab_class<Item>> _slab_classes;
281 // erase_func() is used to remove the item from the cache using slab.
282 std::function<void (Item& item_ref)> _erase_func;
283 std::vector<slab_page_desc*> _slab_pages_vector;
284 boost::intrusive::list<slab_page_desc,
285 boost::intrusive::member_hook<slab_page_desc, boost::intrusive::list_member_hook<>,
286 &slab_page_desc::_lru_link>> _slab_page_desc_lru;
287 uint64_t _max_object_size;
288 uint64_t _available_slab_pages;
289 struct collectd_stats {
290 uint64_t allocs;
291 uint64_t frees;
292 } _stats;
293 memory::reclaimer *_reclaimer = nullptr;
294 bool _reclaimed = false;
295private:
296 memory::reclaiming_result evict_lru_slab_page() {
297 if (_slab_page_desc_lru.empty()) {
298 // NOTE: Nothing to evict. If this happens, it implies that all
299 // slab pages in the slab are being used at the same time.
300 // That being said, this event is very unlikely to happen.
301 return memory::reclaiming_result::reclaimed_nothing;
302 }
303 // get descriptor of the least-recently-used slab page and related info.
304 auto& desc = _slab_page_desc_lru.back();
305 SEASTAR_ASSERT(desc.refcnt() == 0);
306 uint8_t slab_class_id = desc.slab_class_id();
307 auto slab_class = get_slab_class(slab_class_id);
308 void *slab_page = desc.slab_page();
309
310 auto& free_objects = desc.free_objects();
311 if (!desc.empty()) {
312 // if not empty, remove desc from the list of slab pages with free objects.
313 slab_class->remove_desc_from_free_list(desc);
314 // and sort the array of free objects for binary search later on.
315 std::sort(free_objects.begin(), free_objects.end());
316 }
317 // remove desc from the list of slab page descriptors.
318 _slab_page_desc_lru.erase(_slab_page_desc_lru.iterator_to(desc));
319 // remove desc from the slab page vector.
320 _slab_pages_vector[desc.index()] = nullptr;
321
322 // Iterate through objects in the slab page and if the object is an allocated
323 // item, the item should be removed from LRU and then erased.
324 uintptr_t object = reinterpret_cast<uintptr_t>(slab_page);
325 auto object_size = slab_class->size();
326 auto objects = _max_object_size / object_size;
327 for (auto i = 0u; i < objects; i++, object += object_size) {
328 if (!desc.empty()) {
329 // if binary_search returns true, it means that object at the current
330 // offset isn't an item.
331 if (std::binary_search(free_objects.begin(), free_objects.end(), object)) {
332 continue;
333 }
334 }
335 Item* item = reinterpret_cast<Item*>(object);
336 SEASTAR_ASSERT(item->is_unlocked());
337 slab_class->remove_item_from_lru(item);
338 _erase_func(*item);
339 _stats.frees++;
340 }
341#ifdef SEASTAR_DEBUG
342 printf("lru slab page eviction succeeded! desc_empty?=%d\n", desc.empty());
343#endif
344 ::free(slab_page); // free slab page object
345 delete &desc; // free its descriptor
346 return memory::reclaiming_result::reclaimed_something;
347 }
348
349 /*
350 * Reclaim the least recently used slab page that is unused.
351 */
352 memory::reclaiming_result reclaim() {
353 // once reclaimer was called, slab pages should no longer be allocated, as the
354 // memory used by slab is supposed to be calibrated.
355 _reclaimed = true;
356 // FIXME: Should reclaim() only evict a single slab page at a time?
357 return evict_lru_slab_page();
358 }
359
360 void initialize_slab_allocator(double growth_factor, uint64_t limit) {
361 constexpr size_t alignment = std::alignment_of_v<Item>;
362 constexpr size_t initial_size = 96;
363 size_t size = initial_size; // initial object size
364 uint8_t slab_class_id = 0U;
365
366 while (_max_object_size / size > 1) {
367 size = align_up(size, alignment);
368 _slab_class_sizes.push_back(size);
369 _slab_classes.emplace_back(size, slab_class_id);
370 size *= growth_factor;
371 SEASTAR_ASSERT(slab_class_id < std::numeric_limits<uint8_t>::max());
372 slab_class_id++;
373 }
374 _slab_class_sizes.push_back(_max_object_size);
375 _slab_classes.emplace_back(_max_object_size, slab_class_id);
376
377 // If slab limit is zero, enable reclaimer.
378 if (!limit) {
379 _reclaimer = new memory::reclaimer([this] { return reclaim(); });
380 } else {
381 _slab_pages_vector.reserve(_available_slab_pages);
382 }
383 }
384
385 slab_class<Item>* get_slab_class(const size_t size) {
386 // given a size, find slab class with binary search.
387 auto i = std::lower_bound(_slab_class_sizes.begin(), _slab_class_sizes.end(), size);
388 if (i == _slab_class_sizes.end()) {
389 return nullptr;
390 }
391 auto dist = std::distance(_slab_class_sizes.begin(), i);
392 return &_slab_classes[dist];
393 }
394
395 slab_class<Item>* get_slab_class(const uint8_t slab_class_id) {
396 SEASTAR_ASSERT(slab_class_id >= 0 && slab_class_id < _slab_classes.size());
397 return &_slab_classes[slab_class_id];
398 }
399
400 void register_metrics() {
401 namespace sm = seastar::metrics;
402 _metrics.add_group("slab", {
403 sm::make_counter("malloc_total_operations", sm::description("Total number of slab malloc operations"), _stats.allocs),
404 sm::make_counter("free_total_operations", sm::description("Total number of slab free operations"), _stats.frees),
405 sm::make_gauge("malloc_objects", sm::description("Number of slab created objects currently in memory"), [this] {
406 return _stats.allocs - _stats.frees;
407 })
408 });
409 }
410
411 inline slab_page_desc& get_slab_page_desc(Item *item)
412 {
413 auto desc = _slab_pages_vector[item->get_slab_page_index()];
414 SEASTAR_ASSERT(desc != nullptr);
415 SEASTAR_ASSERT(desc->magic() == SLAB_MAGIC_NUMBER);
416 return *desc;
417 }
418
419 inline bool can_allocate_page(slab_class<Item>& sc) {
420 return (_reclaimer && !_reclaimed) ||
421 (_available_slab_pages > 0 || sc.has_no_slab_pages());
422 }
423public:
424 slab_allocator(double growth_factor, uint64_t limit, uint64_t max_object_size)
425 : _max_object_size(max_object_size)
426 , _available_slab_pages(limit / max_object_size)
427 {
428 initialize_slab_allocator(growth_factor, limit);
429 register_metrics();
430 }
431
432 slab_allocator(double growth_factor, uint64_t limit, uint64_t max_object_size,
433 std::function<void (Item& item_ref)> erase_func)
434 : _erase_func(std::move(erase_func))
435 , _max_object_size(max_object_size)
436 , _available_slab_pages(limit / max_object_size)
437 {
438 initialize_slab_allocator(growth_factor, limit);
439 register_metrics();
440 }
441
443 {
444 _slab_classes.clear();
445 _slab_page_desc_lru.clear();
446 for (auto desc : _slab_pages_vector) {
447 if (!desc) {
448 continue;
449 }
450 ::free(desc->slab_page());
451 delete desc;
452 }
453 delete _reclaimer;
454 }
455
459 template<typename... Args>
460 Item* create(const size_t size, Args&&... args) {
461 auto slab_class = get_slab_class(size);
462 if (!slab_class) {
463 throw std::bad_alloc{};
464 }
465
466 Item *item = nullptr;
467 if (!slab_class->empty()) {
468 item = slab_class->create(std::forward<Args>(args)...);
469 _stats.allocs++;
470 } else {
471 if (can_allocate_page(*slab_class)) {
472 auto index_to_insert = _slab_pages_vector.size();
473 item = slab_class->create_from_new_page(_max_object_size, index_to_insert,
474 [this](slab_page_desc& desc) {
475 if (_reclaimer) {
476 // insert desc into the LRU list of slab page descriptors.
477 _slab_page_desc_lru.push_front(desc);
478 }
479 // insert desc into the slab page vector.
480 _slab_pages_vector.push_back(&desc);
481 },
482 std::forward<Args>(args)...);
483 if (_available_slab_pages > 0) {
484 _available_slab_pages--;
485 }
486 _stats.allocs++;
487 } else if (_erase_func) {
488 item = slab_class->create_from_lru(_erase_func, std::forward<Args>(args)...);
489 }
490 }
491 return item;
492 }
493
494 void lock_item(Item *item) {
495 auto& desc = get_slab_page_desc(item);
496 if (_reclaimer) {
497 auto& refcnt = desc.refcnt();
498
499 if (++refcnt == 1) {
500 // remove slab page descriptor from list of slab page descriptors.
501 _slab_page_desc_lru.erase(_slab_page_desc_lru.iterator_to(desc));
502 }
503 }
504 // remove item from the lru of its slab class.
505 auto slab_class = get_slab_class(desc.slab_class_id());
506 slab_class->remove_item_from_lru(item);
507 }
508
509 void unlock_item(Item *item) {
510 auto& desc = get_slab_page_desc(item);
511 if (_reclaimer) {
512 auto& refcnt = desc.refcnt();
513
514 if (--refcnt == 0) {
515 // insert slab page descriptor back into list of slab page descriptors.
516 _slab_page_desc_lru.push_front(desc);
517 }
518 }
519 // insert item into the lru of its slab class.
520 auto slab_class = get_slab_class(desc.slab_class_id());
521 slab_class->insert_item_into_lru(item);
522 }
523
527 void free(Item *item) {
528 if (item) {
529 auto& desc = get_slab_page_desc(item);
530 auto slab_class = get_slab_class(desc.slab_class_id());
531 slab_class->free_item(item, desc);
532 _stats.frees++;
533 }
534 }
535
539 void touch(Item *item) {
540 if (item) {
541 auto& desc = get_slab_page_desc(item);
542 auto slab_class = get_slab_class(desc.slab_class_id());
543 slab_class->touch_item(item);
544 }
545 }
546
551 auto class_id = 0;
552 for (auto& slab_class : _slab_classes) {
553 size_t size = slab_class.size();
554 printf("slab[%3d]\tsize: %10lu\tper-slab-page: %5lu\n", class_id, size, _max_object_size / size);
555 class_id++;
556 }
557 }
558
562 size_t class_size(const size_t size) {
563 auto slab_class = get_slab_class(size);
564 return (slab_class) ? slab_class->size() : 0;
565 }
566};
567
568}
holds the metric definition.
Definition: metrics_registration.hh:94
metric_groups & add_group(const group_name_type &name, const std::initializer_list< metric_definition > &l)
Add metrics belonging to the same group.
Definition: slab.hh:276
void touch(Item *item)
Definition: slab.hh:539
void free(Item *item)
Definition: slab.hh:527
size_t class_size(const size_t size)
Definition: slab.hh:562
void print_slab_classes()
Definition: slab.hh:550
Item * create(const size_t size, Args &&... args)
Definition: slab.hh:460
Definition: slab.hh:135
Definition: slab.hh:127
impl::metric_definition_impl make_gauge(metric_name_type name, T &&val, description d=description(), std::vector< label_instance > labels={})
Gauge are a general purpose metric.
Definition: metrics.hh:442
impl::metric_definition_impl make_counter(metric_name_type name, T &&val, description d=description(), std::vector< label_instance > labels={})
create a counter metric
Definition: metrics.hh:528
header for metrics creation.
metrics creation and registration
Definition: estimated_histogram.hh:34
Seastar API namespace.
Definition: abort_on_ebadf.hh:26
Definition: slab.hh:52