Seastar
High performance C++ framework for concurrent servers
slab.hh
1 /*
2  * This file is open source software, licensed to you under the terms
3  * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4  * distributed with this work for additional information regarding copyright
5  * ownership. You may not use this file except in compliance with the License.
6  *
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an
13  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14  * KIND, either express or implied. See the License for the
15  * specific language governing permissions and limitations
16  * under the License.
17  */
18 /*
19  * Copyright 2015 Cloudius Systems
20  */
21 #pragma once
22 
23 #include <boost/intrusive/unordered_set.hpp>
24 #include <boost/intrusive/list.hpp>
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <assert.h>
29 #include <memory>
30 #include <vector>
31 #include <algorithm>
32 #include <seastar/core/metrics.hh>
33 #include <seastar/core/align.hh>
34 #include <seastar/core/memory.hh>
35 
36 namespace seastar {
37 
38 static constexpr uint16_t SLAB_MAGIC_NUMBER = 0x51AB; // meant to be 'SLAB' :-)
39 
40 /*
41  * Item requirements
42  * - Extend it to slab_item_base.
43  * - First parameter of constructor must be uint32_t _slab_page_index.
44  * - Implement get_slab_page_index() to return _slab_page_index.
45  * - Implement is_unlocked() to check if Item can be evicted.
46  */
47 
48 /*
49  * slab_page_desc is 1:1 mapped to slab page.
50  * footprint: 80b for each slab page.
51  */
53 private:
54  boost::intrusive::list_member_hook<> _lru_link;
55  boost::intrusive::list_member_hook<> _free_pages_link;
56  void *_slab_page;
57  std::vector<uintptr_t> _free_objects;
58  uint32_t _refcnt;
59  uint32_t _index; // index into slab page vector
60  uint16_t _magic;
61  uint8_t _slab_class_id;
62 public:
63  slab_page_desc(void *slab_page, size_t objects, size_t object_size, uint8_t slab_class_id, uint32_t index)
64  : _slab_page(slab_page)
65  , _refcnt(0U)
66  , _index(index)
67  , _magic(SLAB_MAGIC_NUMBER)
68  , _slab_class_id(slab_class_id)
69  {
70  auto object = reinterpret_cast<uintptr_t>(slab_page);
71  _free_objects.reserve(objects - 1);
72  for (auto i = 1u; i < objects; i++) {
73  object += object_size;
74  _free_objects.push_back(object);
75  }
76  }
77 
78  bool empty() const {
79  return _free_objects.empty();
80  }
81 
82  size_t size() const {
83  return _free_objects.size();
84  }
85 
86  uint32_t& refcnt() {
87  return _refcnt;
88  }
89 
90  uint32_t index() const {
91  return _index;
92  }
93 
94  uint16_t magic() const {
95  return _magic;
96  }
97 
98  uint8_t slab_class_id() const {
99  return _slab_class_id;
100  }
101 
102  void* slab_page() const {
103  return _slab_page;
104  }
105 
106  std::vector<uintptr_t>& free_objects() {
107  return _free_objects;
108  }
109 
110  void* allocate_object() {
111  assert(!_free_objects.empty());
112  auto object = reinterpret_cast<void*>(_free_objects.back());
113  _free_objects.pop_back();
114  return object;
115  }
116 
117  void free_object(void *object) {
118  _free_objects.push_back(reinterpret_cast<uintptr_t>(object));
119  }
120 
121  template<typename Item>
122  friend class slab_class;
123  template<typename Item>
124  friend class slab_allocator;
125 };
126 
128  boost::intrusive::list_member_hook<> _lru_link;
129 
130  template<typename Item>
131  friend class slab_class;
132 };
133 
134 template<typename Item>
135 class slab_class {
136 private:
137  boost::intrusive::list<slab_page_desc,
138  boost::intrusive::member_hook<slab_page_desc, boost::intrusive::list_member_hook<>,
139  &slab_page_desc::_free_pages_link>> _free_slab_pages;
140  boost::intrusive::list<slab_item_base,
141  boost::intrusive::member_hook<slab_item_base, boost::intrusive::list_member_hook<>,
142  &slab_item_base::_lru_link>> _lru;
143  size_t _size; // size of objects
144  uint8_t _slab_class_id;
145 private:
146  template<typename... Args>
147  inline
148  Item* create_item(void *object, uint32_t slab_page_index, Args&&... args) {
149  Item *new_item = new(object) Item(slab_page_index, std::forward<Args>(args)...);
150  _lru.push_front(reinterpret_cast<slab_item_base&>(*new_item));
151  return new_item;
152  }
153 
154  inline
155  std::pair<void *, uint32_t> evict_lru_item(std::function<void (Item& item_ref)>& erase_func) {
156  if (_lru.empty()) {
157  return { nullptr, 0U };
158  }
159 
160  Item& victim = reinterpret_cast<Item&>(_lru.back());
161  uint32_t index = victim.get_slab_page_index();
162  assert(victim.is_unlocked());
163  _lru.erase(_lru.iterator_to(reinterpret_cast<slab_item_base&>(victim)));
164  // WARNING: You need to make sure that erase_func will not release victim back to slab.
165  erase_func(victim);
166 
167  return { reinterpret_cast<void*>(&victim), index };
168  }
169 public:
170  slab_class(size_t size, uint8_t slab_class_id)
171  : _size(size)
172  , _slab_class_id(slab_class_id)
173  {
174  }
175  slab_class(slab_class&&) = default;
176  ~slab_class() {
177  _free_slab_pages.clear();
178  _lru.clear();
179  }
180 
181  size_t size() const {
182  return _size;
183  }
184 
185  bool empty() const {
186  return _free_slab_pages.empty();
187  }
188 
189  bool has_no_slab_pages() const {
190  return _lru.empty();
191  }
192 
193  template<typename... Args>
194  Item *create(Args&&... args) {
195  assert(!_free_slab_pages.empty());
196  auto& desc = _free_slab_pages.back();
197  auto object = desc.allocate_object();
198  if (desc.empty()) {
199  // if empty, remove desc from the list of slab pages with free objects.
200  _free_slab_pages.erase(_free_slab_pages.iterator_to(desc));
201  }
202 
203  return create_item(object, desc.index(), std::forward<Args>(args)...);
204  }
205 
206  template<typename... Args>
207  Item *create_from_new_page(uint64_t max_object_size, uint32_t slab_page_index,
208  std::function<void (slab_page_desc& desc)> insert_slab_page_desc,
209  Args&&... args) {
210  // allocate slab page.
211  constexpr size_t alignment = std::alignment_of_v<Item>;
212  void *slab_page = aligned_alloc(alignment, max_object_size);
213  if (!slab_page) {
214  throw std::bad_alloc{};
215  }
216  // allocate descriptor to slab page.
217  slab_page_desc *desc = nullptr;
218  assert(_size % alignment == 0);
219  try {
220  auto objects = max_object_size / _size;
221  desc = new slab_page_desc(slab_page, objects, _size, _slab_class_id, slab_page_index);
222  } catch (const std::bad_alloc& e) {
223  ::free(slab_page);
224  throw std::bad_alloc{};
225  }
226 
227  _free_slab_pages.push_front(*desc);
228  insert_slab_page_desc(*desc);
229 
230  // first object from the allocated slab page is returned.
231  return create_item(slab_page, slab_page_index, std::forward<Args>(args)...);
232  }
233 
234  template<typename... Args>
235  Item *create_from_lru(std::function<void (Item& item_ref)>& erase_func, Args&&... args) {
236  auto ret = evict_lru_item(erase_func);
237  if (!ret.first) {
238  throw std::bad_alloc{};
239  }
240  return create_item(ret.first, ret.second, std::forward<Args>(args)...);
241  }
242 
243  void free_item(Item *item, slab_page_desc& desc) {
244  void *object = item;
245  _lru.erase(_lru.iterator_to(reinterpret_cast<slab_item_base&>(*item)));
246  desc.free_object(object);
247  if (desc.size() == 1) {
248  // push back desc into the list of slab pages with free objects.
249  _free_slab_pages.push_back(desc);
250  }
251  }
252 
253  void touch_item(Item *item) {
254  auto& item_ref = reinterpret_cast<slab_item_base&>(*item);
255  _lru.erase(_lru.iterator_to(item_ref));
256  _lru.push_front(item_ref);
257  }
258 
259  void remove_item_from_lru(Item *item) {
260  auto& item_ref = reinterpret_cast<slab_item_base&>(*item);
261  _lru.erase(_lru.iterator_to(item_ref));
262  }
263 
264  void insert_item_into_lru(Item *item) {
265  auto& item_ref = reinterpret_cast<slab_item_base&>(*item);
266  _lru.push_front(item_ref);
267  }
268 
269  void remove_desc_from_free_list(slab_page_desc& desc) {
270  assert(desc.slab_class_id() == _slab_class_id);
271  _free_slab_pages.erase(_free_slab_pages.iterator_to(desc));
272  }
273 };
274 
275 template<typename Item>
277 private:
278  std::vector<size_t> _slab_class_sizes;
279  std::vector<slab_class<Item>> _slab_classes;
281  // erase_func() is used to remove the item from the cache using slab.
282  std::function<void (Item& item_ref)> _erase_func;
283  std::vector<slab_page_desc*> _slab_pages_vector;
284  boost::intrusive::list<slab_page_desc,
285  boost::intrusive::member_hook<slab_page_desc, boost::intrusive::list_member_hook<>,
286  &slab_page_desc::_lru_link>> _slab_page_desc_lru;
287  uint64_t _max_object_size;
288  uint64_t _available_slab_pages;
289  struct collectd_stats {
290  uint64_t allocs;
291  uint64_t frees;
292  } _stats;
293  memory::reclaimer *_reclaimer = nullptr;
294  bool _reclaimed = false;
295 private:
296  memory::reclaiming_result evict_lru_slab_page() {
297  if (_slab_page_desc_lru.empty()) {
298  // NOTE: Nothing to evict. If this happens, it implies that all
299  // slab pages in the slab are being used at the same time.
300  // That being said, this event is very unlikely to happen.
301  return memory::reclaiming_result::reclaimed_nothing;
302  }
303  // get descriptor of the least-recently-used slab page and related info.
304  auto& desc = _slab_page_desc_lru.back();
305  assert(desc.refcnt() == 0);
306  uint8_t slab_class_id = desc.slab_class_id();
307  auto slab_class = get_slab_class(slab_class_id);
308  void *slab_page = desc.slab_page();
309 
310  auto& free_objects = desc.free_objects();
311  if (!desc.empty()) {
312  // if not empty, remove desc from the list of slab pages with free objects.
313  slab_class->remove_desc_from_free_list(desc);
314  // and sort the array of free objects for binary search later on.
315  std::sort(free_objects.begin(), free_objects.end());
316  }
317  // remove desc from the list of slab page descriptors.
318  _slab_page_desc_lru.erase(_slab_page_desc_lru.iterator_to(desc));
319  // remove desc from the slab page vector.
320  _slab_pages_vector[desc.index()] = nullptr;
321 
322  // Iterate through objects in the slab page and if the object is an allocated
323  // item, the item should be removed from LRU and then erased.
324  uintptr_t object = reinterpret_cast<uintptr_t>(slab_page);
325  auto object_size = slab_class->size();
326  auto objects = _max_object_size / object_size;
327  for (auto i = 0u; i < objects; i++, object += object_size) {
328  if (!desc.empty()) {
329  // if binary_search returns true, it means that object at the current
330  // offset isn't an item.
331  if (std::binary_search(free_objects.begin(), free_objects.end(), object)) {
332  continue;
333  }
334  }
335  Item* item = reinterpret_cast<Item*>(object);
336  assert(item->is_unlocked());
337  slab_class->remove_item_from_lru(item);
338  _erase_func(*item);
339  _stats.frees++;
340  }
341 #ifdef SEASTAR_DEBUG
342  printf("lru slab page eviction succeeded! desc_empty?=%d\n", desc.empty());
343 #endif
344  ::free(slab_page); // free slab page object
345  delete &desc; // free its descriptor
346  return memory::reclaiming_result::reclaimed_something;
347  }
348 
349  /*
350  * Reclaim the least recently used slab page that is unused.
351  */
352  memory::reclaiming_result reclaim() {
353  // once reclaimer was called, slab pages should no longer be allocated, as the
354  // memory used by slab is supposed to be calibrated.
355  _reclaimed = true;
356  // FIXME: Should reclaim() only evict a single slab page at a time?
357  return evict_lru_slab_page();
358  }
359 
360  void initialize_slab_allocator(double growth_factor, uint64_t limit) {
361  constexpr size_t alignment = std::alignment_of_v<Item>;
362  constexpr size_t initial_size = 96;
363  size_t size = initial_size; // initial object size
364  uint8_t slab_class_id = 0U;
365 
366  while (_max_object_size / size > 1) {
367  size = align_up(size, alignment);
368  _slab_class_sizes.push_back(size);
369  _slab_classes.emplace_back(size, slab_class_id);
370  size *= growth_factor;
371  assert(slab_class_id < std::numeric_limits<uint8_t>::max());
372  slab_class_id++;
373  }
374  _slab_class_sizes.push_back(_max_object_size);
375  _slab_classes.emplace_back(_max_object_size, slab_class_id);
376 
377  // If slab limit is zero, enable reclaimer.
378  if (!limit) {
379  _reclaimer = new memory::reclaimer([this] { return reclaim(); });
380  } else {
381  _slab_pages_vector.reserve(_available_slab_pages);
382  }
383  }
384 
385  slab_class<Item>* get_slab_class(const size_t size) {
386  // given a size, find slab class with binary search.
387  auto i = std::lower_bound(_slab_class_sizes.begin(), _slab_class_sizes.end(), size);
388  if (i == _slab_class_sizes.end()) {
389  return nullptr;
390  }
391  auto dist = std::distance(_slab_class_sizes.begin(), i);
392  return &_slab_classes[dist];
393  }
394 
395  slab_class<Item>* get_slab_class(const uint8_t slab_class_id) {
396  assert(slab_class_id >= 0 && slab_class_id < _slab_classes.size());
397  return &_slab_classes[slab_class_id];
398  }
399 
400  void register_metrics() {
401  namespace sm = seastar::metrics;
402  _metrics.add_group("slab", {
403  sm::make_counter("malloc_total_operations", sm::description("Total number of slab malloc operations"), _stats.allocs),
404  sm::make_counter("free_total_operations", sm::description("Total number of slab free operations"), _stats.frees),
405  sm::make_gauge("malloc_objects", sm::description("Number of slab created objects currently in memory"), [this] {
406  return _stats.allocs - _stats.frees;
407  })
408  });
409  }
410 
411  inline slab_page_desc& get_slab_page_desc(Item *item)
412  {
413  auto desc = _slab_pages_vector[item->get_slab_page_index()];
414  assert(desc != nullptr);
415  assert(desc->magic() == SLAB_MAGIC_NUMBER);
416  return *desc;
417  }
418 
419  inline bool can_allocate_page(slab_class<Item>& sc) {
420  return (_reclaimer && !_reclaimed) ||
421  (_available_slab_pages > 0 || sc.has_no_slab_pages());
422  }
423 public:
424  slab_allocator(double growth_factor, uint64_t limit, uint64_t max_object_size)
425  : _max_object_size(max_object_size)
426  , _available_slab_pages(limit / max_object_size)
427  {
428  initialize_slab_allocator(growth_factor, limit);
429  register_metrics();
430  }
431 
432  slab_allocator(double growth_factor, uint64_t limit, uint64_t max_object_size,
433  std::function<void (Item& item_ref)> erase_func)
434  : _erase_func(std::move(erase_func))
435  , _max_object_size(max_object_size)
436  , _available_slab_pages(limit / max_object_size)
437  {
438  initialize_slab_allocator(growth_factor, limit);
439  register_metrics();
440  }
441 
442  ~slab_allocator()
443  {
444  _slab_classes.clear();
445  _slab_page_desc_lru.clear();
446  for (auto desc : _slab_pages_vector) {
447  if (!desc) {
448  continue;
449  }
450  ::free(desc->slab_page());
451  delete desc;
452  }
453  delete _reclaimer;
454  }
455 
459  template<typename... Args>
460  Item* create(const size_t size, Args&&... args) {
461  auto slab_class = get_slab_class(size);
462  if (!slab_class) {
463  throw std::bad_alloc{};
464  }
465 
466  Item *item = nullptr;
467  if (!slab_class->empty()) {
468  item = slab_class->create(std::forward<Args>(args)...);
469  _stats.allocs++;
470  } else {
471  if (can_allocate_page(*slab_class)) {
472  auto index_to_insert = _slab_pages_vector.size();
473  item = slab_class->create_from_new_page(_max_object_size, index_to_insert,
474  [this](slab_page_desc& desc) {
475  if (_reclaimer) {
476  // insert desc into the LRU list of slab page descriptors.
477  _slab_page_desc_lru.push_front(desc);
478  }
479  // insert desc into the slab page vector.
480  _slab_pages_vector.push_back(&desc);
481  },
482  std::forward<Args>(args)...);
483  if (_available_slab_pages > 0) {
484  _available_slab_pages--;
485  }
486  _stats.allocs++;
487  } else if (_erase_func) {
488  item = slab_class->create_from_lru(_erase_func, std::forward<Args>(args)...);
489  }
490  }
491  return item;
492  }
493 
494  void lock_item(Item *item) {
495  auto& desc = get_slab_page_desc(item);
496  if (_reclaimer) {
497  auto& refcnt = desc.refcnt();
498 
499  if (++refcnt == 1) {
500  // remove slab page descriptor from list of slab page descriptors.
501  _slab_page_desc_lru.erase(_slab_page_desc_lru.iterator_to(desc));
502  }
503  }
504  // remove item from the lru of its slab class.
505  auto slab_class = get_slab_class(desc.slab_class_id());
506  slab_class->remove_item_from_lru(item);
507  }
508 
509  void unlock_item(Item *item) {
510  auto& desc = get_slab_page_desc(item);
511  if (_reclaimer) {
512  auto& refcnt = desc.refcnt();
513 
514  if (--refcnt == 0) {
515  // insert slab page descriptor back into list of slab page descriptors.
516  _slab_page_desc_lru.push_front(desc);
517  }
518  }
519  // insert item into the lru of its slab class.
520  auto slab_class = get_slab_class(desc.slab_class_id());
521  slab_class->insert_item_into_lru(item);
522  }
523 
527  void free(Item *item) {
528  if (item) {
529  auto& desc = get_slab_page_desc(item);
530  auto slab_class = get_slab_class(desc.slab_class_id());
531  slab_class->free_item(item, desc);
532  _stats.frees++;
533  }
534  }
535 
539  void touch(Item *item) {
540  if (item) {
541  auto& desc = get_slab_page_desc(item);
542  auto slab_class = get_slab_class(desc.slab_class_id());
543  slab_class->touch_item(item);
544  }
545  }
546 
551  auto class_id = 0;
552  for (auto& slab_class : _slab_classes) {
553  size_t size = slab_class.size();
554  printf("slab[%3d]\tsize: %10lu\tper-slab-page: %5lu\n", class_id, size, _max_object_size / size);
555  class_id++;
556  }
557  }
558 
562  size_t class_size(const size_t size) {
563  auto slab_class = get_slab_class(size);
564  return (slab_class) ? slab_class->size() : 0;
565  }
566 };
567 
568 }
holds the metric definition.
Definition: metrics_registration.hh:94
metric_groups & add_group(const group_name_type &name, const std::initializer_list< metric_definition > &l)
Add metrics belonging to the same group.
Definition: slab.hh:276
void touch(Item *item)
Definition: slab.hh:539
void free(Item *item)
Definition: slab.hh:527
Item * create(const size_t size, Args &&... args)
Definition: slab.hh:460
size_t class_size(const size_t size)
Definition: slab.hh:562
void print_slab_classes()
Definition: slab.hh:550
Definition: slab.hh:135
Definition: slab.hh:127
impl::metric_definition_impl make_gauge(metric_name_type name, T &&val, description d=description(), std::vector< label_instance > labels={})
Gauge are a general purpose metric.
Definition: metrics.hh:443
impl::metric_definition_impl make_counter(metric_name_type name, T &&val, description d=description(), std::vector< label_instance > labels={})
create a counter metric
Definition: metrics.hh:529
header for metrics creation.
metrics creation and registration
Definition: estimated_histogram.hh:34
Seastar API namespace.
Definition: abort_on_ebadf.hh:26
Definition: slab.hh:52