Seastar
High performance C++ framework for concurrent servers
io_queue.hh
1 /*
2  * This file is open source software, licensed to you under the terms
3  * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4  * distributed with this work for additional information regarding copyright
5  * ownership. You may not use this file except in compliance with the License.
6  *
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an
13  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14  * KIND, either express or implied. See the License for the
15  * specific language governing permissions and limitations
16  * under the License.
17  */
18 /*
19  * Copyright 2019 ScyllaDB
20  */
21 
22 #pragma once
23 
24 #ifndef SEASTAR_MODULE
25 #include <boost/container/small_vector.hpp>
26 #include <chrono>
27 #include <memory>
28 #include <vector>
29 #include <sys/uio.h>
30 #endif
31 #include <seastar/core/sstring.hh>
32 #include <seastar/core/fair_queue.hh>
34 #include <seastar/core/future.hh>
35 #include <seastar/core/internal/io_request.hh>
36 #include <seastar/util/spinlock.hh>
37 #include <seastar/util/modules.hh>
38 
39 struct io_queue_for_tests;
40 
41 namespace seastar {
42 
43 class io_queue;
44 namespace internal {
45 const fair_group& get_fair_group(const io_queue& ioq, unsigned stream);
46 }
47 
48 #if SEASTAR_API_LEVEL < 7
49 SEASTAR_MODULE_EXPORT
50 class io_priority_class;
51 
52 [[deprecated("Use io_priority_class.rename")]]
54 rename_priority_class(io_priority_class pc, sstring new_name);
55 #endif
56 
57 SEASTAR_MODULE_EXPORT
58 class io_intent;
59 
60 namespace internal {
61 class io_sink;
62 namespace linux_abi {
63 
64 struct io_event;
65 struct iocb;
66 
67 }
68 }
69 
70 using shard_id = unsigned;
71 using stream_id = unsigned;
72 
73 class io_desc_read_write;
74 class queued_io_request;
75 class io_group;
76 
77 using io_group_ptr = std::shared_ptr<io_group>;
78 using iovec_keeper = std::vector<::iovec>;
79 
80 namespace internal {
81 struct maybe_priority_class_ref;
82 class priority_class {
83  unsigned _id;
84 public:
85 #if SEASTAR_API_LEVEL < 7
86  explicit priority_class(const io_priority_class& pc) noexcept;
87 #endif
88  explicit priority_class(const scheduling_group& sg) noexcept;
89  explicit priority_class(internal::maybe_priority_class_ref pc) noexcept;
90  unsigned id() const noexcept { return _id; }
91 };
92 }
93 
94 class io_queue {
95 public:
96  class priority_class_data;
97 
98 private:
99  std::vector<std::unique_ptr<priority_class_data>> _priority_classes;
100  io_group_ptr _group;
101  boost::container::small_vector<fair_queue, 2> _streams;
102  internal::io_sink& _sink;
103 
104  friend struct ::io_queue_for_tests;
105  friend const fair_group& internal::get_fair_group(const io_queue& ioq, unsigned stream);
106 
107  priority_class_data& find_or_create_class(internal::priority_class pc);
108  future<size_t> queue_request(internal::priority_class pc, internal::io_direction_and_length dnl, internal::io_request req, io_intent* intent, iovec_keeper iovs) noexcept;
109  future<size_t> queue_one_request(internal::priority_class pc, internal::io_direction_and_length dnl, internal::io_request req, io_intent* intent, iovec_keeper iovs) noexcept;
110 
111  // The fields below are going away, they are just here so we can implement deprecated
112  // functions that used to be provided by the fair_queue and are going away (from both
113  // the fair_queue and the io_queue). Double-accounting for now will allow for easier
114  // decoupling and is temporary
115  size_t _queued_requests = 0;
116  size_t _requests_executing = 0;
117 public:
118 
119  using clock_type = std::chrono::steady_clock;
120 
121  // We want to represent the fact that write requests are (maybe) more expensive
122  // than read requests. To avoid dealing with floating point math we will scale one
123  // read request to be counted by this amount.
124  //
125  // A write request that is 30% more expensive than a read will be accounted as
126  // (read_request_base_count * 130) / 100.
127  // It is also technically possible for reads to be the expensive ones, in which case
128  // writes will have an integer value lower than read_request_base_count.
129  static constexpr unsigned read_request_base_count = 128;
130  static constexpr unsigned block_size_shift = 9;
131 
132  struct config {
133  dev_t devid;
134  unsigned long req_count_rate = std::numeric_limits<int>::max();
135  unsigned long blocks_count_rate = std::numeric_limits<int>::max();
136  unsigned disk_req_write_to_read_multiplier = read_request_base_count;
137  unsigned disk_blocks_write_to_read_multiplier = read_request_base_count;
138  size_t disk_read_saturation_length = std::numeric_limits<size_t>::max();
139  size_t disk_write_saturation_length = std::numeric_limits<size_t>::max();
140  sstring mountpoint = "undefined";
141  bool duplex = false;
142  float rate_factor = 1.0;
143  std::chrono::duration<double> rate_limit_duration = std::chrono::milliseconds(1);
144  size_t block_count_limit_min = 1;
145  };
146 
147  io_queue(io_group_ptr group, internal::io_sink& sink);
148  ~io_queue();
149 
150  stream_id request_stream(internal::io_direction_and_length dnl) const noexcept;
151 
152  future<size_t> submit_io_read(internal::priority_class priority_class,
153  size_t len, internal::io_request req, io_intent* intent, iovec_keeper iovs = {}) noexcept;
154  future<size_t> submit_io_write(internal::priority_class priority_class,
155  size_t len, internal::io_request req, io_intent* intent, iovec_keeper iovs = {}) noexcept;
156 
157  void submit_request(io_desc_read_write* desc, internal::io_request req) noexcept;
158  void cancel_request(queued_io_request& req) noexcept;
159  void complete_cancelled_request(queued_io_request& req) noexcept;
160  void complete_request(io_desc_read_write& desc) noexcept;
161 
162  [[deprecated("I/O queue users should not track individual requests, but resources (weight, size) passing through the queue")]]
163  size_t queued_requests() const {
164  return _queued_requests;
165  }
166 
167  // How many requests are sent to disk but not yet returned.
168  [[deprecated("I/O queue users should not track individual requests, but resources (weight, size) passing through the queue")]]
169  size_t requests_currently_executing() const {
170  return _requests_executing;
171  }
172 
173  // Dispatch requests that are pending in the I/O queue
174  void poll_io_queue();
175 
176  clock_type::time_point next_pending_aio() const noexcept;
177  fair_queue_entry::capacity_t request_capacity(internal::io_direction_and_length dnl) const noexcept;
178 
179  sstring mountpoint() const;
180  dev_t dev_id() const noexcept;
181 
182  void update_shares_for_class(internal::priority_class pc, size_t new_shares);
183  future<> update_bandwidth_for_class(internal::priority_class pc, uint64_t new_bandwidth);
184  void rename_priority_class(internal::priority_class pc, sstring new_name);
185  void throttle_priority_class(const priority_class_data& pc) noexcept;
186  void unthrottle_priority_class(const priority_class_data& pc) noexcept;
187 
188  struct request_limits {
189  size_t max_read;
190  size_t max_write;
191  };
192 
193  request_limits get_request_limits() const noexcept;
194  const config& get_config() const noexcept;
195 
196 private:
197  static fair_queue::config make_fair_queue_config(const config& cfg, sstring label);
198  void register_stats(sstring name, priority_class_data& pc);
199 };
200 
201 class io_group {
202 public:
203  explicit io_group(io_queue::config io_cfg, unsigned nr_queues);
204  ~io_group();
205  struct priority_class_data;
206 
207 private:
208  friend class io_queue;
209  friend struct ::io_queue_for_tests;
210  friend const fair_group& internal::get_fair_group(const io_queue& ioq, unsigned stream);
211 
212  const io_queue::config _config;
213  size_t _max_request_length[2];
214  std::vector<std::unique_ptr<fair_group>> _fgs;
215  std::vector<std::unique_ptr<priority_class_data>> _priority_classes;
216  util::spinlock _lock;
217  const shard_id _allocated_on;
218 
219  static fair_group::config make_fair_group_config(const io_queue::config& qcfg) noexcept;
220  priority_class_data& find_or_create_class(internal::priority_class pc);
221 };
222 
223 inline const io_queue::config& io_queue::get_config() const noexcept {
224  return _group->_config;
225 }
226 
227 inline sstring io_queue::mountpoint() const {
228  return get_config().mountpoint;
229 }
230 
231 inline dev_t io_queue::dev_id() const noexcept {
232  return get_config().devid;
233 }
234 
235 namespace internal {
236 double request_tokens(io_direction_and_length dnl, const io_queue::config& cfg) noexcept;
237 }
238 
239 }
Group of queues class.
Definition: fair_queue.hh:138
Definition: fair_queue.hh:236
Fair queuing class.
Definition: fair_queue.hh:291
Definition: io_queue.hh:201
Definition: io_intent.hh:44
Definition: io_queue.hh:94
Definition: io_queue.hh:132
Definition: io_queue.hh:188
Definition: stream.hh:61
Definition: spinlock.hh:88
holds the metric_groups definition needed by class that reports metrics
Seastar API namespace.
Definition: abort_on_ebadf.hh:26