Seastar
High performance C++ framework for concurrent servers
io_queue.hh
1 /*
2  * This file is open source software, licensed to you under the terms
3  * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4  * distributed with this work for additional information regarding copyright
5  * ownership. You may not use this file except in compliance with the License.
6  *
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an
13  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14  * KIND, either express or implied. See the License for the
15  * specific language governing permissions and limitations
16  * under the License.
17  */
18 /*
19  * Copyright 2019 ScyllaDB
20  */
21 
22 #pragma once
23 
24 #ifndef SEASTAR_MODULE
25 #include <boost/container/small_vector.hpp>
26 #include <chrono>
27 #include <memory>
28 #include <vector>
29 #include <sys/uio.h>
30 #endif
31 #include <seastar/core/sstring.hh>
32 #include <seastar/core/fair_queue.hh>
34 #include <seastar/core/future.hh>
35 #include <seastar/core/internal/io_request.hh>
36 #include <seastar/core/lowres_clock.hh>
37 #include <seastar/util/spinlock.hh>
38 #include <seastar/util/modules.hh>
39 
40 struct io_queue_for_tests;
41 
42 namespace seastar {
43 
44 class io_queue;
45 namespace internal {
46 const fair_group& get_fair_group(const io_queue& ioq, unsigned stream);
47 }
48 
49 #if SEASTAR_API_LEVEL < 7
50 SEASTAR_MODULE_EXPORT
51 class io_priority_class;
52 
53 [[deprecated("Use io_priority_class.rename")]]
55 rename_priority_class(io_priority_class pc, sstring new_name);
56 #endif
57 
58 SEASTAR_MODULE_EXPORT
59 class io_intent;
60 
61 namespace internal {
62 class io_sink;
63 namespace linux_abi {
64 
65 struct io_event;
66 struct iocb;
67 
68 }
69 }
70 
71 using shard_id = unsigned;
72 using stream_id = unsigned;
73 
74 class io_desc_read_write;
75 class queued_io_request;
76 class io_group;
77 
78 using io_group_ptr = std::shared_ptr<io_group>;
79 using iovec_keeper = std::vector<::iovec>;
80 
81 namespace internal {
82 struct maybe_priority_class_ref;
83 class priority_class {
84  unsigned _id;
85 public:
86 #if SEASTAR_API_LEVEL < 7
87  explicit priority_class(const io_priority_class& pc) noexcept;
88 #endif
89  explicit priority_class(const scheduling_group& sg) noexcept;
90  explicit priority_class(internal::maybe_priority_class_ref pc) noexcept;
91  unsigned id() const noexcept { return _id; }
92 };
93 }
94 
95 class io_queue {
96 public:
97  class priority_class_data;
98 
99 private:
100  std::vector<std::unique_ptr<priority_class_data>> _priority_classes;
101  io_group_ptr _group;
102  boost::container::small_vector<fair_queue, 2> _streams;
103  internal::io_sink& _sink;
104 
105  friend struct ::io_queue_for_tests;
106  friend const fair_group& internal::get_fair_group(const io_queue& ioq, unsigned stream);
107 
108  priority_class_data& find_or_create_class(internal::priority_class pc);
109  future<size_t> queue_request(internal::priority_class pc, internal::io_direction_and_length dnl, internal::io_request req, io_intent* intent, iovec_keeper iovs) noexcept;
110  future<size_t> queue_one_request(internal::priority_class pc, internal::io_direction_and_length dnl, internal::io_request req, io_intent* intent, iovec_keeper iovs) noexcept;
111 
112  // The fields below are going away, they are just here so we can implement deprecated
113  // functions that used to be provided by the fair_queue and are going away (from both
114  // the fair_queue and the io_queue). Double-accounting for now will allow for easier
115  // decoupling and is temporary
116  size_t _queued_requests = 0;
117  size_t _requests_executing = 0;
118  uint64_t _requests_dispatched = 0;
119  uint64_t _requests_completed = 0;
120 
121  // Flow monitor
122  uint64_t _prev_dispatched = 0;
123  uint64_t _prev_completed = 0;
124  double _flow_ratio = 1.0;
125  timer<lowres_clock> _flow_ratio_update;
126 
127  void update_flow_ratio() noexcept;
128 
129  metrics::metric_groups _metric_groups;
130 public:
131 
132  using clock_type = std::chrono::steady_clock;
133 
134  // We want to represent the fact that write requests are (maybe) more expensive
135  // than read requests. To avoid dealing with floating point math we will scale one
136  // read request to be counted by this amount.
137  //
138  // A write request that is 30% more expensive than a read will be accounted as
139  // (read_request_base_count * 130) / 100.
140  // It is also technically possible for reads to be the expensive ones, in which case
141  // writes will have an integer value lower than read_request_base_count.
142  static constexpr unsigned read_request_base_count = 128;
143  static constexpr unsigned block_size_shift = 9;
144 
145  struct config {
146  dev_t devid;
147  unsigned long req_count_rate = std::numeric_limits<int>::max();
148  unsigned long blocks_count_rate = std::numeric_limits<int>::max();
149  unsigned disk_req_write_to_read_multiplier = read_request_base_count;
150  unsigned disk_blocks_write_to_read_multiplier = read_request_base_count;
151  size_t disk_read_saturation_length = std::numeric_limits<size_t>::max();
152  size_t disk_write_saturation_length = std::numeric_limits<size_t>::max();
153  sstring mountpoint = "undefined";
154  bool duplex = false;
155  float rate_factor = 1.0;
156  std::chrono::duration<double> rate_limit_duration = std::chrono::milliseconds(1);
157  size_t block_count_limit_min = 1;
158  unsigned flow_ratio_ticks = 100;
159  double flow_ratio_ema_factor = 0.95;
160  double flow_ratio_backpressure_threshold = 1.1;
161  };
162 
163  io_queue(io_group_ptr group, internal::io_sink& sink);
164  ~io_queue();
165 
166  stream_id request_stream(internal::io_direction_and_length dnl) const noexcept;
167 
168  future<size_t> submit_io_read(internal::priority_class priority_class,
169  size_t len, internal::io_request req, io_intent* intent, iovec_keeper iovs = {}) noexcept;
170  future<size_t> submit_io_write(internal::priority_class priority_class,
171  size_t len, internal::io_request req, io_intent* intent, iovec_keeper iovs = {}) noexcept;
172 
173  void submit_request(io_desc_read_write* desc, internal::io_request req) noexcept;
174  void cancel_request(queued_io_request& req) noexcept;
175  void complete_cancelled_request(queued_io_request& req) noexcept;
176  void complete_request(io_desc_read_write& desc) noexcept;
177 
178  [[deprecated("I/O queue users should not track individual requests, but resources (weight, size) passing through the queue")]]
179  size_t queued_requests() const {
180  return _queued_requests;
181  }
182 
183  // How many requests are sent to disk but not yet returned.
184  [[deprecated("I/O queue users should not track individual requests, but resources (weight, size) passing through the queue")]]
185  size_t requests_currently_executing() const {
186  return _requests_executing;
187  }
188 
189  // Dispatch requests that are pending in the I/O queue
190  void poll_io_queue();
191 
192  clock_type::time_point next_pending_aio() const noexcept;
193  fair_queue_entry::capacity_t request_capacity(internal::io_direction_and_length dnl) const noexcept;
194 
195  sstring mountpoint() const;
196  dev_t dev_id() const noexcept;
197 
198  void update_shares_for_class(internal::priority_class pc, size_t new_shares);
199  future<> update_bandwidth_for_class(internal::priority_class pc, uint64_t new_bandwidth);
200  void rename_priority_class(internal::priority_class pc, sstring new_name);
201  void throttle_priority_class(const priority_class_data& pc) noexcept;
202  void unthrottle_priority_class(const priority_class_data& pc) noexcept;
203 
204  struct request_limits {
205  size_t max_read;
206  size_t max_write;
207  };
208 
209  request_limits get_request_limits() const noexcept;
210  const config& get_config() const noexcept;
211 
212 private:
213  static fair_queue::config make_fair_queue_config(const config& cfg, sstring label);
214  void register_stats(sstring name, priority_class_data& pc);
215 };
216 
217 class io_group {
218 public:
219  explicit io_group(io_queue::config io_cfg, unsigned nr_queues);
220  ~io_group();
221  struct priority_class_data;
222 
223  std::chrono::duration<double> io_latency_goal() const noexcept;
224 
225 private:
226  friend class io_queue;
227  friend struct ::io_queue_for_tests;
228  friend const fair_group& internal::get_fair_group(const io_queue& ioq, unsigned stream);
229 
230  const io_queue::config _config;
231  size_t _max_request_length[2];
232  std::vector<std::unique_ptr<fair_group>> _fgs;
233  std::vector<std::unique_ptr<priority_class_data>> _priority_classes;
234  util::spinlock _lock;
235  const shard_id _allocated_on;
236 
237  static fair_group::config make_fair_group_config(const io_queue::config& qcfg) noexcept;
238  priority_class_data& find_or_create_class(internal::priority_class pc);
239 };
240 
241 inline const io_queue::config& io_queue::get_config() const noexcept {
242  return _group->_config;
243 }
244 
245 inline sstring io_queue::mountpoint() const {
246  return get_config().mountpoint;
247 }
248 
249 inline dev_t io_queue::dev_id() const noexcept {
250  return get_config().devid;
251 }
252 
253 namespace internal {
254 double request_tokens(io_direction_and_length dnl, const io_queue::config& cfg) noexcept;
255 }
256 
257 }
Group of queues class.
Definition: fair_queue.hh:138
Definition: fair_queue.hh:236
Fair queuing class.
Definition: fair_queue.hh:290
Definition: io_queue.hh:217
Definition: io_intent.hh:44
Definition: io_queue.hh:95
Definition: io_queue.hh:145
Definition: io_queue.hh:204
holds the metric definition.
Definition: metrics_registration.hh:94
Definition: stream.hh:61
Definition: spinlock.hh:88
holds the metric_groups definition needed by class that reports metrics
Seastar API namespace.
Definition: abort_on_ebadf.hh:26