Seastar
High performance C++ framework for concurrent servers
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
net.hh
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2014 Cloudius Systems, Ltd.
20 */
21
22#pragma once
23
24#include <seastar/core/smp.hh>
25#include <seastar/core/deleter.hh>
26#include <seastar/core/queue.hh>
27#include <seastar/core/stream.hh>
29#include <seastar/net/toeplitz.hh>
30#include <seastar/net/ethernet.hh>
31#include <seastar/net/packet.hh>
32#include <seastar/net/const.hh>
33#include <seastar/util/assert.hh>
34#include <unordered_map>
35
36namespace seastar {
37
38namespace internal {
39
40class poller;
41
42}
43
44namespace net {
45
46class packet;
47class interface;
48class device;
49class qp;
50class l3_protocol;
51
53 uint8_t data[64];
54 size_t end_idx = 0;
55public:
56 size_t size() const {
57 return end_idx;
58 }
59 void push_back(uint8_t b) {
60 SEASTAR_ASSERT(end_idx < sizeof(data));
61 data[end_idx++] = b;
62 }
63 void push_back(uint16_t b) {
64 push_back(uint8_t(b));
65 push_back(uint8_t(b >> 8));
66 }
67 void push_back(uint32_t b) {
68 push_back(uint16_t(b));
69 push_back(uint16_t(b >> 16));
70 }
71 const uint8_t& operator[](size_t idx) const {
72 return data[idx];
73 }
74};
75
77 // Enable tx ip header checksum offload
78 bool tx_csum_ip_offload = false;
79 // Enable tx l4 (TCP or UDP) checksum offload
80 bool tx_csum_l4_offload = false;
81 // Enable rx checksum offload
82 bool rx_csum_offload = false;
83 // LRO is enabled
84 bool rx_lro = false;
85 // Enable tx TCP segment offload
86 bool tx_tso = false;
87 // Enable tx UDP fragmentation offload
88 bool tx_ufo = false;
89 // Maximum Transmission Unit
90 uint16_t mtu = 1500;
91 // Maximun packet len when TCP/UDP offload is enabled
92 uint16_t max_packet_len = ip_packet_len_max - eth_hdr_len;
93};
94
96public:
97 struct l3packet {
98 eth_protocol_num proto_num;
100 packet p;
101 };
102 using packet_provider_type = std::function<std::optional<l3packet> ()>;
103private:
104 interface* _netif;
105 eth_protocol_num _proto_num;
106public:
107 explicit l3_protocol(interface* netif, eth_protocol_num proto_num, packet_provider_type func);
108 future<> receive(
109 std::function<future<> (packet, ethernet_address)> rx_fn,
110 std::function<bool (forward_hash&, packet&, size_t)> forward);
111private:
112 friend class interface;
113};
114
116 struct l3_rx_stream {
118 future<> ready;
119 std::function<bool (forward_hash&, packet&, size_t)> forward;
120 l3_rx_stream(std::function<bool (forward_hash&, packet&, size_t)>&& fw) : ready(packet_stream.started()), forward(fw) {}
121 };
122 std::unordered_map<uint16_t, l3_rx_stream> _proto_map;
123 std::shared_ptr<device> _dev;
124 ethernet_address _hw_address;
125 net::hw_features _hw_features;
126 std::vector<l3_protocol::packet_provider_type> _pkt_providers;
127private:
128 future<> dispatch_packet(packet p);
129public:
130 explicit interface(std::shared_ptr<device> dev);
131 ethernet_address hw_address() const noexcept { return _hw_address; }
132 const net::hw_features& hw_features() const { return _hw_features; }
133 future<> register_l3(eth_protocol_num proto_num,
134 std::function<future<> (packet p, ethernet_address from)> next,
135 std::function<bool (forward_hash&, packet&, size_t)> forward);
136 void forward(unsigned cpuid, packet p);
137 unsigned hash2cpu(uint32_t hash);
138 void register_packet_provider(l3_protocol::packet_provider_type func) {
139 _pkt_providers.push_back(std::move(func));
140 }
141 uint16_t hw_queues_count();
142 rss_key_type rss_key() const;
143 friend class l3_protocol;
144};
145
154 void update_pkts_bunch(uint64_t count) {
155 last_bunch = count;
156 packets += count;
157 }
158
166 void update_copy_stats(uint64_t nr_frags, uint64_t bytes) {
167 copy_frags += nr_frags;
168 copy_bytes += bytes;
169 }
170
177 void update_frags_stats(uint64_t nfrags, uint64_t nbytes) {
178 nr_frags += nfrags;
179 bytes += nbytes;
180 }
181
182 uint64_t bytes; // total number of bytes
183 uint64_t nr_frags; // total number of fragments
184 uint64_t copy_frags; // fragments that were copied on L2 level
185 uint64_t copy_bytes; // bytes that were copied on L2 level
186 uint64_t packets; // total number of packets
187 uint64_t last_bunch; // number of packets in the last sent/received bunch
188};
189
190struct qp_stats {
191 qp_stats() : rx{}, tx{} {}
192
193 struct {
194 struct qp_stats_good good;
195
196 struct {
197 void inc_csum_err() {
198 ++csum;
199 ++total;
200 }
201
202 void inc_no_mem() {
203 ++no_mem;
204 ++total;
205 }
206
207 uint64_t no_mem; // Packets dropped due to allocation failure
208 uint64_t total; // total number of erroneous packets
209 uint64_t csum; // packets with bad checksum
210 } bad;
211 } rx;
212
213 struct {
214 struct qp_stats_good good;
215 uint64_t linearized; // number of packets that were linearized
216 } tx;
217};
218
219class qp {
220 using packet_provider_type = std::function<std::optional<packet> ()>;
221 std::vector<packet_provider_type> _pkt_providers;
222 std::optional<std::array<uint8_t, 128>> _sw_reta;
223 circular_buffer<packet> _proxy_packetq;
224 stream<packet> _rx_stream;
225 std::unique_ptr<internal::poller> _tx_poller;
226 circular_buffer<packet> _tx_packetq;
227
228protected:
229 const std::string _stats_plugin_name;
230 const std::string _queue_name;
231 metrics::metric_groups _metrics;
232 qp_stats _stats;
233
234public:
235 qp(bool register_copy_stats = false,
236 const std::string stats_plugin_name = std::string("network"),
237 uint8_t qid = 0);
238 virtual ~qp();
239 virtual future<> send(packet p) = 0;
240 virtual uint32_t send(circular_buffer<packet>& p) {
241 uint32_t sent = 0;
242 while (!p.empty()) {
243 // FIXME: future is discarded
244 (void)send(std::move(p.front()));
245 p.pop_front();
246 sent++;
247 }
248 return sent;
249 }
250 virtual void rx_start() {};
251 void configure_proxies(const std::map<unsigned, float>& cpu_weights);
252 // build REdirection TAble for cpu_weights map: target cpu -> weight
253 void build_sw_reta(const std::map<unsigned, float>& cpu_weights);
254 void proxy_send(packet p) {
255 _proxy_packetq.push_back(std::move(p));
256 }
257 void register_packet_provider(packet_provider_type func) {
258 _pkt_providers.push_back(std::move(func));
259 }
260 bool poll_tx();
261 friend class device;
262};
263
264class device {
265protected:
266 std::unique_ptr<qp*[]> _queues;
267 size_t _rss_table_bits = 0;
268public:
269 device() {
270 _queues = std::make_unique<qp*[]>(smp::count);
271 }
272 virtual ~device() {};
273 qp& queue_for_cpu(unsigned cpu) { return *_queues[cpu]; }
274 qp& local_queue() { return queue_for_cpu(this_shard_id()); }
275 void l2receive(packet p) {
276 // FIXME: future is discarded
277 (void)_queues[this_shard_id()]->_rx_stream.produce(std::move(p));
278 }
279 future<> receive(std::function<future<> (packet)> next_packet);
280 virtual ethernet_address hw_address() = 0;
281 virtual net::hw_features hw_features() = 0;
282 virtual rss_key_type rss_key() const { return default_rsskey_40bytes; }
283 virtual uint16_t hw_queues_count() { return 1; }
284 virtual future<> link_ready() { return make_ready_future<>(); }
285 virtual std::unique_ptr<qp> init_local_queue(const program_options::option_group& opts, uint16_t qid) = 0;
286 virtual unsigned hash2qid(uint32_t hash) {
287 return hash % hw_queues_count();
288 }
289 void set_local_queue(std::unique_ptr<qp> dev);
290 template <typename Func>
291 unsigned forward_dst(unsigned src_cpuid, Func&& hashfn) {
292 auto& qp = queue_for_cpu(src_cpuid);
293 if (!qp._sw_reta) {
294 return src_cpuid;
295 }
296 auto hash = hashfn() >> _rss_table_bits;
297 auto& reta = *qp._sw_reta;
298 return reta[hash % reta.size()];
299 }
300 virtual unsigned hash2cpu(uint32_t hash) {
301 // there is an assumption here that qid == cpu_id which will
302 // not necessary be true in the future
303 return forward_dst(hash2qid(hash), [hash] { return hash; });
304 }
305};
306
307}
308
309}
Definition: circular_buffer.hh:63
A representation of a possibly not-yet-computed value.
Definition: future.hh:1197
holds the metric definition.
Definition: metrics_registration.hh:94
Definition: net.hh:264
Definition: net.hh:52
Definition: net.hh:115
Definition: net.hh:95
Definition: packet.hh:87
Definition: net.hh:219
Definition: program-options.hh:293
Definition: stream.hh:60
holds the metric_groups definition needed by class that reports metrics
Definition: net.hh:76
Seastar API namespace.
Definition: abort_on_ebadf.hh:26
shard_id this_shard_id() noexcept
Returns shard_id of the of the current shard.
Definition: shard_id.hh:52
Definition: ethernet.hh:37
Definition: net.hh:146
void update_frags_stats(uint64_t nfrags, uint64_t nbytes)
Definition: net.hh:177
void update_copy_stats(uint64_t nr_frags, uint64_t bytes)
Definition: net.hh:166
void update_pkts_bunch(uint64_t count)
Definition: net.hh:154
Definition: net.hh:190