Seastar
High performance C++ framework for concurrent servers
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
ip.hh
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2014 Cloudius Systems, Ltd.
20 *
21 */
22
23#pragma once
24
25#ifndef SEASTAR_MODULE
26#include <boost/asio/ip/address_v4.hpp>
27#include <arpa/inet.h>
28#include <unordered_map>
29#include <cstdint>
30#include <array>
31#include <map>
32#include <list>
33#include <chrono>
34#endif
35
36#include <seastar/core/array_map.hh>
37#include <seastar/net/byteorder.hh>
38#include <seastar/core/byteorder.hh>
39#include <seastar/net/arp.hh>
40#include <seastar/net/ip_checksum.hh>
41#include <seastar/net/const.hh>
42#include <seastar/net/packet-util.hh>
43#include <seastar/core/shared_ptr.hh>
44#include <seastar/net/toeplitz.hh>
45#include <seastar/net/udp.hh>
47#include <seastar/util/modules.hh>
48
49#include "ipv4_address.hh"
50#include "ipv6_address.hh"
51
52namespace seastar {
53
54namespace net {
55
56class ipv4;
57template <ip_protocol_num ProtoNum>
58class ipv4_l4;
59
60template <typename InetTraits>
61class tcp;
62
66 struct l4packet {
67 ipv4_address to;
68 packet p;
69 ethernet_address e_dst;
70 ip_protocol_num proto_num;
71 };
72 using packet_provider_type = std::function<std::optional<l4packet> ()>;
73 static void tcp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) {
74 csum.sum_many(src.ip.raw, dst.ip.raw, uint8_t(0), uint8_t(ip_protocol_num::tcp), len);
75 }
76 static void udp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) {
77 csum.sum_many(src.ip.raw, dst.ip.raw, uint8_t(0), uint8_t(ip_protocol_num::udp), len);
78 }
79 static constexpr uint8_t ip_hdr_len_min = ipv4_hdr_len_min;
80};
81
82template <ip_protocol_num ProtoNum>
83class ipv4_l4 {
84public:
85 ipv4& _inet;
86public:
87 ipv4_l4(ipv4& inet) : _inet(inet) {}
88 void register_packet_provider(ipv4_traits::packet_provider_type func);
89 future<ethernet_address> get_l2_dst_address(ipv4_address to);
90 const ipv4& inet() const {
91 return _inet;
92 }
93};
94
96public:
97 virtual ~ip_protocol() {}
98 virtual void received(packet p, ipv4_address from, ipv4_address to) = 0;
99 virtual bool forward(forward_hash& out_hash_data, packet& p, size_t off) {
100 std::ignore = out_hash_data;
101 std::ignore = p;
102 std::ignore = off;
103 return true;
104 }
105};
106
107template <typename InetTraits>
108struct l4connid {
109 using ipaddr = typename InetTraits::address_type;
110 using inet_type = typename InetTraits::inet_type;
111 struct connid_hash;
112
113 ipaddr local_ip;
114 ipaddr foreign_ip;
115 uint16_t local_port;
116 uint16_t foreign_port;
117
118 bool operator==(const l4connid& x) const {
119 return local_ip == x.local_ip
120 && foreign_ip == x.foreign_ip
121 && local_port == x.local_port
122 && foreign_port == x.foreign_port;
123 }
124
125 uint32_t hash(rss_key_type rss_key) {
126 forward_hash hash_data;
127 hash_data.push_back(hton(foreign_ip.ip));
128 hash_data.push_back(hton(local_ip.ip));
129 hash_data.push_back(hton(foreign_port));
130 hash_data.push_back(hton(local_port));
131 return toeplitz_hash(rss_key, hash_data);
132 }
133};
134
135class ipv4_tcp final : public ip_protocol {
137 std::unique_ptr<tcp<ipv4_traits>> _tcp;
138public:
139 ipv4_tcp(ipv4& inet);
140 ~ipv4_tcp();
141 virtual void received(packet p, ipv4_address from, ipv4_address to) override;
142 virtual bool forward(forward_hash& out_hash_data, packet& p, size_t off) override;
143 friend class ipv4;
144};
145
146struct icmp_hdr {
147 enum class msg_type : uint8_t {
148 echo_reply = 0,
149 echo_request = 8,
150 };
151 msg_type type;
152 uint8_t code;
153 packed<uint16_t> csum;
154 packed<uint32_t> rest;
155 template <typename Adjuster>
156 auto adjust_endianness(Adjuster a) {
157 return a(csum);
158 }
159} __attribute__((packed));
160
161
162class icmp {
163public:
164 using ipaddr = ipv4_address;
166 explicit icmp(inet_type& inet) : _inet(inet) {
167 _inet.register_packet_provider([this] {
168 std::optional<ipv4_traits::l4packet> l4p;
169 if (!_packetq.empty()) {
170 l4p = std::move(_packetq.front());
171 _packetq.pop_front();
172 _queue_space.signal(l4p.value().p.len());
173 }
174 return l4p;
175 });
176 }
177 void received(packet p, ipaddr from, ipaddr to);
178private:
179 inet_type& _inet;
181 semaphore _queue_space = {212992};
182};
183
184class ipv4_icmp final : public ip_protocol {
186 icmp _icmp;
187public:
188 ipv4_icmp(ipv4& inet) : _inet_l4(inet), _icmp(_inet_l4) {}
189 virtual void received(packet p, ipv4_address from, ipv4_address to) {
190 _icmp.received(std::move(p), from, to);
191 }
192 friend class ipv4;
193};
194
195class ipv4_udp : public ip_protocol {
197 using connid_hash = typename connid::connid_hash;
198
199public:
200 static const int default_queue_size;
201private:
202 static const uint16_t min_anonymous_port = 32768;
203 ipv4 &_inet;
204 std::unordered_map<uint16_t, lw_shared_ptr<udp_channel_state>> _channels;
205 int _queue_size = default_queue_size;
206 uint16_t _next_anonymous_port = min_anonymous_port;
208private:
209 uint16_t next_port(uint16_t port);
210public:
212 private:
213 ipv4_udp &_proto;
214 uint16_t _port;
215 public:
216 registration(ipv4_udp &proto, uint16_t port) : _proto(proto), _port(port) {};
217
218 void unregister() {
219 _proto._channels.erase(_proto._channels.find(_port));
220 }
221
222 uint16_t port() const {
223 return _port;
224 }
225 };
226
227 ipv4_udp(ipv4& inet);
228 udp_channel make_channel(ipv4_addr addr);
229 virtual void received(packet p, ipv4_address from, ipv4_address to) override;
230 void send(uint16_t src_port, ipv4_addr dst, packet &&p);
231 bool forward(forward_hash& out_hash_data, packet& p, size_t off) override;
232 void set_queue_size(int size) { _queue_size = size; }
233
234 const ipv4& inet() const {
235 return _inet;
236 }
237};
238
239struct ip_hdr;
240
242 virtual ~ip_packet_filter() {};
243 virtual future<> handle(packet& p, ip_hdr* iph, ethernet_address from, bool & handled) = 0;
244};
245
247 struct hash;
248 ipv4_address src_ip;
249 ipv4_address dst_ip;
250 uint16_t identification;
251 uint8_t protocol;
252 bool operator==(const ipv4_frag_id& x) const {
253 return src_ip == x.src_ip &&
254 dst_ip == x.dst_ip &&
255 identification == x.identification &&
256 protocol == x.protocol;
257 }
258};
259
260struct ipv4_frag_id::hash : private std::hash<ipv4_address>,
261 private std::hash<uint16_t>, private std::hash<uint8_t> {
262 size_t operator()(const ipv4_frag_id& id) const noexcept {
263 using h1 = std::hash<ipv4_address>;
264 using h2 = std::hash<uint16_t>;
265 using h3 = std::hash<uint8_t>;
266 return h1::operator()(id.src_ip) ^
267 h1::operator()(id.dst_ip) ^
268 h2::operator()(id.identification) ^
269 h3::operator()(id.protocol);
270 }
271};
272
273struct ipv4_tag {};
275
276class ipv4 {
277public:
278 using clock_type = lowres_clock;
280 using proto_type = uint16_t;
281 static address_type broadcast_address() { return ipv4_address(0xffffffff); }
282 static proto_type arp_protocol_type() { return proto_type(eth_protocol_num::ipv4); }
283private:
284 interface* _netif;
285 std::vector<ipv4_traits::packet_provider_type> _pkt_providers;
286 arp _global_arp;
287 arp_for<ipv4> _arp;
288 ipv4_address _host_address;
289 ipv4_address _gw_address;
290 ipv4_address _netmask;
291 l3_protocol _l3;
292 ipv4_tcp _tcp;
293 ipv4_icmp _icmp;
294 ipv4_udp _udp;
296 ip_packet_filter * _packet_filter = nullptr;
297 struct frag {
298 packet header;
300 clock_type::time_point rx_time;
301 uint32_t mem_size = 0;
302 // fragment with MF == 0 inidates it is the last fragment
303 bool last_frag_received = false;
304
305 packet get_assembled_packet(ethernet_address from, ethernet_address to);
306 int32_t merge(ip_hdr &h, uint16_t offset, packet p);
307 bool is_complete();
308 };
309 std::unordered_map<ipv4_frag_id, frag, ipv4_frag_id::hash> _frags;
310 std::list<ipv4_frag_id> _frags_age;
311 static constexpr std::chrono::seconds _frag_timeout{30};
312 static constexpr uint32_t _frag_low_thresh{3 * 1024 * 1024};
313 static constexpr uint32_t _frag_high_thresh{4 * 1024 * 1024};
314 uint32_t _frag_mem{0};
315 timer<lowres_clock> _frag_timer;
317 unsigned _pkt_provider_idx = 0;
318 metrics::metric_groups _metrics;
319private:
320 future<> handle_received_packet(packet p, ethernet_address from);
321 bool forward(forward_hash& out_hash_data, packet& p, size_t off);
322 std::optional<l3_protocol::l3packet> get_packet();
323 bool in_my_netmask(ipv4_address a) const;
324 void frag_limit_mem();
325 void frag_timeout();
326 void frag_drop(ipv4_frag_id frag_id, uint32_t dropped_size);
327 void frag_arm(clock_type::time_point now) {
328 auto tp = now + _frag_timeout;
329 _frag_timer.arm(tp);
330 }
331 void frag_arm() {
332 auto now = clock_type::now();
333 frag_arm(now);
334 }
335public:
336 explicit ipv4(interface* netif);
337 void set_host_address(ipv4_address ip);
338 ipv4_address host_address() const;
339 void set_gw_address(ipv4_address ip);
340 ipv4_address gw_address() const;
341 void set_netmask_address(ipv4_address ip);
342 ipv4_address netmask_address() const;
343 interface * netif() const {
344 return _netif;
345 }
346 // TODO or something. Should perhaps truly be a list
347 // of filters. With ordering. And blackjack. Etc.
348 // But for now, a simple single raw pointer suffices
349 void set_packet_filter(ip_packet_filter *);
350 ip_packet_filter * packet_filter() const;
351 void send(ipv4_address to, ip_protocol_num proto_num, packet p, ethernet_address e_dst);
352 tcp<ipv4_traits>& get_tcp() { return *_tcp._tcp; }
353 ipv4_udp& get_udp() { return _udp; }
354 void register_l4(proto_type id, ip_protocol* handler);
355 const net::hw_features& hw_features() const { return _netif->hw_features(); }
356 static bool needs_frag(packet& p, ip_protocol_num proto_num, net::hw_features hw_features);
357 void learn(ethernet_address l2, ipv4_address l3) {
358 _arp.learn(l2, l3);
359 }
360 void register_packet_provider(ipv4_traits::packet_provider_type&& func) {
361 _pkt_providers.push_back(std::move(func));
362 }
363 future<ethernet_address> get_l2_dst_address(ipv4_address to);
364};
365
366template <ip_protocol_num ProtoNum>
367inline
368void ipv4_l4<ProtoNum>::register_packet_provider(ipv4_traits::packet_provider_type func) {
369 _inet.register_packet_provider([func = std::move(func)] {
370 auto l4p = func();
371 if (l4p) {
372 l4p.value().proto_num = ProtoNum;
373 }
374 return l4p;
375 });
376}
377
378template <ip_protocol_num ProtoNum>
379inline
380future<ethernet_address> ipv4_l4<ProtoNum>::get_l2_dst_address(ipv4_address to) {
381 return _inet.get_l2_dst_address(to);
382}
383
384struct ip_hdr {
385 uint8_t ihl : 4;
386 uint8_t ver : 4;
387 uint8_t dscp : 6;
388 uint8_t ecn : 2;
391 packed<uint16_t> frag;
392 enum class frag_bits : uint8_t { mf = 13, df = 14, reserved = 15, offset_shift = 3 };
393 uint8_t ttl;
394 uint8_t ip_proto;
395 packed<uint16_t> csum;
396 ipv4_address src_ip;
397 ipv4_address dst_ip;
398 uint8_t options[0];
399 template <typename Adjuster>
400 auto adjust_endianness(Adjuster a) {
401 return a(len, id, frag, csum, src_ip, dst_ip);
402 }
403 bool mf() { return frag & (1 << uint8_t(frag_bits::mf)); }
404 bool df() { return frag & (1 << uint8_t(frag_bits::df)); }
405 uint16_t offset() { return frag << uint8_t(frag_bits::offset_shift); }
406} __attribute__((packed));
407
408template <typename InetTraits>
409struct l4connid<InetTraits>::connid_hash : private std::hash<ipaddr>, private std::hash<uint16_t> {
410 size_t operator()(const l4connid<InetTraits>& id) const noexcept {
411 using h1 = std::hash<ipaddr>;
412 using h2 = std::hash<uint16_t>;
413 return h1::operator()(id.local_ip)
414 ^ h1::operator()(id.foreign_ip)
415 ^ h2::operator()(id.local_port)
416 ^ h2::operator()(id.foreign_port);
417 }
418};
419
420void arp_learn(ethernet_address l2, ipv4_address l3);
421
422}
423
424}
Definition: array_map.hh:39
Definition: circular_buffer.hh:63
A representation of a possibly not-yet-computed value.
Definition: future.hh:1240
Low-resolution and efficient steady clock.
Definition: lowres_clock.hh:56
static time_point now() noexcept
Definition: lowres_clock.hh:74
holds the metric definition.
Definition: metrics_registration.hh:94
Definition: arp.hh:90
Definition: arp.hh:58
Definition: api.hh:116
Definition: net.hh:51
Definition: ip.hh:162
Definition: net.hh:114
Definition: ip.hh:95
Definition: ip.hh:184
Definition: ip.hh:135
Definition: ip.hh:195
Definition: ip.hh:276
Definition: net.hh:94
Definition: packet.hh:87
Definition: tcp.hh:291
void arm(time_point until, std::optional< duration > period={}) noexcept
future now()
Returns a ready future.
Definition: later.hh:35
holds the metric_groups definition needed by class that reports metrics
Definition: net.hh:75
Definition: ip.hh:273
Seastar API namespace.
Definition: abort_on_ebadf.hh:26
Definition: socket_defs.hh:113
Definition: ip_checksum.hh:38
Definition: ethernet.hh:37
Definition: ip.hh:146
Definition: ip.hh:384
Definition: ip.hh:241
Definition: ipv4_address.hh:35
Definition: ip.hh:246
Definition: ip.hh:63
Definition: ip.hh:108
Definition: unaligned.hh:58