Seastar
High performance C++ framework for concurrent servers
ip.hh
1 /*
2  * This file is open source software, licensed to you under the terms
3  * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4  * distributed with this work for additional information regarding copyright
5  * ownership. You may not use this file except in compliance with the License.
6  *
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an
13  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14  * KIND, either express or implied. See the License for the
15  * specific language governing permissions and limitations
16  * under the License.
17  */
18 /*
19  * Copyright (C) 2014 Cloudius Systems, Ltd.
20  *
21  */
22 
23 #pragma once
24 
25 #ifndef SEASTAR_MODULE
26 #include <boost/asio/ip/address_v4.hpp>
27 #include <arpa/inet.h>
28 #include <unordered_map>
29 #include <cstdint>
30 #include <array>
31 #include <map>
32 #include <list>
33 #include <chrono>
34 #endif
35 
36 #include <seastar/core/array_map.hh>
37 #include <seastar/net/byteorder.hh>
38 #include <seastar/core/byteorder.hh>
39 #include <seastar/net/arp.hh>
40 #include <seastar/net/ip_checksum.hh>
41 #include <seastar/net/const.hh>
42 #include <seastar/net/packet-util.hh>
43 #include <seastar/core/shared_ptr.hh>
44 #include <seastar/net/toeplitz.hh>
45 #include <seastar/net/udp.hh>
47 #include <seastar/util/modules.hh>
48 
49 #include "ipv4_address.hh"
50 #include "ipv6_address.hh"
51 
52 namespace seastar {
53 
54 namespace net {
55 
56 class ipv4;
57 template <ip_protocol_num ProtoNum>
58 class ipv4_l4;
59 
60 template <typename InetTraits>
61 class tcp;
62 
63 struct ipv4_traits {
64  using address_type = ipv4_address;
66  struct l4packet {
67  ipv4_address to;
68  packet p;
69  ethernet_address e_dst;
70  ip_protocol_num proto_num;
71  };
72  using packet_provider_type = std::function<std::optional<l4packet> ()>;
73  static void tcp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) {
74  csum.sum_many(src.ip.raw, dst.ip.raw, uint8_t(0), uint8_t(ip_protocol_num::tcp), len);
75  }
76  static void udp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) {
77  csum.sum_many(src.ip.raw, dst.ip.raw, uint8_t(0), uint8_t(ip_protocol_num::udp), len);
78  }
79  static constexpr uint8_t ip_hdr_len_min = ipv4_hdr_len_min;
80 };
81 
82 template <ip_protocol_num ProtoNum>
83 class ipv4_l4 {
84 public:
85  ipv4& _inet;
86 public:
87  ipv4_l4(ipv4& inet) : _inet(inet) {}
88  void register_packet_provider(ipv4_traits::packet_provider_type func);
89  future<ethernet_address> get_l2_dst_address(ipv4_address to);
90  const ipv4& inet() const {
91  return _inet;
92  }
93 };
94 
95 class ip_protocol {
96 public:
97  virtual ~ip_protocol() {}
98  virtual void received(packet p, ipv4_address from, ipv4_address to) = 0;
99  virtual bool forward(forward_hash& out_hash_data, packet& p, size_t off) {
100  std::ignore = out_hash_data;
101  std::ignore = p;
102  std::ignore = off;
103  return true;
104  }
105 };
106 
107 template <typename InetTraits>
108 struct l4connid {
109  using ipaddr = typename InetTraits::address_type;
110  using inet_type = typename InetTraits::inet_type;
111  struct connid_hash;
112 
113  ipaddr local_ip;
114  ipaddr foreign_ip;
115  uint16_t local_port;
116  uint16_t foreign_port;
117 
118  bool operator==(const l4connid& x) const {
119  return local_ip == x.local_ip
120  && foreign_ip == x.foreign_ip
121  && local_port == x.local_port
122  && foreign_port == x.foreign_port;
123  }
124 
125  uint32_t hash(rss_key_type rss_key) {
126  forward_hash hash_data;
127  hash_data.push_back(hton(foreign_ip.ip));
128  hash_data.push_back(hton(local_ip.ip));
129  hash_data.push_back(hton(foreign_port));
130  hash_data.push_back(hton(local_port));
131  return toeplitz_hash(rss_key, hash_data);
132  }
133 };
134 
135 class ipv4_tcp final : public ip_protocol {
137  std::unique_ptr<tcp<ipv4_traits>> _tcp;
138 public:
139  ipv4_tcp(ipv4& inet);
140  ~ipv4_tcp();
141  virtual void received(packet p, ipv4_address from, ipv4_address to) override;
142  virtual bool forward(forward_hash& out_hash_data, packet& p, size_t off) override;
143  friend class ipv4;
144 };
145 
146 struct icmp_hdr {
147  enum class msg_type : uint8_t {
148  echo_reply = 0,
149  echo_request = 8,
150  };
151  msg_type type;
152  uint8_t code;
153  packed<uint16_t> csum;
154  packed<uint32_t> rest;
155  template <typename Adjuster>
156  auto adjust_endianness(Adjuster a) {
157  return a(csum);
158  }
159 } __attribute__((packed));
160 
161 
162 class icmp {
163 public:
164  using ipaddr = ipv4_address;
166  explicit icmp(inet_type& inet) : _inet(inet) {
167  _inet.register_packet_provider([this] {
168  std::optional<ipv4_traits::l4packet> l4p;
169  if (!_packetq.empty()) {
170  l4p = std::move(_packetq.front());
171  _packetq.pop_front();
172  _queue_space.signal(l4p.value().p.len());
173  }
174  return l4p;
175  });
176  }
177  void received(packet p, ipaddr from, ipaddr to);
178 private:
179  inet_type& _inet;
181  semaphore _queue_space = {212992};
182 };
183 
184 class ipv4_icmp final : public ip_protocol {
186  icmp _icmp;
187 public:
188  ipv4_icmp(ipv4& inet) : _inet_l4(inet), _icmp(_inet_l4) {}
189  virtual void received(packet p, ipv4_address from, ipv4_address to) {
190  _icmp.received(std::move(p), from, to);
191  }
192  friend class ipv4;
193 };
194 
195 class ipv4_udp : public ip_protocol {
197  using connid_hash = typename connid::connid_hash;
198 
199 public:
200  static const int default_queue_size;
201 private:
202  static const uint16_t min_anonymous_port = 32768;
203  ipv4 &_inet;
204  std::unordered_map<uint16_t, lw_shared_ptr<udp_channel_state>> _channels;
205  int _queue_size = default_queue_size;
206  uint16_t _next_anonymous_port = min_anonymous_port;
208 private:
209  uint16_t next_port(uint16_t port);
210 public:
211  class registration {
212  private:
213  ipv4_udp &_proto;
214  uint16_t _port;
215  public:
216  registration(ipv4_udp &proto, uint16_t port) : _proto(proto), _port(port) {};
217 
218  void unregister() {
219  _proto._channels.erase(_proto._channels.find(_port));
220  }
221 
222  uint16_t port() const {
223  return _port;
224  }
225  };
226 
227  ipv4_udp(ipv4& inet);
228  udp_channel make_channel(ipv4_addr addr);
229  virtual void received(packet p, ipv4_address from, ipv4_address to) override;
230  void send(uint16_t src_port, ipv4_addr dst, packet &&p);
231  bool forward(forward_hash& out_hash_data, packet& p, size_t off) override;
232  void set_queue_size(int size) { _queue_size = size; }
233 
234  const ipv4& inet() const {
235  return _inet;
236  }
237 };
238 
239 struct ip_hdr;
240 
242  virtual ~ip_packet_filter() {};
243  virtual future<> handle(packet& p, ip_hdr* iph, ethernet_address from, bool & handled) = 0;
244 };
245 
246 struct ipv4_frag_id {
247  struct hash;
248  ipv4_address src_ip;
249  ipv4_address dst_ip;
250  uint16_t identification;
251  uint8_t protocol;
252  bool operator==(const ipv4_frag_id& x) const {
253  return src_ip == x.src_ip &&
254  dst_ip == x.dst_ip &&
255  identification == x.identification &&
256  protocol == x.protocol;
257  }
258 };
259 
260 struct ipv4_frag_id::hash : private std::hash<ipv4_address>,
261  private std::hash<uint16_t>, private std::hash<uint8_t> {
262  size_t operator()(const ipv4_frag_id& id) const noexcept {
263  using h1 = std::hash<ipv4_address>;
264  using h2 = std::hash<uint16_t>;
265  using h3 = std::hash<uint8_t>;
266  return h1::operator()(id.src_ip) ^
267  h1::operator()(id.dst_ip) ^
268  h2::operator()(id.identification) ^
269  h3::operator()(id.protocol);
270  }
271 };
272 
273 struct ipv4_tag {};
275 
276 class ipv4 {
277 public:
278  using clock_type = lowres_clock;
279  using address_type = ipv4_address;
280  using proto_type = uint16_t;
281  static address_type broadcast_address() { return ipv4_address(0xffffffff); }
282  static proto_type arp_protocol_type() { return proto_type(eth_protocol_num::ipv4); }
283 private:
284  interface* _netif;
285  std::vector<ipv4_traits::packet_provider_type> _pkt_providers;
286  arp _global_arp;
287  arp_for<ipv4> _arp;
288  ipv4_address _host_address;
289  ipv4_address _gw_address;
290  ipv4_address _netmask;
291  l3_protocol _l3;
292  ipv4_tcp _tcp;
293  ipv4_icmp _icmp;
294  ipv4_udp _udp;
296  ip_packet_filter * _packet_filter = nullptr;
297  struct frag {
298  packet header;
299  ipv4_packet_merger data;
300  clock_type::time_point rx_time;
301  uint32_t mem_size = 0;
302  // fragment with MF == 0 inidates it is the last fragment
303  bool last_frag_received = false;
304 
305  packet get_assembled_packet(ethernet_address from, ethernet_address to);
306  int32_t merge(ip_hdr &h, uint16_t offset, packet p);
307  bool is_complete();
308  };
309  std::unordered_map<ipv4_frag_id, frag, ipv4_frag_id::hash> _frags;
310  std::list<ipv4_frag_id> _frags_age;
311  static constexpr std::chrono::seconds _frag_timeout{30};
312  static constexpr uint32_t _frag_low_thresh{3 * 1024 * 1024};
313  static constexpr uint32_t _frag_high_thresh{4 * 1024 * 1024};
314  uint32_t _frag_mem{0};
315  timer<lowres_clock> _frag_timer;
317  unsigned _pkt_provider_idx = 0;
318  metrics::metric_groups _metrics;
319 private:
320  future<> handle_received_packet(packet p, ethernet_address from);
321  bool forward(forward_hash& out_hash_data, packet& p, size_t off);
322  std::optional<l3_protocol::l3packet> get_packet();
323  bool in_my_netmask(ipv4_address a) const;
324  void frag_limit_mem();
325  void frag_timeout();
326  void frag_drop(ipv4_frag_id frag_id, uint32_t dropped_size);
327  void frag_arm(clock_type::time_point now) {
328  auto tp = now + _frag_timeout;
329  _frag_timer.arm(tp);
330  }
331  void frag_arm() {
332  auto now = clock_type::now();
333  frag_arm(now);
334  }
335 public:
336  explicit ipv4(interface* netif);
337  void set_host_address(ipv4_address ip);
338  ipv4_address host_address() const;
339  void set_gw_address(ipv4_address ip);
340  ipv4_address gw_address() const;
341  void set_netmask_address(ipv4_address ip);
342  ipv4_address netmask_address() const;
343  interface * netif() const {
344  return _netif;
345  }
346  // TODO or something. Should perhaps truly be a list
347  // of filters. With ordering. And blackjack. Etc.
348  // But for now, a simple single raw pointer suffices
349  void set_packet_filter(ip_packet_filter *);
350  ip_packet_filter * packet_filter() const;
351  void send(ipv4_address to, ip_protocol_num proto_num, packet p, ethernet_address e_dst);
352  tcp<ipv4_traits>& get_tcp() { return *_tcp._tcp; }
353  ipv4_udp& get_udp() { return _udp; }
354  void register_l4(proto_type id, ip_protocol* handler);
355  const net::hw_features& hw_features() const { return _netif->hw_features(); }
356  static bool needs_frag(packet& p, ip_protocol_num proto_num, net::hw_features hw_features);
357  void learn(ethernet_address l2, ipv4_address l3) {
358  _arp.learn(l2, l3);
359  }
360  void register_packet_provider(ipv4_traits::packet_provider_type&& func) {
361  _pkt_providers.push_back(std::move(func));
362  }
363  future<ethernet_address> get_l2_dst_address(ipv4_address to);
364 };
365 
366 template <ip_protocol_num ProtoNum>
367 inline
368 void ipv4_l4<ProtoNum>::register_packet_provider(ipv4_traits::packet_provider_type func) {
369  _inet.register_packet_provider([func = std::move(func)] {
370  auto l4p = func();
371  if (l4p) {
372  l4p.value().proto_num = ProtoNum;
373  }
374  return l4p;
375  });
376 }
377 
378 template <ip_protocol_num ProtoNum>
379 inline
380 future<ethernet_address> ipv4_l4<ProtoNum>::get_l2_dst_address(ipv4_address to) {
381  return _inet.get_l2_dst_address(to);
382 }
383 
384 struct ip_hdr {
385  uint8_t ihl : 4;
386  uint8_t ver : 4;
387  uint8_t dscp : 6;
388  uint8_t ecn : 2;
389  packed<uint16_t> len;
390  packed<uint16_t> id;
391  packed<uint16_t> frag;
392  enum class frag_bits : uint8_t { mf = 13, df = 14, reserved = 15, offset_shift = 3 };
393  uint8_t ttl;
394  uint8_t ip_proto;
395  packed<uint16_t> csum;
396  ipv4_address src_ip;
397  ipv4_address dst_ip;
398  uint8_t options[0];
399  template <typename Adjuster>
400  auto adjust_endianness(Adjuster a) {
401  return a(len, id, frag, csum, src_ip, dst_ip);
402  }
403  bool mf() { return frag & (1 << uint8_t(frag_bits::mf)); }
404  bool df() { return frag & (1 << uint8_t(frag_bits::df)); }
405  uint16_t offset() { return frag << uint8_t(frag_bits::offset_shift); }
406 } __attribute__((packed));
407 
408 template <typename InetTraits>
409 struct l4connid<InetTraits>::connid_hash : private std::hash<ipaddr>, private std::hash<uint16_t> {
410  size_t operator()(const l4connid<InetTraits>& id) const noexcept {
411  using h1 = std::hash<ipaddr>;
412  using h2 = std::hash<uint16_t>;
413  return h1::operator()(id.local_ip)
414  ^ h1::operator()(id.foreign_ip)
415  ^ h2::operator()(id.local_port)
416  ^ h2::operator()(id.foreign_port);
417  }
418 };
419 
420 void arp_learn(ethernet_address l2, ipv4_address l3);
421 
422 }
423 
424 }
Definition: array_map.hh:39
Definition: circular_buffer.hh:63
A representation of a possibly not-yet-computed value.
Definition: future.hh:1238
Low-resolution and efficient steady clock.
Definition: lowres_clock.hh:59
static time_point now() noexcept
Definition: lowres_clock.hh:77
holds the metric definition.
Definition: metrics_registration.hh:94
Definition: arp.hh:90
Definition: arp.hh:58
Definition: net.hh:51
Definition: ip.hh:162
Definition: net.hh:114
Definition: ip.hh:95
Definition: ip.hh:184
Definition: ip.hh:135
Definition: ip.hh:195
Definition: ip.hh:276
Definition: net.hh:94
Definition: packet.hh:87
Definition: tcp.hh:294
void arm(time_point until, std::optional< duration > period={}) noexcept
future now()
Returns a ready future.
Definition: later.hh:35
holds the metric_groups definition needed by class that reports metrics
Definition: net.hh:75
Definition: ip.hh:273
Seastar API namespace.
Definition: abort_on_ebadf.hh:26
Definition: socket_defs.hh:113
Definition: ip_checksum.hh:38
Definition: ethernet.hh:37
Definition: ip.hh:146
Definition: ip.hh:384
Definition: ip.hh:241
Definition: ipv4_address.hh:35
Definition: ip.hh:246
Definition: ip.hh:63
Definition: ip.hh:108
Definition: unaligned.hh:58