Seastar
High performance C++ framework for concurrent servers
linux-aio.hh
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2017 ScyllaDB
20 */
21
22#pragma once
23
24#include <endian.h>
25#include <sys/time.h>
26#include <sys/uio.h>
27#include <cstdint>
28
29namespace seastar {
30
31namespace internal {
32
33namespace linux_abi {
34
35using aio_context_t = unsigned long;
36
37enum class iocb_cmd : uint16_t {
38 PREAD = 0,
39 PWRITE = 1,
40 FSYNC = 2,
41 FDSYNC = 3,
42 POLL = 5,
43 NOOP = 6,
44 PREADV = 7,
45 PWRITEV = 8,
46};
47
48struct io_event {
49 uint64_t data;
50 uint64_t obj;
51 int64_t res;
52 int64_t res2;
53};
54
55constexpr int IOCB_FLAG_RESFD = 1;
56
57struct iocb {
58 uint64_t aio_data;
59
60#if __BYTE_ORDER == __LITTLE_ENDIAN
61 uint32_t aio_key;
62 int32_t aio_rw_flags;
63#elif __BYTE_ORDER == __BIG_ENDIAN
64 int32_t aio_rw_flags;
65 uint32_t aio_key;
66#else
67#error bad byteorder
68#endif
69
70 iocb_cmd aio_lio_opcode;
71 int16_t aio_reqprio;
72 uint32_t aio_fildes;
73
74 uint64_t aio_buf;
75 uint64_t aio_nbytes;
76 int64_t aio_offset;
77
78 uint64_t aio_reserved2;
79
80 uint32_t aio_flags;
81
82 uint32_t aio_resfd;
83};
84
85struct aio_sigset {
86 const sigset_t *sigmask;
87 size_t sigsetsize;
88};
89
90}
91
92linux_abi::iocb make_read_iocb(int fd, uint64_t offset, void* buffer, size_t len);
93linux_abi::iocb make_write_iocb(int fd, uint64_t offset, const void* buffer, size_t len);
94linux_abi::iocb make_readv_iocb(int fd, uint64_t offset, const ::iovec* iov, size_t niov);
95linux_abi::iocb make_writev_iocb(int fd, uint64_t offset, const ::iovec* iov, size_t niov);
96linux_abi::iocb make_poll_iocb(int fd, uint32_t events);
97
98void set_user_data(linux_abi::iocb& iocb, void* data);
99void set_nowait(linux_abi::iocb& iocb, bool nowait);
100
101void set_eventfd_notification(linux_abi::iocb& iocb, int eventfd);
102
103linux_abi::iocb* get_iocb(const linux_abi::io_event& ioev);
104
105int io_setup(int nr_events, linux_abi::aio_context_t* io_context);
106int io_destroy(linux_abi::aio_context_t io_context) noexcept;
107int io_submit(linux_abi::aio_context_t io_context, long nr, linux_abi::iocb** iocbs);
108int io_cancel(linux_abi::aio_context_t io_context, linux_abi::iocb* iocb, linux_abi::io_event* result);
109int io_getevents(linux_abi::aio_context_t io_context, long min_nr, long nr, linux_abi::io_event* events, const ::timespec* timeout,
110 bool force_syscall = false);
111int io_pgetevents(linux_abi::aio_context_t io_context, long min_nr, long nr, linux_abi::io_event* events, const ::timespec* timeout, const sigset_t* sigmask,
112 bool force_syscall = false);
113
114void setup_aio_context(size_t nr, linux_abi::aio_context_t* io_context);
115
116}
117
118extern bool aio_nowait_supported;
119
120namespace internal {
121
122inline
123linux_abi::iocb
124make_read_iocb(int fd, uint64_t offset, void* buffer, size_t len) {
125 linux_abi::iocb iocb{};
126 iocb.aio_lio_opcode = linux_abi::iocb_cmd::PREAD;
127 iocb.aio_fildes = fd;
128 iocb.aio_offset = offset;
129 iocb.aio_buf = reinterpret_cast<uintptr_t>(buffer);
130 iocb.aio_nbytes = len;
131 return iocb;
132}
133
134inline
135linux_abi::iocb
136make_write_iocb(int fd, uint64_t offset, const void* buffer, size_t len) {
137 linux_abi::iocb iocb{};
138 iocb.aio_lio_opcode = linux_abi::iocb_cmd::PWRITE;
139 iocb.aio_fildes = fd;
140 iocb.aio_offset = offset;
141 iocb.aio_buf = reinterpret_cast<uintptr_t>(buffer);
142 iocb.aio_nbytes = len;
143 return iocb;
144}
145
146inline
147linux_abi::iocb
148make_readv_iocb(int fd, uint64_t offset, const ::iovec* iov, size_t niov) {
149 linux_abi::iocb iocb{};
150 iocb.aio_lio_opcode = linux_abi::iocb_cmd::PREADV;
151 iocb.aio_fildes = fd;
152 iocb.aio_offset = offset;
153 iocb.aio_buf = reinterpret_cast<uintptr_t>(iov);
154 iocb.aio_nbytes = niov;
155 return iocb;
156}
157
158inline
159linux_abi::iocb
160make_writev_iocb(int fd, uint64_t offset, const ::iovec* iov, size_t niov) {
161 linux_abi::iocb iocb{};
162 iocb.aio_lio_opcode = linux_abi::iocb_cmd::PWRITEV;
163 iocb.aio_fildes = fd;
164 iocb.aio_offset = offset;
165 iocb.aio_buf = reinterpret_cast<uintptr_t>(iov);
166 iocb.aio_nbytes = niov;
167 return iocb;
168}
169
170inline
171linux_abi::iocb
172make_poll_iocb(int fd, uint32_t events) {
173 linux_abi::iocb iocb{};
174 iocb.aio_lio_opcode = linux_abi::iocb_cmd::POLL;
175 iocb.aio_fildes = fd;
176 iocb.aio_buf = events;
177 return iocb;
178}
179
180inline
181linux_abi::iocb
182make_fdsync_iocb(int fd) {
183 linux_abi::iocb iocb{};
184 iocb.aio_lio_opcode = linux_abi::iocb_cmd::FDSYNC;
185 iocb.aio_fildes = fd;
186 return iocb;
187}
188
189inline
190void
191set_user_data(linux_abi::iocb& iocb, void* data) {
192 iocb.aio_data = reinterpret_cast<uintptr_t>(data);
193}
194
195template <typename T>
196inline T* get_user_data(const linux_abi::iocb& iocb) noexcept {
197 return reinterpret_cast<T*>(uintptr_t(iocb.aio_data));
198}
199
200template <typename T>
201inline T* get_user_data(const linux_abi::io_event& ev) noexcept {
202 return reinterpret_cast<T*>(uintptr_t(ev.data));
203}
204
205inline
206void
207set_eventfd_notification(linux_abi::iocb& iocb, int eventfd) {
208 iocb.aio_flags |= linux_abi::IOCB_FLAG_RESFD;
209 iocb.aio_resfd = eventfd;
210}
211
212inline
213linux_abi::iocb*
214get_iocb(const linux_abi::io_event& ev) {
215 return reinterpret_cast<linux_abi::iocb*>(uintptr_t(ev.obj));
216}
217
218inline
219void
220set_nowait(linux_abi::iocb& iocb, bool nowait) {
221#ifdef RWF_NOWAIT
222 if (aio_nowait_supported) {
223 if (nowait) {
224 iocb.aio_rw_flags |= RWF_NOWAIT;
225 } else {
226 iocb.aio_rw_flags &= ~RWF_NOWAIT;
227 }
228 }
229#endif
230}
231
232}
233
234
235}
236
Seastar API namespace.
Definition: abort_on_ebadf.hh:26