Seastar
High performance C++ framework for concurrent servers
file.hh
1 /*
2  * This file is open source software, licensed to you under the terms
3  * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4  * distributed with this work for additional information regarding copyright
5  * ownership. You may not use this file except in compliance with the License.
6  *
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing,
12  * software distributed under the License is distributed on an
13  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14  * KIND, either express or implied. See the License for the
15  * specific language governing permissions and limitations
16  * under the License.
17  */
18 /*
19  * Copyright 2015 Cloudius Systems
20  */
21 
22 #pragma once
23 
24 #include <seastar/core/do_with.hh>
25 #include <seastar/core/stream.hh>
26 #include <seastar/core/sstring.hh>
27 #include <seastar/core/shared_ptr.hh>
28 #include <seastar/core/align.hh>
29 #include <seastar/core/io_priority_class.hh>
30 #include <seastar/core/file-types.hh>
31 #include <seastar/util/std-compat.hh>
32 #include <system_error>
33 #include <sys/statvfs.h>
34 #include <sys/ioctl.h>
35 #include <linux/fs.h>
36 #include <sys/uio.h>
37 #include <unistd.h>
38 
39 namespace seastar {
40 
43 
47  sstring name;
49  std::optional<directory_entry_type> type;
50 };
51 
53 struct stat_data {
54  uint64_t device_id; // ID of device containing file
55  uint64_t inode_number; // Inode number
56  uint64_t mode; // File type and mode
58  uint64_t number_of_links;// Number of hard links
59  uint64_t uid; // User ID of owner
60  uint64_t gid; // Group ID of owner
61  uint64_t rdev; // Device ID (if special file)
62  uint64_t size; // Total size, in bytes
63  uint64_t block_size; // Block size for filesystem I/O
64  uint64_t allocated_size; // Total size of allocated storage, in bytes
65 
66  std::chrono::system_clock::time_point time_accessed; // Time of last content access
67  std::chrono::system_clock::time_point time_modified; // Time of last content modification
68  std::chrono::system_clock::time_point time_changed; // Time of last status change (either content or attributes)
69 };
70 
77  uint64_t extent_allocation_size_hint = 1 << 20;
78  bool sloppy_size = false;
79  uint64_t sloppy_size_hint = 1 << 20;
80  file_permissions create_permissions = file_permissions::default_file_permissions;
81  bool append_is_unlikely = false;
82 
83  // The fsxattr.fsx_extsize is 32-bit
84  static constexpr uint64_t max_extent_allocation_size_hint = 1 << 31;
85 };
86 
87 class file;
88 class file_impl;
89 class io_intent;
90 class file_handle;
91 
92 // A handle that can be transported across shards and used to
93 // create a dup(2)-like `file` object referring to the same underlying file
95 public:
96  virtual ~file_handle_impl() = default;
97  virtual std::unique_ptr<file_handle_impl> clone() const = 0;
98  virtual shared_ptr<file_impl> to_file() && = 0;
99 };
100 
101 class file_impl {
102  friend class file;
103 protected:
104  static file_impl* get_file_impl(file& f);
105  unsigned _memory_dma_alignment = 4096;
106  unsigned _disk_read_dma_alignment = 4096;
107  unsigned _disk_write_dma_alignment = 4096;
108  unsigned _disk_overwrite_dma_alignment = 4096;
109  unsigned _read_max_length = 1u << 30;
110  unsigned _write_max_length = 1u << 30;
111 public:
112  virtual ~file_impl() {}
113 
114  virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc) = 0;
115  virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) = 0;
116  virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc) = 0;
117  virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) = 0;
118 
119  virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc, io_intent*) {
120  return write_dma(pos, buffer, len, pc);
121  }
122  virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent*) {
123  return write_dma(pos, std::move(iov), pc);
124  }
125  virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc, io_intent*) {
126  return read_dma(pos, buffer, len, pc);
127  }
128  virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc, io_intent*) {
129  return read_dma(pos, std::move(iov), pc);
130  }
131 
132  virtual future<> flush(void) = 0;
133  virtual future<struct stat> stat(void) = 0;
134  virtual future<> truncate(uint64_t length) = 0;
135  virtual future<> discard(uint64_t offset, uint64_t length) = 0;
136  virtual future<int> ioctl(uint64_t cmd, void* argp) noexcept;
137  virtual future<int> ioctl_short(uint64_t cmd, void* argp) noexcept;
138  virtual future<int> fcntl(int op, uintptr_t arg) noexcept;
139  virtual future<int> fcntl_short(int op, uintptr_t arg) noexcept;
140  virtual future<> allocate(uint64_t position, uint64_t length) = 0;
141  virtual future<uint64_t> size(void) = 0;
142  virtual future<> close() = 0;
143  virtual std::unique_ptr<file_handle_impl> dup();
144  virtual subscription<directory_entry> list_directory(std::function<future<> (directory_entry de)> next) = 0;
145  virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc) = 0;
146  virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc, io_intent*) {
147  return dma_read_bulk(offset, range_size, pc);
148  }
149 
150  friend class reactor;
151 };
152 
153 future<shared_ptr<file_impl>> make_file_impl(int fd, file_open_options options, int oflags) noexcept;
154 
156 
166 class file {
167  shared_ptr<file_impl> _file_impl;
168 public:
179  file() noexcept : _file_impl(nullptr) {}
180 
182  : _file_impl(std::move(impl)) {}
183 
185  explicit file(file_handle&& handle) noexcept;
186 
191  explicit operator bool() const noexcept { return bool(_file_impl); }
192 
197  file(const file& x) = default;
199  file(file&& x) noexcept : _file_impl(std::move(x._file_impl)) {}
204  file& operator=(const file& x) noexcept = default;
206  file& operator=(file&& x) noexcept = default;
207 
208  // O_DIRECT reading requires that buffer, offset, and read length, are
209  // all aligned. Alignment of 4096 was necessary in the past, but no longer
210  // is - 512 is usually enough; But we'll need to use BLKSSZGET ioctl to
211  // be sure it is really enough on this filesystem. 4096 is always safe.
212  // In addition, if we start reading in things outside page boundaries,
213  // we will end up with various pages around, some of them with
214  // overlapping ranges. Those would be very challenging to cache.
215 
217  uint64_t disk_read_dma_alignment() const noexcept {
218  return _file_impl->_disk_read_dma_alignment;
219  }
220 
222  uint64_t disk_write_dma_alignment() const noexcept {
223  return _file_impl->_disk_write_dma_alignment;
224  }
225 
232  uint64_t disk_overwrite_dma_alignment() const noexcept {
233  return _file_impl->_disk_overwrite_dma_alignment;
234  }
235 
237  uint64_t memory_dma_alignment() const noexcept {
238  return _file_impl->_memory_dma_alignment;
239  }
240 
245  size_t disk_read_max_length() const noexcept {
246  return _file_impl->_read_max_length;
247  }
248 
253  size_t disk_write_max_length() const noexcept {
254  return _file_impl->_write_max_length;
255  }
256 
272  template <typename CharType>
274  dma_read(uint64_t aligned_pos, CharType* aligned_buffer, size_t aligned_len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept {
275  return dma_read_impl(aligned_pos, reinterpret_cast<uint8_t*>(aligned_buffer), aligned_len, pc, intent);
276  }
277 
294  template <typename CharType>
295  future<temporary_buffer<CharType>> dma_read(uint64_t pos, size_t len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept {
296  return dma_read_impl(pos, len, pc, intent).then([] (temporary_buffer<uint8_t> t) {
297  return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release());
298  });
299  }
300 
303  class eof_error : public std::exception {};
304 
318  template <typename CharType>
320  dma_read_exactly(uint64_t pos, size_t len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept {
321  return dma_read_exactly_impl(pos, len, pc, intent).then([] (temporary_buffer<uint8_t> t) {
322  return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release());
323  });
324  }
325 
336  future<size_t> dma_read(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept;
337 
349  template <typename CharType>
350  future<size_t> dma_write(uint64_t pos, const CharType* buffer, size_t len, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept {
351  return dma_write_impl(pos, reinterpret_cast<const uint8_t*>(buffer), len, pc, intent);
352  }
353 
364  future<size_t> dma_write(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept;
365 
370  future<> flush() noexcept;
371 
373  future<struct stat> stat() noexcept;
374 
376  future<> truncate(uint64_t length) noexcept;
377 
390  future<> allocate(uint64_t position, uint64_t length) noexcept;
391 
396  future<> discard(uint64_t offset, uint64_t length) noexcept;
397 
410  future<int> ioctl(uint64_t cmd, void* argp) noexcept;
411 
426  future<int> ioctl_short(uint64_t cmd, void* argp) noexcept;
427 
439  future<int> fcntl(int op, uintptr_t arg = 0UL) noexcept;
440 
454  future<int> fcntl_short(int op, uintptr_t arg = 0UL) noexcept;
455 
467  [[deprecated("This API was removed from the kernel")]]
468  future<> set_file_lifetime_hint(uint64_t hint) noexcept;
469 
481  future<> set_inode_lifetime_hint(uint64_t hint) noexcept;
482 
494  [[deprecated("This API was removed from the kernel")]]
495  future<uint64_t> get_file_lifetime_hint() noexcept;
496 
508  future<uint64_t> get_inode_lifetime_hint() noexcept;
509 
511  future<uint64_t> size() const noexcept;
512 
522  future<> close() noexcept;
523 
526 
542  template <typename CharType>
543  future<temporary_buffer<CharType>>
544  dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc = default_priority_class(), io_intent* intent = nullptr) noexcept {
545  return dma_read_bulk_impl(offset, range_size, pc, intent).then([] (temporary_buffer<uint8_t> t) {
546  return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release());
547  });
548  }
549 
559 private:
561  dma_read_bulk_impl(uint64_t offset, size_t range_size, const io_priority_class& pc, io_intent* intent) noexcept;
562 
564  dma_write_impl(uint64_t pos, const uint8_t* buffer, size_t len, const io_priority_class& pc, io_intent* intent) noexcept;
565 
567  dma_read_impl(uint64_t pos, size_t len, const io_priority_class& pc, io_intent* intent) noexcept;
568 
570  dma_read_impl(uint64_t aligned_pos, uint8_t* aligned_buffer, size_t aligned_len, const io_priority_class& pc, io_intent* intent) noexcept;
571 
573  dma_read_exactly_impl(uint64_t pos, size_t len, const io_priority_class& pc, io_intent* intent) noexcept;
574 
575  future<uint64_t> get_lifetime_hint_impl(int op) noexcept;
576  future<> set_lifetime_hint_impl(int op, uint64_t hint) noexcept;
577 
578  friend class reactor;
579  friend class file_impl;
580 };
581 
589 template <typename Func>
590 SEASTAR_CONCEPT( requires std::invocable<Func, file&> && std::is_nothrow_move_constructible_v<Func> )
591 auto with_file(future<file> file_fut, Func func) noexcept {
592  static_assert(std::is_nothrow_move_constructible_v<Func>, "Func's move constructor must not throw");
593  return file_fut.then([func = std::move(func)] (file f) mutable {
594  return do_with(std::move(f), [func = std::move(func)] (file& f) mutable {
595  return futurize_invoke(func, f).finally([&f] {
596  return f.close();
597  });
598  });
599  });
600 }
601 
616 template <typename Func>
617 SEASTAR_CONCEPT( requires std::invocable<Func, file&> && std::is_nothrow_move_constructible_v<Func> )
618 auto with_file_close_on_failure(future<file> file_fut, Func func) noexcept {
619  static_assert(std::is_nothrow_move_constructible_v<Func>, "Func's move constructor must not throw");
620  return file_fut.then([func = std::move(func)] (file f) mutable {
621  return do_with(std::move(f), [func = std::move(func)] (file& f) mutable {
622  return futurize_invoke(std::move(func), f).then_wrapped([&f] (auto ret) mutable {
623  if (!ret.failed()) {
624  return ret;
625  }
626  return ret.finally([&f] {
627  // If f.close() fails, return that as nested exception.
628  return f.close();
629  });
630  });
631  });
632  });
633 }
634 
638 
646 class file_handle {
647  std::unique_ptr<file_handle_impl> _impl;
648 private:
649  explicit file_handle(std::unique_ptr<file_handle_impl> impl) : _impl(std::move(impl)) {}
650 public:
656  file_handle& operator=(const file_handle&);
658  file_handle& operator=(file_handle&&) noexcept;
660  file to_file() const &;
662  file to_file() &&;
663 
664  friend class file;
665 };
666 
668 
670 class cancelled_error : public std::exception {
671 public:
672  virtual const char* what() const noexcept {
673  return "cancelled";
674  }
675 };
676 
677 }
An exception Cancelled IOs resolve their future into (see io_intent)
Definition: file.hh:670
Definition: file.hh:303
Definition: file.hh:94
A shard-transportable handle to a file.
Definition: file.hh:646
file_handle(file_handle &&) noexcept
Moves a file handle object.
file_handle(const file_handle &)
Copies a file handle object.
file to_file() const &
Converts the file handle object to a file.
Definition: file.hh:101
Definition: file.hh:166
future close() noexcept
future< size_t > dma_read(uint64_t pos, std::vector< iovec > iov, const io_priority_class &pc=default_priority_class(), io_intent *intent=nullptr) noexcept
subscription< directory_entry > list_directory(std::function< future<>(directory_entry de)> next)
Returns a directory listing, given that this file object is a directory.
future< temporary_buffer< CharType > > dma_read_exactly(uint64_t pos, size_t len, const io_priority_class &pc=default_priority_class(), io_intent *intent=nullptr) noexcept
Definition: file.hh:320
file & operator=(const file &x) noexcept=default
future< int > fcntl_short(int op, uintptr_t arg=0UL) noexcept
future allocate(uint64_t position, uint64_t length) noexcept
future set_inode_lifetime_hint(uint64_t hint) noexcept
future< uint64_t > get_inode_lifetime_hint() noexcept
uint64_t memory_dma_alignment() const noexcept
Alignment requirement for data buffers.
Definition: file.hh:237
file(file_handle &&handle) noexcept
Constructs a file object from a file_handle obtained from another shard.
size_t disk_read_max_length() const noexcept
Definition: file.hh:245
future< int > fcntl(int op, uintptr_t arg=0UL) noexcept
future< temporary_buffer< CharType > > dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class &pc=default_priority_class(), io_intent *intent=nullptr) noexcept
Definition: file.hh:544
future< int > ioctl(uint64_t cmd, void *argp) noexcept
future set_file_lifetime_hint(uint64_t hint) noexcept
future flush() noexcept
file_handle dup()
Creates a handle that can be transported across shards.
future< size_t > dma_write(uint64_t pos, std::vector< iovec > iov, const io_priority_class &pc=default_priority_class(), io_intent *intent=nullptr) noexcept
file(file &&x) noexcept
Moves a file object.
Definition: file.hh:199
uint64_t disk_read_dma_alignment() const noexcept
Alignment requirement for file offsets (for reads)
Definition: file.hh:217
future< size_t > dma_write(uint64_t pos, const CharType *buffer, size_t len, const io_priority_class &pc=default_priority_class(), io_intent *intent=nullptr) noexcept
Definition: file.hh:350
future discard(uint64_t offset, uint64_t length) noexcept
future< temporary_buffer< CharType > > dma_read(uint64_t pos, size_t len, const io_priority_class &pc=default_priority_class(), io_intent *intent=nullptr) noexcept
Definition: file.hh:295
uint64_t disk_overwrite_dma_alignment() const noexcept
Definition: file.hh:232
future< size_t > dma_read(uint64_t aligned_pos, CharType *aligned_buffer, size_t aligned_len, const io_priority_class &pc=default_priority_class(), io_intent *intent=nullptr) noexcept
Definition: file.hh:274
future truncate(uint64_t length) noexcept
Truncates the file to a specified length.
size_t disk_write_max_length() const noexcept
Definition: file.hh:253
uint64_t disk_write_dma_alignment() const noexcept
Alignment requirement for file offsets (for writes)
Definition: file.hh:222
future< struct stat > stat() noexcept
Returns stat information about the file.
future< uint64_t > size() const noexcept
Gets the file size.
future< int > ioctl_short(uint64_t cmd, void *argp) noexcept
file & operator=(file &&x) noexcept=default
Moves assigns a file object.
future< uint64_t > get_file_lifetime_hint() noexcept
file() noexcept
Definition: file.hh:179
file(const file &x)=default
A representation of a possibly not-yet-computed value.
Definition: future.hh:1351
Definition: io_intent.hh:40
Definition: io_priority_class.hh:37
Definition: reactor.hh:180
Definition: shared_ptr.hh:486
Definition: stream.hh:123
Definition: temporary_buffer.hh:62
deleter release() noexcept
Definition: temporary_buffer.hh:198
CharType * get_write() noexcept
Definition: temporary_buffer.hh:123
size_t size() const noexcept
Gets the buffer size.
Definition: temporary_buffer.hh:125
std::optional< directory_entry_type > type
Type of the directory entry, if known.
Definition: file.hh:49
sstring name
Name of the file in a directory entry. Will never be "." or "..". Only the last component is included...
Definition: file.hh:47
directory_entry_type
Definition: file-types.hh:65
auto with_file(future< file > file_fut, Func func) noexcept
Helper for ensuring a file is closed after func is called.
Definition: file.hh:591
auto with_file_close_on_failure(future< file > file_fut, Func func) noexcept
Helper for ensuring a file is closed if func fails.
Definition: file.hh:618
A directory entry being listed.
Definition: file.hh:45
Filesystem object stat information.
Definition: file.hh:53
auto do_with(T1 &&rv1, T2 &&rv2, More &&... more) noexcept
Definition: do_with.hh:129
holds the implementation parts of the metrics layer, do not use directly.
Seastar API namespace.
Definition: abort_on_ebadf.hh:24
Definition: file.hh:76
bool sloppy_size
Allow the file size not to track the amount of data written until a flush.
Definition: file.hh:78
uint64_t sloppy_size_hint
Hint as to what the eventual file size will be.
Definition: file.hh:79
bool append_is_unlikely
Hint that user promises (or at least tries hard) not to write behind file size.
Definition: file.hh:81
file_permissions create_permissions
File permissions to use when creating a file.
Definition: file.hh:80
uint64_t extent_allocation_size_hint
Allocate this much disk space when extending the file.
Definition: file.hh:77