Seastar
High performance C++ framework for concurrent servers
file.hh
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright 2015 Cloudius Systems
20 */
21
22#pragma once
23
24#include <seastar/util/std-compat.hh>
25#include <seastar/core/coroutine.hh>
26#include <seastar/coroutine/generator.hh>
27#include <seastar/core/do_with.hh>
28#include <seastar/core/stream.hh>
29#include <seastar/core/sstring.hh>
30#include <seastar/core/shared_ptr.hh>
31#include <seastar/core/align.hh>
32#include <seastar/core/io_priority_class.hh>
33#include <seastar/core/file-types.hh>
34#include <seastar/core/circular_buffer.hh>
35#include <seastar/util/modules.hh>
36#ifndef SEASTAR_MODULE
37#include <sys/statvfs.h>
38#include <sys/ioctl.h>
39#include <linux/fs.h>
40#include <sys/uio.h>
41#include <unistd.h>
42#include <chrono>
43#include <concepts>
44#include <cstdint>
45#include <functional>
46#include <optional>
47#endif
48
49namespace seastar {
50
51SEASTAR_MODULE_EXPORT_BEGIN
52
55
59 sstring name;
61 std::optional<directory_entry_type> type;
62};
63
66 sstring group_name;
67 sstring group_passwd;
68 __gid_t group_id;
69 std::vector<sstring> group_members;
70};
71
73struct stat_data {
74 uint64_t device_id; // ID of device containing file
75 uint64_t inode_number; // Inode number
76 uint64_t mode; // File type and mode
78 uint64_t number_of_links;// Number of hard links
79 uint64_t uid; // User ID of owner
80 uint64_t gid; // Group ID of owner
81 uint64_t rdev; // Device ID (if special file)
82 uint64_t size; // Total size, in bytes
83 uint64_t block_size; // Block size for filesystem I/O
84 uint64_t allocated_size; // Total size of allocated storage, in bytes
85
86 std::chrono::system_clock::time_point time_accessed; // Time of last content access
87 std::chrono::system_clock::time_point time_modified; // Time of last content modification
88 std::chrono::system_clock::time_point time_changed; // Time of last status change (either content or attributes)
89};
90
97 uint64_t extent_allocation_size_hint = 1 << 20;
98 bool sloppy_size = false;
99 uint64_t sloppy_size_hint = 1 << 20;
100 file_permissions create_permissions = file_permissions::default_file_permissions;
101 bool append_is_unlikely = false;
102
103 // The fsxattr.fsx_extsize is 32-bit
104 static constexpr uint64_t max_extent_allocation_size_hint = 1 << 31;
105
106 // XFS ignores hints that are not aligned to the logical block size.
107 // To fulfill the requirement, we ensure that hint is aligned to 128KB (best guess).
108 static constexpr uint32_t min_extent_size_hint_alignment{128u << 10}; // 128KB
109};
110
111class file;
112class file_impl;
113class io_intent;
114class file_handle;
115class file_data_sink_impl;
116class file_data_source_impl;
117
118// A handle that can be transported across shards and used to
119// create a dup(2)-like `file` object referring to the same underlying file
121public:
122 virtual ~file_handle_impl() = default;
123 virtual std::unique_ptr<file_handle_impl> clone() const = 0;
124 virtual shared_ptr<file_impl> to_file() && = 0;
125};
126
128 friend class file;
129protected:
130 static file_impl* get_file_impl(file& f);
131 unsigned _memory_dma_alignment = 4096;
132 unsigned _disk_read_dma_alignment = 4096;
133 unsigned _disk_write_dma_alignment = 4096;
134 unsigned _disk_overwrite_dma_alignment = 4096;
135 unsigned _read_max_length = 1u << 30;
136 unsigned _write_max_length = 1u << 30;
137public:
138 virtual ~file_impl() {}
139
140 virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, io_intent*) = 0;
141 virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, io_intent*) = 0;
142 virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, io_intent*) = 0;
143 virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, io_intent*) = 0;
144 virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, io_intent*) = 0;
145
146 virtual future<> flush() = 0;
147 virtual future<struct stat> stat() = 0;
148 virtual future<> truncate(uint64_t length) = 0;
149 virtual future<> discard(uint64_t offset, uint64_t length) = 0;
150 virtual future<int> ioctl(uint64_t cmd, void* argp) noexcept;
151 virtual future<int> ioctl_short(uint64_t cmd, void* argp) noexcept;
152 virtual future<int> fcntl(int op, uintptr_t arg) noexcept;
153 virtual future<int> fcntl_short(int op, uintptr_t arg) noexcept;
154 virtual future<> allocate(uint64_t position, uint64_t length) = 0;
155 virtual future<uint64_t> size() = 0;
156 virtual future<> close() = 0;
157 virtual std::unique_ptr<file_handle_impl> dup();
158 virtual subscription<directory_entry> list_directory(std::function<future<> (directory_entry de)> next) = 0;
159 // due to https://github.com/scylladb/seastar/issues/1913, we cannot use
160 // buffered generator yet.
161 virtual coroutine::experimental::generator<directory_entry> experimental_list_directory();
162};
163
164future<shared_ptr<file_impl>> make_file_impl(int fd, file_open_options options, int oflags, struct stat st) noexcept;
165
167
177class file {
178 shared_ptr<file_impl> _file_impl;
179public:
190 file() noexcept : _file_impl(nullptr) {}
191
193 : _file_impl(std::move(impl)) {}
194
196 explicit file(file_handle&& handle) noexcept;
197
202 explicit operator bool() const noexcept { return bool(_file_impl); }
203
208 file(const file& x) = default;
210 file(file&& x) noexcept : _file_impl(std::move(x._file_impl)) {}
215 file& operator=(const file& x) noexcept = default;
217 file& operator=(file&& x) noexcept = default;
218
219 // O_DIRECT reading requires that buffer, offset, and read length, are
220 // all aligned. Alignment of 4096 was necessary in the past, but no longer
221 // is - 512 is usually enough; But we'll need to use BLKSSZGET ioctl to
222 // be sure it is really enough on this filesystem. 4096 is always safe.
223 // In addition, if we start reading in things outside page boundaries,
224 // we will end up with various pages around, some of them with
225 // overlapping ranges. Those would be very challenging to cache.
226
228 uint64_t disk_read_dma_alignment() const noexcept {
229 return _file_impl->_disk_read_dma_alignment;
230 }
231
233 uint64_t disk_write_dma_alignment() const noexcept {
234 return _file_impl->_disk_write_dma_alignment;
235 }
236
243 uint64_t disk_overwrite_dma_alignment() const noexcept {
244 return _file_impl->_disk_overwrite_dma_alignment;
245 }
246
248 uint64_t memory_dma_alignment() const noexcept {
249 return _file_impl->_memory_dma_alignment;
250 }
251
256 size_t disk_read_max_length() const noexcept {
257 return _file_impl->_read_max_length;
258 }
259
264 size_t disk_write_max_length() const noexcept {
265 return _file_impl->_write_max_length;
266 }
267
282 template <typename CharType>
284 dma_read(uint64_t aligned_pos, CharType* aligned_buffer, size_t aligned_len, io_intent* intent = nullptr) noexcept {
285 return dma_read_impl(aligned_pos, reinterpret_cast<uint8_t*>(aligned_buffer), aligned_len, internal::maybe_priority_class_ref(), intent);
286 }
287
303 template <typename CharType>
304 future<temporary_buffer<CharType>> dma_read(uint64_t pos, size_t len, io_intent* intent = nullptr) noexcept {
305 return dma_read_impl(pos, len, internal::maybe_priority_class_ref(), intent).then([] (temporary_buffer<uint8_t> t) {
306 return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release());
307 });
308 }
309
312 class eof_error : public std::exception {};
313
326 template <typename CharType>
328 dma_read_exactly(uint64_t pos, size_t len, io_intent* intent = nullptr) noexcept {
329 return dma_read_exactly_impl(pos, len, internal::maybe_priority_class_ref(), intent).then([] (temporary_buffer<uint8_t> t) {
330 return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release());
331 });
332 }
333
343 future<size_t> dma_read(uint64_t pos, std::vector<iovec> iov, io_intent* intent = nullptr) noexcept {
344 return dma_read_impl(pos, std::move(iov), internal::maybe_priority_class_ref(), intent);
345 }
346
357 template <typename CharType>
358 future<size_t> dma_write(uint64_t pos, const CharType* buffer, size_t len, io_intent* intent = nullptr) noexcept {
359 return dma_write_impl(pos, reinterpret_cast<const uint8_t*>(buffer), len, internal::maybe_priority_class_ref(), intent);
360 }
361
371 future<size_t> dma_write(uint64_t pos, std::vector<iovec> iov, io_intent* intent = nullptr) noexcept {
372 return dma_write_impl(pos, std::move(iov), internal::maybe_priority_class_ref(), intent);
373 }
374
379 future<> flush() noexcept;
380
382 future<struct stat> stat() noexcept;
383
385 future<> truncate(uint64_t length) noexcept;
386
399 future<> allocate(uint64_t position, uint64_t length) noexcept;
400
405 future<> discard(uint64_t offset, uint64_t length) noexcept;
406
419 future<int> ioctl(uint64_t cmd, void* argp) noexcept;
420
435 future<int> ioctl_short(uint64_t cmd, void* argp) noexcept;
436
448 future<int> fcntl(int op, uintptr_t arg = 0UL) noexcept;
449
463 future<int> fcntl_short(int op, uintptr_t arg = 0UL) noexcept;
464
476 [[deprecated("This API was removed from the kernel")]]
477 future<> set_file_lifetime_hint(uint64_t hint) noexcept;
478
490 future<> set_inode_lifetime_hint(uint64_t hint) noexcept;
491
503 [[deprecated("This API was removed from the kernel")]]
504 future<uint64_t> get_file_lifetime_hint() noexcept;
505
517 future<uint64_t> get_inode_lifetime_hint() noexcept;
518
520 future<uint64_t> size() const noexcept;
521
532 future<> close() noexcept;
533
536
538 // due to https://github.com/scylladb/seastar/issues/1913, we cannot use
539 // buffered generator yet.
540 coroutine::experimental::generator<directory_entry> experimental_list_directory();
541
556 template <typename CharType>
557 future<temporary_buffer<CharType>>
558 dma_read_bulk(uint64_t offset, size_t range_size, io_intent* intent = nullptr) noexcept {
559 return dma_read_bulk_impl(offset, range_size, internal::maybe_priority_class_ref(), intent).then([] (temporary_buffer<uint8_t> t) {
560 return temporary_buffer<CharType>(reinterpret_cast<CharType*>(t.get_write()), t.size(), t.release());
561 });
562 }
563
573private:
575 dma_read_bulk_impl(uint64_t offset, size_t range_size, internal::maybe_priority_class_ref pc, io_intent* intent) noexcept;
576
578 dma_write_impl(uint64_t pos, const uint8_t* buffer, size_t len, internal::maybe_priority_class_ref pc, io_intent* intent) noexcept;
579
581 dma_write_impl(uint64_t pos, std::vector<iovec> iov, internal::maybe_priority_class_ref pc, io_intent* intent) noexcept;
582
584 dma_read_impl(uint64_t pos, size_t len, internal::maybe_priority_class_ref pc, io_intent* intent) noexcept;
585
587 dma_read_impl(uint64_t aligned_pos, uint8_t* aligned_buffer, size_t aligned_len, internal::maybe_priority_class_ref pc, io_intent* intent) noexcept;
588
590 dma_read_impl(uint64_t pos, std::vector<iovec> iov, internal::maybe_priority_class_ref pc, io_intent* intent) noexcept;
591
593 dma_read_exactly_impl(uint64_t pos, size_t len, internal::maybe_priority_class_ref pc, io_intent* intent) noexcept;
594
595 future<uint64_t> get_lifetime_hint_impl(int op) noexcept;
596 future<> set_lifetime_hint_impl(int op, uint64_t hint) noexcept;
597
598 friend class file_impl;
599 friend class file_data_sink_impl;
600 friend class file_data_source_impl;
601};
602
610template <std::invocable<file&> Func>
611requires std::is_nothrow_move_constructible_v<Func>
612auto with_file(future<file> file_fut, Func func) noexcept {
613 static_assert(std::is_nothrow_move_constructible_v<Func>, "Func's move constructor must not throw");
614 return file_fut.then([func = std::move(func)] (file f) mutable {
615 return do_with(std::move(f), [func = std::move(func)] (file& f) mutable {
616 return futurize_invoke(func, f).finally([&f] {
617 return f.close();
618 });
619 });
620 });
621}
622
637template <std::invocable<file&> Func>
638requires std::is_nothrow_move_constructible_v<Func>
639auto with_file_close_on_failure(future<file> file_fut, Func func) noexcept {
640 static_assert(std::is_nothrow_move_constructible_v<Func>, "Func's move constructor must not throw");
641 return file_fut.then([func = std::move(func)] (file f) mutable {
642 return do_with(std::move(f), [func = std::move(func)] (file& f) mutable {
643 return futurize_invoke(std::move(func), f).then_wrapped([&f] (auto ret) mutable {
644 if (!ret.failed()) {
645 return ret;
646 }
647 return ret.finally([&f] {
648 // If f.close() fails, return that as nested exception.
649 return f.close();
650 });
651 });
652 });
653 });
654}
655
659
668 std::unique_ptr<file_handle_impl> _impl;
669private:
670 explicit file_handle(std::unique_ptr<file_handle_impl> impl) : _impl(std::move(impl)) {}
671public:
677 file_handle& operator=(const file_handle&);
679 file_handle& operator=(file_handle&&) noexcept;
681 file to_file() const &;
684
685 friend class file;
686};
687
689
691class cancelled_error : public std::exception {
692public:
693 virtual const char* what() const noexcept {
694 return "cancelled";
695 }
696};
697
698SEASTAR_MODULE_EXPORT_END
699
700}
An exception Cancelled IOs resolve their future into (see io_intent)
Definition: file.hh:691
Definition: file.hh:312
Definition: file.hh:120
A shard-transportable handle to a file.
Definition: file.hh:667
file_handle(file_handle &&) noexcept
Moves a file handle object.
file_handle(const file_handle &)
Copies a file handle object.
file to_file() const &
Converts the file handle object to a file.
Definition: file.hh:127
Definition: file.hh:177
future close() noexcept
future< int > ioctl(uint64_t cmd, void *argp) noexcept
future< temporary_buffer< CharType > > dma_read(uint64_t pos, size_t len, io_intent *intent=nullptr) noexcept
Definition: file.hh:304
file & operator=(file &&x) noexcept=default
Moves assigns a file object.
future< temporary_buffer< CharType > > dma_read_bulk(uint64_t offset, size_t range_size, io_intent *intent=nullptr) noexcept
Definition: file.hh:558
subscription< directory_entry > list_directory(std::function< future<>(directory_entry de)> next)
Returns a directory listing, given that this file object is a directory.
file & operator=(const file &x) noexcept=default
future< size_t > dma_write(uint64_t pos, std::vector< iovec > iov, io_intent *intent=nullptr) noexcept
Definition: file.hh:371
future allocate(uint64_t position, uint64_t length) noexcept
future set_inode_lifetime_hint(uint64_t hint) noexcept
uint64_t memory_dma_alignment() const noexcept
Alignment requirement for data buffers.
Definition: file.hh:248
file(file_handle &&handle) noexcept
Constructs a file object from a file_handle obtained from another shard.
future< size_t > dma_write(uint64_t pos, const CharType *buffer, size_t len, io_intent *intent=nullptr) noexcept
Definition: file.hh:358
future< int > fcntl_short(int op, uintptr_t arg=0UL) noexcept
size_t disk_read_max_length() const noexcept
Definition: file.hh:256
future< size_t > dma_read(uint64_t pos, std::vector< iovec > iov, io_intent *intent=nullptr) noexcept
Definition: file.hh:343
future set_file_lifetime_hint(uint64_t hint) noexcept
future flush() noexcept
file_handle dup()
Creates a handle that can be transported across shards.
future< uint64_t > get_inode_lifetime_hint() noexcept
file(file &&x) noexcept
Moves a file object.
Definition: file.hh:210
uint64_t disk_read_dma_alignment() const noexcept
Alignment requirement for file offsets (for reads)
Definition: file.hh:228
future< uint64_t > size() const noexcept
Gets the file size.
future discard(uint64_t offset, uint64_t length) noexcept
future< struct stat > stat() noexcept
Returns stat information about the file.
uint64_t disk_overwrite_dma_alignment() const noexcept
Definition: file.hh:243
future< size_t > dma_read(uint64_t aligned_pos, CharType *aligned_buffer, size_t aligned_len, io_intent *intent=nullptr) noexcept
Definition: file.hh:284
future truncate(uint64_t length) noexcept
Truncates the file to a specified length.
future< int > ioctl_short(uint64_t cmd, void *argp) noexcept
coroutine::experimental::generator< directory_entry > experimental_list_directory()
Returns a directory listing, given that this file object is a directory.
size_t disk_write_max_length() const noexcept
Definition: file.hh:264
uint64_t disk_write_dma_alignment() const noexcept
Alignment requirement for file offsets (for writes)
Definition: file.hh:233
future< int > fcntl(int op, uintptr_t arg=0UL) noexcept
future< uint64_t > get_file_lifetime_hint() noexcept
file() noexcept
Definition: file.hh:190
file(const file &x)=default
future< temporary_buffer< CharType > > dma_read_exactly(uint64_t pos, size_t len, io_intent *intent=nullptr) noexcept
Definition: file.hh:328
A representation of a possibly not-yet-computed value.
Definition: future.hh:1240
Definition: io_intent.hh:44
Definition: shared_ptr.hh:507
Definition: stream.hh:127
Definition: temporary_buffer.hh:67
deleter release() noexcept
Definition: temporary_buffer.hh:203
size_t size() const noexcept
Gets the buffer size.
Definition: temporary_buffer.hh:130
CharType * get_write() noexcept
Definition: temporary_buffer.hh:128
std::optional< directory_entry_type > type
Type of the directory entry, if known.
Definition: file.hh:61
sstring name
Name of the file in a directory entry. Will never be "." or "..". Only the last component is included...
Definition: file.hh:59
directory_entry_type
Definition: file-types.hh:70
auto with_file_close_on_failure(future< file > file_fut, Func func) noexcept
Helper for ensuring a file is closed if func fails.
Definition: file.hh:639
auto with_file(future< file > file_fut, Func func) noexcept
Helper for ensuring a file is closed after func is called.
Definition: file.hh:612
A directory entry being listed.
Definition: file.hh:57
Group details from the system group database.
Definition: file.hh:65
Filesystem object stat information.
Definition: file.hh:73
auto do_with(T1 &&rv1, T2 &&rv2, More &&... more) noexcept
Definition: do_with.hh:135
holds the implementation parts of the metrics layer, do not use directly.
Seastar API namespace.
Definition: abort_on_ebadf.hh:26
STL namespace.
Definition: file.hh:96
bool sloppy_size
Allow the file size not to track the amount of data written until a flush.
Definition: file.hh:98
uint64_t sloppy_size_hint
Hint as to what the eventual file size will be.
Definition: file.hh:99
bool append_is_unlikely
Hint that user promises (or at least tries hard) not to write behind file size.
Definition: file.hh:101
file_permissions create_permissions
File permissions to use when creating a file.
Definition: file.hh:100
uint64_t extent_allocation_size_hint
Allocate this much disk space when extending the file.
Definition: file.hh:97