From ed67a877a3998a968ebdec27cf210d2dbb6ca0da Mon Sep 17 00:00:00 2001 From: Liav A Date: Sat, 28 Jan 2023 19:00:54 +0200 Subject: [PATCH] Kernel+SystemServer+Base: Introduce the RAMFS filesystem This filesystem is based on the code of the long-lived TmpFS. It differs from that filesystem in one keypoint - its root inode doesn't have a sticky bit on it. Therefore, we mount it on /dev, to ensure only root can modify files on that directory. In addition to that, /tmp is mounted directly in the SystemServer main (start) code, so it's no longer specified in the fstab file. We ensure that /tmp has a sticky bit and has the value 0777 for root directory permissions, which is certainly a special case when using RAM-backed (and in general other) filesystems. Because of these 2 changes, it's no longer needed to maintain the TmpFS filesystem, hence it's removed (renamed to RAMFS), because the RAMFS represents the purpose of this filesystem in a much better way - it relies on being backed by RAM "storage", and therefore it's easy to conclude it's temporary and volatile, so its content is gone on either system shutdown or unmounting of the filesystem. --- Base/etc/fstab | 1 - Base/usr/share/man/man2/mount.md | 2 +- Documentation/Kernel/{TmpFS.md => RAMFS.md} | 24 +++--- Kernel/CMakeLists.txt | 4 +- Kernel/FileSystem/RAMFS/FileSystem.cpp | 40 ++++++++++ .../FileSystem/{TmpFS => RAMFS}/FileSystem.h | 14 ++-- Kernel/FileSystem/{TmpFS => RAMFS}/Inode.cpp | 76 +++++++++---------- Kernel/FileSystem/{TmpFS => RAMFS}/Inode.h | 27 +++---- Kernel/FileSystem/TmpFS/FileSystem.cpp | 40 ---------- Kernel/Forward.h | 3 +- Kernel/Syscalls/mount.cpp | 4 +- Userland/Services/SystemServer/main.cpp | 6 +- 12 files changed, 123 insertions(+), 118 deletions(-) rename Documentation/Kernel/{TmpFS.md => RAMFS.md} (86%) create mode 100644 Kernel/FileSystem/RAMFS/FileSystem.cpp rename Kernel/FileSystem/{TmpFS => RAMFS}/FileSystem.h (72%) rename Kernel/FileSystem/{TmpFS => RAMFS}/Inode.cpp (82%) rename Kernel/FileSystem/{TmpFS => RAMFS}/Inode.h (79%) delete mode 100644 Kernel/FileSystem/TmpFS/FileSystem.cpp diff --git a/Base/etc/fstab b/Base/etc/fstab index 2c8ea0da07..39e7256dfd 100644 --- a/Base/etc/fstab +++ b/Base/etc/fstab @@ -11,4 +11,3 @@ /usr/Tests /usr/Tests bind bind,nodev,ro /usr/local /usr/local bind bind,nodev,nosuid -none /tmp tmp nodev,nosuid diff --git a/Base/usr/share/man/man2/mount.md b/Base/usr/share/man/man2/mount.md index bb6e5a72c4..ccbb66c520 100644 --- a/Base/usr/share/man/man2/mount.md +++ b/Base/usr/share/man/man2/mount.md @@ -20,7 +20,7 @@ over `target`. * `Ext2FS` (or `ext2`): The ext2 filesystem. * `ProcFS` (or `proc`): The process pseudo-filesystem (normally mounted at `/proc`). * `DevPtsFS` (or `devpts`): The pseudoterminal pseudo-filesystem (normally mounted at `/dev/pts`). -* `TmpFS` (or `tmp`): A non-persistent filesystem that stores all its data in RAM. An instance of this filesystem is normally mounted at `/tmp`. +* `RAMFS` (or `ram`): A non-persistent filesystem that stores all its data in RAM. An instance of this filesystem is normally mounted at `/tmp`. * `Plan9FS` (or `9p`): A remote filesystem served over the 9P protocol. For Ext2FS, `source_fd` must refer to an open file descriptor to a file diff --git a/Documentation/Kernel/TmpFS.md b/Documentation/Kernel/RAMFS.md similarity index 86% rename from Documentation/Kernel/TmpFS.md rename to Documentation/Kernel/RAMFS.md index 7ba607eb60..2eaf6d932d 100644 --- a/Documentation/Kernel/TmpFS.md +++ b/Documentation/Kernel/RAMFS.md @@ -1,16 +1,16 @@ -# `TmpFS` filesystem and its purposes +# `RAMFS` filesystem and its purposes -`TmpFS` is a RAM-backed filesystem. It is used to hold files and directories in the `/tmp` directory and +`RAMFS` is a RAM-backed filesystem. It is used to hold files and directories in the `/tmp` directory and device nodes in the `/dev` directory. -## What are the `TmpFS` filesystem characteristics? +## What are the `RAMFS` filesystem characteristics? -`TmpFS` is a pure RAM-backed filesystem, which means all files and directories -actually live in memory, each in its own `TmpFS` instance in the kernel. +`RAMFS` is a pure RAM-backed filesystem, which means all files and directories +actually live in memory, each in its own `RAMFS` instance in the kernel. -The `TmpFS` in its current design is very conservative about allocating virtual memory ranges +The `RAMFS` in its current design is very conservative about allocating virtual memory ranges for itself, and instead it uses the `AnonymousVMObject` object to hold physical pages containing -data for its inodes. When doing actual IO, the `TmpFS` code temporarily allocates a small virtual memory +data for its inodes. When doing actual IO, the `RAMFS` code temporarily allocates a small virtual memory `Memory::Region` to perform the task, which works quite well although it puts a strain on the virtual memory mapping code. The current design also ensures that fabricated huge files can be easily created in the filesystem with very small overhead until actual IO is performed. @@ -24,10 +24,10 @@ Many test suites in the project leverage the `/tmp` for placing their test files when trying to check the correctness of many system-related functionality. Other programs rely on `/tmp` for placing their temporary files to properly function. -### Why does the `TmpFS` work well for the `/dev` directory? +### Why does the `RAMFS` work well for the `/dev` directory? -To understand why `TmpFS` works reliably when mounted on `/dev`, we must understand -first what we did in the past and how `TmpFS` solves many of the issues with the previous design. +To understand why `RAMFS` works reliably when mounted on `/dev`, we must understand +first what we did in the past and how `RAMFS` solves many of the issues with the previous design. At first, we didn't have any special filesystem mounted in `/dev` as the image build script generated all the required device nodes in `/dev`. This was quite sufficient in @@ -54,7 +54,7 @@ The `DevFS` solution was short-lived, and was quickly replaced by the `DevTmpFS` That new shiny filesystem was again specific to `/dev`, but it solved many of the issues `DevFS` suffered from - no more hardcoded permissions and now the design has flexible filesystem layout in its mindset. -This was achieved by implementing from scratch a filesystem that resembles the `TmpFS` +This was achieved by implementing from scratch a filesystem that resembles the `RAMFS` filesystem, but was different in one major aspect - only device nodes and directories are allowed to be in `/dev`. This strict requirement has been mandated to ensure the user doesn't accidentally put unrelated files in `/dev`. When the `DevTmpFS` was invented, it clearly @@ -65,6 +65,6 @@ in this document, but ultimately evolved to be flexible enough to work quite wel Everything worked quite well, but there was still a prominent problem with `DevTmpFS` - it was an entire filesystem solution just for `/dev` and nobody else used it. Testing the filesystem was quite clunky and truthfully lacking from the beginning until its removal. -To solve this problem, it was decided to stop using it, and instead just use `TmpFS`. +To solve this problem, it was decided to stop using it, and instead just use `RAMFS`. To ensure the current behavior of disallowing regular files in `/dev`, a new mount flag called `MS_NOREGULAR` was invented, so it could be mounted with it. diff --git a/Kernel/CMakeLists.txt b/Kernel/CMakeLists.txt index 4d28850b50..f443adfdfb 100644 --- a/Kernel/CMakeLists.txt +++ b/Kernel/CMakeLists.txt @@ -135,6 +135,8 @@ set(KERNEL_SOURCES FileSystem/ProcFS/ComponentRegistry.cpp FileSystem/ProcFS/FileSystem.cpp FileSystem/ProcFS/Inode.cpp + FileSystem/RAMFS/FileSystem.cpp + FileSystem/RAMFS/Inode.cpp FileSystem/SysFS/Component.cpp FileSystem/SysFS/DirectoryInode.cpp FileSystem/SysFS/FileSystem.cpp @@ -196,8 +198,6 @@ set(KERNEL_SOURCES FileSystem/SysFS/Subsystems/Kernel/Variables/DumpKmallocStack.cpp FileSystem/SysFS/Subsystems/Kernel/Variables/StringVariable.cpp FileSystem/SysFS/Subsystems/Kernel/Variables/UBSANDeadly.cpp - FileSystem/TmpFS/FileSystem.cpp - FileSystem/TmpFS/Inode.cpp FileSystem/VirtualFileSystem.cpp Firmware/BIOS.cpp Firmware/ACPI/Initialize.cpp diff --git a/Kernel/FileSystem/RAMFS/FileSystem.cpp b/Kernel/FileSystem/RAMFS/FileSystem.cpp new file mode 100644 index 0000000000..ec89fc6ee4 --- /dev/null +++ b/Kernel/FileSystem/RAMFS/FileSystem.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019-2020, Sergey Bugaev + * Copyright (c) 2022-2023, Liav A. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace Kernel { + +ErrorOr> RAMFS::try_create() +{ + return TRY(adopt_nonnull_lock_ref_or_enomem(new (nothrow) RAMFS)); +} + +RAMFS::RAMFS() = default; +RAMFS::~RAMFS() = default; + +ErrorOr RAMFS::initialize() +{ + m_root_inode = TRY(RAMFSInode::try_create_root(*this)); + return {}; +} + +Inode& RAMFS::root_inode() +{ + VERIFY(!m_root_inode.is_null()); + return *m_root_inode; +} + +unsigned RAMFS::next_inode_index() +{ + MutexLocker locker(m_lock); + + return m_next_inode_index++; +} + +} diff --git a/Kernel/FileSystem/TmpFS/FileSystem.h b/Kernel/FileSystem/RAMFS/FileSystem.h similarity index 72% rename from Kernel/FileSystem/TmpFS/FileSystem.h rename to Kernel/FileSystem/RAMFS/FileSystem.h index 2990148095..bfe965e0dc 100644 --- a/Kernel/FileSystem/TmpFS/FileSystem.h +++ b/Kernel/FileSystem/RAMFS/FileSystem.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2019-2020, Sergey Bugaev - * Copyright (c) 2022, Liav A. + * Copyright (c) 2022-2023, Liav A. * * SPDX-License-Identifier: BSD-2-Clause */ @@ -13,24 +13,24 @@ namespace Kernel { -class TmpFS final : public FileSystem { - friend class TmpFSInode; +class RAMFS final : public FileSystem { + friend class RAMFSInode; public: - virtual ~TmpFS() override; + virtual ~RAMFS() override; static ErrorOr> try_create(); virtual ErrorOr initialize() override; - virtual StringView class_name() const override { return "TmpFS"sv; } + virtual StringView class_name() const override { return "RAMFS"sv; } virtual bool supports_watchers() const override { return true; } virtual Inode& root_inode() override; private: - TmpFS(); + RAMFS(); - LockRefPtr m_root_inode; + LockRefPtr m_root_inode; // NOTE: We start by assigning InodeIndex of 2, because 0 is invalid and 1 // is reserved for the root directory inode. diff --git a/Kernel/FileSystem/TmpFS/Inode.cpp b/Kernel/FileSystem/RAMFS/Inode.cpp similarity index 82% rename from Kernel/FileSystem/TmpFS/Inode.cpp rename to Kernel/FileSystem/RAMFS/Inode.cpp index 60c07e8bac..56f33b5b61 100644 --- a/Kernel/FileSystem/TmpFS/Inode.cpp +++ b/Kernel/FileSystem/RAMFS/Inode.cpp @@ -1,16 +1,16 @@ /* * Copyright (c) 2019-2020, Sergey Bugaev - * Copyright (c) 2022, Liav A. + * Copyright (c) 2022-2023, Liav A. * * SPDX-License-Identifier: BSD-2-Clause */ -#include +#include #include namespace Kernel { -TmpFSInode::TmpFSInode(TmpFS& fs, InodeMetadata const& metadata, LockWeakPtr parent) +RAMFSInode::RAMFSInode(RAMFS& fs, InodeMetadata const& metadata, LockWeakPtr parent) : Inode(fs, fs.next_inode_index()) , m_metadata(metadata) , m_parent(move(parent)) @@ -18,7 +18,7 @@ TmpFSInode::TmpFSInode(TmpFS& fs, InodeMetadata const& metadata, LockWeakPtr> TmpFSInode::try_create(TmpFS& fs, InodeMetadata const& metadata, LockWeakPtr parent) +ErrorOr> RAMFSInode::try_create(RAMFS& fs, InodeMetadata const& metadata, LockWeakPtr parent) { - return adopt_nonnull_lock_ref_or_enomem(new (nothrow) TmpFSInode(fs, metadata, move(parent))); + return adopt_nonnull_lock_ref_or_enomem(new (nothrow) RAMFSInode(fs, metadata, move(parent))); } -ErrorOr> TmpFSInode::try_create_root(TmpFS& fs) +ErrorOr> RAMFSInode::try_create_root(RAMFS& fs) { - return adopt_nonnull_lock_ref_or_enomem(new (nothrow) TmpFSInode(fs)); + return adopt_nonnull_lock_ref_or_enomem(new (nothrow) RAMFSInode(fs)); } -InodeMetadata TmpFSInode::metadata() const +InodeMetadata RAMFSInode::metadata() const { MutexLocker locker(m_inode_lock, Mutex::Mode::Shared); return m_metadata; } -ErrorOr TmpFSInode::traverse_as_directory(Function(FileSystem::DirectoryEntryView const&)> callback) const +ErrorOr RAMFSInode::traverse_as_directory(Function(FileSystem::DirectoryEntryView const&)> callback) const { MutexLocker locker(m_inode_lock, Mutex::Mode::Shared); @@ -69,7 +69,7 @@ ErrorOr TmpFSInode::traverse_as_directory(Function(FileSyste return {}; } -ErrorOr TmpFSInode::replace_child(StringView name, Inode& new_child) +ErrorOr RAMFSInode::replace_child(StringView name, Inode& new_child) { MutexLocker locker(m_inode_lock); VERIFY(is_directory()); @@ -80,7 +80,7 @@ ErrorOr TmpFSInode::replace_child(StringView name, Inode& new_child) return ENOENT; auto old_child = child->inode; - child->inode = static_cast(new_child); + child->inode = static_cast(new_child); old_child->did_delete_self(); @@ -89,13 +89,13 @@ ErrorOr TmpFSInode::replace_child(StringView name, Inode& new_child) return {}; } -ErrorOr> TmpFSInode::DataBlock::create() +ErrorOr> RAMFSInode::DataBlock::create() { auto data_block_buffer_vmobject = TRY(Memory::AnonymousVMObject::try_create_with_size(DataBlock::block_size, AllocationStrategy::AllocateNow)); return TRY(adopt_nonnull_own_or_enomem(new (nothrow) DataBlock(move(data_block_buffer_vmobject)))); } -ErrorOr TmpFSInode::ensure_allocated_blocks(size_t offset, size_t io_size) +ErrorOr RAMFSInode::ensure_allocated_blocks(size_t offset, size_t io_size) { VERIFY(m_inode_lock.is_locked()); size_t block_start_index = offset / DataBlock::block_size; @@ -123,31 +123,31 @@ ErrorOr TmpFSInode::ensure_allocated_blocks(size_t offset, size_t io_size) return {}; } -ErrorOr TmpFSInode::read_bytes_from_content_space(size_t offset, size_t io_size, UserOrKernelBuffer& buffer) const +ErrorOr RAMFSInode::read_bytes_from_content_space(size_t offset, size_t io_size, UserOrKernelBuffer& buffer) const { VERIFY(m_inode_lock.is_locked()); VERIFY(m_metadata.size >= 0); if (static_cast(m_metadata.size) < offset) return 0; - auto mapping_region = TRY(MM.allocate_kernel_region(DataBlock::block_size, "TmpFSInode Mapping Region"sv, Memory::Region::Access::Read, AllocationStrategy::Reserve)); - return const_cast(*this).do_io_on_content_space(*mapping_region, offset, io_size, buffer, false); + auto mapping_region = TRY(MM.allocate_kernel_region(DataBlock::block_size, "RAMFSInode Mapping Region"sv, Memory::Region::Access::Read, AllocationStrategy::Reserve)); + return const_cast(*this).do_io_on_content_space(*mapping_region, offset, io_size, buffer, false); } -ErrorOr TmpFSInode::read_bytes_locked(off_t offset, size_t size, UserOrKernelBuffer& buffer, OpenFileDescription*) const +ErrorOr RAMFSInode::read_bytes_locked(off_t offset, size_t size, UserOrKernelBuffer& buffer, OpenFileDescription*) const { VERIFY(m_inode_lock.is_locked()); VERIFY(!is_directory()); return read_bytes_from_content_space(offset, size, buffer); } -ErrorOr TmpFSInode::write_bytes_to_content_space(size_t offset, size_t io_size, UserOrKernelBuffer const& buffer) +ErrorOr RAMFSInode::write_bytes_to_content_space(size_t offset, size_t io_size, UserOrKernelBuffer const& buffer) { VERIFY(m_inode_lock.is_locked()); - auto mapping_region = TRY(MM.allocate_kernel_region(DataBlock::block_size, "TmpFSInode Mapping Region"sv, Memory::Region::Access::Write, AllocationStrategy::Reserve)); + auto mapping_region = TRY(MM.allocate_kernel_region(DataBlock::block_size, "RAMFSInode Mapping Region"sv, Memory::Region::Access::Write, AllocationStrategy::Reserve)); return do_io_on_content_space(*mapping_region, offset, io_size, const_cast(buffer), true); } -ErrorOr TmpFSInode::write_bytes_locked(off_t offset, size_t size, UserOrKernelBuffer const& buffer, OpenFileDescription*) +ErrorOr RAMFSInode::write_bytes_locked(off_t offset, size_t size, UserOrKernelBuffer const& buffer, OpenFileDescription*) { VERIFY(m_inode_lock.is_locked()); VERIFY(!is_directory()); @@ -169,7 +169,7 @@ ErrorOr TmpFSInode::write_bytes_locked(off_t offset, size_t size, UserOr return nwritten; } -ErrorOr TmpFSInode::do_io_on_content_space(Memory::Region& mapping_region, size_t offset, size_t io_size, UserOrKernelBuffer& buffer, bool write) +ErrorOr RAMFSInode::do_io_on_content_space(Memory::Region& mapping_region, size_t offset, size_t io_size, UserOrKernelBuffer& buffer, bool write) { VERIFY(m_inode_lock.is_locked()); size_t remaining_bytes = 0; @@ -227,14 +227,14 @@ ErrorOr TmpFSInode::do_io_on_content_space(Memory::Region& mapping_regio return nio; } -ErrorOr TmpFSInode::truncate_to_block_index(size_t block_index) +ErrorOr RAMFSInode::truncate_to_block_index(size_t block_index) { VERIFY(m_inode_lock.is_locked()); TRY(m_blocks.try_resize(block_index)); return {}; } -ErrorOr> TmpFSInode::lookup(StringView name) +ErrorOr> RAMFSInode::lookup(StringView name) { MutexLocker locker(m_inode_lock, Mutex::Mode::Shared); VERIFY(is_directory()); @@ -253,7 +253,7 @@ ErrorOr> TmpFSInode::lookup(StringView name) return child->inode; } -TmpFSInode::Child* TmpFSInode::find_child_by_name(StringView name) +RAMFSInode::Child* RAMFSInode::find_child_by_name(StringView name) { for (auto& child : m_children) { if (child.name->view() == name) @@ -262,7 +262,7 @@ TmpFSInode::Child* TmpFSInode::find_child_by_name(StringView name) return nullptr; } -ErrorOr TmpFSInode::flush_metadata() +ErrorOr RAMFSInode::flush_metadata() { // We don't really have any metadata that could become dirty. // The only reason we even call set_metadata_dirty() is @@ -273,7 +273,7 @@ ErrorOr TmpFSInode::flush_metadata() return {}; } -ErrorOr TmpFSInode::chmod(mode_t mode) +ErrorOr RAMFSInode::chmod(mode_t mode) { MutexLocker locker(m_inode_lock); @@ -282,7 +282,7 @@ ErrorOr TmpFSInode::chmod(mode_t mode) return {}; } -ErrorOr TmpFSInode::chown(UserID uid, GroupID gid) +ErrorOr RAMFSInode::chown(UserID uid, GroupID gid) { MutexLocker locker(m_inode_lock); @@ -292,7 +292,7 @@ ErrorOr TmpFSInode::chown(UserID uid, GroupID gid) return {}; } -ErrorOr> TmpFSInode::create_child(StringView name, mode_t mode, dev_t dev, UserID uid, GroupID gid) +ErrorOr> RAMFSInode::create_child(StringView name, mode_t mode, dev_t dev, UserID uid, GroupID gid) { MutexLocker locker(m_inode_lock); auto now = kgettimeofday(); @@ -307,12 +307,12 @@ ErrorOr> TmpFSInode::create_child(StringView name, mode metadata.major_device = major_from_encoded_device(dev); metadata.minor_device = minor_from_encoded_device(dev); - auto child = TRY(TmpFSInode::try_create(fs(), metadata, *this)); + auto child = TRY(RAMFSInode::try_create(fs(), metadata, *this)); TRY(add_child(*child, name, mode)); return child; } -ErrorOr TmpFSInode::add_child(Inode& child, StringView name, mode_t) +ErrorOr RAMFSInode::add_child(Inode& child, StringView name, mode_t) { VERIFY(is_directory()); VERIFY(child.fsid() == fsid()); @@ -329,7 +329,7 @@ ErrorOr TmpFSInode::add_child(Inode& child, StringView name, mode_t) auto name_kstring = TRY(KString::try_create(name)); // Balanced by `delete` in remove_child() - auto* child_entry = new (nothrow) Child { move(name_kstring), static_cast(child) }; + auto* child_entry = new (nothrow) Child { move(name_kstring), static_cast(child) }; if (!child_entry) return ENOMEM; @@ -338,7 +338,7 @@ ErrorOr TmpFSInode::add_child(Inode& child, StringView name, mode_t) return {}; } -ErrorOr TmpFSInode::remove_child(StringView name) +ErrorOr RAMFSInode::remove_child(StringView name) { MutexLocker locker(m_inode_lock); VERIFY(is_directory()); @@ -359,7 +359,7 @@ ErrorOr TmpFSInode::remove_child(StringView name) return {}; } -ErrorOr TmpFSInode::truncate(u64 size) +ErrorOr RAMFSInode::truncate(u64 size) { MutexLocker locker(m_inode_lock); VERIFY(!is_directory()); @@ -369,7 +369,7 @@ ErrorOr TmpFSInode::truncate(u64 size) u64 last_possible_block_index = size / DataBlock::block_size; if ((size % DataBlock::block_size != 0) && m_blocks[last_possible_block_index]) { - auto mapping_region = TRY(MM.allocate_kernel_region(DataBlock::block_size, "TmpFSInode Mapping Region"sv, Memory::Region::Access::Write, AllocationStrategy::Reserve)); + auto mapping_region = TRY(MM.allocate_kernel_region(DataBlock::block_size, "RAMFSInode Mapping Region"sv, Memory::Region::Access::Write, AllocationStrategy::Reserve)); VERIFY(m_blocks[last_possible_block_index]); NonnullLockRefPtr block_vmobject = m_blocks[last_possible_block_index]->vmobject(); mapping_region->set_vmobject(block_vmobject); @@ -381,7 +381,7 @@ ErrorOr TmpFSInode::truncate(u64 size) return {}; } -ErrorOr TmpFSInode::update_timestamps(Optional