From 6a4b93b3e0015bf2d0d617330220575794d7ef75 Mon Sep 17 00:00:00 2001 From: Idan Horowitz Date: Sat, 16 Dec 2023 12:37:28 +0200 Subject: [PATCH] Kernel: Protect processes' master TLS with a fine-grained spinlock This moves it out of the scope of the big process lock, and allows us to wean some syscalls off it, starting with sys$allocate_tls. --- Kernel/API/Syscall.h | 2 +- Kernel/Syscalls/execve.cpp | 27 +++++++++-------- Kernel/Syscalls/fork.cpp | 32 ++++++++++++-------- Kernel/Syscalls/mmap.cpp | 62 ++++++++++++++++++++------------------ Kernel/Tasks/Process.h | 10 +++--- Kernel/Tasks/Thread.cpp | 42 +++++++++++--------------- Kernel/Tasks/Thread.h | 2 -- 7 files changed, 91 insertions(+), 86 deletions(-) diff --git a/Kernel/API/Syscall.h b/Kernel/API/Syscall.h index b4f4c06a80..4a631cd4f9 100644 --- a/Kernel/API/Syscall.h +++ b/Kernel/API/Syscall.h @@ -48,7 +48,7 @@ enum class NeedsBigProcessLock { S(accept4, NeedsBigProcessLock::No) \ S(adjtime, NeedsBigProcessLock::No) \ S(alarm, NeedsBigProcessLock::No) \ - S(allocate_tls, NeedsBigProcessLock::Yes) \ + S(allocate_tls, NeedsBigProcessLock::No) \ S(anon_create, NeedsBigProcessLock::No) \ S(annotate_mapping, NeedsBigProcessLock::No) \ S(bind, NeedsBigProcessLock::No) \ diff --git a/Kernel/Syscalls/execve.cpp b/Kernel/Syscalls/execve.cpp index 297f9ed8f7..2602204406 100644 --- a/Kernel/Syscalls/execve.cpp +++ b/Kernel/Syscalls/execve.cpp @@ -431,9 +431,11 @@ Process::load(Memory::AddressSpace& new_space, NonnullRefPtr Process::do_exec(NonnullRefPtr main_program_d auto allocated_space = TRY(Memory::AddressSpace::try_create(*this, nullptr)); OwnPtr old_space; - auto old_master_tls_region = m_master_tls_region; - auto old_master_tls_size = m_master_tls_size; - auto old_master_tls_alignment = m_master_tls_alignment; + auto old_master_tls = m_master_tls.with([](auto& master_tls) { + auto old = master_tls; + master_tls.region = nullptr; + master_tls.size = 0; + master_tls.alignment = 0; + return old; + }); auto& new_space = m_space.with([&](auto& space) -> Memory::AddressSpace& { old_space = move(space); space = move(allocated_space); return *space; }); - m_master_tls_region = nullptr; - m_master_tls_size = 0; - m_master_tls_alignment = 0; ArmedScopeGuard space_guard([&]() { // If we failed at any point from now on we have to revert back to the old address space m_space.with([&](auto& space) { space = old_space.release_nonnull(); }); - m_master_tls_region = old_master_tls_region; - m_master_tls_size = old_master_tls_size; - m_master_tls_alignment = old_master_tls_alignment; + m_master_tls.with([&](auto& master_tls) { + master_tls = old_master_tls; + }); Memory::MemoryManager::enter_process_address_space(*this); }); diff --git a/Kernel/Syscalls/fork.cpp b/Kernel/Syscalls/fork.cpp index e6d933f601..5801c7b144 100644 --- a/Kernel/Syscalls/fork.cpp +++ b/Kernel/Syscalls/fork.cpp @@ -158,21 +158,27 @@ ErrorOr Process::sys$fork(RegisterState& regs) #endif TRY(address_space().with([&](auto& parent_space) { - return child->address_space().with([&](auto& child_space) -> ErrorOr { - child_space->set_enforces_syscall_regions(parent_space->enforces_syscall_regions()); - for (auto& region : parent_space->region_tree().regions()) { - dbgln_if(FORK_DEBUG, "fork: cloning Region '{}' @ {}", region.name(), region.vaddr()); - auto region_clone = TRY(region.try_clone()); - TRY(region_clone->map(child_space->page_directory(), Memory::ShouldFlushTLB::No)); - TRY(child_space->region_tree().place_specifically(*region_clone, region.range())); - auto* child_region = region_clone.leak_ptr(); + return m_master_tls.with([&](auto& parent_master_tls) -> ErrorOr { + return child->address_space().with([&](auto& child_space) -> ErrorOr { + child_space->set_enforces_syscall_regions(parent_space->enforces_syscall_regions()); + for (auto& region : parent_space->region_tree().regions()) { + dbgln_if(FORK_DEBUG, "fork: cloning Region '{}' @ {}", region.name(), region.vaddr()); + auto region_clone = TRY(region.try_clone()); + TRY(region_clone->map(child_space->page_directory(), Memory::ShouldFlushTLB::No)); + TRY(child_space->region_tree().place_specifically(*region_clone, region.range())); + auto* child_region = region_clone.leak_ptr(); - if (®ion == m_master_tls_region.unsafe_ptr()) { - child->m_master_tls_region = TRY(child_region->try_make_weak_ptr()); - child->m_master_tls_size = m_master_tls_size; - child->m_master_tls_alignment = m_master_tls_alignment; + if (®ion == parent_master_tls.region.unsafe_ptr()) { + TRY(child->m_master_tls.with([&](auto& child_master_tls) -> ErrorOr { + child_master_tls.region = TRY(child_region->try_make_weak_ptr()); + child_master_tls.size = parent_master_tls.size; + child_master_tls.alignment = parent_master_tls.alignment; + return {}; + })); + } } - } + return {}; + }); return {}; }); })); diff --git a/Kernel/Syscalls/mmap.cpp b/Kernel/Syscalls/mmap.cpp index 962aa55a3e..12baf992ec 100644 --- a/Kernel/Syscalls/mmap.cpp +++ b/Kernel/Syscalls/mmap.cpp @@ -524,50 +524,52 @@ ErrorOr Process::sys$mremap(Userspace ErrorOr Process::sys$allocate_tls(Userspace initial_data, size_t size) { - VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this); + VERIFY_NO_PROCESS_BIG_LOCK(this); TRY(require_promise(Pledge::stdio)); if (!size || size % PAGE_SIZE != 0) return EINVAL; - if (!m_master_tls_region.is_null()) - return EEXIST; + return m_master_tls.with([&](auto& master_tls) -> ErrorOr { + if (!master_tls.region.is_null()) + return EEXIST; - if (thread_count() != 1) - return EFAULT; + if (thread_count() != 1) + return EFAULT; - Thread* main_thread = nullptr; - bool multiple_threads = false; - for_each_thread([&main_thread, &multiple_threads](auto& thread) { - if (main_thread) - multiple_threads = true; - main_thread = &thread; - return IterationDecision::Break; - }); - VERIFY(main_thread); + Thread* main_thread = nullptr; + bool multiple_threads = false; + for_each_thread([&main_thread, &multiple_threads](auto& thread) { + if (main_thread) + multiple_threads = true; + main_thread = &thread; + return IterationDecision::Break; + }); + VERIFY(main_thread); - if (multiple_threads) - return EINVAL; + if (multiple_threads) + return EINVAL; - return address_space().with([&](auto& space) -> ErrorOr { - auto* region = TRY(space->allocate_region(Memory::RandomizeVirtualAddress::Yes, {}, size, PAGE_SIZE, "Master TLS"sv, PROT_READ | PROT_WRITE)); + return address_space().with([&](auto& space) -> ErrorOr { + auto* region = TRY(space->allocate_region(Memory::RandomizeVirtualAddress::Yes, {}, size, PAGE_SIZE, "Master TLS"sv, PROT_READ | PROT_WRITE)); - m_master_tls_region = TRY(region->try_make_weak_ptr()); - m_master_tls_size = size; - m_master_tls_alignment = PAGE_SIZE; + master_tls.region = TRY(region->try_make_weak_ptr()); + master_tls.size = size; + master_tls.alignment = PAGE_SIZE; - { - Kernel::SmapDisabler disabler; - void* fault_at; - if (!Kernel::safe_memcpy((char*)m_master_tls_region.unsafe_ptr()->vaddr().as_ptr(), (char*)initial_data.ptr(), size, fault_at)) - return EFAULT; - } + { + Kernel::SmapDisabler disabler; + void* fault_at; + if (!Kernel::safe_memcpy((char*)master_tls.region.unsafe_ptr()->vaddr().as_ptr(), (char*)initial_data.ptr(), size, fault_at)) + return EFAULT; + } - TRY(main_thread->make_thread_specific_region({})); + TRY(main_thread->make_thread_specific_region({})); - Processor::set_thread_specific_data(main_thread->thread_specific_data()); + Processor::set_thread_specific_data(main_thread->thread_specific_data()); - return m_master_tls_region.unsafe_ptr()->vaddr().get(); + return master_tls.region.unsafe_ptr()->vaddr().get(); + }); }); } diff --git a/Kernel/Tasks/Process.h b/Kernel/Tasks/Process.h index df63c85bf6..2064f0b570 100644 --- a/Kernel/Tasks/Process.h +++ b/Kernel/Tasks/Process.h @@ -924,8 +924,6 @@ private: Vector> m_arguments; Vector> m_environment; - LockWeakPtr m_master_tls_region; - IntrusiveListNode m_jail_process_list_node; IntrusiveListNode m_all_processes_list_node; @@ -937,8 +935,12 @@ private: SpinlockProtected, LockRank::None> m_jail_process_list; SpinlockProtected, LockRank::Process> m_attached_jail {}; - size_t m_master_tls_size { 0 }; - size_t m_master_tls_alignment { 0 }; + struct MasterThreadLocalStorage { + LockWeakPtr region; + size_t size { 0 }; + size_t alignment { 0 }; + }; + SpinlockProtected m_master_tls; Mutex m_big_lock { "Process"sv, Mutex::MutexBehavior::BigLock }; Mutex m_ptrace_lock { "ptrace"sv }; diff --git a/Kernel/Tasks/Thread.cpp b/Kernel/Tasks/Thread.cpp index ca5c726234..a5bf2938d1 100644 --- a/Kernel/Tasks/Thread.cpp +++ b/Kernel/Tasks/Thread.cpp @@ -1360,37 +1360,31 @@ ErrorOr> Thread::backtrace() return KString::try_create(builder.string_view()); } -size_t Thread::thread_specific_region_alignment() const -{ - return max(process().m_master_tls_alignment, alignof(ThreadSpecificData)); -} - -size_t Thread::thread_specific_region_size() const -{ - return align_up_to(process().m_master_tls_size, thread_specific_region_alignment()) + sizeof(ThreadSpecificData); -} - ErrorOr Thread::make_thread_specific_region(Badge) { - // The process may not require a TLS region, or allocate TLS later with sys$allocate_tls (which is what dynamically loaded programs do) - if (!process().m_master_tls_region) - return {}; + return process().m_master_tls.with([&](auto& master_tls) -> ErrorOr { + // The process may not require a TLS region, or allocate TLS later with sys$allocate_tls (which is what dynamically loaded programs do) + if (!master_tls.region) + return {}; - return process().address_space().with([&](auto& space) -> ErrorOr { - auto* region = TRY(space->allocate_region(Memory::RandomizeVirtualAddress::Yes, {}, thread_specific_region_size(), PAGE_SIZE, "Thread-specific"sv, PROT_READ | PROT_WRITE)); + return process().address_space().with([&](auto& space) -> ErrorOr { + auto region_alignment = max(master_tls.alignment, alignof(ThreadSpecificData)); + auto region_size = align_up_to(master_tls.size, region_alignment) + sizeof(ThreadSpecificData); + auto* region = TRY(space->allocate_region(Memory::RandomizeVirtualAddress::Yes, {}, region_size, PAGE_SIZE, "Thread-specific"sv, PROT_READ | PROT_WRITE)); - m_thread_specific_range = region->range(); + m_thread_specific_range = region->range(); - SmapDisabler disabler; - auto* thread_specific_data = (ThreadSpecificData*)region->vaddr().offset(align_up_to(process().m_master_tls_size, thread_specific_region_alignment())).as_ptr(); - auto* thread_local_storage = (u8*)((u8*)thread_specific_data) - align_up_to(process().m_master_tls_size, process().m_master_tls_alignment); - m_thread_specific_data = VirtualAddress(thread_specific_data); - thread_specific_data->self = thread_specific_data; + SmapDisabler disabler; + auto* thread_specific_data = (ThreadSpecificData*)region->vaddr().offset(align_up_to(master_tls.size, region_alignment)).as_ptr(); + auto* thread_local_storage = (u8*)((u8*)thread_specific_data) - align_up_to(master_tls.size, master_tls.size); + m_thread_specific_data = VirtualAddress(thread_specific_data); + thread_specific_data->self = thread_specific_data; - if (process().m_master_tls_size != 0) - memcpy(thread_local_storage, process().m_master_tls_region.unsafe_ptr()->vaddr().as_ptr(), process().m_master_tls_size); + if (master_tls.size != 0) + memcpy(thread_local_storage, master_tls.region.unsafe_ptr()->vaddr().as_ptr(), master_tls.size); - return {}; + return {}; + }); }); } diff --git a/Kernel/Tasks/Thread.h b/Kernel/Tasks/Thread.h index 4113a302cd..345db6a5a1 100644 --- a/Kernel/Tasks/Thread.h +++ b/Kernel/Tasks/Thread.h @@ -788,8 +788,6 @@ public: StringView state_string() const; VirtualAddress thread_specific_data() const { return m_thread_specific_data; } - size_t thread_specific_region_size() const; - size_t thread_specific_region_alignment() const; ALWAYS_INLINE void yield_if_stopped() {