Kernel: Use AddressSpace region tree for range allocation

This patch stops using VirtualRangeAllocator in AddressSpace and instead
looks for holes in the region tree when allocating VM space.

There are many benefits:

- VirtualRangeAllocator is non-intrusive and would call kmalloc/kfree
  when used. This new solution is allocation-free. This was a source
  of unpleasant MM/kmalloc deadlocks.

- We consolidate authority on what the address space looks like in a
  single place. Previously, we had both the range allocator *and* the
  region tree both being used to determine if an address was valid.
  Now there is only the region tree.

- Deallocation of VM when splitting regions is no longer complicated,
  as we don't need to keep two separate trees in sync.
This commit is contained in:
Andreas Kling 2022-04-02 20:01:29 +02:00
parent 90a7b9e5b4
commit 02a95a196f
10 changed files with 158 additions and 30 deletions

View file

@ -1,10 +1,11 @@
/*
* Copyright (c) 2021, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2021-2022, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/API/MemoryLayout.h>
#include <Kernel/Arch/CPU.h>
#include <Kernel/Locking/Spinlock.h>
#include <Kernel/Memory/AddressSpace.h>
@ -13,20 +14,33 @@
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/PerformanceManager.h>
#include <Kernel/Process.h>
#include <Kernel/Random.h>
#include <Kernel/Scheduler.h>
namespace Kernel::Memory {
ErrorOr<NonnullOwnPtr<AddressSpace>> AddressSpace::try_create(AddressSpace const* parent)
{
auto page_directory = TRY(PageDirectory::try_create_for_userspace(parent ? &parent->page_directory().range_allocator() : nullptr));
auto space = TRY(adopt_nonnull_own_or_enomem(new (nothrow) AddressSpace(page_directory)));
auto page_directory = TRY(PageDirectory::try_create_for_userspace());
VirtualRange total_range = [&]() -> VirtualRange {
if (parent)
return parent->m_total_range;
constexpr FlatPtr userspace_range_base = USER_RANGE_BASE;
FlatPtr const userspace_range_ceiling = USER_RANGE_CEILING;
size_t random_offset = (get_fast_random<u8>() % 32 * MiB) & PAGE_MASK;
FlatPtr base = userspace_range_base + random_offset;
return VirtualRange(VirtualAddress { base }, userspace_range_ceiling - base);
}();
auto space = TRY(adopt_nonnull_own_or_enomem(new (nothrow) AddressSpace(move(page_directory), total_range)));
space->page_directory().set_space({}, *space);
return space;
}
AddressSpace::AddressSpace(NonnullRefPtr<PageDirectory> page_directory)
AddressSpace::AddressSpace(NonnullRefPtr<PageDirectory> page_directory, VirtualRange total_range)
: m_page_directory(move(page_directory))
, m_total_range(total_range)
{
}
@ -78,9 +92,6 @@ ErrorOr<void> AddressSpace::unmap_mmap_range(VirtualAddress addr, size_t size)
auto new_regions = TRY(try_split_region_around_range(*region, range_to_unmap));
// Instead we give back the unwanted VM manually.
page_directory().range_allocator().deallocate(range_to_unmap);
// And finally we map the new region(s) using our page directory (they were just allocated and don't have one).
for (auto* new_region : new_regions) {
// TODO: Ideally we should do this in a way that can be rolled back on failure, as failing here
@ -126,9 +137,6 @@ ErrorOr<void> AddressSpace::unmap_mmap_range(VirtualAddress addr, size_t size)
TRY(new_regions.try_extend(split_regions));
}
// Give back any unwanted VM to the range allocator.
page_directory().range_allocator().deallocate(range_to_unmap);
// And finally map the new region(s) into our page directory.
for (auto* new_region : new_regions) {
// TODO: Ideally we should do this in a way that can be rolled back on failure, as failing here
@ -141,13 +149,121 @@ ErrorOr<void> AddressSpace::unmap_mmap_range(VirtualAddress addr, size_t size)
return {};
}
ErrorOr<VirtualRange> AddressSpace::try_allocate_anywhere(size_t size, size_t alignment)
{
if (!size)
return EINVAL;
VERIFY((size % PAGE_SIZE) == 0);
VERIFY((alignment % PAGE_SIZE) == 0);
if (Checked<size_t>::addition_would_overflow(size, alignment))
return EOVERFLOW;
VirtualAddress window_start = m_total_range.base();
for (auto it = m_regions.begin(); !it.is_end(); ++it) {
auto& region = *it;
if (window_start == region.vaddr()) {
window_start = region.range().end();
continue;
}
VirtualRange available_range { window_start, region.vaddr().get() - window_start.get() };
window_start = region.range().end();
// FIXME: This check is probably excluding some valid candidates when using a large alignment.
if (available_range.size() < (size + alignment))
continue;
FlatPtr initial_base = available_range.base().get();
FlatPtr aligned_base = round_up_to_power_of_two(initial_base, alignment);
return VirtualRange { VirtualAddress(aligned_base), size };
}
VirtualRange available_range { window_start, m_total_range.end().get() - window_start.get() };
if (m_total_range.contains(available_range))
return available_range;
dmesgln("VirtualRangeAllocator: Failed to allocate anywhere: size={}, alignment={}", size, alignment);
return ENOMEM;
}
ErrorOr<VirtualRange> AddressSpace::try_allocate_specific(VirtualAddress base, size_t size)
{
if (!size)
return EINVAL;
VERIFY(base.is_page_aligned());
VERIFY((size % PAGE_SIZE) == 0);
VirtualRange const range { base, size };
if (!m_total_range.contains(range))
return ENOMEM;
auto* region = m_regions.find_largest_not_above(base.get());
if (!region) {
// The range can be accommodated below the current lowest range.
return range;
}
if (region->range().intersects(range)) {
// Requested range overlaps an existing range.
return ENOMEM;
}
auto it = m_regions.begin_from(region->vaddr().get());
VERIFY(!it.is_end());
++it;
if (it.is_end()) {
// The range can be accommodated above the nearest range.
return range;
}
if (it->range().intersects(range)) {
// Requested range overlaps the next neighbor.
return ENOMEM;
}
// Requested range fits between first region and its next neighbor.
return range;
}
ErrorOr<VirtualRange> AddressSpace::try_allocate_randomized(size_t size, size_t alignment)
{
if (!size)
return EINVAL;
VERIFY((size % PAGE_SIZE) == 0);
VERIFY((alignment % PAGE_SIZE) == 0);
// FIXME: I'm sure there's a smarter way to do this.
constexpr size_t maximum_randomization_attempts = 1000;
for (size_t i = 0; i < maximum_randomization_attempts; ++i) {
VirtualAddress random_address { round_up_to_power_of_two(get_fast_random<FlatPtr>() % m_total_range.end().get(), alignment) };
if (!m_total_range.contains(random_address, size))
continue;
auto range_or_error = try_allocate_specific(random_address, size);
if (!range_or_error.is_error())
return range_or_error.release_value();
}
return try_allocate_anywhere(size, alignment);
}
ErrorOr<VirtualRange> AddressSpace::try_allocate_range(VirtualAddress vaddr, size_t size, size_t alignment)
{
vaddr.mask(PAGE_MASK);
size = TRY(page_round_up(size));
if (vaddr.is_null())
return page_directory().range_allocator().try_allocate_anywhere(size, alignment);
return page_directory().range_allocator().try_allocate_specific(vaddr, size);
return try_allocate_anywhere(size, alignment);
return try_allocate_specific(vaddr, size);
}
ErrorOr<Region*> AddressSpace::try_allocate_split_region(Region const& source_region, VirtualRange const& range, size_t offset_in_vmobject)

View file

@ -66,8 +66,12 @@ public:
size_t amount_purgeable_volatile() const;
size_t amount_purgeable_nonvolatile() const;
ErrorOr<VirtualRange> try_allocate_anywhere(size_t size, size_t alignment);
ErrorOr<VirtualRange> try_allocate_specific(VirtualAddress base, size_t size);
ErrorOr<VirtualRange> try_allocate_randomized(size_t size, size_t alignment);
private:
explicit AddressSpace(NonnullRefPtr<PageDirectory>);
AddressSpace(NonnullRefPtr<PageDirectory>, VirtualRange total_range);
void delete_all_regions_assuming_they_are_unmapped();
@ -76,6 +80,7 @@ private:
RefPtr<PageDirectory> m_page_directory;
IntrusiveRedBlackTree<&Region::m_tree_node> m_regions;
VirtualRange const m_total_range;
bool m_enforces_syscall_regions { false };
};

View file

@ -34,21 +34,10 @@ UNMAP_AFTER_INIT NonnullRefPtr<PageDirectory> PageDirectory::must_create_kernel_
return directory;
}
ErrorOr<NonnullRefPtr<PageDirectory>> PageDirectory::try_create_for_userspace(VirtualRangeAllocator const* parent_range_allocator)
ErrorOr<NonnullRefPtr<PageDirectory>> PageDirectory::try_create_for_userspace()
{
constexpr FlatPtr userspace_range_base = USER_RANGE_BASE;
FlatPtr const userspace_range_ceiling = USER_RANGE_CEILING;
auto directory = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) PageDirectory));
if (parent_range_allocator) {
TRY(directory->m_range_allocator.initialize_from_parent(*parent_range_allocator));
} else {
size_t random_offset = (get_fast_random<u8>() % 32 * MiB) & PAGE_MASK;
u32 base = userspace_range_base + random_offset;
TRY(directory->m_range_allocator.initialize_with_range(VirtualAddress(base), userspace_range_ceiling - base));
}
// NOTE: Take the MM lock since we need it for quickmap.
SpinlockLocker lock(s_mm_lock);

View file

@ -21,7 +21,7 @@ class PageDirectory : public RefCounted<PageDirectory> {
friend class MemoryManager;
public:
static ErrorOr<NonnullRefPtr<PageDirectory>> try_create_for_userspace(VirtualRangeAllocator const* parent_range_allocator = nullptr);
static ErrorOr<NonnullRefPtr<PageDirectory>> try_create_for_userspace();
static NonnullRefPtr<PageDirectory> must_create_kernel_page_directory();
static RefPtr<PageDirectory> find_current();

View file

@ -58,7 +58,8 @@ Region::~Region()
if (!is_readable() && !is_writable() && !is_executable()) {
// If the region is "PROT_NONE", we didn't map it in the first place,
// so all we need to do here is deallocate the VM.
m_page_directory->range_allocator().deallocate(range());
if (is_kernel())
m_page_directory->range_allocator().deallocate(range());
} else {
SpinlockLocker mm_locker(s_mm_lock);
unmap_with_locks_held(ShouldDeallocateVirtualRange::Yes, ShouldFlushTLB::Yes, pd_locker, mm_locker);
@ -270,7 +271,8 @@ void Region::unmap_with_locks_held(ShouldDeallocateVirtualRange deallocate_range
if (should_flush_tlb == ShouldFlushTLB::Yes)
MemoryManager::flush_tlb(m_page_directory, vaddr(), page_count());
if (deallocate_range == ShouldDeallocateVirtualRange::Yes) {
m_page_directory->range_allocator().deallocate(range());
if (is_kernel())
m_page_directory->range_allocator().deallocate(range());
}
m_page_directory = nullptr;
}

View file

@ -25,6 +25,18 @@ Vector<VirtualRange, 2> VirtualRange::carve(VirtualRange const& taken) const
parts.append({ taken.end(), end().get() - taken.end().get() });
return parts;
}
bool VirtualRange::intersects(VirtualRange const& other) const
{
auto a = *this;
auto b = other;
if (a.base() > b.base())
swap(a, b);
return a.base() < b.end() && b.base() < a.end();
}
VirtualRange VirtualRange::intersect(VirtualRange const& other) const
{
if (*this == other) {

View file

@ -51,6 +51,8 @@ public:
Vector<VirtualRange, 2> carve(VirtualRange const&) const;
VirtualRange intersect(VirtualRange const&) const;
bool intersects(VirtualRange const&) const;
static ErrorOr<VirtualRange> expand_to_page_boundaries(FlatPtr address, size_t size);
private:

View file

@ -30,6 +30,8 @@ public:
bool contains(VirtualRange const& range) const { return m_total_range.contains(range); }
VirtualRange total_range() const { return m_total_range; }
private:
ErrorOr<void> carve_from_region(VirtualRange const& from, VirtualRange const&);

View file

@ -17,7 +17,7 @@ ErrorOr<FlatPtr> Process::sys$map_time_page()
auto& vmobject = TimeManagement::the().time_page_vmobject();
auto range = TRY(address_space().page_directory().range_allocator().try_allocate_randomized(PAGE_SIZE, PAGE_SIZE));
auto range = TRY(address_space().try_allocate_randomized(PAGE_SIZE, PAGE_SIZE));
auto* region = TRY(address_space().allocate_region_with_vmobject(range, vmobject, 0, "Kernel time page"sv, PROT_READ, true));
return region->vaddr().get();
}

View file

@ -193,7 +193,7 @@ ErrorOr<FlatPtr> Process::sys$mmap(Userspace<Syscall::SC_mmap_params const*> use
auto range = TRY([&]() -> ErrorOr<Memory::VirtualRange> {
if (map_randomized)
return address_space().page_directory().range_allocator().try_allocate_randomized(rounded_size, alignment);
return address_space().try_allocate_randomized(rounded_size, alignment);
// If MAP_FIXED is specified, existing mappings that intersect the requested range are removed.
if (map_fixed)