Kernel: Make the page table quickmaps per-CPU

While the "regular" quickmap (used to temporarily map a physical page
at a known address for quick access) has been per-CPU for a while,
we also have the PD (page directory) and PT (page table) quickmaps
used by the memory management code to edit page tables. These have been
global, which meant that SMP systems had to keep fighting over them.

This patch makes *all* quickmaps per-CPU. We reserve virtual addresses
for up to 64 CPUs worth of quickmaps for now.

Note that all quickmaps are still protected by the MM lock, and we'll
have to fix that too, before seeing any real throughput improvements.
This commit is contained in:
Andreas Kling 2022-08-22 14:41:08 +02:00
parent 930dedfbd8
commit a838fdfd88
3 changed files with 21 additions and 34 deletions

View file

@ -1037,53 +1037,41 @@ void MemoryManager::flush_tlb(PageDirectory const* page_directory, VirtualAddres
PageDirectoryEntry* MemoryManager::quickmap_pd(PageDirectory& directory, size_t pdpt_index)
{
VERIFY_INTERRUPTS_DISABLED();
VERIFY(s_mm_lock.is_locked_by_current_processor());
auto& mm_data = get_data();
auto& pte = boot_pd_kernel_pt1023[(KERNEL_QUICKMAP_PD - KERNEL_PT1024_BASE) / PAGE_SIZE];
VirtualAddress vaddr(KERNEL_QUICKMAP_PD_PER_CPU_BASE + Processor::current_id() * PAGE_SIZE);
size_t pte_index = (vaddr.get() - KERNEL_PT1024_BASE) / PAGE_SIZE;
auto& pte = boot_pd_kernel_pt1023[pte_index];
auto pd_paddr = directory.m_directory_pages[pdpt_index]->paddr();
if (pte.physical_page_base() != pd_paddr.get()) {
pte.set_physical_page_base(pd_paddr.get());
pte.set_present(true);
pte.set_writable(true);
pte.set_user_allowed(false);
// Because we must continue to hold the MM lock while we use this
// mapping, it is sufficient to only flush on the current CPU. Other
// CPUs trying to use this API must wait on the MM lock anyway
flush_tlb_local(VirtualAddress(KERNEL_QUICKMAP_PD));
} else {
// Even though we don't allow this to be called concurrently, it's
// possible that this PD was mapped on a different CPU and we don't
// broadcast the flush. If so, we still need to flush the TLB.
if (mm_data.m_last_quickmap_pd != pd_paddr)
flush_tlb_local(VirtualAddress(KERNEL_QUICKMAP_PD));
flush_tlb_local(vaddr);
}
mm_data.m_last_quickmap_pd = pd_paddr;
return (PageDirectoryEntry*)KERNEL_QUICKMAP_PD;
return (PageDirectoryEntry*)vaddr.get();
}
PageTableEntry* MemoryManager::quickmap_pt(PhysicalAddress pt_paddr)
{
VERIFY_INTERRUPTS_DISABLED();
VERIFY(s_mm_lock.is_locked_by_current_processor());
auto& mm_data = get_data();
auto& pte = ((PageTableEntry*)boot_pd_kernel_pt1023)[(KERNEL_QUICKMAP_PT - KERNEL_PT1024_BASE) / PAGE_SIZE];
VirtualAddress vaddr(KERNEL_QUICKMAP_PT_PER_CPU_BASE + Processor::current_id() * PAGE_SIZE);
size_t pte_index = (vaddr.get() - KERNEL_PT1024_BASE) / PAGE_SIZE;
auto& pte = ((PageTableEntry*)boot_pd_kernel_pt1023)[pte_index];
if (pte.physical_page_base() != pt_paddr.get()) {
pte.set_physical_page_base(pt_paddr.get());
pte.set_present(true);
pte.set_writable(true);
pte.set_user_allowed(false);
// Because we must continue to hold the MM lock while we use this
// mapping, it is sufficient to only flush on the current CPU. Other
// CPUs trying to use this API must wait on the MM lock anyway
flush_tlb_local(VirtualAddress(KERNEL_QUICKMAP_PT));
} else {
// Even though we don't allow this to be called concurrently, it's
// possible that this PT was mapped on a different CPU and we don't
// broadcast the flush. If so, we still need to flush the TLB.
if (mm_data.m_last_quickmap_pt != pt_paddr)
flush_tlb_local(VirtualAddress(KERNEL_QUICKMAP_PT));
flush_tlb_local(vaddr);
}
mm_data.m_last_quickmap_pt = pt_paddr;
return (PageTableEntry*)KERNEL_QUICKMAP_PT;
return (PageTableEntry*)vaddr.get();
}
u8* MemoryManager::quickmap_page(PhysicalAddress const& physical_address)

View file

@ -91,9 +91,6 @@ struct MemoryManagerData {
Spinlock m_quickmap_in_use { LockRank::None };
u32 m_quickmap_prev_flags;
PhysicalAddress m_last_quickmap_pd;
PhysicalAddress m_last_quickmap_pt;
};
// NOLINTNEXTLINE(readability-redundant-declaration) FIXME: Why do we declare this here *and* in Thread.h?

View file

@ -17,9 +17,11 @@
#define KERNEL_PD_END (kernel_mapping_base + KERNEL_PD_SIZE)
#define KERNEL_PT1024_BASE (kernel_mapping_base + 0x3FE00000)
#define KERNEL_QUICKMAP_PT (KERNEL_PT1024_BASE + 0x6000)
#define KERNEL_QUICKMAP_PD (KERNEL_PT1024_BASE + 0x7000)
#define KERNEL_QUICKMAP_PER_CPU_BASE (KERNEL_PT1024_BASE + 0x8000)
#define KERNEL_MAX_CPU_COUNT 64
#define KERNEL_QUICKMAP_PT_PER_CPU_BASE (KERNEL_PT1024_BASE + (1 * KERNEL_MAX_CPU_COUNT * PAGE_SIZE))
#define KERNEL_QUICKMAP_PD_PER_CPU_BASE (KERNEL_PT1024_BASE + (2 * KERNEL_MAX_CPU_COUNT * PAGE_SIZE))
#define KERNEL_QUICKMAP_PER_CPU_BASE (KERNEL_PT1024_BASE + (3 * KERNEL_MAX_CPU_COUNT * PAGE_SIZE))
#define USER_RANGE_BASE 0x10000
#define USER_RANGE_CEILING (kernel_mapping_base - 0x2000000)