From 3c8064a7872b990c6b48455ed6509d161adf4721 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Thu, 8 Nov 2018 12:59:16 +0100 Subject: [PATCH] Support basic mmap'ing of a file! All right, we can now mmap() a file and it gets magically paged in from fs in response to an NP page fault. This is really cool :^) I need to refactor this to support sharing of read-only file-backed pages, but it's cool to just have something working. --- Kernel/MemoryManager.cpp | 50 +++++++++++++++++++++++++++++++++---- Kernel/MemoryManager.h | 4 ++- Kernel/Process.cpp | 54 ++++++++++++++++++++++++++++++++++++---- Kernel/Process.h | 1 + Kernel/i386.cpp | 12 ++++++--- Kernel/i386.h | 1 + LibC/mman.h | 2 ++ Userland/sh.cpp | 31 +++++++++++++++++++++++ 8 files changed, 141 insertions(+), 14 deletions(-) diff --git a/Kernel/MemoryManager.cpp b/Kernel/MemoryManager.cpp index 570cf7d451..e404560c65 100644 --- a/Kernel/MemoryManager.cpp +++ b/Kernel/MemoryManager.cpp @@ -254,6 +254,31 @@ bool MemoryManager::copy_on_write(Process& process, Region& region, unsigned pag return true; } +bool MemoryManager::page_in_from_vnode(Process& process, Region& region, unsigned page_index_in_region) +{ + ASSERT(region.physical_pages[page_index_in_region].is_null()); + region.physical_pages[page_index_in_region] = allocate_physical_page(); + if (region.physical_pages[page_index_in_region].is_null()) { + kprintf("MM: page_in_from_vnode was unable to allocate a physical page\n"); + return false; + } + remap_region_page(process.m_page_directory, region, page_index_in_region, true); + byte* dest_ptr = region.linearAddress.offset(page_index_in_region * PAGE_SIZE).asPtr(); + dbgprintf("MM: page_in_from_vnode ready to read from vnode, will write to L%x!\n", dest_ptr); + sti(); // Oh god here we go... + auto nread = region.m_vnode->fileSystem()->readInodeBytes(region.m_vnode->inode, region.m_file_offset, PAGE_SIZE, dest_ptr, nullptr); + if (nread < 0) { + kprintf("MM: page_in_form_vnode had error (%d) while reading!\n", nread); + return false; + } + if (nread < PAGE_SIZE) { + // If we read less than a page, zero out the rest to avoid leaking uninitialized data. + memset(dest_ptr + nread, 0, PAGE_SIZE - nread); + } + cli(); + return true; +} + PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault) { ASSERT_INTERRUPTS_DISABLED(); @@ -264,17 +289,21 @@ PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault) ASSERT(region); auto page_index_in_region = region->page_index_from_address(fault.laddr()); if (fault.is_not_present()) { - kprintf(" >> NP fault in Region{%p}[%u]\n", region, page_index_in_region); + if (region->m_vnode) { + dbgprintf("NP(vnode) fault in Region{%p}[%u]\n", region, page_index_in_region); + page_in_from_vnode(*current, *region, page_index_in_region); + return PageFaultResponse::Continue; + } else { + kprintf("NP(error) fault in Region{%p}[%u]\n", region, page_index_in_region); + } } else if (fault.is_protection_violation()) { if (region->cow_map.get(page_index_in_region)) { -#ifdef PAGE_FAULT_DEBUG - dbgprintf(" >> PV (COW) fault in Region{%p}[%u]\n", region, page_index_in_region); -#endif + dbgprintf("PV(cow) fault in Region{%p}[%u]\n", region, page_index_in_region); bool success = copy_on_write(*current, *region, page_index_in_region); ASSERT(success); return PageFaultResponse::Continue; } - kprintf(" >> PV fault in Region{%p}[%u]\n", region, page_index_in_region); + kprintf("PV(error) fault in Region{%p}[%u]\n", region, page_index_in_region); } else { ASSERT_NOT_REACHED(); } @@ -284,6 +313,17 @@ PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault) return PageFaultResponse::ShouldCrash; } +RetainPtr MemoryManager::allocate_physical_page() +{ + InterruptDisabler disabler; + if (1 > m_free_physical_pages.size()) + return { }; +#ifdef MM_DEBUG + dbgprintf("MM: allocate_physical_page vending P%x\n", m_free_physical_pages.last()->paddr().get()); +#endif + return m_free_physical_pages.takeLast(); +} + Vector> MemoryManager::allocate_physical_pages(size_t count) { InterruptDisabler disabler; diff --git a/Kernel/MemoryManager.h b/Kernel/MemoryManager.h index ca30f9d6db..946b1ba244 100644 --- a/Kernel/MemoryManager.h +++ b/Kernel/MemoryManager.h @@ -72,7 +72,7 @@ struct Region : public Retainable { return (laddr - linearAddress).get() / PAGE_SIZE; } - RetainPtr m_file; + RetainPtr m_vnode; Unix::off_t m_file_offset { 0 }; LinearAddress linearAddress; @@ -115,6 +115,7 @@ public: bool validate_user_write(const Process&, LinearAddress) const; Vector> allocate_physical_pages(size_t count); + RetainPtr allocate_physical_page(); void remap_region(Process&, Region&); @@ -142,6 +143,7 @@ private: static Region* region_from_laddr(Process&, LinearAddress); bool copy_on_write(Process&, Region&, unsigned page_index_in_region); + bool page_in_from_vnode(Process&, Region&, unsigned page_index_in_region); byte* quickmap_page(PhysicalPage&); void unquickmap_page(); diff --git a/Kernel/Process.cpp b/Kernel/Process.cpp index 48fc568437..aa97ba053b 100644 --- a/Kernel/Process.cpp +++ b/Kernel/Process.cpp @@ -106,6 +106,28 @@ Region* Process::allocate_region(LinearAddress laddr, size_t size, String&& name return m_regions.last().ptr(); } +Region* Process::allocate_file_backed_region(LinearAddress laddr, size_t size, RetainPtr&& vnode, String&& name, bool is_readable, bool is_writable) +{ + ASSERT(!vnode->isCharacterDevice()); + + // FIXME: This needs sanity checks. What if this overlaps existing regions? + if (laddr.is_null()) { + laddr = m_nextRegion; + m_nextRegion = m_nextRegion.offset(size).offset(PAGE_SIZE); + } + + laddr.mask(0xfffff000); + + unsigned page_count = ceilDiv(size, PAGE_SIZE); + Vector> physical_pages; + physical_pages.resize(page_count); // Start out with no physical pages! + + m_regions.append(adopt(*new Region(laddr, size, move(physical_pages), move(name), is_readable, is_writable))); + m_regions.last()->m_vnode = move(vnode); + MM.mapRegion(*this, *m_regions.last()); + return m_regions.last().ptr(); +} + bool Process::deallocate_region(Region& region) { InterruptDisabler disabler; @@ -141,19 +163,41 @@ int Process::sys$set_mmap_name(void* addr, size_t size, const char* name) void* Process::sys$mmap(const Syscall::SC_mmap_params* params) { VALIDATE_USER_READ_WITH_RETURN_TYPE(params, sizeof(Syscall::SC_mmap_params), void*); - InterruptDisabler disabler; void* addr = (void*)params->addr; size_t size = params->size; int prot = params->prot; int flags = params->flags; int fd = params->fd; Unix::off_t offset = params->offset; - // FIXME: Implement mapping at a client-preferred address. Most of the support is already in plcae. + if (size == 0) + return (void*)-EINVAL; + if ((dword)addr & ~PAGE_MASK || size & ~PAGE_MASK) + return (void*)-EINVAL; + if (flags & MAP_ANONYMOUS) { + InterruptDisabler disabler; + // FIXME: Implement mapping at a client-specified address. Most of the support is already in plcae. + ASSERT(addr == nullptr); + auto* region = allocate_region(LinearAddress(), size, "mmap", prot & PROT_READ, prot & PROT_WRITE); + if (!region) + return (void*)-ENOMEM; + return region->linearAddress.asPtr(); + } + if (offset & ~PAGE_MASK) + return (void*)-EINVAL; + auto* descriptor = file_descriptor(fd); + if (!descriptor) + return (void*)-EBADF; + if (descriptor->vnode()->isCharacterDevice()) + return (void*)-ENODEV; + // FIXME: If PROT_EXEC, check that the underlying file system isn't mounted noexec. + auto region_name = descriptor->absolute_path(); + InterruptDisabler disabler; + // FIXME: Implement mapping at a client-specified address. Most of the support is already in plcae. ASSERT(addr == nullptr); - auto* region = allocate_region(LinearAddress(), size, "mmap"); + auto* region = allocate_file_backed_region(LinearAddress(), size, descriptor->vnode(), move(region_name), prot & PROT_READ, prot & PROT_WRITE); if (!region) - return (void*)-1; - return (void*)region->linearAddress.get(); + return (void*)-ENOMEM; + return region->linearAddress.asPtr(); } int Process::sys$munmap(void* addr, size_t size) diff --git a/Kernel/Process.h b/Kernel/Process.h index 4a136b3b84..d926fb94da 100644 --- a/Kernel/Process.h +++ b/Kernel/Process.h @@ -252,6 +252,7 @@ private: TTY* m_tty { nullptr }; Region* allocate_region(LinearAddress, size_t, String&& name, bool is_readable = true, bool is_writable = true); + Region* allocate_file_backed_region(LinearAddress laddr, size_t size, RetainPtr&& vnode, String&& name, bool is_readable, bool is_writable); bool deallocate_region(Region& region); Region* regionFromRange(LinearAddress, size_t); diff --git a/Kernel/i386.cpp b/Kernel/i386.cpp index f6e46f7345..6b941592cd 100644 --- a/Kernel/i386.cpp +++ b/Kernel/i386.cpp @@ -188,10 +188,11 @@ void exception_14_handler(RegisterDumpWithExceptionCode& regs) dword faultAddress; asm ("movl %%cr2, %%eax":"=a"(faultAddress)); - dbgprintf("Ring%u page fault in %s(%u), %s laddr=%p\n", - regs.cs & 3, + dbgprintf("%s(%u): ring%u %s page fault, %s L%x\n", current->name().characters(), current->pid(), + regs.cs & 3, + regs.exception_code & 1 ? "PV" : "NP", regs.exception_code & 2 ? "write" : "read", faultAddress); @@ -231,7 +232,12 @@ void exception_14_handler(RegisterDumpWithExceptionCode& regs) auto response = MM.handle_page_fault(PageFault(regs.exception_code, LinearAddress(faultAddress))); if (response == PageFaultResponse::ShouldCrash) { - kprintf("Crashing after unresolved page fault\n"); + kprintf("%s(%u) unrecoverable page fault, %s laddr=%p\n", + current->name().characters(), + current->pid(), + regs.exception_code & 2 ? "write" : "read", + faultAddress); + kprintf("exception code: %w\n", regs.exception_code); kprintf("pc=%w:%x ds=%w es=%w fs=%w gs=%w\n", regs.cs, regs.eip, regs.ds, regs.es, regs.fs, regs.gs); kprintf("stk=%w:%x\n", ss, esp); diff --git a/Kernel/i386.h b/Kernel/i386.h index 3b56be9e72..714b78fdc2 100644 --- a/Kernel/i386.h +++ b/Kernel/i386.h @@ -3,6 +3,7 @@ #include "types.h" #define PAGE_SIZE 4096u +#define PAGE_MASK 0xfffff000 union Descriptor { struct { diff --git a/LibC/mman.h b/LibC/mman.h index 16c3247591..a2a6ca372c 100644 --- a/LibC/mman.h +++ b/LibC/mman.h @@ -14,6 +14,8 @@ #define PROT_EXEC 0x4 #define PROT_NONE 0x0 +#define MAP_FAILED ((void*)-1) + __BEGIN_DECLS void* mmap(void* addr, size_t, int prot, int flags, int fd, off_t); diff --git a/Userland/sh.cpp b/Userland/sh.cpp index 9a4706dc52..e20dbb9d1a 100644 --- a/Userland/sh.cpp +++ b/Userland/sh.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -112,6 +113,32 @@ static int sh_wt(int, const char**) return 0; } +static int sh_mf(int, const char**) +{ + int rc; + int fd = open("/Banner.txt", O_RDONLY); + if (fd < 0) { + perror("open(/Banner.txt)"); + return 1; + } + printf("opened /Banner.txt, calling mmap...\n"); + byte* data = (byte*)mmap(nullptr, getpagesize(), PROT_READ, MAP_PRIVATE, fd, 0); + if (data == MAP_FAILED) { + perror("mmap()"); + goto close_it; + } + printf("mapped file @ %p\n", data); + printf("contents: %b %b %b %b\n", data[0], data[1], data[2], data[3]); + + rc = munmap(data, getpagesize()); + printf("munmap() returned %d\n", rc); + +close_it: + rc = close(fd); + printf("close() returned %d\n", rc); + return 0; +} + static int sh_exit(int, const char**) { printf("Good-bye!\n"); @@ -190,6 +217,10 @@ static bool handle_builtin(int argc, const char** argv, int& retval) retval = sh_wt(argc, argv); return true; } + if (!strcmp(argv[0], "mf")) { + retval = sh_mf(argc, argv); + return true; + } if (!strcmp(argv[0], "fork")) { retval = sh_fork(argc, argv); return true;