Support basic mmap'ing of a file!

All right, we can now mmap() a file and it gets magically paged in from fs
in response to an NP page fault. This is really cool :^)

I need to refactor this to support sharing of read-only file-backed pages,
but it's cool to just have something working.
This commit is contained in:
Andreas Kling 2018-11-08 12:59:16 +01:00
parent fdbd9f1e27
commit 3c8064a787
8 changed files with 141 additions and 14 deletions

View file

@ -254,6 +254,31 @@ bool MemoryManager::copy_on_write(Process& process, Region& region, unsigned pag
return true;
}
bool MemoryManager::page_in_from_vnode(Process& process, Region& region, unsigned page_index_in_region)
{
ASSERT(region.physical_pages[page_index_in_region].is_null());
region.physical_pages[page_index_in_region] = allocate_physical_page();
if (region.physical_pages[page_index_in_region].is_null()) {
kprintf("MM: page_in_from_vnode was unable to allocate a physical page\n");
return false;
}
remap_region_page(process.m_page_directory, region, page_index_in_region, true);
byte* dest_ptr = region.linearAddress.offset(page_index_in_region * PAGE_SIZE).asPtr();
dbgprintf("MM: page_in_from_vnode ready to read from vnode, will write to L%x!\n", dest_ptr);
sti(); // Oh god here we go...
auto nread = region.m_vnode->fileSystem()->readInodeBytes(region.m_vnode->inode, region.m_file_offset, PAGE_SIZE, dest_ptr, nullptr);
if (nread < 0) {
kprintf("MM: page_in_form_vnode had error (%d) while reading!\n", nread);
return false;
}
if (nread < PAGE_SIZE) {
// If we read less than a page, zero out the rest to avoid leaking uninitialized data.
memset(dest_ptr + nread, 0, PAGE_SIZE - nread);
}
cli();
return true;
}
PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault)
{
ASSERT_INTERRUPTS_DISABLED();
@ -264,17 +289,21 @@ PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault)
ASSERT(region);
auto page_index_in_region = region->page_index_from_address(fault.laddr());
if (fault.is_not_present()) {
kprintf(" >> NP fault in Region{%p}[%u]\n", region, page_index_in_region);
if (region->m_vnode) {
dbgprintf("NP(vnode) fault in Region{%p}[%u]\n", region, page_index_in_region);
page_in_from_vnode(*current, *region, page_index_in_region);
return PageFaultResponse::Continue;
} else {
kprintf("NP(error) fault in Region{%p}[%u]\n", region, page_index_in_region);
}
} else if (fault.is_protection_violation()) {
if (region->cow_map.get(page_index_in_region)) {
#ifdef PAGE_FAULT_DEBUG
dbgprintf(" >> PV (COW) fault in Region{%p}[%u]\n", region, page_index_in_region);
#endif
dbgprintf("PV(cow) fault in Region{%p}[%u]\n", region, page_index_in_region);
bool success = copy_on_write(*current, *region, page_index_in_region);
ASSERT(success);
return PageFaultResponse::Continue;
}
kprintf(" >> PV fault in Region{%p}[%u]\n", region, page_index_in_region);
kprintf("PV(error) fault in Region{%p}[%u]\n", region, page_index_in_region);
} else {
ASSERT_NOT_REACHED();
}
@ -284,6 +313,17 @@ PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault)
return PageFaultResponse::ShouldCrash;
}
RetainPtr<PhysicalPage> MemoryManager::allocate_physical_page()
{
InterruptDisabler disabler;
if (1 > m_free_physical_pages.size())
return { };
#ifdef MM_DEBUG
dbgprintf("MM: allocate_physical_page vending P%x\n", m_free_physical_pages.last()->paddr().get());
#endif
return m_free_physical_pages.takeLast();
}
Vector<RetainPtr<PhysicalPage>> MemoryManager::allocate_physical_pages(size_t count)
{
InterruptDisabler disabler;

View file

@ -72,7 +72,7 @@ struct Region : public Retainable<Region> {
return (laddr - linearAddress).get() / PAGE_SIZE;
}
RetainPtr<VirtualFileSystem::Node> m_file;
RetainPtr<VirtualFileSystem::Node> m_vnode;
Unix::off_t m_file_offset { 0 };
LinearAddress linearAddress;
@ -115,6 +115,7 @@ public:
bool validate_user_write(const Process&, LinearAddress) const;
Vector<RetainPtr<PhysicalPage>> allocate_physical_pages(size_t count);
RetainPtr<PhysicalPage> allocate_physical_page();
void remap_region(Process&, Region&);
@ -142,6 +143,7 @@ private:
static Region* region_from_laddr(Process&, LinearAddress);
bool copy_on_write(Process&, Region&, unsigned page_index_in_region);
bool page_in_from_vnode(Process&, Region&, unsigned page_index_in_region);
byte* quickmap_page(PhysicalPage&);
void unquickmap_page();

View file

@ -106,6 +106,28 @@ Region* Process::allocate_region(LinearAddress laddr, size_t size, String&& name
return m_regions.last().ptr();
}
Region* Process::allocate_file_backed_region(LinearAddress laddr, size_t size, RetainPtr<VirtualFileSystem::Node>&& vnode, String&& name, bool is_readable, bool is_writable)
{
ASSERT(!vnode->isCharacterDevice());
// FIXME: This needs sanity checks. What if this overlaps existing regions?
if (laddr.is_null()) {
laddr = m_nextRegion;
m_nextRegion = m_nextRegion.offset(size).offset(PAGE_SIZE);
}
laddr.mask(0xfffff000);
unsigned page_count = ceilDiv(size, PAGE_SIZE);
Vector<RetainPtr<PhysicalPage>> physical_pages;
physical_pages.resize(page_count); // Start out with no physical pages!
m_regions.append(adopt(*new Region(laddr, size, move(physical_pages), move(name), is_readable, is_writable)));
m_regions.last()->m_vnode = move(vnode);
MM.mapRegion(*this, *m_regions.last());
return m_regions.last().ptr();
}
bool Process::deallocate_region(Region& region)
{
InterruptDisabler disabler;
@ -141,19 +163,41 @@ int Process::sys$set_mmap_name(void* addr, size_t size, const char* name)
void* Process::sys$mmap(const Syscall::SC_mmap_params* params)
{
VALIDATE_USER_READ_WITH_RETURN_TYPE(params, sizeof(Syscall::SC_mmap_params), void*);
InterruptDisabler disabler;
void* addr = (void*)params->addr;
size_t size = params->size;
int prot = params->prot;
int flags = params->flags;
int fd = params->fd;
Unix::off_t offset = params->offset;
// FIXME: Implement mapping at a client-preferred address. Most of the support is already in plcae.
if (size == 0)
return (void*)-EINVAL;
if ((dword)addr & ~PAGE_MASK || size & ~PAGE_MASK)
return (void*)-EINVAL;
if (flags & MAP_ANONYMOUS) {
InterruptDisabler disabler;
// FIXME: Implement mapping at a client-specified address. Most of the support is already in plcae.
ASSERT(addr == nullptr);
auto* region = allocate_region(LinearAddress(), size, "mmap", prot & PROT_READ, prot & PROT_WRITE);
if (!region)
return (void*)-ENOMEM;
return region->linearAddress.asPtr();
}
if (offset & ~PAGE_MASK)
return (void*)-EINVAL;
auto* descriptor = file_descriptor(fd);
if (!descriptor)
return (void*)-EBADF;
if (descriptor->vnode()->isCharacterDevice())
return (void*)-ENODEV;
// FIXME: If PROT_EXEC, check that the underlying file system isn't mounted noexec.
auto region_name = descriptor->absolute_path();
InterruptDisabler disabler;
// FIXME: Implement mapping at a client-specified address. Most of the support is already in plcae.
ASSERT(addr == nullptr);
auto* region = allocate_region(LinearAddress(), size, "mmap");
auto* region = allocate_file_backed_region(LinearAddress(), size, descriptor->vnode(), move(region_name), prot & PROT_READ, prot & PROT_WRITE);
if (!region)
return (void*)-1;
return (void*)region->linearAddress.get();
return (void*)-ENOMEM;
return region->linearAddress.asPtr();
}
int Process::sys$munmap(void* addr, size_t size)

View file

@ -252,6 +252,7 @@ private:
TTY* m_tty { nullptr };
Region* allocate_region(LinearAddress, size_t, String&& name, bool is_readable = true, bool is_writable = true);
Region* allocate_file_backed_region(LinearAddress laddr, size_t size, RetainPtr<VirtualFileSystem::Node>&& vnode, String&& name, bool is_readable, bool is_writable);
bool deallocate_region(Region& region);
Region* regionFromRange(LinearAddress, size_t);

View file

@ -188,10 +188,11 @@ void exception_14_handler(RegisterDumpWithExceptionCode& regs)
dword faultAddress;
asm ("movl %%cr2, %%eax":"=a"(faultAddress));
dbgprintf("Ring%u page fault in %s(%u), %s laddr=%p\n",
regs.cs & 3,
dbgprintf("%s(%u): ring%u %s page fault, %s L%x\n",
current->name().characters(),
current->pid(),
regs.cs & 3,
regs.exception_code & 1 ? "PV" : "NP",
regs.exception_code & 2 ? "write" : "read",
faultAddress);
@ -231,7 +232,12 @@ void exception_14_handler(RegisterDumpWithExceptionCode& regs)
auto response = MM.handle_page_fault(PageFault(regs.exception_code, LinearAddress(faultAddress)));
if (response == PageFaultResponse::ShouldCrash) {
kprintf("Crashing after unresolved page fault\n");
kprintf("%s(%u) unrecoverable page fault, %s laddr=%p\n",
current->name().characters(),
current->pid(),
regs.exception_code & 2 ? "write" : "read",
faultAddress);
kprintf("exception code: %w\n", regs.exception_code);
kprintf("pc=%w:%x ds=%w es=%w fs=%w gs=%w\n", regs.cs, regs.eip, regs.ds, regs.es, regs.fs, regs.gs);
kprintf("stk=%w:%x\n", ss, esp);

View file

@ -3,6 +3,7 @@
#include "types.h"
#define PAGE_SIZE 4096u
#define PAGE_MASK 0xfffff000
union Descriptor {
struct {

View file

@ -14,6 +14,8 @@
#define PROT_EXEC 0x4
#define PROT_NONE 0x0
#define MAP_FAILED ((void*)-1)
__BEGIN_DECLS
void* mmap(void* addr, size_t, int prot, int flags, int fd, off_t);

View file

@ -6,6 +6,7 @@
#include <LibC/stdlib.h>
#include <LibC/utsname.h>
#include <LibC/pwd.h>
#include <sys/mman.h>
#include <signal.h>
#include <AK/FileSystemPath.h>
@ -112,6 +113,32 @@ static int sh_wt(int, const char**)
return 0;
}
static int sh_mf(int, const char**)
{
int rc;
int fd = open("/Banner.txt", O_RDONLY);
if (fd < 0) {
perror("open(/Banner.txt)");
return 1;
}
printf("opened /Banner.txt, calling mmap...\n");
byte* data = (byte*)mmap(nullptr, getpagesize(), PROT_READ, MAP_PRIVATE, fd, 0);
if (data == MAP_FAILED) {
perror("mmap()");
goto close_it;
}
printf("mapped file @ %p\n", data);
printf("contents: %b %b %b %b\n", data[0], data[1], data[2], data[3]);
rc = munmap(data, getpagesize());
printf("munmap() returned %d\n", rc);
close_it:
rc = close(fd);
printf("close() returned %d\n", rc);
return 0;
}
static int sh_exit(int, const char**)
{
printf("Good-bye!\n");
@ -190,6 +217,10 @@ static bool handle_builtin(int argc, const char** argv, int& retval)
retval = sh_wt(argc, argv);
return true;
}
if (!strcmp(argv[0], "mf")) {
retval = sh_mf(argc, argv);
return true;
}
if (!strcmp(argv[0], "fork")) {
retval = sh_fork(argc, argv);
return true;