i386: Merge PAE and non-PAE pmaps into same kernel.

Effectively all i386 kernels now have two pmaps compiled in: one
managing PAE pagetables, and another non-PAE. The implementation is
selected at cold time depending on the CPU features. The vm_paddr_t is
always 64bit now. As result, nx bit can be used on all capable CPUs.

Option PAE only affects the bus_addr_t: it is still 32bit for non-PAE
configs, for drivers compatibility. Kernel layout, esp. max kernel
address, low memory PDEs and max user address (same as trampoline
start) are now same for PAE and for non-PAE regardless of the type of
page tables used.

Non-PAE kernel (when using PAE pagetables) can handle physical memory
up to 24G now, larger memory requires re-tuning the KVA consumers and
instead the code caps the maximum at 24G. Unfortunately, a lot of
drivers do not use busdma(9) properly so by default even 4G barrier is
not easy. There are two tunables added: hw.above4g_allow and
hw.above24g_allow, the first one is kept enabled for now to evaluate
the status on HEAD, second is only for dev use.

i386 now creates three freelists if there is any memory above 4G, to
allow proper bounce pages allocation. Also, VM_KMEM_SIZE_SCALE changed
from 3 to 1.

The PAE_TABLES kernel config option is retired.

In collaboarion with: pho
Discussed with:	emaste
Reviewed by:	markj
MFC after:	2 weeks
Sponsored by:	The FreeBSD Foundation
Differential revision:	https://reviews.freebsd.org/D18894
This commit is contained in:
Konstantin Belousov 2019-01-30 02:07:13 +00:00
parent 381c2d2e9a
commit 9a52756044
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=343567
44 changed files with 2815 additions and 1208 deletions

View file

@ -67,14 +67,16 @@ typedef uint64_t i386_pde_pae_t;
_Static_assert(PAGE_SHIFT == I386_PAGE_SHIFT, "PAGE_SHIFT mismatch");
_Static_assert(PAGE_SIZE == I386_PAGE_SIZE, "PAGE_SIZE mismatch");
_Static_assert(PAGE_MASK == I386_PAGE_MASK, "PAGE_MASK mismatch");
#if 0
_Static_assert(NPTEPG == I386_NPTEPG, "NPTEPG mismatch");
_Static_assert(PDRSHIFT == I386_PDRSHIFT, "PDRSHIFT mismatch");
_Static_assert(NBPDR == I386_NBPDR, "NBPDR mismatch");
#endif
_Static_assert(PDRSHIFT_NOPAE == I386_PDRSHIFT, "PDRSHIFT mismatch");
_Static_assert(PG_V == I386_PG_V, "PG_V mismatch");
_Static_assert(PG_PS == I386_PG_PS, "PG_PS mismatch");
_Static_assert((u_int)PG_FRAME == I386_PG_FRAME, "PG_FRAME mismatch");
_Static_assert(PG_PS_FRAME == I386_PG_PS_FRAME, "PG_PS_FRAME mismatch");
_Static_assert((u_int)PG_FRAME_NOPAE == I386_PG_FRAME, "PG_FRAME mismatch");
_Static_assert(PG_PS_FRAME_NOPAE == I386_PG_PS_FRAME, "PG_PS_FRAME mismatch");
#endif
int _i386_native(kvm_t *);

View file

@ -492,12 +492,16 @@ i386/i386/longrun.c optional cpu_enable_longrun
i386/i386/machdep.c standard
i386/i386/mem.c optional mem
i386/i386/minidump_machdep.c standard
i386/i386/minidump_machdep_pae.c standard
i386/i386/minidump_machdep_nopae.c standard
i386/i386/mp_clock.c optional smp
i386/i386/mp_machdep.c optional smp
i386/i386/mpboot.s optional smp
i386/i386/npx.c standard
i386/i386/perfmon.c optional perfmon
i386/i386/pmap.c standard
i386/i386/pmap_base.c standard
i386/i386/pmap_nopae.c standard
i386/i386/pmap_pae.c standard
i386/i386/prof_machdep.c optional profiling-routine
i386/i386/ptrace_machdep.c standard
i386/i386/sigtramp.s standard

View file

@ -33,11 +33,6 @@ KVA_PAGES opt_global.h
# Physical address extensions and support for >4G ram. As above.
PAE opt_global.h
# Use PAE page tables, but limit memory support to 4GB.
# This keeps the i386 non-PAE KBI, in particular, drivers see
# 32bit vm_paddr_t.
PAE_TABLES opt_global.h
TIMER_FREQ opt_clock.h
CPU_ATHLON_SSE_HACK opt_cpu.h

View file

@ -309,7 +309,7 @@ dcons_drv_init(int stage)
* Allow read/write access to dcons buffer.
*/
for (pa = trunc_page(addr); pa < addr + size; pa += PAGE_SIZE)
*vtopte(PMAP_MAP_LOW + pa) |= PG_RW;
pmap_ksetrw(PMAP_MAP_LOW + pa);
invltlb();
#endif
/* XXX P to V */

View file

@ -513,7 +513,7 @@ int genfbioctl(genfb_softc_t *sc, video_adapter_t *adp, u_long cmd,
}
int genfbmmap(genfb_softc_t *sc, video_adapter_t *adp, vm_ooffset_t offset,
vm_offset_t *paddr, int prot, vm_memattr_t *memattr)
vm_paddr_t *paddr, int prot, vm_memattr_t *memattr)
{
return vidd_mmap(adp, offset, paddr, prot, memattr);
}

View file

@ -327,7 +327,7 @@ int genfbwrite(genfb_softc_t *sc, video_adapter_t *adp,
int genfbioctl(genfb_softc_t *sc, video_adapter_t *adp,
u_long cmd, caddr_t arg, int flag, struct thread *td);
int genfbmmap(genfb_softc_t *sc, video_adapter_t *adp,
vm_ooffset_t offset, vm_offset_t *paddr,
vm_ooffset_t offset, vm_paddr_t *paddr,
int prot, vm_memattr_t *memattr);
#endif /* FB_INSTALL_CDEV */

View file

@ -147,7 +147,7 @@ vga_ioctl(struct cdev *dev, vga_softc_t *sc, u_long cmd, caddr_t arg, int flag,
int
vga_mmap(struct cdev *dev, vga_softc_t *sc, vm_ooffset_t offset,
vm_offset_t *paddr, int prot, vm_memattr_t *memattr)
vm_paddr_t *paddr, int prot, vm_memattr_t *memattr)
{
return genfbmmap(&sc->gensc, sc->adp, offset, paddr, prot, memattr);
}

View file

@ -91,7 +91,7 @@ int vga_write(struct cdev *dev, vga_softc_t *sc, struct uio *uio, int flag);
int vga_ioctl(struct cdev *dev, vga_softc_t *sc, u_long cmd, caddr_t arg,
int flag, struct thread *td);
int vga_mmap(struct cdev *dev, vga_softc_t *sc, vm_ooffset_t offset,
vm_offset_t *paddr, int prot, vm_memattr_t *memattr);
vm_paddr_t *paddr, int prot, vm_memattr_t *memattr);
#endif
extern int (*vga_sub_configure)(int flags);

View file

@ -291,7 +291,7 @@ ec_putc(int c)
#ifdef __amd64__
fb = KERNBASE + 0xb8000;
#else /* __i386__ */
fb = PMAP_MAP_LOW + 0xb8000;
fb = pmap_get_map_low() + 0xb8000;
#endif
xsize = 80;
ysize = 25;

View file

@ -134,7 +134,7 @@ table_map(vm_paddr_t pa, int offset, vm_offset_t length)
off = pa & PAGE_MASK;
length = round_page(length + off);
pa = pa & PG_FRAME;
pa = pmap_pg_frame(pa);
va = (vm_offset_t)pmap_kenter_temporary(pa, offset) +
(offset * PAGE_SIZE);
data = (void *)(va + off);

View file

@ -329,9 +329,7 @@ bios16(struct bios_args *args, char *fmt, ...)
va_list ap;
int flags = BIOSCODE_FLAG | BIOSDATA_FLAG;
u_int i, arg_start, arg_end;
pt_entry_t *pte;
pd_entry_t *ptd, orig_ptd;
void *bios16_pmap_handle;
arg_start = 0xffffffff;
arg_end = 0;
@ -388,18 +386,10 @@ bios16(struct bios_args *args, char *fmt, ...)
args->seg.args.limit = 0xffff;
}
args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME;
args->seg.code32.base = pmap_pg_frame((u_int)&bios16_jmp);
args->seg.code32.limit = 0xffff;
/*
* no page table, so create one and install it.
*/
pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
ptd = IdlePTD;
*pte = vm86phystk | PG_RW | PG_V;
orig_ptd = *ptd;
*ptd = vtophys(pte) | PG_RW | PG_V;
pmap_invalidate_all(kernel_pmap); /* XXX insurance for now */
bios16_pmap_handle = pmap_bios16_enter();
stack_top = stack;
va_start(ap, fmt);
@ -451,13 +441,7 @@ bios16(struct bios_args *args, char *fmt, ...)
bioscall_vector.vec16.segment = GSEL(GBIOSCODE16_SEL, SEL_KPL);
i = bios16_call(&args->r, stack_top);
*ptd = orig_ptd; /* remove page table */
/*
* XXX only needs to be invlpg(0) but that doesn't work on the 386
*/
pmap_invalidate_all(kernel_pmap);
free(pte, M_TEMP); /* ... and free it */
pmap_bios16_leave(bios16_pmap_handle);
return (i);
}

View file

@ -47,12 +47,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_map.h>
#include <vm/vm_page.h>
#if defined(PAE) || defined(PAE_TABLES)
#define KCR3 ((u_int)IdlePDPT)
#else
#define KCR3 ((u_int)IdlePTD)
#endif
int copyin_fast(const void *udaddr, void *kaddr, size_t len, u_int);
static int (*copyin_fast_tramp)(const void *, void *, size_t, u_int);
int copyout_fast(const void *kaddr, void *udaddr, size_t len, u_int);
@ -103,7 +97,6 @@ cp_slow0(vm_offset_t uva, size_t len, bool write,
{
struct pcpu *pc;
vm_page_t m[2];
pt_entry_t *pte;
vm_offset_t kaddr;
int error, i, plen;
bool sleepable;
@ -128,12 +121,7 @@ cp_slow0(vm_offset_t uva, size_t len, bool write,
sx_xlock(&pc->pc_copyout_slock);
kaddr = pc->pc_copyout_saddr;
}
for (i = 0, pte = vtopte(kaddr); i < plen; i++, pte++) {
*pte = PG_V | PG_RW | PG_A | PG_M | VM_PAGE_TO_PHYS(m[i]) |
pmap_cache_bits(kernel_pmap, pmap_page_get_memattr(m[i]),
FALSE);
invlpg(kaddr + ptoa(i));
}
pmap_cp_slow0_map(kaddr, plen, m);
kaddr += uva - trunc_page(uva);
f(kaddr, arg);
sched_unpin();
@ -225,7 +213,7 @@ copyin(const void *udaddr, void *kaddr, size_t len)
(uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS)
return (EFAULT);
if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ &&
copyin_fast_tramp(udaddr, kaddr, len, KCR3) == 0))
copyin_fast_tramp(udaddr, kaddr, len, pmap_get_kcr3()) == 0))
return (0);
for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) {
@ -260,7 +248,7 @@ copyout(const void *kaddr, void *udaddr, size_t len)
(uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS)
return (EFAULT);
if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ &&
copyout_fast_tramp(kaddr, udaddr, len, KCR3) == 0))
copyout_fast_tramp(kaddr, udaddr, len, pmap_get_kcr3()) == 0))
return (0);
for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) {
@ -296,7 +284,7 @@ fubyte(volatile const void *base)
(uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout) {
res = fubyte_fast_tramp(base, KCR3);
res = fubyte_fast_tramp(base, pmap_get_kcr3());
if (res != -1)
return (res);
}
@ -322,7 +310,7 @@ fuword16(volatile const void *base)
(uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout) {
res = fuword16_fast_tramp(base, KCR3);
res = fuword16_fast_tramp(base, pmap_get_kcr3());
if (res != -1)
return (res);
}
@ -348,7 +336,7 @@ fueword(volatile const void *base, long *val)
(uintptr_t)base + sizeof(*val) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout) {
if (fueword_fast_tramp(base, val, KCR3) == 0)
if (fueword_fast_tramp(base, val, pmap_get_kcr3()) == 0)
return (0);
}
if (cp_slow0((vm_offset_t)base, sizeof(long), false, fueword_slow0,
@ -383,7 +371,7 @@ subyte(volatile void *base, int byte)
if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base ||
(uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout && subyte_fast_tramp(base, byte, KCR3) == 0)
if (fast_copyout && subyte_fast_tramp(base, byte, pmap_get_kcr3()) == 0)
return (0);
return (cp_slow0((vm_offset_t)base, sizeof(u_char), true, subyte_slow0,
&byte) != 0 ? -1 : 0);
@ -403,7 +391,8 @@ suword16(volatile void *base, int word)
if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base ||
(uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout && suword16_fast_tramp(base, word, KCR3) == 0)
if (fast_copyout && suword16_fast_tramp(base, word, pmap_get_kcr3())
== 0)
return (0);
return (cp_slow0((vm_offset_t)base, sizeof(int16_t), true,
suword16_slow0, &word) != 0 ? -1 : 0);
@ -423,7 +412,7 @@ suword(volatile void *base, long word)
if ((uintptr_t)base + sizeof(word) < (uintptr_t)base ||
(uintptr_t)base + sizeof(word) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout && suword_fast_tramp(base, word, KCR3) == 0)
if (fast_copyout && suword_fast_tramp(base, word, pmap_get_kcr3()) == 0)
return (0);
return (cp_slow0((vm_offset_t)base, sizeof(long), true,
suword_slow0, &word) != 0 ? -1 : 0);

View file

@ -101,21 +101,8 @@ ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
ASSYM(TD0_KSTACK_PAGES, TD0_KSTACK_PAGES);
ASSYM(PAGE_SIZE, PAGE_SIZE);
ASSYM(NPTEPG, NPTEPG);
ASSYM(NPDEPG, NPDEPG);
ASSYM(NPDEPTD, NPDEPTD);
ASSYM(NPGPTD, NPGPTD);
ASSYM(PDESIZE, sizeof(pd_entry_t));
ASSYM(PTESIZE, sizeof(pt_entry_t));
ASSYM(PDESHIFT, PDESHIFT);
ASSYM(PTESHIFT, PTESHIFT);
ASSYM(PAGE_SHIFT, PAGE_SHIFT);
ASSYM(PAGE_MASK, PAGE_MASK);
ASSYM(PDRSHIFT, PDRSHIFT);
ASSYM(PDRMASK, PDRMASK);
ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
ASSYM(KERNBASE, KERNBASE);
ASSYM(KERNLOAD, KERNLOAD);
ASSYM(PCB_CR0, offsetof(struct pcb, pcb_cr0));
ASSYM(PCB_CR2, offsetof(struct pcb, pcb_cr2));
ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
@ -222,6 +209,9 @@ ASSYM(PC_KESP0, offsetof(struct pcpu, pc_kesp0));
ASSYM(PC_TRAMPSTK, offsetof(struct pcpu, pc_trampstk));
ASSYM(PC_COPYOUT_BUF, offsetof(struct pcpu, pc_copyout_buf));
ASSYM(PC_IBPB_SET, offsetof(struct pcpu, pc_ibpb_set));
ASSYM(PMAP_TRM_MIN_ADDRESS, PMAP_TRM_MIN_ADDRESS);
ASSYM(KERNLOAD, KERNLOAD);
ASSYM(KERNBASE, KERNBASE);
#ifdef DEV_APIC
ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL);
@ -237,7 +227,6 @@ ASSYM(GPROC0_SEL, GPROC0_SEL);
ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
ASSYM(VM86_STACK_SPACE, VM86_STACK_SPACE);
ASSYM(PMAP_TRM_MIN_ADDRESS, PMAP_TRM_MIN_ADDRESS);
ASSYM(TRAMP_COPYOUT_SZ, TRAMP_COPYOUT_SZ);
#ifdef HWPMC_HOOKS

View file

@ -632,6 +632,7 @@ extern int elf32_nxstack;
void
initializecpu(void)
{
uint64_t msr;
switch (cpu) {
#ifdef I486_CPU
@ -744,16 +745,10 @@ initializecpu(void)
load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
cpu_fxsr = hw_instruction_sse = 1;
}
#if defined(PAE) || defined(PAE_TABLES)
if ((amd_feature & AMDID_NX) != 0) {
uint64_t msr;
if (elf32_nxstack) {
msr = rdmsr(MSR_EFER) | EFER_NXE;
wrmsr(MSR_EFER, msr);
pg_nx = PG_NX;
elf32_nxstack = 1;
}
#endif
}
void

View file

@ -53,15 +53,6 @@
#include "assym.inc"
/*
* PTmap is recursive pagemap at top of virtual address space.
* Within PTmap, the page directory can be found (third indirection).
*/
.globl PTmap,PTD,PTDpde
.set PTmap,(PTDPTDI << PDRSHIFT)
.set PTD,PTmap + (PTDPTDI * PAGE_SIZE)
.set PTDpde,PTD + (PTDPTDI * PDESIZE)
/*
* Compiled KERNBASE location and the kernel load address, now identical.
*/

View file

@ -175,6 +175,8 @@ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
int _udatasel, _ucodesel;
u_int basemem;
static int above4g_allow = 1;
static int above24g_allow = 0;
int cold = 1;
@ -1675,6 +1677,7 @@ static int
add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
int *physmap_idxp)
{
uint64_t lim, ign;
int i, insert_idx, physmap_idx;
physmap_idx = *physmap_idxp;
@ -1682,13 +1685,24 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
if (length == 0)
return (1);
#ifndef PAE
if (base > 0xffffffff) {
printf("%uK of memory above 4GB ignored\n",
(u_int)(length / 1024));
lim = 0x100000000; /* 4G */
if (pae_mode && above4g_allow)
lim = above24g_allow ? -1ULL : 0x600000000; /* 24G */
if (base >= lim) {
printf("%uK of memory above %uGB ignored, pae %d "
"above4g_allow %d above24g_allow %d\n",
(u_int)(length / 1024), (u_int)(lim >> 30), pae_mode,
above4g_allow, above24g_allow);
return (1);
}
#endif
if (base + length >= lim) {
ign = base + length - lim;
length -= ign;
printf("%uK of memory above %uGB ignored, pae %d "
"above4g_allow %d above24g_allow %d\n",
(u_int)(ign / 1024), (u_int)(lim >> 30), pae_mode,
above4g_allow, above24g_allow);
}
/*
* Find insertion point while checking for overlap. Start off by
@ -1781,8 +1795,6 @@ add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
static void
basemem_setup(void)
{
pt_entry_t *pte;
int i;
if (basemem > 640) {
printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
@ -1790,15 +1802,7 @@ basemem_setup(void)
basemem = 640;
}
/*
* Map pages between basemem and ISA_HOLE_START, if any, r/w into
* the vm86 page table so that vm86 can scribble on them using
* the vm86 map too. XXX: why 2 ways for this and only 1 way for
* page 0, at least as initialized here?
*/
pte = (pt_entry_t *)vm86paddr;
for (i = basemem / 4; i < 160; i++)
pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
pmap_basemem_setup(basemem);
}
/*
@ -1820,7 +1824,6 @@ getmemsize(int first)
int has_smap, off, physmap_idx, pa_indx, da_indx;
u_long memtest;
vm_paddr_t physmap[PHYSMAP_SIZE];
pt_entry_t *pte;
quad_t dcons_addr, dcons_size, physmem_tunable;
int hasbrokenint12, i, res;
u_int extmem;
@ -1841,6 +1844,9 @@ getmemsize(int first)
*/
vm_phys_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
TUNABLE_INT_FETCH("hw.above4g_allow", &above4g_allow);
TUNABLE_INT_FETCH("hw.above24g_allow", &above24g_allow);
/*
* Check if the loader supplied an SMAP memory map. If so,
* use that and do not make any VM86 calls.
@ -2031,7 +2037,6 @@ getmemsize(int first)
phys_avail[pa_indx++] = physmap[0];
phys_avail[pa_indx] = physmap[0];
dump_avail[da_indx] = physmap[0];
pte = CMAP3;
/*
* Get dcons buffer address
@ -2052,7 +2057,7 @@ getmemsize(int first)
end = trunc_page(physmap[i + 1]);
for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
int tmp, page_bad, full;
int *ptr = (int *)CADDR3;
int *ptr;
full = FALSE;
/*
@ -2076,8 +2081,7 @@ getmemsize(int first)
/*
* map page into kernel: valid, read/write,non-cacheable
*/
*pte = pa | PG_V | PG_RW | PG_N;
invltlb();
ptr = (int *)pmap_cmap3(pa, PG_V | PG_RW | PG_N);
tmp = *(int *)ptr;
/*
@ -2158,8 +2162,7 @@ getmemsize(int first)
break;
}
}
*pte = 0;
invltlb();
pmap_cmap3(0, 0);
/*
* XXX
@ -2414,6 +2417,7 @@ init386(int first)
finishidentcpu(); /* Final stage of CPU initialization */
i386_setidt2();
pmap_set_nx();
initializecpu(); /* Initialize CPU registers */
initializecpucache();
@ -2508,11 +2512,7 @@ init386(int first)
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0;
#if defined(PAE) || defined(PAE_TABLES)
thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
#else
thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
#endif
thread0.td_pcb->pcb_cr3 = pmap_get_kcr3();
thread0.td_pcb->pcb_ext = 0;
thread0.td_frame = &proc0_tf;
@ -2581,11 +2581,7 @@ machdep_init_trampoline(void)
(int)dblfault_stack + PAGE_SIZE;
dblfault_tss->tss_ss = dblfault_tss->tss_ss0 = dblfault_tss->tss_ss1 =
dblfault_tss->tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
#if defined(PAE) || defined(PAE_TABLES)
dblfault_tss->tss_cr3 = (int)IdlePDPT;
#else
dblfault_tss->tss_cr3 = (int)IdlePTD;
#endif
dblfault_tss->tss_cr3 = pmap_get_kcr3();
dblfault_tss->tss_eip = (int)dblfault_handler;
dblfault_tss->tss_eflags = PSL_KERNEL;
dblfault_tss->tss_ds = dblfault_tss->tss_es =

View file

@ -148,7 +148,6 @@ memrw(struct cdev *dev, struct uio *uio, int flags)
error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio);
pmap_qremove((vm_offset_t)ptvmmap, 1);
sx_xunlock(&memsxlock);
}
return (error);

View file

@ -49,310 +49,11 @@ __FBSDID("$FreeBSD$");
CTASSERT(sizeof(struct kerneldumpheader) == 512);
#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
#define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE)
uint32_t *vm_page_dump;
int vm_page_dump_size;
static struct kerneldumpheader kdh;
/* Handle chunked writes. */
static size_t fragsz;
static void *dump_va;
static uint64_t counter, progress;
CTASSERT(sizeof(*vm_page_dump) == 4);
static int
is_dumpable(vm_paddr_t pa)
{
int i;
for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
return (1);
}
return (0);
}
#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
static int
blk_flush(struct dumperinfo *di)
{
int error;
if (fragsz == 0)
return (0);
error = dump_append(di, dump_va, 0, fragsz);
fragsz = 0;
return (error);
}
static int
blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
{
size_t len;
int error, i, c;
u_int maxdumpsz;
maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
if (maxdumpsz == 0) /* seatbelt */
maxdumpsz = PAGE_SIZE;
error = 0;
if ((sz % PAGE_SIZE) != 0) {
printf("size not page aligned\n");
return (EINVAL);
}
if (ptr != NULL && pa != 0) {
printf("cant have both va and pa!\n");
return (EINVAL);
}
if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
printf("address not page aligned\n");
return (EINVAL);
}
if (ptr != NULL) {
/* If we're doing a virtual dump, flush any pre-existing pa pages */
error = blk_flush(di);
if (error)
return (error);
}
while (sz) {
len = maxdumpsz - fragsz;
if (len > sz)
len = sz;
counter += len;
progress -= len;
if (counter >> 24) {
printf(" %lld", PG2MB(progress >> PAGE_SHIFT));
counter &= (1<<24) - 1;
}
wdog_kern_pat(WD_LASTVAL);
if (ptr) {
error = dump_append(di, ptr, 0, len);
if (error)
return (error);
ptr += len;
sz -= len;
} else {
for (i = 0; i < len; i += PAGE_SIZE)
dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
fragsz += len;
pa += len;
sz -= len;
if (fragsz == maxdumpsz) {
error = blk_flush(di);
if (error)
return (error);
}
}
/* Check for user abort. */
c = cncheckc();
if (c == 0x03)
return (ECANCELED);
if (c != -1)
printf(" (CTRL-C to abort) ");
}
return (0);
}
/* A fake page table page, to avoid having to handle both 4K and 2M pages */
static pt_entry_t fakept[NPTEPG];
int
minidumpsys(struct dumperinfo *di)
{
uint64_t dumpsize;
uint32_t ptesize;
vm_offset_t va;
int error;
uint32_t bits;
uint64_t pa;
pd_entry_t *pd;
pt_entry_t *pt;
int i, j, k, bit;
struct minidumphdr mdhdr;
counter = 0;
/* Walk page table pages, set bits in vm_page_dump */
ptesize = 0;
for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
/*
* We always write a page, even if it is zero. Each
* page written corresponds to 2MB of space
*/
ptesize += PAGE_SIZE;
pd = IdlePTD; /* always mapped! */
j = va >> PDRSHIFT;
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is an entire 2M page. */
pa = pd[j] & PG_PS_FRAME;
for (k = 0; k < NPTEPG; k++) {
if (is_dumpable(pa))
dump_add_page(pa);
pa += PAGE_SIZE;
}
continue;
}
if ((pd[j] & PG_V) == PG_V) {
/* set bit for each valid page in this 2MB block */
pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0);
for (k = 0; k < NPTEPG; k++) {
if ((pt[k] & PG_V) == PG_V) {
pa = pt[k] & PG_FRAME;
if (is_dumpable(pa))
dump_add_page(pa);
}
}
} else {
/* nothing, we're going to dump a null page */
}
}
/* Calculate dump size. */
dumpsize = ptesize;
dumpsize += round_page(msgbufp->msg_size);
dumpsize += round_page(vm_page_dump_size);
for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
bits = vm_page_dump[i];
while (bits) {
bit = bsfl(bits);
pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
/* Clear out undumpable pages now if needed */
if (is_dumpable(pa)) {
dumpsize += PAGE_SIZE;
} else {
dump_drop_page(pa);
}
bits &= ~(1ul << bit);
}
}
dumpsize += PAGE_SIZE;
progress = dumpsize;
/* Initialize mdhdr */
bzero(&mdhdr, sizeof(mdhdr));
strcpy(mdhdr.magic, MINIDUMP_MAGIC);
mdhdr.version = MINIDUMP_VERSION;
mdhdr.msgbufsize = msgbufp->msg_size;
mdhdr.bitmapsize = vm_page_dump_size;
mdhdr.ptesize = ptesize;
mdhdr.kernbase = KERNBASE;
#if defined(PAE) || defined(PAE_TABLES)
mdhdr.paemode = 1;
#endif
dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION,
dumpsize);
error = dump_start(di, &kdh);
if (error != 0)
goto fail;
printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
printf("Dumping %llu MB:", (long long)dumpsize >> 20);
/* Dump my header */
bzero(&fakept, sizeof(fakept));
bcopy(&mdhdr, &fakept, sizeof(mdhdr));
error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
if (error)
goto fail;
/* Dump msgbuf up front */
error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
if (error)
goto fail;
/* Dump bitmap */
error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
if (error)
goto fail;
/* Dump kernel page table pages */
for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
/* We always write a page, even if it is zero */
pd = IdlePTD; /* always mapped! */
j = va >> PDRSHIFT;
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is a single 2M block. Generate a fake PTP */
pa = pd[j] & PG_PS_FRAME;
for (k = 0; k < NPTEPG; k++) {
fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
}
error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
if (error)
goto fail;
/* flush, in case we reuse fakept in the same block */
error = blk_flush(di);
if (error)
goto fail;
continue;
}
if ((pd[j] & PG_V) == PG_V) {
pa = pd[j] & PG_FRAME;
error = blk_write(di, 0, pa, PAGE_SIZE);
if (error)
goto fail;
} else {
bzero(fakept, sizeof(fakept));
error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
if (error)
goto fail;
/* flush, in case we reuse fakept in the same block */
error = blk_flush(di);
if (error)
goto fail;
}
}
/* Dump memory chunks */
/* XXX cluster it up and use blk_dump() */
for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
bits = vm_page_dump[i];
while (bits) {
bit = bsfl(bits);
pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
error = blk_write(di, 0, pa, PAGE_SIZE);
if (error)
goto fail;
bits &= ~(1ul << bit);
}
}
error = blk_flush(di);
if (error)
goto fail;
error = dump_finish(di, &kdh);
if (error != 0)
goto fail;
printf("\nDump complete\n");
return (0);
fail:
if (error < 0)
error = -error;
if (error == ECANCELED)
printf("\nDump aborted\n");
else if (error == E2BIG || error == ENOSPC)
printf("\nDump failed. Partition too small.\n");
else
printf("\n** DUMP FAILED (ERROR %d) **\n", error);
return (error);
}
void
dump_add_page(vm_paddr_t pa)
{
@ -375,3 +76,10 @@ dump_drop_page(vm_paddr_t pa)
atomic_clear_int(&vm_page_dump[idx], 1ul << bit);
}
int
minidumpsys(struct dumperinfo *di)
{
return (pae_mode ? minidumpsys_pae(di) : minidumpsys_nopae(di));
}

View file

@ -0,0 +1,360 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2006 Peter Wemm
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_watchdog.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/cons.h>
#include <sys/kernel.h>
#include <sys/kerneldump.h>
#include <sys/msgbuf.h>
#include <sys/watchdog.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/atomic.h>
#include <machine/elf.h>
#include <machine/md_var.h>
#include <machine/vmparam.h>
#include <machine/minidump.h>
CTASSERT(sizeof(struct kerneldumpheader) == 512);
#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
#define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE)
extern uint32_t *vm_page_dump;
extern int vm_page_dump_size;
static struct kerneldumpheader kdh;
/* Handle chunked writes. */
static size_t fragsz;
static void *dump_va;
static uint64_t counter, progress;
CTASSERT(sizeof(*vm_page_dump) == 4);
static int
is_dumpable(vm_paddr_t pa)
{
int i;
for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
return (1);
}
return (0);
}
#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
static int
blk_flush(struct dumperinfo *di)
{
int error;
if (fragsz == 0)
return (0);
error = dump_append(di, dump_va, 0, fragsz);
fragsz = 0;
return (error);
}
static int
blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
{
size_t len;
int error, i, c;
u_int maxdumpsz;
maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
if (maxdumpsz == 0) /* seatbelt */
maxdumpsz = PAGE_SIZE;
error = 0;
if ((sz % PAGE_SIZE) != 0) {
printf("size not page aligned\n");
return (EINVAL);
}
if (ptr != NULL && pa != 0) {
printf("cant have both va and pa!\n");
return (EINVAL);
}
if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
printf("address not page aligned\n");
return (EINVAL);
}
if (ptr != NULL) {
/* If we're doing a virtual dump, flush any pre-existing pa pages */
error = blk_flush(di);
if (error)
return (error);
}
while (sz) {
len = maxdumpsz - fragsz;
if (len > sz)
len = sz;
counter += len;
progress -= len;
if (counter >> 24) {
printf(" %lld", PG2MB(progress >> PAGE_SHIFT));
counter &= (1<<24) - 1;
}
wdog_kern_pat(WD_LASTVAL);
if (ptr) {
error = dump_append(di, ptr, 0, len);
if (error)
return (error);
ptr += len;
sz -= len;
} else {
for (i = 0; i < len; i += PAGE_SIZE)
dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
fragsz += len;
pa += len;
sz -= len;
if (fragsz == maxdumpsz) {
error = blk_flush(di);
if (error)
return (error);
}
}
/* Check for user abort. */
c = cncheckc();
if (c == 0x03)
return (ECANCELED);
if (c != -1)
printf(" (CTRL-C to abort) ");
}
return (0);
}
/* A fake page table page, to avoid having to handle both 4K and 2M pages */
static pt_entry_t fakept[NPTEPG];
#ifdef PMAP_PAE_COMP
#define minidumpsys minidumpsys_pae
#define IdlePTD IdlePTD_pae
#else
#define minidumpsys minidumpsys_nopae
#define IdlePTD IdlePTD_nopae
#endif
int
minidumpsys(struct dumperinfo *di)
{
uint64_t dumpsize;
uint32_t ptesize;
vm_offset_t va;
int error;
uint32_t bits;
uint64_t pa;
pd_entry_t *pd;
pt_entry_t *pt;
int i, j, k, bit;
struct minidumphdr mdhdr;
counter = 0;
/* Walk page table pages, set bits in vm_page_dump */
ptesize = 0;
for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
/*
* We always write a page, even if it is zero. Each
* page written corresponds to 2MB of space
*/
ptesize += PAGE_SIZE;
pd = IdlePTD; /* always mapped! */
j = va >> PDRSHIFT;
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is an entire 2M page. */
pa = pd[j] & PG_PS_FRAME;
for (k = 0; k < NPTEPG; k++) {
if (is_dumpable(pa))
dump_add_page(pa);
pa += PAGE_SIZE;
}
continue;
}
if ((pd[j] & PG_V) == PG_V) {
/* set bit for each valid page in this 2MB block */
pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0);
for (k = 0; k < NPTEPG; k++) {
if ((pt[k] & PG_V) == PG_V) {
pa = pt[k] & PG_FRAME;
if (is_dumpable(pa))
dump_add_page(pa);
}
}
} else {
/* nothing, we're going to dump a null page */
}
}
/* Calculate dump size. */
dumpsize = ptesize;
dumpsize += round_page(msgbufp->msg_size);
dumpsize += round_page(vm_page_dump_size);
for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
bits = vm_page_dump[i];
while (bits) {
bit = bsfl(bits);
pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
/* Clear out undumpable pages now if needed */
if (is_dumpable(pa)) {
dumpsize += PAGE_SIZE;
} else {
dump_drop_page(pa);
}
bits &= ~(1ul << bit);
}
}
dumpsize += PAGE_SIZE;
progress = dumpsize;
/* Initialize mdhdr */
bzero(&mdhdr, sizeof(mdhdr));
strcpy(mdhdr.magic, MINIDUMP_MAGIC);
mdhdr.version = MINIDUMP_VERSION;
mdhdr.msgbufsize = msgbufp->msg_size;
mdhdr.bitmapsize = vm_page_dump_size;
mdhdr.ptesize = ptesize;
mdhdr.kernbase = KERNBASE;
mdhdr.paemode = pae_mode;
dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION,
dumpsize);
error = dump_start(di, &kdh);
if (error != 0)
goto fail;
printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
printf("Dumping %llu MB:", (long long)dumpsize >> 20);
/* Dump my header */
bzero(&fakept, sizeof(fakept));
bcopy(&mdhdr, &fakept, sizeof(mdhdr));
error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
if (error)
goto fail;
/* Dump msgbuf up front */
error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
if (error)
goto fail;
/* Dump bitmap */
error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
if (error)
goto fail;
/* Dump kernel page table pages */
for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
/* We always write a page, even if it is zero */
pd = IdlePTD; /* always mapped! */
j = va >> PDRSHIFT;
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is a single 2M block. Generate a fake PTP */
pa = pd[j] & PG_PS_FRAME;
for (k = 0; k < NPTEPG; k++) {
fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
}
error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
if (error)
goto fail;
/* flush, in case we reuse fakept in the same block */
error = blk_flush(di);
if (error)
goto fail;
continue;
}
if ((pd[j] & PG_V) == PG_V) {
pa = pd[j] & PG_FRAME;
error = blk_write(di, 0, pa, PAGE_SIZE);
if (error)
goto fail;
} else {
bzero(fakept, sizeof(fakept));
error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
if (error)
goto fail;
/* flush, in case we reuse fakept in the same block */
error = blk_flush(di);
if (error)
goto fail;
}
}
/* Dump memory chunks */
/* XXX cluster it up and use blk_dump() */
for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
bits = vm_page_dump[i];
while (bits) {
bit = bsfl(bits);
pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
error = blk_write(di, 0, pa, PAGE_SIZE);
if (error)
goto fail;
bits &= ~(1ul << bit);
}
}
error = blk_flush(di);
if (error)
goto fail;
error = dump_finish(di, &kdh);
if (error != 0)
goto fail;
printf("\nDump complete\n");
return (0);
fail:
if (error < 0)
error = -error;
if (error == ECANCELED)
printf("\nDump aborted\n");
else if (error == E2BIG || error == ENOSPC)
printf("\nDump failed. Partition too small.\n");
else
printf("\n** DUMP FAILED (ERROR %d) **\n", error);
return (error);
}

View file

@ -0,0 +1,40 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2018 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <machine/pmap_nopae.h>
#include <vm/pmap.h>
#include "minidump_machdep_base.c"

View file

@ -0,0 +1,41 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2018 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#define PMAP_PAE_COMP
#include <sys/param.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <machine/pmap_pae.h>
#include <vm/pmap.h>
#include "minidump_machdep_base.c"

View file

@ -309,9 +309,7 @@ start_all_aps(void)
mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
/* Remap lowest 1MB */
IdlePTD[0] = IdlePTD[1];
load_cr3(rcr3()); /* invalidate TLB */
pmap_remap_lower(true);
/* install the AP 1st level boot code */
install_ap_tramp();
@ -359,9 +357,7 @@ start_all_aps(void)
CPU_SET(cpu, &all_cpus); /* record AP in CPU map */
}
/* Unmap lowest 1MB again */
IdlePTD[0] = 0;
load_cr3(rcr3());
pmap_remap_lower(false);
/* restore the warmstart vector */
*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;

View file

@ -81,19 +81,17 @@ NON_GPROF_ENTRY(MPentry)
testl $CPUID_PSE,%edx
jz 1f
orl $CR4_PSE,%eax /* Enable PSE */
1:
testl $CPUID_PGE,%edx
jz 1f
1: testl $CPUID_PGE,%edx
jz 2f
orl $CR4_PGE,%eax /* Enable PGE */
1:
testl $CPUID_VME,%edx
jz 1f
2: testl $CPUID_VME,%edx
jz 3f
orl $CR4_VME,%eax /* Enable VME */
1:
movl %eax,%cr4
3: movl %eax,%cr4
/* Now enable paging mode */
#if defined(PAE) || defined(PAE_TABLES)
cmpl $0, pae_mode
je 4f
movl IdlePDPT, %eax
movl %eax, %cr3
movl %cr4, %eax
@ -103,21 +101,19 @@ NON_GPROF_ENTRY(MPentry)
cpuid
movl $0x80000001, %ebx
cmpl %ebx, %eax
jb 1f
jb 5f
movl %ebx, %eax
cpuid
testl $AMDID_NX, %edx
je 1f
je 5f
movl $MSR_EFER, %ecx
rdmsr
orl $EFER_NXE,%eax
wrmsr
1:
#else
movl IdlePTD, %eax
jmp 5f
4: movl IdlePTD_nopae, %eax
movl %eax,%cr3
#endif
movl %cr0,%eax
5: movl %cr0,%eax
orl $CR0_PE|CR0_PG,%eax /* enable paging */
movl %eax,%cr0 /* let the games begin! */
movl bootSTK,%esp /* boot stack end loc. */

File diff suppressed because it is too large Load diff

954
sys/i386/i386/pmap_base.c Normal file
View file

@ -0,0 +1,954 @@
/*-
* SPDX-License-Identifier: BSD-4-Clause
*
* Copyright (c) 1991 Regents of the University of California.
* All rights reserved.
* Copyright (c) 1994 John S. Dyson
* All rights reserved.
* Copyright (c) 1994 David Greenman
* All rights reserved.
* Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department and William Jolitz of UUNET Technologies Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
*/
/*-
* Copyright (c) 2003 Networks Associates Technology, Inc.
* All rights reserved.
* Copyright (c) 2018 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Jake Burkholder,
* Safeport Network Services, and Network Associates Laboratories, the
* Security Research Division of Network Associates, Inc. under
* DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
* CHATS research program.
*
* Portions of this software were developed by
* Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_apic.h"
#include "opt_cpu.h"
#include "opt_pmap.h"
#include "opt_smp.h"
#include "opt_vm.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/vmmeter.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
#ifdef DEV_APIC
#include <sys/bus.h>
#include <machine/intr_machdep.h>
#include <x86/apicvar.h>
#endif
#include <x86/ifunc.h>
static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
#include <machine/vmparam.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/pmap.h>
#include <machine/pmap_base.h>
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
int unmapped_buf_allowed = 1;
int pti;
u_long physfree; /* phys addr of next free page */
u_long vm86phystk; /* PA of vm86/bios stack */
u_long vm86paddr; /* address of vm86 region */
int vm86pa; /* phys addr of vm86 region */
u_long KERNend; /* phys addr end of kernel (just after bss) */
u_long KPTphys; /* phys addr of kernel page tables */
caddr_t ptvmmap = 0;
vm_offset_t kernel_vm_end;
int i386_pmap_VM_NFREEORDER;
int i386_pmap_VM_LEVEL_0_ORDER;
int i386_pmap_PDRSHIFT;
int pat_works = 1;
SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD,
&pat_works, 1,
"Is page attribute table fully functional?");
int pg_ps_enabled = 1;
SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
&pg_ps_enabled, 0,
"Are large page mappings enabled?");
int pv_entry_max = 0;
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD,
&pv_entry_max, 0,
"Max number of PV entries");
int pv_entry_count = 0;
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD,
&pv_entry_count, 0,
"Current number of pv entries");
#ifndef PMAP_SHPGPERPROC
#define PMAP_SHPGPERPROC 200
#endif
int shpgperproc = PMAP_SHPGPERPROC;
SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD,
&shpgperproc, 0,
"Page share factor per proc");
static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
"2/4MB page mapping counters");
u_long pmap_pde_demotions;
SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD,
&pmap_pde_demotions, 0,
"2/4MB page demotions");
u_long pmap_pde_mappings;
SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
&pmap_pde_mappings, 0,
"2/4MB page mappings");
u_long pmap_pde_p_failures;
SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD,
&pmap_pde_p_failures, 0,
"2/4MB page promotion failures");
u_long pmap_pde_promotions;
SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD,
&pmap_pde_promotions, 0,
"2/4MB page promotions");
#ifdef SMP
int PMAP1changedcpu;
SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD,
&PMAP1changedcpu, 0,
"Number of times pmap_pte_quick changed CPU with same PMAP1");
#endif
int PMAP1changed;
SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD,
&PMAP1changed, 0,
"Number of times pmap_pte_quick changed PMAP1");
int PMAP1unchanged;
SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD,
&PMAP1unchanged, 0,
"Number of times pmap_pte_quick didn't change PMAP1");
static int
kvm_size(SYSCTL_HANDLER_ARGS)
{
unsigned long ksize;
ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
return (sysctl_handle_long(oidp, &ksize, 0, req));
}
SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
0, 0, kvm_size, "IU",
"Size of KVM");
static int
kvm_free(SYSCTL_HANDLER_ARGS)
{
unsigned long kfree;
kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
return (sysctl_handle_long(oidp, &kfree, 0, req));
}
SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
0, 0, kvm_free, "IU",
"Amount of KVM free");
#ifdef PV_STATS
int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
long pv_entry_frees, pv_entry_allocs;
int pv_entry_spare;
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD,
&pc_chunk_count, 0,
"Current number of pv entry chunks");
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD,
&pc_chunk_allocs, 0,
"Current number of pv entry chunks allocated");
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD,
&pc_chunk_frees, 0,
"Current number of pv entry chunks frees");
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD,
&pc_chunk_tryfail, 0,
"Number of times tried to get a chunk page but failed.");
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD,
&pv_entry_frees, 0,
"Current number of pv entry frees");
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD,
&pv_entry_allocs, 0,
"Current number of pv entry allocs");
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD,
&pv_entry_spare, 0,
"Current number of spare pv entries");
#endif
struct pmap kernel_pmap_store;
static struct pmap_methods *pmap_methods_ptr;
/*
* Initialize a vm_page's machine-dependent fields.
*/
void
pmap_page_init(vm_page_t m)
{
TAILQ_INIT(&m->md.pv_list);
m->md.pat_mode = PAT_WRITE_BACK;
}
void
invltlb_glob(void)
{
invltlb();
}
static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
vm_offset_t eva);
static void pmap_invalidate_cache_range_all(vm_offset_t sva,
vm_offset_t eva);
void
pmap_flush_page(vm_page_t m)
{
pmap_methods_ptr->pm_flush_page(m);
}
DEFINE_IFUNC(, void, pmap_invalidate_cache_range, (vm_offset_t, vm_offset_t),
static)
{
if ((cpu_feature & CPUID_SS) != 0)
return (pmap_invalidate_cache_range_selfsnoop);
if ((cpu_feature & CPUID_CLFSH) != 0)
return (pmap_force_invalidate_cache_range);
return (pmap_invalidate_cache_range_all);
}
#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
static void
pmap_invalidate_cache_range_check_align(vm_offset_t sva, vm_offset_t eva)
{
KASSERT((sva & PAGE_MASK) == 0,
("pmap_invalidate_cache_range: sva not page-aligned"));
KASSERT((eva & PAGE_MASK) == 0,
("pmap_invalidate_cache_range: eva not page-aligned"));
}
static void
pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva)
{
pmap_invalidate_cache_range_check_align(sva, eva);
}
void
pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
{
sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) {
/*
* The supplied range is bigger than 2MB.
* Globally invalidate cache.
*/
pmap_invalidate_cache();
return;
}
#ifdef DEV_APIC
/*
* XXX: Some CPUs fault, hang, or trash the local APIC
* registers if we use CLFLUSH on the local APIC
* range. The local APIC is always uncached, so we
* don't need to flush for that range anyway.
*/
if (pmap_kextract(sva) == lapic_paddr)
return;
#endif
if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) {
/*
* Do per-cache line flush. Use the sfence
* instruction to insure that previous stores are
* included in the write-back. The processor
* propagates flush to other processors in the cache
* coherence domain.
*/
sfence();
for (; sva < eva; sva += cpu_clflush_line_size)
clflushopt(sva);
sfence();
} else {
/*
* Writes are ordered by CLFLUSH on Intel CPUs.
*/
if (cpu_vendor_id != CPU_VENDOR_INTEL)
mfence();
for (; sva < eva; sva += cpu_clflush_line_size)
clflush(sva);
if (cpu_vendor_id != CPU_VENDOR_INTEL)
mfence();
}
}
static void
pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva)
{
pmap_invalidate_cache_range_check_align(sva, eva);
pmap_invalidate_cache();
}
void
pmap_invalidate_cache_pages(vm_page_t *pages, int count)
{
int i;
if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
(cpu_feature & CPUID_CLFSH) == 0) {
pmap_invalidate_cache();
} else {
for (i = 0; i < count; i++)
pmap_flush_page(pages[i]);
}
}
void
pmap_ksetrw(vm_offset_t va)
{
pmap_methods_ptr->pm_ksetrw(va);
}
void
pmap_remap_lower(bool enable)
{
pmap_methods_ptr->pm_remap_lower(enable);
}
void
pmap_remap_lowptdi(bool enable)
{
pmap_methods_ptr->pm_remap_lowptdi(enable);
}
void
pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
vm_offset_t *addr, vm_size_t size)
{
return (pmap_methods_ptr->pm_align_superpage(object, offset,
addr, size));
}
vm_offset_t
pmap_quick_enter_page(vm_page_t m)
{
return (pmap_methods_ptr->pm_quick_enter_page(m));
}
void
pmap_quick_remove_page(vm_offset_t addr)
{
return (pmap_methods_ptr->pm_quick_remove_page(addr));
}
void *
pmap_trm_alloc(size_t size, int flags)
{
return (pmap_methods_ptr->pm_trm_alloc(size, flags));
}
void
pmap_trm_free(void *addr, size_t size)
{
pmap_methods_ptr->pm_trm_free(addr, size);
}
void
pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
{
}
vm_offset_t
pmap_get_map_low(void)
{
return (pmap_methods_ptr->pm_get_map_low());
}
vm_offset_t
pmap_get_vm_maxuser_address(void)
{
return (pmap_methods_ptr->pm_get_vm_maxuser_address());
}
vm_paddr_t
pmap_kextract(vm_offset_t va)
{
return (pmap_methods_ptr->pm_kextract(va));
}
vm_paddr_t
pmap_pg_frame(vm_paddr_t pa)
{
return (pmap_methods_ptr->pm_pg_frame(pa));
}
void
pmap_sf_buf_map(struct sf_buf *sf)
{
pmap_methods_ptr->pm_sf_buf_map(sf);
}
void
pmap_cp_slow0_map(vm_offset_t kaddr, int plen, vm_page_t *ma)
{
pmap_methods_ptr->pm_cp_slow0_map(kaddr, plen, ma);
}
u_int
pmap_get_kcr3(void)
{
return (pmap_methods_ptr->pm_get_kcr3());
}
u_int
pmap_get_cr3(pmap_t pmap)
{
return (pmap_methods_ptr->pm_get_cr3(pmap));
}
caddr_t
pmap_cmap3(vm_paddr_t pa, u_int pte_flags)
{
return (pmap_methods_ptr->pm_cmap3(pa, pte_flags));
}
void
pmap_basemem_setup(u_int basemem)
{
pmap_methods_ptr->pm_basemem_setup(basemem);
}
void
pmap_set_nx(void)
{
pmap_methods_ptr->pm_set_nx();
}
void *
pmap_bios16_enter(void)
{
return (pmap_methods_ptr->pm_bios16_enter());
}
void
pmap_bios16_leave(void *handle)
{
pmap_methods_ptr->pm_bios16_leave(handle);
}
void
pmap_bootstrap(vm_paddr_t firstaddr)
{
pmap_methods_ptr->pm_bootstrap(firstaddr);
}
boolean_t
pmap_is_valid_memattr(pmap_t pmap, vm_memattr_t mode)
{
return (pmap_methods_ptr->pm_is_valid_memattr(pmap, mode));
}
int
pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde)
{
return (pmap_methods_ptr->pm_cache_bits(pmap, mode, is_pde));
}
bool
pmap_ps_enabled(pmap_t pmap)
{
return (pmap_methods_ptr->pm_ps_enabled(pmap));
}
void
pmap_pinit0(pmap_t pmap)
{
pmap_methods_ptr->pm_pinit0(pmap);
}
int
pmap_pinit(pmap_t pmap)
{
return (pmap_methods_ptr->pm_pinit(pmap));
}
void
pmap_activate(struct thread *td)
{
pmap_methods_ptr->pm_activate(td);
}
void
pmap_activate_boot(pmap_t pmap)
{
pmap_methods_ptr->pm_activate_boot(pmap);
}
void
pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
{
pmap_methods_ptr->pm_advise(pmap, sva, eva, advice);
}
void
pmap_clear_modify(vm_page_t m)
{
pmap_methods_ptr->pm_clear_modify(m);
}
int
pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
{
return (pmap_methods_ptr->pm_change_attr(va, size, mode));
}
int
pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
{
return (pmap_methods_ptr->pm_mincore(pmap, addr, locked_pa));
}
void
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t src_addr)
{
pmap_methods_ptr->pm_copy(dst_pmap, src_pmap, dst_addr, len, src_addr);
}
void
pmap_copy_page(vm_page_t src, vm_page_t dst)
{
pmap_methods_ptr->pm_copy_page(src, dst);
}
void
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
vm_offset_t b_offset, int xfersize)
{
pmap_methods_ptr->pm_copy_pages(ma, a_offset, mb, b_offset, xfersize);
}
void
pmap_zero_page(vm_page_t m)
{
pmap_methods_ptr->pm_zero_page(m);
}
void
pmap_zero_page_area(vm_page_t m, int off, int size)
{
pmap_methods_ptr->pm_zero_page_area(m, off, size);
}
int
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
u_int flags, int8_t psind)
{
return (pmap_methods_ptr->pm_enter(pmap, va, m, prot, flags, psind));
}
void
pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
vm_page_t m_start, vm_prot_t prot)
{
pmap_methods_ptr->pm_enter_object(pmap, start, end, m_start, prot);
}
void
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
pmap_methods_ptr->pm_enter_quick(pmap, va, m, prot);
}
void *
pmap_kenter_temporary(vm_paddr_t pa, int i)
{
return (pmap_methods_ptr->pm_kenter_temporary(pa, i));
}
void
pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
vm_pindex_t pindex, vm_size_t size)
{
pmap_methods_ptr->pm_object_init_pt(pmap, addr, object, pindex, size);
}
void
pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
pmap_methods_ptr->pm_unwire(pmap, sva, eva);
}
boolean_t
pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
{
return (pmap_methods_ptr->pm_page_exists_quick(pmap, m));
}
int
pmap_page_wired_mappings(vm_page_t m)
{
return (pmap_methods_ptr->pm_page_wired_mappings(m));
}
boolean_t
pmap_page_is_mapped(vm_page_t m)
{
return (pmap_methods_ptr->pm_page_is_mapped(m));
}
void
pmap_remove_pages(pmap_t pmap)
{
pmap_methods_ptr->pm_remove_pages(pmap);
}
boolean_t
pmap_is_modified(vm_page_t m)
{
return (pmap_methods_ptr->pm_is_modified(m));
}
boolean_t
pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
{
return (pmap_methods_ptr->pm_is_prefaultable(pmap, addr));
}
boolean_t
pmap_is_referenced(vm_page_t m)
{
return (pmap_methods_ptr->pm_is_referenced(m));
}
void
pmap_remove_write(vm_page_t m)
{
pmap_methods_ptr->pm_remove_write(m);
}
int
pmap_ts_referenced(vm_page_t m)
{
return (pmap_methods_ptr->pm_ts_referenced(m));
}
void *
pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
{
return (pmap_methods_ptr->pm_mapdev_attr(pa, size, mode));
}
void *
pmap_mapdev(vm_paddr_t pa, vm_size_t size)
{
return (pmap_methods_ptr->pm_mapdev_attr(pa, size, PAT_UNCACHEABLE));
}
void *
pmap_mapbios(vm_paddr_t pa, vm_size_t size)
{
return (pmap_methods_ptr->pm_mapdev_attr(pa, size, PAT_WRITE_BACK));
}
void
pmap_unmapdev(vm_offset_t va, vm_size_t size)
{
pmap_methods_ptr->pm_unmapdev(va, size);
}
void
pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
{
pmap_methods_ptr->pm_page_set_memattr(m, ma);
}
vm_paddr_t
pmap_extract(pmap_t pmap, vm_offset_t va)
{
return (pmap_methods_ptr->pm_extract(pmap, va));
}
vm_page_t
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
return (pmap_methods_ptr->pm_extract_and_hold(pmap, va, prot));
}
vm_offset_t
pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
{
return (pmap_methods_ptr->pm_map(virt, start, end, prot));
}
void
pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
{
pmap_methods_ptr->pm_qenter(sva, ma, count);
}
void
pmap_qremove(vm_offset_t sva, int count)
{
pmap_methods_ptr->pm_qremove(sva, count);
}
void
pmap_release(pmap_t pmap)
{
pmap_methods_ptr->pm_release(pmap);
}
void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
pmap_methods_ptr->pm_remove(pmap, sva, eva);
}
void
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
{
pmap_methods_ptr->pm_protect(pmap, sva, eva, prot);
}
void
pmap_remove_all(vm_page_t m)
{
pmap_methods_ptr->pm_remove_all(m);
}
void
pmap_init(void)
{
pmap_methods_ptr->pm_init();
}
void
pmap_init_pat(void)
{
pmap_methods_ptr->pm_init_pat();
}
void
pmap_growkernel(vm_offset_t addr)
{
pmap_methods_ptr->pm_growkernel(addr);
}
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
pmap_methods_ptr->pm_invalidate_page(pmap, va);
}
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
pmap_methods_ptr->pm_invalidate_range(pmap, sva, eva);
}
void
pmap_invalidate_all(pmap_t pmap)
{
pmap_methods_ptr->pm_invalidate_all(pmap);
}
void
pmap_invalidate_cache(void)
{
pmap_methods_ptr->pm_invalidate_cache();
}
void
pmap_kenter(vm_offset_t va, vm_paddr_t pa)
{
pmap_methods_ptr->pm_kenter(va, pa);
}
void
pmap_kremove(vm_offset_t va)
{
pmap_methods_ptr->pm_kremove(va);
}
extern struct pmap_methods pmap_pae_methods, pmap_nopae_methods;
int pae_mode;
SYSCTL_INT(_vm_pmap, OID_AUTO, pae_mode, CTLFLAG_RD,
&pae_mode, 1,
"PAE");
void
pmap_cold(void)
{
if ((cpu_feature & CPUID_PAE) != 0) {
pae_mode = 1;
pmap_methods_ptr = &pmap_pae_methods;
pmap_pae_cold();
} else {
pmap_methods_ptr = &pmap_nopae_methods;
pmap_nopae_cold();
}
}

View file

@ -0,0 +1,48 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_apic.h"
#include "opt_cpu.h"
#include "opt_pmap.h"
#include "opt_smp.h"
#include "opt_vm.h"
#include <sys/param.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#define PMTYPE pmap_nopae_
#include <machine/pmap_nopae.h>
#include <vm/pmap.h>
_Static_assert(sizeof(struct pmap_KBI) >= sizeof(struct pmap), "pmap KBI");
#include "pmap.c"

49
sys/i386/i386/pmap_pae.c Normal file
View file

@ -0,0 +1,49 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2018 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_apic.h"
#include "opt_cpu.h"
#include "opt_pmap.h"
#include "opt_smp.h"
#include "opt_vm.h"
#define PMAP_PAE_COMP
#include <sys/param.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#define PMTYPE pmap_pae_
#include <machine/pmap_pae.h>
#include <vm/pmap.h>
_Static_assert(sizeof(struct pmap_KBI) >= sizeof(struct pmap), "pmap KBI");
#include "pmap.c"

View file

@ -119,6 +119,7 @@ static void trap_fatal(struct trapframe *, vm_offset_t);
void dblfault_handler(void);
extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall);
extern uint64_t pg_nx;
#define MAX_TRAP_MSG 32
@ -871,10 +872,8 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
*/
if (frame->tf_err & PGEX_W)
ftype = VM_PROT_WRITE;
#if defined(PAE) || defined(PAE_TABLES)
else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
ftype = VM_PROT_EXECUTE;
#endif
else
ftype = VM_PROT_READ;
@ -935,10 +934,8 @@ trap_fatal(frame, eva)
printf("fault code = %s %s%s, %s\n",
code & PGEX_U ? "user" : "supervisor",
code & PGEX_W ? "write" : "read",
#if defined(PAE) || defined(PAE_TABLES)
pg_nx != 0 ?
(code & PGEX_I ? " instruction" : " data") :
#endif
"",
code & PGEX_RSV ? "reserved bits in PTE" :
code & PGEX_P ? "protection violation" : "page not present");

View file

@ -397,8 +397,8 @@ vm86_emulate(struct vm86frame *vmf)
(sizeof(struct pcb_ext) - sizeof(struct segment_descriptor) + \
INTMAP_SIZE + IOMAP_SIZE + 1)
struct vm86_layout {
pt_entry_t vml_pgtbl[PGTABLE_SIZE];
struct vm86_layout_pae {
uint64_t vml_pgtbl[PGTABLE_SIZE];
struct pcb vml_pcb;
struct pcb_ext vml_ext;
char vml_intmap[INTMAP_SIZE];
@ -406,12 +406,26 @@ struct vm86_layout {
char vml_iomap_trailer;
};
void
vm86_initialize(void)
struct vm86_layout_nopae {
uint32_t vml_pgtbl[PGTABLE_SIZE];
struct pcb vml_pcb;
struct pcb_ext vml_ext;
char vml_intmap[INTMAP_SIZE];
char vml_iomap[IOMAP_SIZE];
char vml_iomap_trailer;
};
_Static_assert(sizeof(struct vm86_layout_pae) <= ctob(3),
"struct vm86_layout_pae exceeds space allocated in locore.s");
_Static_assert(sizeof(struct vm86_layout_nopae) <= ctob(3),
"struct vm86_layout_nopae exceeds space allocated in locore.s");
static void
vm86_initialize_pae(void)
{
int i;
u_int *addr;
struct vm86_layout *vml = (struct vm86_layout *)vm86paddr;
struct vm86_layout_pae *vml;
struct pcb *pcb;
struct pcb_ext *ext;
struct soft_segment_descriptor ssd = {
@ -425,12 +439,6 @@ vm86_initialize(void)
0 /* granularity */
};
/*
* this should be a compile time error, but cpp doesn't grok sizeof().
*/
if (sizeof(struct vm86_layout) > ctob(3))
panic("struct vm86_layout exceeds space allocated in locore.s");
/*
* Below is the memory layout that we use for the vm86 region.
*
@ -473,6 +481,7 @@ vm86_initialize(void)
#define vm86_frame pcb_ebp
#define pgtable_va pcb_ebx
vml = (struct vm86_layout_pae *)vm86paddr;
pcb = &vml->vml_pcb;
ext = &vml->vml_ext;
@ -482,13 +491,13 @@ vm86_initialize(void)
pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
pcb->pgtable_va = vm86paddr;
pcb->pcb_flags = PCB_VM86CALL;
pcb->pcb_flags = PCB_VM86CALL;
pcb->pcb_ext = ext;
bzero(ext, sizeof(struct pcb_ext));
bzero(ext, sizeof(struct pcb_ext));
ext->ext_tss.tss_esp0 = vm86paddr;
ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
ext->ext_tss.tss_ioopt =
ext->ext_tss.tss_ioopt =
((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
ext->ext_iomap = vml->vml_iomap;
ext->ext_vm86.vm86_intmap = vml->vml_intmap;
@ -502,7 +511,7 @@ vm86_initialize(void)
vml->vml_iomap_trailer = 0xff;
ssd.ssd_base = (u_int)&ext->ext_tss;
ssd.ssd_limit = TSS_SIZE - 1;
ssd.ssd_limit = TSS_SIZE - 1;
ssdtosd(&ssd, &ext->ext_tssd);
vm86pcb = pcb;
@ -517,6 +526,80 @@ vm86_initialize(void)
#endif
}
static void
vm86_initialize_nopae(void)
{
int i;
u_int *addr;
struct vm86_layout_nopae *vml;
struct pcb *pcb;
struct pcb_ext *ext;
struct soft_segment_descriptor ssd = {
0, /* segment base address (overwritten) */
0, /* length (overwritten) */
SDT_SYS386TSS, /* segment type */
0, /* priority level */
1, /* descriptor present */
0, 0,
0, /* default 16 size */
0 /* granularity */
};
vml = (struct vm86_layout_nopae *)vm86paddr;
pcb = &vml->vml_pcb;
ext = &vml->vml_ext;
mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
bzero(pcb, sizeof(struct pcb));
pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
pcb->pgtable_va = vm86paddr;
pcb->pcb_flags = PCB_VM86CALL;
pcb->pcb_ext = ext;
bzero(ext, sizeof(struct pcb_ext));
ext->ext_tss.tss_esp0 = vm86paddr;
ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
ext->ext_tss.tss_ioopt =
((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
ext->ext_iomap = vml->vml_iomap;
ext->ext_vm86.vm86_intmap = vml->vml_intmap;
if (cpu_feature & CPUID_VME)
ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
addr = (u_int *)ext->ext_vm86.vm86_intmap;
for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
*addr++ = 0;
vml->vml_iomap_trailer = 0xff;
ssd.ssd_base = (u_int)&ext->ext_tss;
ssd.ssd_limit = TSS_SIZE - 1;
ssdtosd(&ssd, &ext->ext_tssd);
vm86pcb = pcb;
#if 0
/*
* use whatever is leftover of the vm86 page layout as a
* message buffer so we can capture early output.
*/
msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
ctob(3) - sizeof(struct vm86_layout));
#endif
}
void
vm86_initialize(void)
{
if (pae_mode)
vm86_initialize_pae();
else
vm86_initialize_nopae();
}
vm_offset_t
vm86_getpage(struct vm86context *vmc, int pagenum)
{
@ -644,19 +727,31 @@ vm86_intcall(int intnum, struct vm86frame *vmf)
int
vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
{
pt_entry_t *pte;
uint64_t *pte_pae;
uint32_t *pte_nopae;
int (*p)(struct vm86frame *);
vm_paddr_t page;
int i, entry, retval;
pte = (pt_entry_t *)vm86paddr;
mtx_lock(&vm86_lock);
for (i = 0; i < vmc->npages; i++) {
page = vtophys(vmc->pmap[i].kva & PG_FRAME);
entry = vmc->pmap[i].pte_num;
vmc->pmap[i].old_pte = pte[entry];
pte[entry] = page | PG_V | PG_RW | PG_U;
pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
if (pae_mode) {
pte_pae = (uint64_t *)vm86paddr;
for (i = 0; i < vmc->npages; i++) {
page = vtophys(vmc->pmap[i].kva & PG_FRAME_PAE);
entry = vmc->pmap[i].pte_num;
vmc->pmap[i].old_pte = pte_pae[entry];
pte_pae[entry] = page | PG_V | PG_RW | PG_U;
pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
}
} else {
pte_nopae = (uint32_t *)vm86paddr;
for (i = 0; i < vmc->npages; i++) {
page = vtophys(vmc->pmap[i].kva & PG_FRAME_NOPAE);
entry = vmc->pmap[i].pte_num;
vmc->pmap[i].old_pte = pte_nopae[entry];
pte_nopae[entry] = page | PG_V | PG_RW | PG_U;
pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
}
}
vmf->vmf_trapno = intnum;
@ -666,10 +761,18 @@ vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
retval = p(vmf);
critical_exit();
for (i = 0; i < vmc->npages; i++) {
entry = vmc->pmap[i].pte_num;
pte[entry] = vmc->pmap[i].old_pte;
pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
if (pae_mode) {
for (i = 0; i < vmc->npages; i++) {
entry = vmc->pmap[i].pte_num;
pte_pae[entry] = vmc->pmap[i].old_pte;
pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
}
} else {
for (i = 0; i < vmc->npages; i++) {
entry = vmc->pmap[i].pte_num;
pte_nopae[entry] = vmc->pmap[i].old_pte;
pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
}
}
mtx_unlock(&vm86_lock);

View file

@ -101,8 +101,12 @@ ENTRY(vm86_bioscall)
movl %cr3,%eax
pushl %eax /* save address space */
movl IdlePTD,%ecx /* va (and pa) of Idle PTD */
movl %ecx,%ebx
cmpb $0,pae_mode
jne 2f
movl IdlePTD_nopae,%ecx /* va (and pa) of Idle PTD */
jmp 3f
2: movl IdlePTD_pae,%ecx
3: movl %ecx,%ebx
movl 0(%ebx),%eax
pushl %eax /* old ptde != 0 when booting */
pushl %ebx /* keep for reuse */
@ -112,10 +116,10 @@ ENTRY(vm86_bioscall)
movl SCR_NEWPTD(%edx),%eax /* mapping for vm86 page table */
movl %eax,0(%ebx) /* ... install as PTD entry 0 */
#if defined(PAE) || defined(PAE_TABLES)
cmpb $0,pae_mode
je 4f
movl IdlePDPT,%ecx
#endif
movl %ecx,%cr3 /* new page tables */
4: movl %ecx,%cr3 /* new page tables */
movl SCR_VMFRAME(%edx),%esp /* switch to new stack */
pushl %esp

View file

@ -230,11 +230,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
* Set registers for trampoline to user mode. Leave space for the
* return address on stack. These are the kernel mode register values.
*/
#if defined(PAE) || defined(PAE_TABLES)
pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt);
#else
pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
#endif
pcb2->pcb_cr3 = pmap_get_cr3(vmspace_pmap(p2->p_vmspace));
pcb2->pcb_edi = 0;
pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */
pcb2->pcb_ebp = 0;
@ -572,34 +568,10 @@ kvtop(void *addr)
void
sf_buf_map(struct sf_buf *sf, int flags)
{
pt_entry_t opte, *ptep;
/*
* Update the sf_buf's virtual-to-physical mapping, flushing the
* virtual address from the TLB. Since the reference count for
* the sf_buf's old mapping was zero, that mapping is not
* currently in use. Consequently, there is no need to exchange
* the old and new PTEs atomically, even under PAE.
*/
ptep = vtopte(sf->kva);
opte = *ptep;
*ptep = VM_PAGE_TO_PHYS(sf->m) | PG_RW | PG_V |
pmap_cache_bits(kernel_pmap, sf->m->md.pat_mode, 0);
/*
* Avoid unnecessary TLB invalidations: If the sf_buf's old
* virtual-to-physical mapping was not used, then any processor
* that has invalidated the sf_buf's virtual address from its TLB
* since the last used mapping need not invalidate again.
*/
pmap_sf_buf_map(sf);
#ifdef SMP
if ((opte & (PG_V | PG_A)) == (PG_V | PG_A))
CPU_ZERO(&sf->cpumask);
sf_buf_shootdown(sf, flags);
#else
if ((opte & (PG_V | PG_A)) == (PG_V | PG_A))
pmap_invalidate_page(kernel_pmap, sf->kva);
#endif
}

View file

@ -69,6 +69,8 @@ void doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault));
void fill_based_sd(struct segment_descriptor *sdp, uint32_t base);
void i686_pagezero(void *addr);
void sse2_pagezero(void *addr);
int minidumpsys_nopae(struct dumperinfo *);
int minidumpsys_pae(struct dumperinfo *);
void init_AMD_Elan_sc520(void);
vm_paddr_t kvtop(void *addr);
void panicifcpuunsupported(void);

View file

@ -88,25 +88,23 @@
#define CACHE_LINE_SIZE (1 << CACHE_LINE_SHIFT)
#define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */
#define PAGE_SIZE (1<<PAGE_SHIFT) /* bytes/page */
#define PAGE_MASK (PAGE_SIZE-1)
#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
#define PAGE_SIZE (1 << PAGE_SHIFT) /* bytes/page */
#define PAGE_MASK (PAGE_SIZE - 1)
#define NPTEPG (PAGE_SIZE / sizeof(pt_entry_t))
#if defined(PAE) || defined(PAE_TABLES)
#define NPGPTD 4
#define PDRSHIFT 21 /* LOG2(NBPDR) */
#define NPGPTD_SHIFT 9
#else
#define NPGPTD 1
#define PDRSHIFT 22 /* LOG2(NBPDR) */
#define NPGPTD_SHIFT 10
/* Size in bytes of the page directory */
#define NBPTD (NPGPTD << PAGE_SHIFT)
/* Number of PDEs in page directory, 2048 for PAE, 1024 for non-PAE */
#define NPDEPTD (NBPTD / sizeof(pd_entry_t))
/* Number of PDEs in one page of the page directory, 512 vs. 1024 */
#define NPDEPG (PAGE_SIZE / sizeof(pd_entry_t))
#define PDRMASK (NBPDR - 1)
#ifndef PDRSHIFT
#define PDRSHIFT i386_pmap_PDRSHIFT
#endif
#ifndef NBPDR
#define NBPDR (1 << PDRSHIFT) /* bytes/page dir */
#endif
#define NBPTD (NPGPTD<<PAGE_SHIFT)
#define NPDEPTD (NBPTD/(sizeof (pd_entry_t)))
#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
#define NBPDR (1<<PDRSHIFT) /* bytes/page dir */
#define PDRMASK (NBPDR-1)
#define MAXPAGESIZES 2 /* maximum number of supported page sizes */

View file

@ -65,22 +65,14 @@
#define PG_AVAIL2 0x400 /* < programmers use */
#define PG_AVAIL3 0x800 /* \ */
#define PG_PDE_PAT 0x1000 /* PAT PAT index */
#if defined(PAE) || defined(PAE_TABLES)
#define PG_NX (1ull<<63) /* No-execute */
#endif
/* Our various interpretations of the above */
#define PG_W PG_AVAIL1 /* "Wired" pseudoflag */
#define PG_MANAGED PG_AVAIL2
#define PG_PROMOTED PG_AVAIL3 /* PDE only */
#if defined(PAE) || defined(PAE_TABLES)
#define PG_FRAME (0x000ffffffffff000ull)
#define PG_PS_FRAME (0x000fffffffe00000ull)
#else
#define PG_FRAME (~PAGE_MASK)
#define PG_PS_FRAME (0xffc00000)
#endif
#define PG_PROT (PG_RW|PG_U) /* all protection bits . */
#define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */
@ -105,56 +97,30 @@
#define PGEX_RSV 0x08 /* reserved PTE field is non-zero */
#define PGEX_I 0x10 /* during an instruction fetch */
/*
* Size of Kernel address space. This is the number of page table pages
* (4MB each) to use for the kernel. 256 pages == 1 Gigabyte.
* This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
* For PAE, the page table page unit size is 2MB. This means that 512 pages
* is 1 Gigabyte. Double everything. It must be a multiple of 8 for PAE.
*/
#if defined(PAE) || defined(PAE_TABLES)
#define KVA_PAGES (512*4)
#else
#define KVA_PAGES (256*4)
#endif
/*
* Pte related macros
*/
#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
/*
* The initial number of kernel page table pages that are constructed
* by pmap_cold() must be sufficient to map vm_page_array[]. That number can
* be calculated as follows:
* max_phys / PAGE_SIZE * sizeof(struct vm_page) / NBPDR
* PAE: max_phys 16G, sizeof(vm_page) 76, NBPDR 2M, 152 page table pages.
* PAE_TABLES: max_phys 4G, sizeof(vm_page) 68, NBPDR 2M, 36 page table pages.
* Non-PAE: max_phys 4G, sizeof(vm_page) 68, NBPDR 4M, 18 page table pages.
*/
#ifndef NKPT
#if defined(PAE)
#define NKPT 240
#elif defined(PAE_TABLES)
#define NKPT 60
#else
#define NKPT 30
#endif
#endif
#ifndef NKPDE
#define NKPDE (KVA_PAGES) /* number of page tables/pde's */
#endif
#define PDRSHIFT_PAE 21 /* LOG2(NBPDR) */
#define PG_FRAME_PAE (0x000ffffffffff000ull)
#define PG_PS_FRAME_PAE (0x000fffffffe00000ull)
#define PDRSHIFT_NOPAE 22
#define PG_FRAME_NOPAE (~PAGE_MASK)
#define PG_PS_FRAME_NOPAE (0xffc00000)
/*
* The *PTDI values control the layout of virtual memory
*/
#define KPTDI 0 /* start of kernel virtual pde's */
#define LOWPTDI 1 /* low memory map pde */
#define KERNPTDI 2 /* start of kernel text pde */
#define PTDPTDI (NPDEPTD - 1 - NPGPTD) /* ptd entry that points
to ptd! */
#define TRPTDI (NPDEPTD - 1) /* u/k trampoline ptd */
/* ptd entry that points to ptd */
#define PTDPTDI (NPDEPTD - NTRPPTD - NPGPTD)
#define TRPTDI (NPDEPTD - NTRPPTD) /* u/k trampoline ptd */
/*
* XXX doesn't really belong here I guess...
@ -171,50 +137,10 @@
#include <vm/_vm_radix.h>
#if defined(PAE) || defined(PAE_TABLES)
typedef uint64_t pdpt_entry_t;
typedef uint64_t pd_entry_t;
typedef uint64_t pt_entry_t;
#define PTESHIFT (3)
#define PDESHIFT (3)
#else
typedef uint32_t pd_entry_t;
typedef uint32_t pt_entry_t;
#define PTESHIFT (2)
#define PDESHIFT (2)
#endif
/*
* Address of current address space page table maps and directories.
*/
#ifdef _KERNEL
#include <machine/atomic.h>
extern pt_entry_t PTmap[];
extern pd_entry_t PTD[];
extern pd_entry_t PTDpde[];
#if defined(PAE) || defined(PAE_TABLES)
extern pdpt_entry_t *IdlePDPT;
#endif
extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
/*
* Translate a virtual address to the kernel virtual address of its page table
* entry (PTE). This can be used recursively. If the address of a PTE as
* previously returned by this macro is itself given as the argument, then the
* address of the page directory entry (PDE) that maps the PTE will be
* returned.
*
* This macro may be used before pmap_bootstrap() is called.
*/
#define vtopte(va) (PTmap + i386_btop(va))
/*
* Translate a virtual address to its physical address.
@ -223,72 +149,10 @@ extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
*/
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
/*
* KPTmap is a linear mapping of the kernel page table. It differs from the
* recursive mapping in two ways: (1) it only provides access to kernel page
* table pages, and not user page table pages, and (2) it provides access to
* a kernel page table page after the corresponding virtual addresses have
* been promoted to a 2/4MB page mapping.
*
* KPTmap is first initialized by pmap_cold() to support just NPKT page table
* pages. Later, it is reinitialized by pmap_bootstrap() to allow for
* expansion of the kernel page table.
*/
extern pt_entry_t *KPTmap;
#if (defined(PAE) || defined(PAE_TABLES))
#define pde_cmpset(pdep, old, new) atomic_cmpset_64_i586(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte)
#define pte_load_clear(ptep) atomic_swap_64_i586(ptep, 0)
#define pte_store(ptep, pte) atomic_store_rel_64_i586(ptep, pte)
#define pte_load(ptep) atomic_load_acq_64_i586(ptep)
extern pt_entry_t pg_nx;
#else /* !(PAE || PAE_TABLES) */
#define pde_cmpset(pdep, old, new) atomic_cmpset_int(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_int(ptep, pte)
#define pte_load_clear(ptep) atomic_swap_int(ptep, 0)
#define pte_store(ptep, pte) do { \
*(u_int *)(ptep) = (u_int)(pte); \
} while (0)
#define pte_load(ptep) atomic_load_acq_int(ptep)
#endif /* !(PAE || PAE_TABLES) */
#define pte_clear(ptep) pte_store(ptep, 0)
#define pde_store(pdep, pde) pte_store(pdep, pde)
/*
* Extract from the kernel page table the physical address that is mapped by
* the given virtual address "va".
*
* This function may be used before pmap_bootstrap() is called.
*/
static __inline vm_paddr_t
pmap_kextract(vm_offset_t va)
{
vm_paddr_t pa;
if ((pa = pte_load(&PTD[va >> PDRSHIFT])) & PG_PS) {
pa = (pa & PG_PS_FRAME) | (va & PDRMASK);
} else {
/*
* Beware of a concurrent promotion that changes the PDE at
* this point! For example, vtopte() must not be used to
* access the PTE because it would use the new PDE. It is,
* however, safe to use the old PDE because the page table
* page is preserved by the promotion.
*/
pa = KPTmap[i386_btop(va)];
pa = (pa & PG_FRAME) | (va & PAGE_MASK);
}
return (pa);
}
#endif /* _KERNEL */
/*
@ -302,20 +166,30 @@ struct md_page {
int pat_mode;
};
#define PMAP_EXTERN_FIELDS \
cpuset_t pm_active; /* active on cpus */ \
struct mtx pm_mtx; \
struct pmap_statistics pm_stats; /* pmap statistics */
struct pmap_KBI {
PMAP_EXTERN_FIELDS
int32_t pm_fill[32];
};
#ifdef PMTYPE
struct pmap {
struct mtx pm_mtx;
PMAP_EXTERN_FIELDS
pd_entry_t *pm_pdir; /* KVA of page directory */
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
cpuset_t pm_active; /* active on cpus */
struct pmap_statistics pm_stats; /* pmap statistics */
LIST_ENTRY(pmap) pm_list; /* List of all pmaps */
#if defined(PAE) || defined(PAE_TABLES)
pdpt_entry_t *pm_pdpt; /* KVA of page directory pointer
table */
#endif
struct vm_radix pm_root; /* spare page table pages */
vm_page_t pm_ptdpg[NPGPTD];
};
#else
#define pmap pmap_KBI
#endif
typedef struct pmap *pmap_t;
@ -360,8 +234,6 @@ struct pv_chunk {
#ifdef _KERNEL
extern caddr_t CADDR3;
extern pt_entry_t *CMAP3;
extern vm_paddr_t phys_avail[];
extern vm_paddr_t dump_avail[];
extern char *ptvmmap; /* poor name! */
@ -372,27 +244,45 @@ extern vm_offset_t virtual_end;
#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))
struct sf_buf;
/*
* Only the following functions or macros may be used before pmap_bootstrap()
* is called: pmap_kenter(), pmap_kextract(), pmap_kremove(), vtophys(), and
* vtopte().
*/
void pmap_activate_boot(pmap_t pmap);
void pmap_basemem_setup(u_int basemem);
void *pmap_bios16_enter(void);
void pmap_bios16_leave(void *handle);
void pmap_bootstrap(vm_paddr_t);
int pmap_cache_bits(pmap_t, int mode, boolean_t is_pde);
int pmap_change_attr(vm_offset_t, vm_size_t, int);
caddr_t pmap_cmap3(vm_paddr_t pa, u_int pte_bits);
void pmap_cp_slow0_map(vm_offset_t kaddr, int plen, vm_page_t *ma);
void pmap_flush_page(vm_page_t m);
u_int pmap_get_kcr3(void);
u_int pmap_get_cr3(pmap_t);
vm_offset_t pmap_get_map_low(void);
vm_offset_t pmap_get_vm_maxuser_address(void);
void pmap_init_pat(void);
void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
void *pmap_kenter_temporary(vm_paddr_t pa, int i);
vm_paddr_t pmap_kextract(vm_offset_t va);
void pmap_kremove(vm_offset_t);
void pmap_ksetrw(vm_offset_t va);
void *pmap_mapbios(vm_paddr_t, vm_size_t);
void *pmap_mapdev(vm_paddr_t, vm_size_t);
void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
boolean_t pmap_page_is_mapped(vm_page_t m);
void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
vm_paddr_t pmap_pg_frame(vm_paddr_t pa);
bool pmap_ps_enabled(pmap_t pmap);
void pmap_remap_lower(bool);
void pmap_remap_lowptdi(bool);
void pmap_set_nx(void);
void pmap_sf_buf_map(struct sf_buf *sf);
void pmap_unmapdev(vm_offset_t, vm_size_t);
pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2;
void pmap_invalidate_page(pmap_t, vm_offset_t);
void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
void pmap_invalidate_all(pmap_t);
@ -405,6 +295,13 @@ void pmap_trm_free(void *addr, size_t size);
void invltlb_glob(void);
struct thread;
extern int pae_mode;
extern int i386_pmap_VM_NFREEORDER;
extern int i386_pmap_VM_LEVEL_0_ORDER;
extern int i386_pmap_PDRSHIFT;
#endif /* _KERNEL */
#endif /* !LOCORE */

View file

@ -0,0 +1,124 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2018 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MACHINE_PMAP_BASE_H_
#define _MACHINE_PMAP_BASE_H_
struct pmap_methods {
void (*pm_ksetrw)(vm_offset_t);
void (*pm_remap_lower)(bool);
void (*pm_remap_lowptdi)(bool);
void (*pm_align_superpage)(vm_object_t object, vm_ooffset_t offset,
vm_offset_t *addr, vm_size_t size);
vm_offset_t (*pm_quick_enter_page)(vm_page_t m);
void (*pm_quick_remove_page)(vm_offset_t addr);
void *(*pm_trm_alloc)(size_t size, int flags);
void (*pm_trm_free)(void *addr, size_t size);
vm_offset_t (*pm_get_map_low)(void);
vm_offset_t (*pm_get_vm_maxuser_address)(void);
vm_paddr_t (*pm_kextract)(vm_offset_t va);
vm_paddr_t (*pm_pg_frame)(vm_paddr_t pa);
void (*pm_sf_buf_map)(struct sf_buf *sf);
void (*pm_cp_slow0_map)(vm_offset_t kaddr, int plen, vm_page_t *ma);
u_int (*pm_get_kcr3)(void);
u_int (*pm_get_cr3)(pmap_t);
caddr_t (*pm_cmap3)(vm_paddr_t pa, u_int pte_flags);
void (*pm_basemem_setup)(u_int basemem);
void (*pm_set_nx)(void);
void *(*pm_bios16_enter)(void);
void (*pm_bios16_leave)(void *handle);
void (*pm_bootstrap)(vm_paddr_t firstaddr);
boolean_t (*pm_is_valid_memattr)(pmap_t, vm_memattr_t);
int (*pm_cache_bits)(pmap_t, int, boolean_t);
bool (*pm_ps_enabled)(pmap_t);
void (*pm_pinit0)(pmap_t);
int (*pm_pinit)(pmap_t);
void (*pm_activate)(struct thread *);
void (*pm_activate_boot)(pmap_t);
void (*pm_advise)(pmap_t, vm_offset_t, vm_offset_t, int);
void (*pm_clear_modify)(vm_page_t);
int (*pm_change_attr)(vm_offset_t, vm_size_t, int);
int (*pm_mincore)(pmap_t, vm_offset_t, vm_paddr_t *);
void (*pm_copy)(pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t);
void (*pm_copy_page)(vm_page_t, vm_page_t);
void (*pm_copy_pages)(vm_page_t [], vm_offset_t, vm_page_t [],
vm_offset_t, int);
void (*pm_zero_page)(vm_page_t);
void (*pm_zero_page_area)(vm_page_t, int, int);
int (*pm_enter)(pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int,
int8_t);
void (*pm_enter_object)(pmap_t, vm_offset_t, vm_offset_t,
vm_page_t, vm_prot_t);
void (*pm_enter_quick)(pmap_t, vm_offset_t, vm_page_t, vm_prot_t);
void *(*pm_kenter_temporary)(vm_paddr_t pa, int);
void (*pm_object_init_pt)(pmap_t, vm_offset_t, vm_object_t,
vm_pindex_t, vm_size_t);
void (*pm_unwire)(pmap_t, vm_offset_t, vm_offset_t);
boolean_t (*pm_page_exists_quick)(pmap_t, vm_page_t);
int (*pm_page_wired_mappings)(vm_page_t);
boolean_t (*pm_page_is_mapped)(vm_page_t);
void (*pm_remove_pages)(pmap_t);
boolean_t (*pm_is_modified)(vm_page_t);
boolean_t (*pm_is_prefaultable)(pmap_t, vm_offset_t);
boolean_t (*pm_is_referenced)(vm_page_t);
void (*pm_remove_write)(vm_page_t);
int (*pm_ts_referenced)(vm_page_t);
void *(*pm_mapdev_attr)(vm_paddr_t, vm_size_t, int);
void (*pm_unmapdev)(vm_offset_t, vm_size_t);
void (*pm_page_set_memattr)(vm_page_t, vm_memattr_t);
vm_paddr_t (*pm_extract)(pmap_t, vm_offset_t);
vm_page_t (*pm_extract_and_hold)(pmap_t, vm_offset_t, vm_prot_t);
vm_offset_t (*pm_map)(vm_offset_t *, vm_paddr_t, vm_paddr_t, int);
void (*pm_qenter)(vm_offset_t sva, vm_page_t *, int);
void (*pm_qremove)(vm_offset_t, int);
void (*pm_release)(pmap_t);
void (*pm_protect)(pmap_t, vm_offset_t, vm_offset_t, vm_prot_t);
void (*pm_remove)(pmap_t, vm_offset_t, vm_offset_t);
void (*pm_remove_all)(vm_page_t);
void (*pm_init)(void);
void (*pm_init_pat)(void);
void (*pm_growkernel)(vm_offset_t);
void (*pm_invalidate_page)(pmap_t, vm_offset_t);
void (*pm_invalidate_range)(pmap_t, vm_offset_t, vm_offset_t);
void (*pm_invalidate_all)(pmap_t);
void (*pm_invalidate_cache)(void);
void (*pm_flush_page)(vm_page_t);
void (*pm_kenter)(vm_offset_t, vm_paddr_t);
void (*pm_kremove)(vm_offset_t);
};
void pmap_cold(void);
void pmap_pae_cold(void);
void pmap_nopae_cold(void);
#endif

View file

@ -0,0 +1,100 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1991 Regents of the University of California.
* All rights reserved.
*
* Copyright (c) 2018 The FreeBSD Foundation
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department and William Jolitz of UUNET Technologies Inc.
*
* Portions of this software were developed by
* Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Derived from hp300 version by Mike Hibler, this version by William
* Jolitz uses a recursive map [a pde points to the page directory] to
* map the page tables using the pagetables themselves. This is done to
* reduce the impact on kernel virtual memory for lots of sparse address
* space, and to reduce the cost of memory to each process.
*
* from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90
* from: @(#)pmap.h 7.4 (Berkeley) 5/12/91
* $FreeBSD$
*/
#ifndef _MACHINE_PMAP_NOPAE_H
#define _MACHINE_PMAP_NOPAE_H
#define NTRPPTD 1
#define LOWPTDI 1
#define KERNPTDI 2
#define NPGPTD 1
#define NPGPTD_SHIFT 10
#undef PDRSHIFT
#define PDRSHIFT PDRSHIFT_NOPAE
#undef NBPDR
#define NBPDR (1 << PDRSHIFT_NOPAE) /* bytes/page dir */
#define PG_FRAME PG_FRAME_NOPAE
#define PG_PS_FRAME PG_PS_FRAME_NOPAE
#define KVA_PAGES (256*4)
#ifndef NKPT
#define NKPT 30
#endif
typedef uint32_t pd_entry_t;
typedef uint32_t pt_entry_t;
typedef uint32_t pdpt_entry_t; /* Only to keep struct pmap layout. */
#define PTESHIFT (2)
#define PDESHIFT (2)
#define pde_cmpset(pdep, old, new) atomic_cmpset_int(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_int(ptep, pte)
#define pte_load_clear(ptep) atomic_swap_int(ptep, 0)
#define pte_store(ptep, pte) do { \
*(u_int *)(ptep) = (u_int)(pte); \
} while (0)
#define pte_load(ptep) atomic_load_int(ptep)
extern pt_entry_t PTmap[];
extern pd_entry_t PTD[];
extern pd_entry_t PTDpde[];
extern pd_entry_t *IdlePTD_nopae;
extern pt_entry_t *KPTmap_nopae;
struct pmap;
pt_entry_t *__CONCAT(PMTYPE, pmap_pte)(struct pmap *, vm_offset_t) __pure2;
#endif

123
sys/i386/include/pmap_pae.h Normal file
View file

@ -0,0 +1,123 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1991 Regents of the University of California.
* All rights reserved.
*
* Copyright (c) 2018 The FreeBSD Foundation
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department and William Jolitz of UUNET Technologies Inc.
*
* Portions of this software were developed by
* Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Derived from hp300 version by Mike Hibler, this version by William
* Jolitz uses a recursive map [a pde points to the page directory] to
* map the page tables using the pagetables themselves. This is done to
* reduce the impact on kernel virtual memory for lots of sparse address
* space, and to reduce the cost of memory to each process.
*
* from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90
* from: @(#)pmap.h 7.4 (Berkeley) 5/12/91
* $FreeBSD$
*/
#ifndef _MACHINE_PMAP_PAE_H
#define _MACHINE_PMAP_PAE_H
#define NTRPPTD 2 /* Number of PTDs for trampoline
mapping */
#define LOWPTDI 2 /* low memory map pde */
#define KERNPTDI 4 /* start of kernel text pde */
#define NPGPTD 4 /* Num of pages for page directory */
#define NPGPTD_SHIFT 9
#undef PDRSHIFT
#define PDRSHIFT PDRSHIFT_PAE
#undef NBPDR
#define NBPDR (1 << PDRSHIFT_PAE) /* bytes/page dir */
#define PG_FRAME PG_FRAME_PAE
#define PG_PS_FRAME PG_PS_FRAME_PAE
/*
* Size of Kernel address space. This is the number of page table pages
* (4MB each) to use for the kernel. 256 pages == 1 Gigabyte.
* This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
* For PAE, the page table page unit size is 2MB. This means that 512 pages
* is 1 Gigabyte. Double everything. It must be a multiple of 8 for PAE.
*/
#define KVA_PAGES (512*4)
/*
* The initial number of kernel page table pages that are constructed
* by pmap_cold() must be sufficient to map vm_page_array. That number can
* be calculated as follows:
* max_phys / PAGE_SIZE * sizeof(struct vm_page) / NBPDR
* PAE: max_phys 16G, sizeof(vm_page) 76, NBPDR 2M, 152 page table pages.
* PAE_TABLES: max_phys 4G, sizeof(vm_page) 68, NBPDR 2M, 36 page table pages.
* Non-PAE: max_phys 4G, sizeof(vm_page) 68, NBPDR 4M, 18 page table pages.
*/
#ifndef NKPT
#define NKPT 240
#endif
typedef uint64_t pdpt_entry_t;
typedef uint64_t pd_entry_t;
typedef uint64_t pt_entry_t;
#define PTESHIFT (3)
#define PDESHIFT (3)
#define pde_cmpset(pdep, old, new) atomic_cmpset_64_i586(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte)
#define pte_load_clear(ptep) atomic_swap_64_i586(ptep, 0)
#define pte_store(ptep, pte) atomic_store_rel_64_i586(ptep, pte)
#define pte_load(ptep) atomic_load_acq_64_i586(ptep)
extern pdpt_entry_t *IdlePDPT;
extern pt_entry_t pg_nx;
extern pd_entry_t *IdlePTD_pae; /* physical address of "Idle" state directory */
/*
* KPTmap is a linear mapping of the kernel page table. It differs from the
* recursive mapping in two ways: (1) it only provides access to kernel page
* table pages, and not user page table pages, and (2) it provides access to
* a kernel page table page after the corresponding virtual addresses have
* been promoted to a 2/4MB page mapping.
*
* KPTmap is first initialized by pmap_cold() to support just NPKT page table
* pages. Later, it is reinitialized by pmap_bootstrap() to allow for
* expansion of the kernel page table.
*/
extern pt_entry_t *KPTmap_pae;
#endif

View file

@ -111,7 +111,7 @@ struct vm86context {
int flags;
int pte_num;
vm_offset_t kva;
u_int old_pte;
uint64_t old_pte;
} pmap[VM86_PMAPSIZE];
};

View file

@ -95,25 +95,32 @@
#define VM_FREEPOOL_DIRECT 0
/*
* Create two free page lists: VM_FREELIST_DEFAULT is for physical
* pages that are above the largest physical address that is
* accessible by ISA DMA and VM_FREELIST_LOWMEM is for physical pages
* that are below that address.
* Create up to three free page lists: VM_FREELIST_DMA32 is for physical pages
* that have physical addresses below 4G but are not accessible by ISA DMA,
* and VM_FREELIST_ISADMA is for physical pages that are accessible by ISA
* DMA.
*/
#define VM_NFREELIST 2
#define VM_NFREELIST 3
#define VM_FREELIST_DEFAULT 0
#define VM_FREELIST_LOWMEM 1
#define VM_FREELIST_DMA32 1
#define VM_FREELIST_LOWMEM 2
#define VM_LOWMEM_BOUNDARY (16 << 20) /* 16MB ISA DMA limit */
/*
* Always create DMA32 freelist if there is any memory above 4G.
* Bounce dma is extremely fragile and simultaneously intensively
* used.
*/
#define VM_DMA32_NPAGES_THRESHOLD 1
/*
* The largest allocation size is 2MB under PAE and 4MB otherwise.
*/
#ifdef PAE
#define VM_NFREEORDER 10
#else
#define VM_NFREEORDER 11
#endif
#define VM_NFREEORDER_PAE 10
#define VM_NFREEORDER_NOPAE 11
#define VM_NFREEORDER_MAX VM_NFREEORDER_NOPAE
#define VM_NFREEORDER i386_pmap_VM_NFREEORDER
/*
* Enable superpage reservations: 1 level.
@ -127,18 +134,19 @@
* used, and 1024 pages otherwise.
*/
#ifndef VM_LEVEL_0_ORDER
#if defined(PAE) || defined(PAE_TABLES)
#define VM_LEVEL_0_ORDER 9
#define VM_LEVEL_0_ORDER_PAE 9
#define VM_LEVEL_0_ORDER_NOPAE 10
#define VM_LEVEL_0_ORDER_MAX VM_LEVEL_0_ORDER_NOPAE
#define VM_LEVEL_0_ORDER i386_pmap_VM_LEVEL_0_ORDER
#else
#define VM_LEVEL_0_ORDER 10
#endif
#define VM_LEVEL_0_ORDER_MAX VM_LEVEL_0_ORDER
#endif
/*
* Kernel physical load address.
*/
#ifndef KERNLOAD
#define KERNLOAD (KERNPTDI << PDRSHIFT)
#define KERNLOAD (8 * 1024 * 1024)
#endif /* !defined(KERNLOAD) */
/*
@ -148,7 +156,7 @@
* messy at times, but hey, we'll do anything to save a page :-)
*/
#define VM_MAX_KERNEL_ADDRESS VADDR(PTDPTDI, 0)
#define VM_MAX_KERNEL_ADDRESS (0xffffffffU - 16 * 1024 * 1024 + 1)
#define VM_MIN_KERNEL_ADDRESS 0
@ -157,7 +165,7 @@
#define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
#define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0)
#define VM_MAXUSER_ADDRESS VADDR(TRPTDI, 0)
#define VM_MAXUSER_ADDRESS (0xffffffff - 4 * 1024 * 1024 + 1)
#define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE)
#define USRSTACK SHAREDPAGE
@ -168,12 +176,13 @@
#define PMAP_TRM_MIN_ADDRESS VM_MAXUSER_ADDRESS
#define PMAP_TRM_MAX_ADDRESS 0xffffffff
#define PMAP_MAP_LOW VADDR(LOWPTDI, 0)
#define PMAP_MAP_LOW (4 * 1024 * 1024)
/*
* KVA layout. The unit of the system allocation is single PDE, which
* represents NBPDR bytes, aligned to NBPDR. NBPDR is 4M for non-PAE
* page tables, and 2M for PAE. Addresses below are shown for non-PAE.
* page tables, and 2M for PAE, so PAE mode requires twice as many PTDs
* to create the same memory map as non-PAE.
*
* 0x00000000 - 0x003fffff Transient identity map of low memory (0-4M),
* normally disabled to catch NULL derefs.
@ -193,7 +202,7 @@
* How many physical pages per kmem arena virtual page.
*/
#ifndef VM_KMEM_SIZE_SCALE
#define VM_KMEM_SIZE_SCALE (3)
#define VM_KMEM_SIZE_SCALE (1)
#endif
/*

View file

@ -490,15 +490,13 @@ pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus)
if (minbus != 0)
return (0);
#ifndef PAE
if (base >= 0x100000000) {
if (!pae_mode && base >= 0x100000000) {
if (bootverbose)
printf(
"PCI: Memory Mapped PCI configuration area base 0x%jx too high\n",
(uintmax_t)base);
return (0);
}
#endif
if (bootverbose)
printf("PCIe: Memory Mapped configuration base @ 0x%jx\n",

View file

@ -194,8 +194,7 @@ acpi_wakeup_cpus(struct acpi_softc *sc)
* cpususpend_handler() and we will release them soon. Then each
* will invalidate its TLB.
*/
PTD[KPTDI] = 0;
invltlb_glob();
pmap_remap_lowptdi(false);
#endif
/* restore the warmstart vector */
@ -277,7 +276,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
* be careful to use the kernel map (PTD[0] is for curthread
* which may be a user thread in deprecated APIs).
*/
PTD[KPTDI] = PTD[LOWPTDI];
pmap_remap_lowptdi(true);
#endif
/* Call ACPICA to enter the desired sleep state */
@ -449,12 +448,7 @@ acpi_install_wakeup_handler(struct acpi_softc *sc)
/* Save pointers to some global data. */
WAKECODE_FIXUP(wakeup_ret, void *, resumectx);
#ifndef __amd64__
#if defined(PAE) || defined(PAE_TABLES)
WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdpt));
#else
WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdir));
#endif
WAKECODE_FIXUP(wakeup_cr3, register_t, pmap_get_kcr3());
#else /* __amd64__ */
/* Create the initial 1GB replicated page tables */
for (i = 0; i < 512; i++) {

View file

@ -135,11 +135,7 @@ typedef __uint64_t __vm_size_t;
#else
typedef __uint32_t __u_register_t;
typedef __uint32_t __vm_offset_t;
#ifdef PAE
typedef __uint64_t __vm_paddr_t;
#else
typedef __uint32_t __vm_paddr_t;
#endif
typedef __uint32_t __vm_size_t;
#endif
typedef int ___wchar_t;

View file

@ -102,23 +102,10 @@ struct trapframe;
*/
typedef void alias_for_inthand_t(void);
/*
* Returns the maximum physical address that can be used with the
* current system.
*/
static __inline vm_paddr_t
cpu_getmaxphyaddr(void)
{
#if defined(__i386__) && !defined(PAE)
return (0xffffffff);
#else
return ((1ULL << cpu_maxphyaddr) - 1);
#endif
}
bool acpi_get_fadt_bootflags(uint16_t *flagsp);
void *alloc_fpusave(int flags);
void busdma_swi(void);
vm_paddr_t cpu_getmaxphyaddr(void);
bool cpu_mwait_usable(void);
void cpu_probe_amdc1e(void);
void cpu_setregs(void);

View file

@ -53,6 +53,9 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/power.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/asmacros.h>
#include <machine/clock.h>
#include <machine/cputypes.h>
@ -2533,3 +2536,18 @@ print_hypervisor_info(void)
if (*hv_vendor)
printf("Hypervisor: Origin = \"%s\"\n", hv_vendor);
}
/*
* Returns the maximum physical address that can be used with the
* current system.
*/
vm_paddr_t
cpu_getmaxphyaddr(void)
{
#if defined(__i386__)
if (!pae_mode)
return (0xffffffff);
#endif
return ((1ULL << cpu_maxphyaddr) - 1);
}