riscv: smarter DMAP construction

Currently we create the DMAP by mapping the entire range between the
smallest and largest physical memory addresses with L1 superpages. This
is obviously overkill, and we may end up mapping all kinds of ranges that
are not real memory.

In the case of the HiFive Unmatched (obsolete hardware), there is an
errata resulting in faults when a TLB mapping spans PMP (firmware)
protection regions. So, when our DMAP mapping spans into the memory
reserved by OpenSBI, we get a fatal fault. This highlights the need to
be smarter here.

Therefore, let's attempt to build the DMAP a little more correctly by
walking the physmap array and mapping each range individually. It is not
perfect in that we still only respect the range to a 2MB granularity,
but this could be improved in the future.

Reviewed by:	markj
MFC after:	1 month
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D45328
This commit is contained in:
Mitchell Horne 2024-06-20 15:32:07 -03:00
parent de09dcebd7
commit 762a3224cd

View file

@ -540,37 +540,109 @@ pmap_early_alloc_tables(vm_paddr_t *freemempos, int npages)
return (pt);
}
static void
pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
/*
* Construct the direct map -- a linear mapping of physical memory into
* the kernel address space.
*
* We walk the list of physical memory segments (of arbitrary size and
* address) mapping each appropriately using L2 and L1 superpages.
* Consequently, the DMAP address space will have unmapped regions
* corresponding to any holes between physical memory segments.
*
* The lowest usable physical address will always be mapped to
* DMAP_MIN_ADDRESS.
*/
static vm_paddr_t
pmap_bootstrap_dmap(pd_entry_t *l1, vm_paddr_t freemempos)
{
vm_paddr_t physmap[PHYS_AVAIL_ENTRIES];
vm_offset_t va;
vm_paddr_t pa;
pd_entry_t *l1;
u_int l1_slot;
pt_entry_t entry;
pn_t pn;
vm_paddr_t min_pa, max_pa, pa, endpa;
pd_entry_t *l2;
u_int l1slot, l2slot;
int physmap_idx;
pa = dmap_phys_base = min_pa & ~L1_OFFSET;
va = DMAP_MIN_ADDRESS;
l1 = (pd_entry_t *)kern_l1;
l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS);
physmap_idx = physmem_avail(physmap, nitems(physmap));
min_pa = physmap[0];
max_pa = physmap[physmap_idx - 1];
for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
printf("physmap_idx %u\n", physmap_idx);
printf("min_pa %lx\n", min_pa);
printf("max_pa %lx\n", max_pa);
/* superpages */
pn = (pa / PAGE_SIZE);
entry = PTE_KERN;
entry |= (pn << PTE_PPN0_S);
pmap_store(&l1[l1_slot], entry);
/* Set the limits of the DMAP region. */
dmap_phys_base = rounddown(min_pa, L1_SIZE);
dmap_phys_max = max_pa;
/* Walk the physmap table. */
l2 = NULL;
l1slot = Ln_ENTRIES; /* sentinel value */
for (int idx = 0; idx < physmap_idx; idx += 2) {
pa = rounddown(physmap[idx], L2_SIZE);
endpa = physmap[idx + 1];
/* Virtual address for this range. */
va = PHYS_TO_DMAP(pa);
/* Any 1GB possible for this range? */
if (roundup(pa, L1_SIZE) + L1_SIZE > endpa)
goto l2end;
/* Loop until the next 1GB boundary. */
while ((pa & L1_OFFSET) != 0) {
if (l2 == NULL || pmap_l1_index(va) != l1slot) {
/* Need to alloc another page table. */
l2 = pmap_early_alloc_tables(&freemempos, 1);
/* Link it. */
l1slot = pmap_l1_index(va);
pmap_store(&l1[l1slot],
L1_PDE((vm_paddr_t)l2, PTE_V));
}
/* map l2 pages */
l2slot = pmap_l2_index(va);
pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN));
pa += L2_SIZE;
va += L2_SIZE;
}
/* Map what we can with 1GB superpages. */
while (pa + L1_SIZE - 1 < endpa) {
/* map l1 pages */
l1slot = pmap_l1_index(va);
pmap_store(&l1[l1slot], L1_PTE(pa, PTE_KERN));
pa += L1_SIZE;
va += L1_SIZE;
}
l2end:
while (pa < endpa) {
if (l2 == NULL || pmap_l1_index(va) != l1slot) {
/* Need to alloc another page table. */
l2 = pmap_early_alloc_tables(&freemempos, 1);
/* Link it. */
l1slot = pmap_l1_index(va);
pmap_store(&l1[l1slot],
L1_PDE((vm_paddr_t)l2, PTE_V));
}
/* map l2 pages */
l2slot = pmap_l2_index(va);
pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN));
pa += L2_SIZE;
va += L2_SIZE;
}
}
/* Set the upper limit of the DMAP region */
dmap_phys_max = pa;
/* And finally, the limit on DMAP VA. */
dmap_max_addr = va;
sfence_vma();
return (freemempos);
}
/*
@ -589,7 +661,7 @@ pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
*/
static vm_paddr_t
pmap_create_pagetables(vm_paddr_t kernstart, vm_size_t kernlen,
vm_paddr_t min_pa, vm_paddr_t max_pa, vm_paddr_t *root_pt_phys)
vm_paddr_t *root_pt_phys)
{
pt_entry_t *l0, *l1, *kern_l2, *kern_l3, *devmap_l3;
pd_entry_t *devmap_l2;
@ -657,6 +729,9 @@ pmap_create_pagetables(vm_paddr_t kernstart, vm_size_t kernlen,
nkernl3 = 16;
kern_l3 = pmap_early_alloc_tables(&freemempos, nkernl3);
/* Bootstrap the direct map. */
freemempos = pmap_bootstrap_dmap(l1, freemempos);
/* Allocations are done. */
if (freemempos < roundup2(kernend, L2_SIZE))
freemempos = roundup2(kernend, L2_SIZE);
@ -714,9 +789,6 @@ pmap_create_pagetables(vm_paddr_t kernstart, vm_size_t kernlen,
pa = (vm_paddr_t)devmap_l2;
pmap_store(&l1[slot], L1_PDE(pa, PTE_V));
/* Bootstrap the direct map. */
pmap_bootstrap_dmap((vm_offset_t)l1, min_pa, max_pa);
/* Return the next position of free memory */
return (freemempos);
}
@ -727,14 +799,11 @@ pmap_create_pagetables(vm_paddr_t kernstart, vm_size_t kernlen,
void
pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
{
vm_paddr_t physmap[PHYS_AVAIL_ENTRIES];
vm_paddr_t freemempos;
vm_paddr_t max_pa, min_pa, pa;
vm_paddr_t freemempos, pa;
vm_paddr_t root_pt_phys;
vm_offset_t freeva;
vm_offset_t dpcpu, msgbufpv;
pt_entry_t *pte;
u_int physmap_idx;
int i;
printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
@ -752,31 +821,8 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
*/
CPU_SET(PCPU_GET(hart), &kernel_pmap->pm_active);
/* Assume the address we were loaded to is a valid physical address. */
min_pa = max_pa = kernstart;
physmap_idx = physmem_avail(physmap, nitems(physmap));
physmap_idx /= 2;
/*
* Find the minimum physical address. physmap is sorted,
* but may contain empty ranges.
*/
for (i = 0; i < physmap_idx * 2; i += 2) {
if (physmap[i] == physmap[i + 1])
continue;
if (physmap[i] <= min_pa)
min_pa = physmap[i];
if (physmap[i + 1] > max_pa)
max_pa = physmap[i + 1];
}
printf("physmap_idx %u\n", physmap_idx);
printf("min_pa %lx\n", min_pa);
printf("max_pa %lx\n", max_pa);
/* Create a new set of pagetables to run the kernel in. */
freemempos = pmap_create_pagetables(kernstart, kernlen, min_pa, max_pa,
&root_pt_phys);
freemempos = pmap_create_pagetables(kernstart, kernlen, &root_pt_phys);
/* Switch to the newly created page tables. */
kernel_pmap->pm_stage = PM_STAGE1;