i386 pmap: Adapt recent amd64/arm64 superpage improvements

Don't recompute mpte during promotion.

Optimize MADV_WILLNEED on existing superpages.

Standardize promotion conditions across amd64, arm64, and i386.

Stop requiring the accessed bit for superpage promotion.

Tidy up pmap_promote_pde() calls.

Retire PMAP_INLINE.  It's no longer used.

Note: Some of these changes are a prerequisite to fixing a panic that
arises when attempting to create a wired superpage mapping by
pmap_enter(psind=1) (as opposed to promotion).

Reviewed by:	kib
MFC after:	1 week
Differential Revision:	https://reviews.freebsd.org/D41944
This commit is contained in:
Alan Cox 2023-09-24 13:21:36 -05:00
parent 29a55fd09b
commit 902ed64fec
2 changed files with 137 additions and 65 deletions

View file

@ -159,16 +159,6 @@
#endif
#include <machine/pmap_base.h>
#if !defined(DIAGNOSTIC)
#ifdef __GNUC_GNU_INLINE__
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
#else
#define PMAP_INLINE extern inline
#endif
#else
#define PMAP_INLINE
#endif
#ifdef PV_STATS
#define PV_STAT(x) do { x ; } while (0)
#else
@ -311,13 +301,14 @@ static int pmap_pvh_wired_mappings(struct md_page *pvh, int count);
static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
static bool pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
static int pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot);
static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
u_int flags, vm_page_t m);
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
bool allpte_PG_A_set);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
@ -327,7 +318,8 @@ static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
#if VM_NRESERVLEVEL > 0
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
vm_page_t mpte);
#endif
static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
vm_prot_t prot);
@ -993,7 +985,7 @@ __CONCAT(PMTYPE, init)(void)
*/
if (pseflag != 0 &&
KERNBASE <= i << PDRSHIFT && i << PDRSHIFT < KERNend &&
pmap_insert_pt_page(kernel_pmap, mpte, true))
pmap_insert_pt_page(kernel_pmap, mpte, true, true))
panic("pmap_init: pmap_insert_pt_page failed");
}
PMAP_UNLOCK(kernel_pmap);
@ -1928,14 +1920,26 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
* for mapping a distinct range of virtual addresses. The pmap's collection is
* ordered by this virtual address range.
*
* If "promoted" is false, then the page table page "mpte" must be zero filled.
* If "promoted" is false, then the page table page "mpte" must be zero filled;
* "mpte"'s valid field will be set to 0.
*
* If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must
* contain valid mappings with identical attributes except for PG_A; "mpte"'s
* valid field will be set to 1.
*
* If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain
* valid mappings with identical attributes including PG_A; "mpte"'s valid
* field will be set to VM_PAGE_BITS_ALL.
*/
static __inline int
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
bool allpte_PG_A_set)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
KASSERT(promoted || !allpte_PG_A_set,
("a zero-filled PTP can't have PG_A set in every PTE"));
mpte->valid = promoted ? (allpte_PG_A_set ? VM_PAGE_BITS_ALL : 1) : 0;
return (vm_radix_insert(&pmap->pm_root, mpte));
}
@ -2843,10 +2847,11 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
newpte ^= PG_PDE_PAT | PG_PTE_PAT;
/*
* If the page table page is not leftover from an earlier promotion,
* initialize it.
* If the PTP is not leftover from an earlier promotion or it does not
* have PG_A set in every PTE, then fill it. The new PTEs will all
* have PG_A set.
*/
if (vm_page_none_valid(mpte))
if (!vm_page_all_valid(mpte))
pmap_fill_ptp(firstpte, newpte);
KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
@ -2854,8 +2859,7 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
" addresses"));
/*
* If the mapping has changed attributes, update the page table
* entries.
* If the mapping has changed attributes, update the PTEs.
*/
if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
pmap_fill_ptp(firstpte, newpte);
@ -2985,7 +2989,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
} else {
mpte = pmap_remove_pt_page(pmap, sva);
if (mpte != NULL) {
KASSERT(vm_page_all_valid(mpte),
KASSERT(vm_page_any_valid(mpte),
("pmap_remove_pde: pte page not promoted"));
pmap->pm_stats.resident_count--;
KASSERT(mpte->ref_count == NPTEPG,
@ -3469,38 +3473,56 @@ __CONCAT(PMTYPE, protect)(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
* pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel
* pmap.
*/
static void
pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
static bool
pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte)
{
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
pt_entry_t allpte_PG_A, *firstpte, oldpte, pa, *pte;
#ifdef KTR
vm_offset_t oldpteva;
#endif
vm_page_t mpte;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if (!pg_ps_enabled)
return (false);
/*
* Examine the first PTE in the specified PTP. Abort if this PTE is
* either invalid, unused, or does not map the first 4KB physical page
* either invalid or does not map the first 4KB physical page
* within a 2- or 4MB page.
*/
firstpte = pmap_pte_quick(pmap, trunc_4mpage(va));
setpde:
newpde = *firstpte;
if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) {
pmap_pde_p_failures++;
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
" in pmap %p", va, pmap);
return;
return (false);
}
if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) {
pmap_pde_p_failures++;
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
" in pmap %p", va, pmap);
return;
return (false);
}
/*
* Both here and in the below "for" loop, to allow for repromotion
* after MADV_FREE, conditionally write protect a clean PTE before
* possibly aborting the promotion due to other PTE attributes. Why?
* Suppose that MADV_FREE is applied to a part of a superpage, the
* address range [S, E). pmap_advise() will demote the superpage
* mapping, destroy the 4KB page mapping at the end of [S, E), and
* clear PG_M and PG_A in the PTEs for the rest of [S, E). Later,
* imagine that the memory in [S, E) is recycled, but the last 4KB
* page in [S, E) is not the last to be rewritten, or simply accessed.
* In other words, there is still a 4KB page in [S, E), call it P,
* that is writeable but PG_M and PG_A are clear in P's PTE. Unless
* we write protect P before aborting the promotion, if and when P is
* finally rewritten, there won't be a page fault to trigger
* repromotion.
*/
if ((newpde & (PG_M | PG_RW)) == PG_RW) {
/*
* When PG_M is already clear, PG_RW can be cleared without
@ -3510,6 +3532,8 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
~PG_RW))
goto setpde;
newpde &= ~PG_RW;
CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx"
" in pmap %p", va & ~PDRMASK, pmap);
}
/*
@ -3517,15 +3541,16 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
* PTE maps an unexpected 4KB physical page or does not have identical
* characteristics to the first PTE.
*/
pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
allpte_PG_A = newpde & PG_A;
pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE;
for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
setpte:
oldpte = *pte;
if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
if ((oldpte & (PG_FRAME | PG_V)) != pa) {
pmap_pde_p_failures++;
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
" in pmap %p", va, pmap);
return;
return (false);
}
if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
/*
@ -3547,28 +3572,39 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
pmap_pde_p_failures++;
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
" in pmap %p", va, pmap);
return;
return (false);
}
allpte_PG_A &= oldpte;
pa -= PAGE_SIZE;
}
/*
* Save the page table page in its current state until the PDE
* mapping the superpage is demoted by pmap_demote_pde() or
* destroyed by pmap_remove_pde().
* Unless all PTEs have PG_A set, clear it from the superpage mapping,
* so that promotions triggered by speculative mappings, such as
* pmap_enter_quick(), don't automatically mark the underlying pages
* as referenced.
*/
mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
newpde &= ~PG_A | allpte_PG_A;
/*
* Save the PTP in its current state until the PDE mapping the
* superpage is demoted by pmap_demote_pde() or destroyed by
* pmap_remove_pde(). If PG_A is not set in every PTE, then request
* that the PTP be refilled on demotion.
*/
if (mpte == NULL)
mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
KASSERT(mpte >= vm_page_array &&
mpte < &vm_page_array[vm_page_array_size],
("pmap_promote_pde: page table page is out of range"));
KASSERT(mpte->pindex == va >> PDRSHIFT,
("pmap_promote_pde: page table page's pindex is wrong"));
if (pmap_insert_pt_page(pmap, mpte, true)) {
if (pmap_insert_pt_page(pmap, mpte, true, allpte_PG_A != 0)) {
pmap_pde_p_failures++;
CTR2(KTR_PMAP,
"pmap_promote_pde: failure for va %#x in pmap %p", va,
pmap);
return;
return (false);
}
/*
@ -3596,6 +3632,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
pmap_pde_promotions++;
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x"
" in pmap %p", va, pmap);
return (true);
}
#endif /* VM_NRESERVLEVEL > 0 */
@ -3859,9 +3896,9 @@ __CONCAT(PMTYPE, enter)(pmap_t pmap, vm_offset_t va, vm_page_t m,
* populated, then attempt promotion.
*/
if ((mpte == NULL || mpte->ref_count == NPTEPG) &&
pg_ps_enabled && (m->flags & PG_FICTITIOUS) == 0 &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0)
pmap_promote_pde(pmap, pde, va);
(void)pmap_promote_pde(pmap, pde, va, mpte);
#endif
rv = KERN_SUCCESS;
@ -3874,11 +3911,11 @@ __CONCAT(PMTYPE, enter)(pmap_t pmap, vm_offset_t va, vm_page_t m,
/*
* Tries to create a read- and/or execute-only 2 or 4 MB page mapping. Returns
* true if successful. Returns false if (1) a mapping already exists at the
* specified virtual address or (2) a PV entry cannot be allocated without
* reclaiming another PV entry.
* KERN_SUCCESS if the mapping was created. Otherwise, returns an error
* value. See pmap_enter_pde() for the possible error values when "no sleep",
* "no replace", and "no reclaim" are specified.
*/
static bool
static int
pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
pd_entry_t newpde;
@ -3895,8 +3932,7 @@ pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
if (pmap != kernel_pmap)
newpde |= PG_U;
return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP |
PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL) ==
KERN_SUCCESS);
PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL));
}
/*
@ -3919,11 +3955,14 @@ pmap_every_pte_zero(vm_offset_t va)
/*
* Tries to create the specified 2 or 4 MB page mapping. Returns KERN_SUCCESS
* if the mapping was created, and either KERN_FAILURE or
* KERN_RESOURCE_SHORTAGE otherwise. Returns KERN_FAILURE if
* PMAP_ENTER_NOREPLACE was specified and a mapping already exists at the
* specified virtual address. Returns KERN_RESOURCE_SHORTAGE if
* PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
* if the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE,
* or KERN_RESOURCE_SHORTAGE otherwise. Returns KERN_FAILURE if
* PMAP_ENTER_NOREPLACE was specified and a 4 KB page mapping already exists
* within the 2 or 4 MB virtual address range starting at the specified virtual
* address. Returns KERN_NO_SPACE if PMAP_ENTER_NOREPLACE was specified and a
* 2 or 4 MB page mapping already exists at the specified virtual address.
* Returns KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NORECLAIM was specified and a
* PV entry allocation failed.
*
* The parameter "m" is only used when creating a managed, writeable mapping.
*/
@ -3944,12 +3983,19 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
pde = pmap_pde(pmap, va);
oldpde = *pde;
if ((oldpde & PG_V) != 0) {
if ((flags & PMAP_ENTER_NOREPLACE) != 0 && (pmap !=
kernel_pmap || (oldpde & PG_PS) != 0 ||
!pmap_every_pte_zero(va))) {
CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return (KERN_FAILURE);
if ((flags & PMAP_ENTER_NOREPLACE) != 0) {
if ((oldpde & PG_PS) != 0) {
CTR2(KTR_PMAP,
"pmap_enter_pde: no space for va %#lx"
" in pmap %p", va, pmap);
return (KERN_NO_SPACE);
} else if (pmap != kernel_pmap ||
!pmap_every_pte_zero(va)) {
CTR2(KTR_PMAP,
"pmap_enter_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return (KERN_FAILURE);
}
}
/* Break the existing mapping(s). */
SLIST_INIT(&free);
@ -3978,7 +4024,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
* leave the kernel page table page zero filled.
*/
mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
if (pmap_insert_pt_page(pmap, mt, false))
if (pmap_insert_pt_page(pmap, mt, false, false))
panic("pmap_enter_pde: trie insert failed");
}
}
@ -4035,6 +4081,7 @@ __CONCAT(PMTYPE, enter_object)(pmap_t pmap, vm_offset_t start, vm_offset_t end,
vm_offset_t va;
vm_page_t m, mpte;
vm_pindex_t diff, psize;
int rv;
VM_OBJECT_ASSERT_LOCKED(m_start->object);
@ -4047,7 +4094,8 @@ __CONCAT(PMTYPE, enter_object)(pmap_t pmap, vm_offset_t start, vm_offset_t end,
va = start + ptoa(diff);
if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
m->psind == 1 && pg_ps_enabled &&
pmap_enter_4mpage(pmap, va, m, prot))
((rv = pmap_enter_4mpage(pmap, va, m, prot)) ==
KERN_SUCCESS || rv == KERN_NO_SPACE))
m = &m[NBPDR / PAGE_SIZE - 1];
else
mpte = pmap_enter_quick_locked(pmap, va, m, prot,
@ -4084,12 +4132,14 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot, vm_page_t mpte)
{
pt_entry_t newpte, *pte;
pd_entry_t *pde;
KASSERT(pmap != kernel_pmap || !VA_IS_CLEANMAP(va) ||
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
pde = NULL;
/*
* In the case that a page table page is not
@ -4109,7 +4159,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
/*
* Get the page directory entry
*/
ptepa = pmap->pm_pdir[ptepindex];
pde = &pmap->pm_pdir[ptepindex];
ptepa = *pde;
/*
* If the page table page is mapped, we just increment
@ -4167,6 +4218,27 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
if (pmap != kernel_pmap)
newpte |= PG_U;
pte_store_zero(pte, newpte);
#if VM_NRESERVLEVEL > 0
/*
* If both the PTP and the reservation are fully populated, then
* attempt promotion.
*/
if ((mpte == NULL || mpte->ref_count == NPTEPG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
if (pde == NULL)
pde = pmap_pde(pmap, va);
/*
* If promotion succeeds, then the next call to this function
* should not be given the unmapped PTP as a hint.
*/
if (pmap_promote_pde(pmap, pde, va, mpte))
mpte = NULL;
}
#endif
sched_unpin();
return (mpte);
}
@ -4836,7 +4908,7 @@ __CONCAT(PMTYPE, remove_pages)(pmap_t pmap)
}
mpte = pmap_remove_pt_page(pmap, pv->pv_va);
if (mpte != NULL) {
KASSERT(vm_page_all_valid(mpte),
KASSERT(vm_page_any_valid(mpte),
("pmap_remove_pages: pte page not promoted"));
pmap->pm_stats.resident_count--;
KASSERT(mpte->ref_count == NPTEPG,

View file

@ -83,7 +83,7 @@
* 4KB (PTE) page mappings have identical settings for the following fields:
*/
#define PG_PTE_PROMOTE (PG_MANAGED | PG_W | PG_G | PG_PTE_PAT | \
PG_M | PG_A | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V)
PG_M | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V)
/*
* Page Protection Exception bits