Make v_wire_count a per-cpu counter(9) counter. This eliminates a

significant source of cache line contention from vm_page_alloc().  Use
accessors and vm_page_unwire_noq() so that the mechanism can be easily
changed in the future.

Reviewed by:	markj
Discussed with:	kib, glebius
Tested by:	pho (earlier version)
Sponsored by:	Netflix, Dell/EMC Isilon
Differential Revision:	https://reviews.freebsd.org/D14273
This commit is contained in:
Jeff Roberson 2018-02-12 22:53:00 +00:00
parent 487340b004
commit e958ad4cf3
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=329187
21 changed files with 65 additions and 47 deletions

View file

@ -74,8 +74,7 @@ efi_destroy_1t1_map(void)
VM_OBJECT_RLOCK(obj_1t1_pt);
TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq)
m->wire_count = 0;
atomic_subtract_int(&vm_cnt.v_wire_count,
obj_1t1_pt->resident_page_count);
vm_wire_sub(obj_1t1_pt->resident_page_count);
VM_OBJECT_RUNLOCK(obj_1t1_pt);
vm_object_deallocate(obj_1t1_pt);
}

View file

@ -1246,7 +1246,7 @@ pmap_init(void)
mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
mpte->wire_count = 1;
}
atomic_add_int(&vm_cnt.v_wire_count, nkpt);
vm_wire_add(nkpt);
/*
* If the kernel is running on a virtual machine, then it must assume
@ -2381,7 +2381,7 @@ pmap_free_zero_pages(struct spglist *free)
/* Preserve the page's PG_ZERO setting. */
vm_page_free_toq(m);
}
atomic_subtract_int(&vm_cnt.v_wire_count, count);
vm_wire_sub(count);
}
/*

View file

@ -2634,11 +2634,12 @@ pmap_unwire_pt2pg(pmap_t pmap, vm_offset_t va, vm_page_t m)
pmap->pm_stats.resident_count--;
/*
* This is a release store so that the ordinary store unmapping
* This barrier is so that the ordinary store unmapping
* the L2 page table page is globally performed before TLB shoot-
* down is begun.
*/
atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
wmb();
vm_wire_sub(1);
}
/*
@ -2945,7 +2946,7 @@ pmap_pv_reclaim(pmap_t locked_pmap)
SLIST_REMOVE_HEAD(&free, plinks.s.ss);
/* Recycle a freed page table page. */
m_pc->wire_count = 1;
atomic_add_int(&vm_cnt.v_wire_count, 1);
vm_wire_add(1);
}
pmap_free_zero_pages(&free);
return (m_pc);

View file

@ -75,8 +75,7 @@ efi_destroy_1t1_map(void)
VM_OBJECT_RLOCK(obj_1t1_pt);
TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq)
m->wire_count = 0;
atomic_subtract_int(&vm_cnt.v_wire_count,
obj_1t1_pt->resident_page_count);
vm_wire_sub(obj_1t1_pt->resident_page_count);
VM_OBJECT_RUNLOCK(obj_1t1_pt);
vm_object_deallocate(obj_1t1_pt);
}

View file

@ -1362,7 +1362,7 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
}
pmap_invalidate_page(pmap, va);
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_wire_sub(1);
/*
* Put page on a list so that it is released after
@ -1907,7 +1907,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
SLIST_REMOVE_HEAD(&free, plinks.s.ss);
/* Recycle a freed page table page. */
m_pc->wire_count = 1;
atomic_add_int(&vm_cnt.v_wire_count, 1);
vm_wire_add(1);
}
pmap_free_zero_pages(&free);
return (m_pc);
@ -1958,7 +1958,7 @@ free_pv_chunk(struct pv_chunk *pc)
/* entire chunk is free, return it */
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
dump_drop_page(m->phys_addr);
vm_page_unwire(m, PQ_NONE);
vm_page_unwire_noq(m);
vm_page_free(m);
}
@ -2264,9 +2264,9 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
pmap_resident_count_dec(pmap, 1);
KASSERT(ml3->wire_count == NL3PG,
("pmap_remove_pages: l3 page wire count error"));
ml3->wire_count = 0;
ml3->wire_count = 1;
vm_page_unwire_noq(ml3);
pmap_add_delayed_free_list(ml3, free, FALSE);
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
}
return (pmap_unuse_pt(pmap, sva, l1e, free));
}
@ -3711,11 +3711,10 @@ pmap_remove_pages(pmap_t pmap)
pmap_resident_count_dec(pmap,1);
KASSERT(ml3->wire_count == NL3PG,
("pmap_remove_pages: l3 page wire count error"));
ml3->wire_count = 0;
ml3->wire_count = 1;
vm_page_unwire_noq(ml3);
pmap_add_delayed_free_list(ml3,
&free, FALSE);
atomic_subtract_int(
&vm_cnt.v_wire_count, 1);
}
break;
case 2:

View file

@ -163,7 +163,7 @@ linprocfs_domeminfo(PFS_FILL_ARGS)
* is very little memory left, so we cheat and tell them that
* all memory that isn't wired down is free.
*/
memused = vm_cnt.v_wire_count * PAGE_SIZE;
memused = vm_wire_count() * PAGE_SIZE;
memfree = memtotal - memused;
swap_pager_status(&i, &j);
swaptotal = (unsigned long long)i * PAGE_SIZE;

View file

@ -165,7 +165,7 @@ linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale;
sysinfo.totalram = physmem * PAGE_SIZE;
sysinfo.freeram = sysinfo.totalram - vm_cnt.v_wire_count * PAGE_SIZE;
sysinfo.freeram = sysinfo.totalram - vm_wire_count() * PAGE_SIZE;
sysinfo.sharedram = 0;
mtx_lock(&vm_object_list_mtx);

View file

@ -1718,7 +1718,7 @@ pmap_free_zero_pages(struct spglist *free)
/* Preserve the page's PG_ZERO setting. */
vm_page_free_toq(m);
}
atomic_subtract_int(&vm_cnt.v_wire_count, count);
vm_wire_sub(count);
}
/*

View file

@ -206,7 +206,7 @@ sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
{
u_long val;
val = ctob(physmem - vm_cnt.v_wire_count);
val = ctob(physmem - vm_wire_count());
return (sysctl_handle_long(oidp, &val, 0, req));
}

View file

@ -151,7 +151,7 @@ pcpu_zones_startup(void)
pcpu_zone_ptr = uma_zcreate("ptr pcpu", sizeof(void *),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
}
SYSINIT(pcpu_zones, SI_SUB_KMEM, SI_ORDER_ANY, pcpu_zones_startup, NULL);
SYSINIT(pcpu_zones, SI_SUB_VM, SI_ORDER_ANY, pcpu_zones_startup, NULL);
/*
* First-fit extent based allocator for allocating space in the per-cpu

View file

@ -4552,7 +4552,7 @@ vm_hold_free_pages(struct buf *bp, int newbsize)
p->wire_count--;
vm_page_free(p);
}
atomic_subtract_int(&vm_cnt.v_wire_count, bp->b_npages - newnpages);
vm_wire_sub(bp->b_npages - newnpages);
bp->b_npages = newnpages;
}

View file

@ -1009,7 +1009,7 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m)
* If the page is finally unwired, simply free it.
*/
vm_page_free_zero(m);
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_wire_sub(1);
}
/*

View file

@ -681,7 +681,7 @@ pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx)
pa = pte_vatopa(mmu, kernel_pmap, va);
m = PHYS_TO_VM_PAGE(pa);
vm_page_free_zero(m);
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_wire_sub(1);
pmap_kremove(va);
}
@ -786,7 +786,7 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx,
ptbl_free_pmap_ptbl(pmap, ptbl);
for (j = 0; j < i; j++)
vm_page_free(mtbl[j]);
atomic_subtract_int(&vm_cnt.v_wire_count, i);
vm_wire_sub(i);
return (NULL);
}
VM_WAIT;
@ -828,7 +828,7 @@ ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx)
pa = pte_vatopa(mmu, kernel_pmap, va);
m = PHYS_TO_VM_PAGE(pa);
vm_page_free_zero(m);
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_wire_sub(1);
pmap_kremove(va);
}
@ -1030,7 +1030,7 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep)
ptbl_free_pmap_ptbl(pmap, ptbl);
for (j = 0; j < i; j++)
vm_page_free(mtbl[j]);
atomic_subtract_int(&vm_cnt.v_wire_count, i);
vm_wire_sub(i);
return (NULL);
}
VM_WAIT;
@ -1091,7 +1091,7 @@ ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx)
pa = pte_vatopa(mmu, kernel_pmap, va);
m = PHYS_TO_VM_PAGE(pa);
vm_page_free_zero(m);
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_wire_sub(1);
mmu_booke_kremove(mmu, va);
}

View file

@ -1153,7 +1153,7 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
}
pmap_invalidate_page(pmap, va);
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_wire_sub(1);
/*
* Put page on a list so that it is released after

View file

@ -1308,8 +1308,7 @@ pmap_release(pmap_t pm)
while (!TAILQ_EMPTY(&obj->memq)) {
m = TAILQ_FIRST(&obj->memq);
m->md.pmap = NULL;
m->wire_count--;
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_page_unwire_noq(m);
vm_page_free_zero(m);
}
VM_OBJECT_WUNLOCK(obj);

View file

@ -125,6 +125,7 @@ struct vmmeter {
counter_u64_t v_vforkpages; /* (p) pages affected by vfork() */
counter_u64_t v_rforkpages; /* (p) pages affected by rfork() */
counter_u64_t v_kthreadpages; /* (p) ... and by kernel fork() */
counter_u64_t v_wire_count; /* (p) pages wired down */
#define VM_METER_NCOUNTERS \
(offsetof(struct vmmeter, v_page_size) / sizeof(counter_u64_t))
/*
@ -139,7 +140,6 @@ struct vmmeter {
u_int v_pageout_free_min; /* (c) min pages reserved for kernel */
u_int v_interrupt_free_min; /* (c) reserved pages for int code */
u_int v_free_severe; /* (c) severe page depletion point */
u_int v_wire_count VMMETER_ALIGNED; /* (a) pages wired down */
};
#endif /* _KERNEL || _WANT_VMMETER */
@ -155,7 +155,27 @@ extern domainset_t vm_severe_domains;
#define VM_CNT_INC(var) VM_CNT_ADD(var, 1)
#define VM_CNT_FETCH(var) counter_u64_fetch(vm_cnt.var)
static inline void
vm_wire_add(int cnt)
{
VM_CNT_ADD(v_wire_count, cnt);
}
static inline void
vm_wire_sub(int cnt)
{
VM_CNT_ADD(v_wire_count, -cnt);
}
u_int vm_free_count(void);
static inline u_int
vm_wire_count(void)
{
return (VM_CNT_FETCH(v_wire_count));
}
/*
* Return TRUE if we are under our severe low-free-pages threshold

View file

@ -209,7 +209,8 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
mtx_lock(&sw_dev_mtx);
r = swap_reserved + incr;
if (overcommit & SWAP_RESERVE_ALLOW_NONWIRED) {
s = vm_cnt.v_page_count - vm_cnt.v_free_reserved - vm_cnt.v_wire_count;
s = vm_cnt.v_page_count - vm_cnt.v_free_reserved -
vm_wire_count();
s *= PAGE_SIZE;
} else
s = 0;

View file

@ -191,7 +191,7 @@ vslock(void *addr, size_t len)
* Also, the sysctl code, which is the only present user
* of vslock(), does a hard loop on EAGAIN.
*/
if (npages + vm_cnt.v_wire_count > vm_page_max_wired)
if (npages + vm_wire_count() > vm_page_max_wired)
return (EAGAIN);
#endif
error = vm_map_wire(&curproc->p_vmspace->vm_map, start, end,

View file

@ -96,6 +96,7 @@ struct vmmeter __exclusive_cache_line vm_cnt = {
.v_vforkpages = EARLY_COUNTER,
.v_rforkpages = EARLY_COUNTER,
.v_kthreadpages = EARLY_COUNTER,
.v_wire_count = EARLY_COUNTER,
};
static void
@ -105,7 +106,7 @@ vmcounter_startup(void)
COUNTER_ARRAY_ALLOC(cnt, VM_METER_NCOUNTERS, M_WAITOK);
}
SYSINIT(counter, SI_SUB_CPU, SI_ORDER_FOURTH + 1, vmcounter_startup, NULL);
SYSINIT(counter, SI_SUB_KMEM, SI_ORDER_FIRST, vmcounter_startup, NULL);
SYSCTL_UINT(_vm, VM_V_FREE_MIN, v_free_min,
CTLFLAG_RW, &vm_cnt.v_free_min, 0, "Minimum low-free-pages threshold");
@ -403,7 +404,7 @@ VM_STATS_UINT(v_free_reserved, "Pages reserved for deadlock");
VM_STATS_UINT(v_free_target, "Pages desired free");
VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold");
VM_STATS_PROC(v_free_count, "Free pages", vm_free_count);
VM_STATS_UINT(v_wire_count, "Wired pages");
VM_STATS_PROC(v_wire_count, "Wired pages", vm_wire_count);
VM_STATS_PROC(v_active_count, "Active pages", vm_active_count);
VM_STATS_UINT(v_inactive_target, "Desired inactive pages");
VM_STATS_PROC(v_inactive_count, "Inactive pages", vm_inactive_count);

View file

@ -1002,7 +1002,7 @@ kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len)
return (ENOMEM);
}
PROC_UNLOCK(proc);
if (npages + vm_cnt.v_wire_count > vm_page_max_wired)
if (npages + vm_wire_count() > vm_page_max_wired)
return (EAGAIN);
#ifdef RACCT
if (racct_enable) {

View file

@ -1796,7 +1796,7 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
* The page lock is not required for wiring a page until that
* page is inserted into the object.
*/
atomic_add_int(&vm_cnt.v_wire_count, 1);
vm_wire_add(1);
m->wire_count = 1;
}
m->act_count = 0;
@ -1805,7 +1805,7 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
if (vm_page_insert_after(m, object, pindex, mpred)) {
pagedaemon_wakeup(domain);
if (req & VM_ALLOC_WIRED) {
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_wire_sub(1);
m->wire_count = 0;
}
KASSERT(m->object == NULL, ("page %p has object", m));
@ -1989,7 +1989,7 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
if ((req & VM_ALLOC_SBUSY) != 0)
busy_lock = VPB_SHARERS_WORD(1);
if ((req & VM_ALLOC_WIRED) != 0)
atomic_add_int(&vm_cnt.v_wire_count, npages);
vm_wire_add(npages);
if (object != NULL) {
if (object->memattr != VM_MEMATTR_DEFAULT &&
memattr == VM_MEMATTR_DEFAULT)
@ -2007,8 +2007,7 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
if (vm_page_insert_after(m, object, pindex, mpred)) {
pagedaemon_wakeup(domain);
if ((req & VM_ALLOC_WIRED) != 0)
atomic_subtract_int(
&vm_cnt.v_wire_count, npages);
vm_wire_sub(npages);
KASSERT(m->object == NULL,
("page %p has object", m));
mpred = m;
@ -2133,7 +2132,7 @@ vm_page_alloc_freelist_domain(int domain, int freelist, int req)
* The page lock is not required for wiring a page that does
* not belong to an object.
*/
atomic_add_int(&vm_cnt.v_wire_count, 1);
vm_wire_add(1);
m->wire_count = 1;
}
/* Unmanaged pages don't use "act_count". */
@ -3256,7 +3255,7 @@ vm_page_wire(vm_page_t m)
KASSERT((m->oflags & VPO_UNMANAGED) == 0 ||
m->queue == PQ_NONE,
("vm_page_wire: unmanaged page %p is queued", m));
atomic_add_int(&vm_cnt.v_wire_count, 1);
vm_wire_add(1);
}
m->wire_count++;
KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m));
@ -3331,7 +3330,7 @@ vm_page_unwire_noq(vm_page_t m)
panic("vm_page_unwire: page %p's wire count is zero", m);
m->wire_count--;
if (m->wire_count == 0) {
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_wire_sub(1);
return (true);
} else
return (false);
@ -4157,7 +4156,7 @@ DB_SHOW_COMMAND(page, vm_page_print_page_info)
db_printf("vm_cnt.v_inactive_count: %d\n", vm_inactive_count());
db_printf("vm_cnt.v_active_count: %d\n", vm_active_count());
db_printf("vm_cnt.v_laundry_count: %d\n", vm_laundry_count());
db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count);
db_printf("vm_cnt.v_wire_count: %d\n", vm_wire_count());
db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target);