vm_phys: fix freelist_contig

vm_phys_find_freelist_contig is called to search a list of max-sized
free page blocks and find one that, when joined with adjacent blocks
in memory, can satisfy a request for a memory allocation bigger than
any single max-sized free page block. In commit
fa8a6585c7, I defined this function in
order to offer two improvements: 1) reduce the worst-case search time,
and 2) allow solutions that include less-than max-sized free page
blocks at the front or back of the giant allocation. However, it turns
out that this change introduced an error, reported in In Bug
274592. That error concerns failing to check segment boundaries. This
change fixes an error in vm_phys_find_freelist_config that resolves
that bug. It also abandons improvement 2), because the value of that
improvement is small and because preserving it would require more
testing than I am able to do.

PR:		274592
Reported by:	shafaisal.us@gmail.com
Reviewed by:	alc, markj
Tested by:	shafaisal.us@gmail.com
Fixes:	fa8a6585c7 vm_phys: avoid waste in multipage allocation
MFC after:	10 days
Differential Revision:	https://reviews.freebsd.org/D42509
This commit is contained in:
Doug Moore 2023-11-15 03:25:45 -06:00
parent ede077bf2a
commit 2a4897bd4e

View file

@ -1360,108 +1360,75 @@ vm_phys_unfree_page(vm_page_t m)
}
/*
* Find a run of contiguous physical pages from the specified page list.
* Find a run of contiguous physical pages, meeting alignment requirements, from
* a list of max-sized page blocks, where we need at least two consecutive
* blocks to satisfy the (large) page request.
*/
static vm_page_t
vm_phys_find_freelist_contig(struct vm_freelist *fl, int oind, u_long npages,
vm_phys_find_freelist_contig(struct vm_freelist *fl, u_long npages,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
{
struct vm_phys_seg *seg;
vm_paddr_t frag, lbound, pa, page_size, pa_end, pa_pre, size;
vm_page_t m, m_listed, m_ret;
int order;
vm_page_t m, m_iter, m_ret;
vm_paddr_t max_size, size;
int max_order;
KASSERT(npages > 0, ("npages is 0"));
KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
/* Search for a run satisfying the specified conditions. */
page_size = PAGE_SIZE;
max_order = VM_NFREEORDER - 1;
size = npages << PAGE_SHIFT;
frag = (npages & ~(~0UL << oind)) << PAGE_SHIFT;
TAILQ_FOREACH(m_listed, &fl[oind].pl, listq) {
max_size = (vm_paddr_t)1 << (PAGE_SHIFT + max_order);
KASSERT(size > max_size, ("size is too small"));
/*
* In order to avoid examining any free max-sized page block more than
* twice, identify the ones that are first in a physically-contiguous
* sequence of such blocks, and only for those walk the sequence to
* check if there are enough free blocks starting at a properly aligned
* block. Thus, no block is checked for free-ness more than twice.
*/
TAILQ_FOREACH(m, &fl[max_order].pl, listq) {
/*
* Determine if the address range starting at pa is
* too low.
* Skip m unless it is first in a sequence of free max page
* blocks >= low in its segment.
*/
pa = VM_PAGE_TO_PHYS(m_listed);
if (pa < low)
seg = &vm_phys_segs[m->segind];
if (VM_PAGE_TO_PHYS(m) < MAX(low, seg->start))
continue;
if (VM_PAGE_TO_PHYS(m) >= max_size &&
VM_PAGE_TO_PHYS(m) - max_size >= MAX(low, seg->start) &&
max_order == m[-1 << max_order].order)
continue;
/*
* If this is not the first free oind-block in this range, bail
* out. We have seen the first free block already, or will see
* it before failing to find an appropriate range.
* Advance m_ret from m to the first of the sequence, if any,
* that satisfies alignment conditions and might leave enough
* space.
*/
seg = &vm_phys_segs[m_listed->segind];
lbound = low > seg->start ? low : seg->start;
pa_pre = pa - (page_size << oind);
m = &seg->first_page[atop(pa_pre - seg->start)];
if (pa != 0 && pa_pre >= lbound && m->order == oind)
m_ret = m;
while (!vm_addr_ok(VM_PAGE_TO_PHYS(m_ret),
size, alignment, boundary) &&
VM_PAGE_TO_PHYS(m_ret) + size <= MIN(high, seg->end) &&
max_order == m_ret[1 << max_order].order)
m_ret += 1 << max_order;
/*
* Skip m unless some block m_ret in the sequence is properly
* aligned, and begins a sequence of enough pages less than
* high, and in the same segment.
*/
if (VM_PAGE_TO_PHYS(m_ret) + size > MIN(high, seg->end))
continue;
if (!vm_addr_align_ok(pa, alignment))
/* Advance to satisfy alignment condition. */
pa = roundup2(pa, alignment);
else if (frag != 0 && lbound + frag <= pa) {
/*
* Back up to the first aligned free block in this
* range, without moving below lbound.
*/
pa_end = pa;
for (order = oind - 1; order >= 0; order--) {
pa_pre = pa_end - (page_size << order);
if (!vm_addr_align_ok(pa_pre, alignment))
break;
m = &seg->first_page[atop(pa_pre - seg->start)];
if (pa_pre >= lbound && m->order == order)
pa_end = pa_pre;
}
/*
* If the extra small blocks are enough to complete the
* fragment, use them. Otherwise, look to allocate the
* fragment at the other end.
*/
if (pa_end + frag <= pa)
pa = pa_end;
/*
* Skip m unless the blocks to allocate starting at m_ret are
* all free.
*/
for (m_iter = m_ret;
m_iter < m_ret + npages && max_order == m_iter->order;
m_iter += 1 << max_order) {
}
/* Advance as necessary to satisfy boundary conditions. */
if (!vm_addr_bound_ok(pa, size, boundary))
pa = roundup2(pa + 1, boundary);
pa_end = pa + size;
/*
* Determine if the address range is valid (without overflow in
* pa_end calculation), and fits within the segment.
*/
if (pa_end < pa || seg->end < pa_end)
if (m_iter < m_ret + npages)
continue;
m_ret = &seg->first_page[atop(pa - seg->start)];
/*
* Determine whether there are enough free oind-blocks here to
* satisfy the allocation request.
*/
pa = VM_PAGE_TO_PHYS(m_listed);
do {
pa += page_size << oind;
if (pa >= pa_end)
return (m_ret);
m = &seg->first_page[atop(pa - seg->start)];
} while (oind == m->order);
/*
* Determine if an additional series of free blocks of
* diminishing size can help to satisfy the allocation request.
*/
while (m->order < oind &&
pa + 2 * (page_size << m->order) > pa_end) {
pa += page_size << m->order;
if (pa >= pa_end)
return (m_ret);
m = &seg->first_page[atop(pa - seg->start)];
}
return (m_ret);
}
return (NULL);
}
@ -1508,11 +1475,10 @@ vm_phys_find_queues_contig(
}
if (order < VM_NFREEORDER)
return (NULL);
/* Search for a long-enough sequence of small blocks. */
oind = VM_NFREEORDER - 1;
/* Search for a long-enough sequence of max-order blocks. */
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
fl = (*queues)[pind];
m_ret = vm_phys_find_freelist_contig(fl, oind, npages,
m_ret = vm_phys_find_freelist_contig(fl, npages,
low, high, alignment, boundary);
if (m_ret != NULL)
return (m_ret);
@ -1593,6 +1559,18 @@ vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
/* Return excess pages to the free lists. */
fl = (*queues)[VM_FREEPOOL_DEFAULT];
vm_phys_enq_range(&m_run[npages], m - &m_run[npages], fl, 0);
/* Return page verified to satisfy conditions of request. */
pa_start = VM_PAGE_TO_PHYS(m_run);
KASSERT(low <= pa_start,
("memory allocated below minimum requested range"));
KASSERT(pa_start + ptoa(npages) <= high,
("memory allocated above maximum requested range"));
seg = &vm_phys_segs[m_run->segind];
KASSERT(seg->domain == domain,
("memory not allocated from specified domain"));
KASSERT(vm_addr_ok(pa_start, ptoa(npages), alignment, boundary),
("memory alignment/boundary constraints not satisfied"));
return (m_run);
}