arm64: Teach bus_dma on arm64 about NUMA

When allocating memory we should try to allocate from the NUMA node
closest to the device to reduce cross domain memory traffic. Teach the
arm64 bus_dma code to do this.

While here use mallocarray to guard against an unlikely integer
overflow.

Reviewed by:	markj
Sponsored by:	Arm Ltd
Differential Revision:	https://reviews.freebsd.org/D42187
This commit is contained in:
Andrew Turner 2023-10-12 17:01:01 +01:00
parent 26ac295c05
commit 271e669ed5
2 changed files with 93 additions and 46 deletions

View file

@ -34,6 +34,7 @@
#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/domainset.h>
#include <sys/malloc.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
@ -117,6 +118,7 @@ static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
static MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata");
#define dmat_alignment(dmat) ((dmat)->common.alignment)
#define dmat_domain(dmat) ((dmat)->common.domain)
#define dmat_flags(dmat) ((dmat)->common.flags)
#define dmat_lowaddr(dmat) ((dmat)->common.lowaddr)
#define dmat_lockfunc(dmat) ((dmat)->common.lockfunc)
@ -124,6 +126,40 @@ static MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata");
#include "../../kern/subr_busdma_bounce.c"
static int
bounce_bus_dma_zone_setup(bus_dma_tag_t dmat)
{
struct bounce_zone *bz;
bus_size_t maxsize;
int error;
/*
* Round size up to a full page, and add one more page because
* there can always be one more boundary crossing than the
* number of pages in a transfer.
*/
maxsize = roundup2(dmat->common.maxsize, PAGE_SIZE) + PAGE_SIZE;
/* Must bounce */
if ((error = alloc_bounce_zone(dmat)) != 0)
return (error);
bz = dmat->bounce_zone;
if (ptoa(bz->total_bpages) < maxsize) {
int pages;
pages = atop(maxsize) + 1 - bz->total_bpages;
/* Add pages to our bounce pool */
if (alloc_bounce_pages(dmat, pages) < pages)
return (ENOMEM);
}
/* Performed initial allocation */
dmat->bounce_flags |= BF_MIN_ALLOC_COMP;
return (error);
}
/*
* Return true if the DMA should bounce because the start or end does not fall
* on a cacheline boundary (which would require a partial cacheline flush).
@ -257,34 +293,9 @@ bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
newtag->common.alignment > 1)
newtag->bounce_flags |= BF_COULD_BOUNCE;
if ((flags & BUS_DMA_ALLOCNOW) != 0) {
struct bounce_zone *bz;
/*
* Round size up to a full page, and add one more page because
* there can always be one more boundary crossing than the
* number of pages in a transfer.
*/
maxsize = roundup2(maxsize, PAGE_SIZE) + PAGE_SIZE;
/* Must bounce */
if ((error = alloc_bounce_zone(newtag)) != 0) {
free(newtag, M_DEVBUF);
return (error);
}
bz = newtag->bounce_zone;
if (ptoa(bz->total_bpages) < maxsize) {
int pages;
pages = atop(maxsize) + 1 - bz->total_bpages;
/* Add pages to our bounce pool */
if (alloc_bounce_pages(newtag, pages) < pages)
error = ENOMEM;
}
/* Performed initial allocation */
newtag->bounce_flags |= BF_MIN_ALLOC_COMP;
} else
if ((flags & BUS_DMA_ALLOCNOW) != 0)
error = bounce_bus_dma_zone_setup(newtag);
else
error = 0;
if (error != 0)
@ -339,6 +350,23 @@ bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat)
return (error);
}
/*
* Update the domain for the tag. We may need to reallocate the zone and
* bounce pages.
*/
static int
bounce_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
{
KASSERT(dmat->map_count == 0,
("bounce_bus_dma_tag_set_domain: Domain set after use.\n"));
if ((dmat->bounce_flags & BF_COULD_BOUNCE) == 0 ||
dmat->bounce_zone == NULL)
return (0);
dmat->bounce_flags &= ~BF_MIN_ALLOC_COMP;
return (bounce_bus_dma_zone_setup(dmat));
}
static bool
bounce_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
{
@ -356,7 +384,8 @@ alloc_dmamap(bus_dma_tag_t dmat, int flags)
mapsize = sizeof(*map);
mapsize += sizeof(struct sync_list) * dmat->common.nsegments;
map = malloc(mapsize, M_DEVBUF, flags | M_ZERO);
map = malloc_domainset(mapsize, M_DEVBUF,
DOMAINSET_PREF(dmat->common.domain), flags | M_ZERO);
if (map == NULL)
return (NULL);
@ -379,9 +408,9 @@ bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
error = 0;
if (dmat->segments == NULL) {
dmat->segments = (bus_dma_segment_t *)malloc(
sizeof(bus_dma_segment_t) * dmat->common.nsegments,
M_DEVBUF, M_NOWAIT);
dmat->segments = mallocarray_domainset(dmat->common.nsegments,
sizeof(bus_dma_segment_t), M_DEVBUF,
DOMAINSET_PREF(dmat->common.domain), M_NOWAIT);
if (dmat->segments == NULL) {
CTR3(KTR_BUSDMA, "%s: tag %p error %d",
__func__, dmat, ENOMEM);
@ -490,9 +519,9 @@ bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
mflags = M_WAITOK;
if (dmat->segments == NULL) {
dmat->segments = (bus_dma_segment_t *)malloc(
sizeof(bus_dma_segment_t) * dmat->common.nsegments,
M_DEVBUF, mflags);
dmat->segments = mallocarray_domainset(dmat->common.nsegments,
sizeof(bus_dma_segment_t), M_DEVBUF,
DOMAINSET_PREF(dmat->common.domain), mflags);
if (dmat->segments == NULL) {
CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
__func__, dmat, dmat->common.flags, ENOMEM);
@ -536,9 +565,10 @@ bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
/*
* Allocate the buffer from the malloc(9) allocator if...
* - It's small enough to fit into a single power of two sized bucket.
* - The alignment is less than or equal to the maximum size
* - It's small enough to fit into a single page.
* - Its alignment requirement is also smaller than the page size.
* - The low address requirement is fulfilled.
* - Default cache attributes are requested (WB).
* else allocate non-contiguous pages if...
* - The page count that could get allocated doesn't exceed
* nsegments also when the maximum segment size is less
@ -555,23 +585,28 @@ bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
*
* In the meantime warn the user if malloc gets it wrong.
*/
if ((dmat->alloc_size <= PAGE_SIZE) &&
(dmat->alloc_alignment <= dmat->alloc_size) &&
if (dmat->alloc_size <= PAGE_SIZE &&
dmat->alloc_alignment <= PAGE_SIZE &&
dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) &&
attr == VM_MEMATTR_DEFAULT) {
*vaddr = malloc(dmat->alloc_size, M_DEVBUF, mflags);
*vaddr = malloc_domainset_aligned(dmat->alloc_size,
dmat->alloc_alignment, M_DEVBUF,
DOMAINSET_PREF(dmat->common.domain), mflags);
} else if (dmat->common.nsegments >=
howmany(dmat->alloc_size, MIN(dmat->common.maxsegsz, PAGE_SIZE)) &&
dmat->alloc_alignment <= PAGE_SIZE &&
(dmat->common.boundary % PAGE_SIZE) == 0) {
/* Page-based multi-segment allocations allowed */
*vaddr = kmem_alloc_attr(dmat->alloc_size, mflags,
0ul, dmat->common.lowaddr, attr);
*vaddr = kmem_alloc_attr_domainset(
DOMAINSET_PREF(dmat->common.domain), dmat->alloc_size,
mflags, 0ul, dmat->common.lowaddr, attr);
dmat->bounce_flags |= BF_KMEM_ALLOC;
} else {
*vaddr = kmem_alloc_contig(dmat->alloc_size, mflags,
0ul, dmat->common.lowaddr, dmat->alloc_alignment != 0 ?
dmat->alloc_alignment : 1ul, dmat->common.boundary, attr);
*vaddr = kmem_alloc_contig_domainset(
DOMAINSET_PREF(dmat->common.domain), dmat->alloc_size,
mflags, 0ul, dmat->common.lowaddr,
dmat->alloc_alignment != 0 ? dmat->alloc_alignment : 1ul,
dmat->common.boundary, attr);
dmat->bounce_flags |= BF_KMEM_ALLOC;
}
if (*vaddr == NULL) {
@ -1147,6 +1182,7 @@ bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
struct bus_dma_impl bus_dma_bounce_impl = {
.tag_create = bounce_bus_dma_tag_create,
.tag_destroy = bounce_bus_dma_tag_destroy,
.tag_set_domain = bounce_bus_dma_tag_set_domain,
.id_mapped = bounce_bus_dma_id_mapped,
.map_create = bounce_bus_dmamap_create,
.map_destroy = bounce_bus_dmamap_destroy,

View file

@ -44,6 +44,7 @@
#include <sys/uio.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_phys.h>
#include <vm/pmap.h>
#include <machine/bus.h>
@ -142,8 +143,11 @@ common_bus_dma_tag_create(struct bus_dma_tag_common *parent,
common->filterarg = parent->filterarg;
common->parent = parent->parent;
}
common->domain = parent->domain;
atomic_add_int(&parent->ref_count, 1);
}
common->domain = vm_phys_domain_match(common->domain, 0ul,
common->lowaddr);
*dmat = common;
return (0);
}
@ -209,6 +213,13 @@ bus_dma_tag_destroy(bus_dma_tag_t dmat)
int
bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain)
{
struct bus_dma_tag_common *tc;
return (0);
tc = (struct bus_dma_tag_common *)dmat;
domain = vm_phys_domain_match(domain, 0ul, tc->lowaddr);
/* Only call the callback if it changes. */
if (domain == tc->domain)
return (0);
tc->domain = domain;
return (tc->impl->tag_set_domain(dmat));
}