From a77e1f0f81df5fa3b4a6a38728ebace599cb18a4 Mon Sep 17 00:00:00 2001 From: Mitchell Horne Date: Wed, 14 Feb 2024 12:56:13 -0400 Subject: [PATCH] busdma: better handling of small segment bouncing Typically, when a DMA transaction requires bouncing, we will break up the request into segments that are, at maximum, page-sized. However, in the atypical case of a driver whose maximum segment size is smaller than PAGE_SIZE, we end up inefficiently assigning each segment its own bounce page. For example, the dwmmc driver has a maximum segment size of 2048 (PAGE_SIZE / 2); a 4-page transfer ends up requiring 8 bounce pages in the current scheme. We should attempt to batch segments into bounce pages more efficiently. This is achieved by pushing all considerations of the maximum segment size into the new _bus_dmamap_addsegs() function, which wraps _bus_dmamap_addseg(). Thus we allocate the minimal number of bounce pages required to complete the entire transfer, while still performing the transfer with smaller-sized transactions. For most drivers with a segment size >= PAGE_SIZE, this will have no impact. For drivers like dwmmc mentioned above, this improves the memory and performance efficiency when bouncing a large transfer. Co-authored-by: jhb Reviewed by: jhb MFC after: 1 month Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D45048 --- sys/arm/arm/busdma_machdep.c | 21 +++++-------- sys/arm64/arm64/busdma_bounce.c | 24 ++++++--------- sys/kern/subr_busdma_bounce.c | 22 +++++++++++++ sys/powerpc/powerpc/busdma_machdep.c | 26 ++++++---------- sys/riscv/riscv/busdma_bounce.c | 27 ++++++---------- sys/x86/x86/busdma_bounce.c | 46 ++++++++++------------------ 6 files changed, 75 insertions(+), 91 deletions(-) diff --git a/sys/arm/arm/busdma_machdep.c b/sys/arm/arm/busdma_machdep.c index 17cde2e60e37..13af7eb682d6 100644 --- a/sys/arm/arm/busdma_machdep.c +++ b/sys/arm/arm/busdma_machdep.c @@ -796,7 +796,7 @@ _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, */ curaddr = buf; while (buflen != 0) { - sgsize = MIN(buflen, dmat->maxsegsz); + sgsize = buflen; if (must_bounce(dmat, map, curaddr, sgsize) != 0) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); @@ -833,7 +833,6 @@ _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, bus_dmamap_t map, while (vaddr < vendaddr) { sg_len = MIN(vendaddr - vaddr, (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK))); - sg_len = MIN(sg_len, dmat->maxsegsz); if (__predict_true(pmap == kernel_pmap)) paddr = pmap_kextract(vaddr); else @@ -884,7 +883,7 @@ _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, while (buflen > 0) { curaddr = buf; - sgsize = MIN(buflen, dmat->maxsegsz); + sgsize = buflen; if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, sgsize)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); @@ -908,9 +907,8 @@ _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, } else sl->datacount += sgsize; } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) + if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, + segp)) break; buf += sgsize; buflen -= sgsize; @@ -1000,11 +998,7 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, /* * Compute the segment size, and adjust counts. */ - sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); - if (sgsize > dmat->maxsegsz) - sgsize = dmat->maxsegsz; - if (buflen < sgsize) - sgsize = buflen; + sgsize = MIN(buflen, PAGE_SIZE - (curaddr & PAGE_MASK)); if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, sgsize)) { @@ -1037,9 +1031,8 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, } else sl->datacount += sgsize; } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) + if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, + segp)) break; vaddr += sgsize; buflen -= sgsize; diff --git a/sys/arm64/arm64/busdma_bounce.c b/sys/arm64/arm64/busdma_bounce.c index 385e26e3bd8e..f218bc062642 100644 --- a/sys/arm64/arm64/busdma_bounce.c +++ b/sys/arm64/arm64/busdma_bounce.c @@ -643,7 +643,7 @@ _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, count = 0; curaddr = buf; while (buflen != 0) { - sgsize = MIN(buflen, dmat->common.maxsegsz); + sgsize = buflen; if (must_bounce(dmat, map, curaddr, sgsize)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); @@ -696,15 +696,13 @@ _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { - sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); - sg_len = MIN(sg_len, dmat->common.maxsegsz); + sg_len = MIN(vendaddr - vaddr, + PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); - if (must_bounce(dmat, map, paddr, - min(vendaddr - vaddr, (PAGE_SIZE - ((vm_offset_t)vaddr & - PAGE_MASK)))) != 0) { + if (must_bounce(dmat, map, paddr, sg_len) != 0) { sg_len = roundup2(sg_len, dmat->common.alignment); map->pagesneeded++; @@ -746,7 +744,7 @@ bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, while (buflen > 0) { curaddr = buf; - sgsize = MIN(buflen, dmat->common.maxsegsz); + sgsize = buflen; if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, sgsize)) { /* @@ -780,9 +778,8 @@ bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, } else sl->datacount += sgsize; } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) + if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, + segp)) break; buf += sgsize; buflen -= sgsize; @@ -858,7 +855,7 @@ bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, /* * Compute the segment size, and adjust counts. */ - sgsize = MIN(buflen, dmat->common.maxsegsz); + sgsize = buflen; if ((map->flags & DMAMAP_FROM_DMAMEM) == 0) sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); @@ -897,9 +894,8 @@ bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, } else sl->datacount += sgsize; } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) + if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, + segp)) break; vaddr += sgsize; buflen -= sgsize; diff --git a/sys/kern/subr_busdma_bounce.c b/sys/kern/subr_busdma_bounce.c index a9cddcb3f39f..a27bc423c23a 100644 --- a/sys/kern/subr_busdma_bounce.c +++ b/sys/kern/subr_busdma_bounce.c @@ -499,6 +499,28 @@ _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, return (sgsize); } +/* + * Add a contiguous physical range to the segment list, respecting the tag's + * maximum segment size and splitting it into multiple segments as necessary. + */ +static bool +_bus_dmamap_addsegs(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, + bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) +{ + bus_size_t done, todo; + + while (sgsize > 0) { + todo = MIN(sgsize, dmat_maxsegsz(dmat)); + done = _bus_dmamap_addseg(dmat, map, curaddr, todo, segs, + segp); + if (done == 0) + return (false); + curaddr += done; + sgsize -= done; + } + return (true); +} + static void busdma_thread(void *dummy __unused) { diff --git a/sys/powerpc/powerpc/busdma_machdep.c b/sys/powerpc/powerpc/busdma_machdep.c index fad22d49a7f5..b023e7f353b9 100644 --- a/sys/powerpc/powerpc/busdma_machdep.c +++ b/sys/powerpc/powerpc/busdma_machdep.c @@ -487,7 +487,7 @@ _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, */ curaddr = buf; while (buflen != 0) { - sgsize = MIN(buflen, dmat->maxsegsz); + sgsize = buflen; if (must_bounce(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); @@ -523,8 +523,8 @@ _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, while (vaddr < vendaddr) { bus_size_t sg_len; - sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); - sg_len = MIN(sg_len, dmat->maxsegsz); + sg_len = MIN(vendaddr - vaddr, + PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else @@ -569,15 +569,14 @@ _bus_dmamap_load_phys(bus_dma_tag_t dmat, while (buflen > 0) { curaddr = buf; - sgsize = MIN(buflen, dmat->maxsegsz); + sgsize = buflen; if (map->pagesneeded != 0 && must_bounce(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) + if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, + segp)) break; buf += sgsize; buflen -= sgsize; @@ -632,8 +631,6 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, vaddr = (vm_offset_t)buf; while (buflen > 0) { - bus_size_t max_sgsize; - /* * Get the physical address for this segment. */ @@ -648,20 +645,15 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, /* * Compute the segment size, and adjust counts. */ - max_sgsize = MIN(buflen, dmat->maxsegsz); - sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); + sgsize = MIN(buflen, PAGE_SIZE - (curaddr & PAGE_MASK)); if (map->pagesneeded != 0 && must_bounce(dmat, curaddr)) { sgsize = roundup2(sgsize, dmat->alignment); - sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); - } else { - sgsize = MIN(sgsize, max_sgsize); } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) + if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, + segp)) break; vaddr += sgsize; buflen -= sgsize; diff --git a/sys/riscv/riscv/busdma_bounce.c b/sys/riscv/riscv/busdma_bounce.c index cb0d7ea29ecd..e1c217f1d12e 100644 --- a/sys/riscv/riscv/busdma_bounce.c +++ b/sys/riscv/riscv/busdma_bounce.c @@ -497,7 +497,7 @@ _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, */ curaddr = buf; while (buflen != 0) { - sgsize = MIN(buflen, dmat->common.maxsegsz); + sgsize = buflen; if (addr_needs_bounce(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); @@ -534,8 +534,8 @@ _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { - sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); - sg_len = MIN(sg_len, dmat->common.maxsegsz); + sg_len = MIN(vendaddr - vaddr, + PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else @@ -582,7 +582,7 @@ bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, while (buflen > 0) { curaddr = buf; - sgsize = MIN(buflen, dmat->common.maxsegsz); + sgsize = buflen; if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && addr_needs_bounce(dmat, curaddr)) { @@ -607,9 +607,8 @@ bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, } else sl->datacount += sgsize; } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) + if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, + segp)) break; buf += sgsize; buflen -= sgsize; @@ -631,7 +630,7 @@ bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, int *segp) { struct sync_list *sl; - bus_size_t sgsize, max_sgsize; + bus_size_t sgsize; bus_addr_t curaddr, sl_pend; vm_offset_t kvaddr, vaddr, sl_vend; int error; @@ -668,17 +667,14 @@ bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, /* * Compute the segment size, and adjust counts. */ - max_sgsize = MIN(buflen, dmat->common.maxsegsz); - sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); + sgsize = MIN(buflen, PAGE_SIZE - (curaddr & PAGE_MASK)); if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && addr_needs_bounce(dmat, curaddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); - sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } else if ((dmat->bounce_flags & BF_COHERENT) == 0) { - sgsize = MIN(sgsize, max_sgsize); if (map->sync_count > 0) { sl_pend = sl->paddr + sl->datacount; sl_vend = sl->vaddr + sl->datacount; @@ -704,12 +700,9 @@ bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, sl->datacount = sgsize; } else sl->datacount += sgsize; - } else { - sgsize = MIN(sgsize, max_sgsize); } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) + if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, + segp)) break; vaddr += sgsize; buflen -= sgsize; diff --git a/sys/x86/x86/busdma_bounce.c b/sys/x86/x86/busdma_bounce.c index 07fbfd6dffcb..5aa4ffcff3cc 100644 --- a/sys/x86/x86/busdma_bounce.c +++ b/sys/x86/x86/busdma_bounce.c @@ -511,7 +511,7 @@ _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen, count = 0; curaddr = buf; while (buflen != 0) { - sgsize = MIN(buflen, dmat->common.maxsegsz); + sgsize = buflen; if (must_bounce(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); @@ -563,8 +563,8 @@ _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { - sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); - sg_len = MIN(sg_len, dmat->common.maxsegsz); + sg_len = MIN(vendaddr - vaddr, + PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else @@ -584,7 +584,7 @@ static void _bus_dmamap_count_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, int ma_offs, bus_size_t buflen, int flags) { - bus_size_t sg_len, max_sgsize; + bus_size_t sg_len; int page_index; vm_paddr_t paddr; @@ -604,12 +604,10 @@ _bus_dmamap_count_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, while (buflen > 0) { paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs; sg_len = PAGE_SIZE - ma_offs; - max_sgsize = MIN(buflen, dmat->common.maxsegsz); - sg_len = MIN(sg_len, max_sgsize); + sg_len = MIN(sg_len, buflen); if (must_bounce(dmat, paddr)) { sg_len = roundup2(sg_len, dmat->common.alignment); - sg_len = MIN(sg_len, max_sgsize); KASSERT(vm_addr_align_ok(sg_len, dmat->common.alignment), ("Segment size is not aligned")); @@ -656,7 +654,7 @@ bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, while (buflen > 0) { curaddr = buf; - sgsize = MIN(buflen, dmat->common.maxsegsz); + sgsize = buflen; if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 && map->pagesneeded != 0 && must_bounce(dmat, curaddr)) { @@ -664,9 +662,9 @@ bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, curaddr = add_bounce_page(dmat, map, 0, curaddr, 0, sgsize); } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) + + if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, + segp)) break; buf += sgsize; buflen -= sgsize; @@ -687,7 +685,7 @@ bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { - bus_size_t sgsize, max_sgsize; + bus_size_t sgsize; vm_paddr_t curaddr; vm_offset_t kvaddr, vaddr; int error; @@ -723,21 +721,16 @@ bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, /* * Compute the segment size, and adjust counts. */ - max_sgsize = MIN(buflen, dmat->common.maxsegsz); - sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); + sgsize = MIN(buflen, PAGE_SIZE - (curaddr & PAGE_MASK)); if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 && map->pagesneeded != 0 && must_bounce(dmat, curaddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); - sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, 0, sgsize); - } else { - sgsize = MIN(sgsize, max_sgsize); } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) + if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, + segp)) break; vaddr += sgsize; buflen -= sgsize; @@ -756,7 +749,7 @@ bounce_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, { vm_paddr_t paddr, next_paddr; int error, page_index; - bus_size_t sgsize, max_sgsize; + bus_size_t sgsize; if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* @@ -790,13 +783,11 @@ bounce_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, * Compute the segment size, and adjust counts. */ paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs; - max_sgsize = MIN(buflen, dmat->common.maxsegsz); - sgsize = PAGE_SIZE - ma_offs; + sgsize = MIN(buflen, PAGE_SIZE - ma_offs); if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 && map->pagesneeded != 0 && must_bounce(dmat, paddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); - sgsize = MIN(sgsize, max_sgsize); KASSERT(vm_addr_align_ok(sgsize, dmat->common.alignment), ("Segment size is not aligned")); @@ -811,12 +802,9 @@ bounce_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, next_paddr = 0; paddr = add_bounce_page(dmat, map, 0, paddr, next_paddr, sgsize); - } else { - sgsize = MIN(sgsize, max_sgsize); } - sgsize = _bus_dmamap_addseg(dmat, map, paddr, sgsize, segs, - segp); - if (sgsize == 0) + if (!_bus_dmamap_addsegs(dmat, map, paddr, sgsize, segs, + segp)) break; KASSERT(buflen >= sgsize, ("Segment length overruns original buffer"));