dma-mapping updates for Linux 6.8

- reduce area lock contention for non-primary IO TLB pools (Petr Tesarik)
  - don't store redundant offsets in the dma_ranges stuctures
    (Robin Murphy)
  - clear dev->dma_mem when freeing per-device pools (Joakim Zhang)
 -----BEGIN PGP SIGNATURE-----
 
 iQI/BAABCgApFiEEgdbnc3r/njty3Iq9D55TZVIEUYMFAmWcGxQLHGhjaEBsc3Qu
 ZGUACgkQD55TZVIEUYNf6hAAi9wP1ehnIqHCczCxpideyJnE76r+LgjInjudQUqE
 cnpl7E+dO/e/7Trk+L7hIrzi5uz8m9e+DZgL9wUY4h5mvJ+8ELet3Ec62UMVL1g0
 cWSYOtlMdUZn9Oy+qy2TTCa//1HyzcWQdplVwcOqD7zCLO4PavUR18+Vw5eDUpBR
 TE8EB+7P8ta8XSFnsryZS4zI1AhTTjZfh8ZgPdp+niBh7XAqOFNn3WiGK4qvA9o6
 nIjIV6ydBjZYkyYPeDsqszqmZG64mEeGUZhLWmjAyg1/c8so7uFviNfJ05od34js
 aWpmFxrM9Mm4BaBiU3FsSQkMBGCGaD/H2UXjIl1Qayt+pzUfaP7+8UWWo/T7Mj35
 RFKe9xzlPY8rqOszdBBvy6lCWguHXw4d4IFoqOz+YoUaxlV+RAbFOCHtW2BNvtPe
 b1YCr/FKNQ8NxsJWnbcehDtClY461pqBbaDrio3K7eTJgG10biAoWBfPhV+5VEer
 aB14krQcn7v1vXjfLu2huSrPt1ZjXuWVfXA3nO3Mt3VWxZWat82gLkFyt5N6ZfQ4
 juaDMX3Vzlz3VPf4MHFC+yFRx55b/9X26lC1BlSoo4tAknoo746Lvy/PasZarILC
 sGPt+2BVlPQ466zkjky4GtoNof1TMNuPF0Xr/mNCEjxCYUGQcTIdmOvA9y8mu/V+
 7M4=
 =v5F+
 -----END PGP SIGNATURE-----

Merge tag 'dma-mapping-6.8-2024-01-08' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig:

 - reduce area lock contention for non-primary IO TLB pools (Petr
   Tesarik)

 - don't store redundant offsets in the dma_ranges stuctures (Robin
   Murphy)

 - clear dev->dma_mem when freeing per-device pools (Joakim Zhang)

* tag 'dma-mapping-6.8-2024-01-08' of git://git.infradead.org/users/hch/dma-mapping:
  dma-mapping: clear dev->dma_mem to NULL after freeing it
  swiotlb: reduce area lock contention for non-primary IO TLB pools
  dma-mapping: don't store redundant offsets
This commit is contained in:
Linus Torvalds 2024-01-11 13:46:50 -08:00
commit 893e2f9eac
6 changed files with 70 additions and 46 deletions

View file

@ -1532,7 +1532,6 @@ int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map)
r->cpu_start = rentry->res->start;
r->dma_start = rentry->res->start - rentry->offset;
r->size = resource_size(rentry->res);
r->offset = rentry->offset;
r++;
}
}

View file

@ -955,7 +955,6 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map)
r->cpu_start = range.cpu_addr;
r->dma_start = range.bus_addr;
r->size = range.size;
r->offset = range.cpu_addr - range.bus_addr;
r++;
}
out:

View file

@ -21,7 +21,6 @@ struct bus_dma_region {
phys_addr_t cpu_start;
dma_addr_t dma_start;
u64 size;
u64 offset;
};
static inline dma_addr_t translate_phys_to_dma(struct device *dev,
@ -29,9 +28,12 @@ static inline dma_addr_t translate_phys_to_dma(struct device *dev,
{
const struct bus_dma_region *m;
for (m = dev->dma_range_map; m->size; m++)
if (paddr >= m->cpu_start && paddr - m->cpu_start < m->size)
return (dma_addr_t)paddr - m->offset;
for (m = dev->dma_range_map; m->size; m++) {
u64 offset = paddr - m->cpu_start;
if (paddr >= m->cpu_start && offset < m->size)
return m->dma_start + offset;
}
/* make sure dma_capable fails when no translation is available */
return DMA_MAPPING_ERROR;
@ -42,9 +44,12 @@ static inline phys_addr_t translate_dma_to_phys(struct device *dev,
{
const struct bus_dma_region *m;
for (m = dev->dma_range_map; m->size; m++)
if (dma_addr >= m->dma_start && dma_addr - m->dma_start < m->size)
return (phys_addr_t)dma_addr + m->offset;
for (m = dev->dma_range_map; m->size; m++) {
u64 offset = dma_addr - m->dma_start;
if (dma_addr >= m->dma_start && offset < m->size)
return m->cpu_start + offset;
}
return (phys_addr_t)-1;
}

View file

@ -132,8 +132,10 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
void dma_release_coherent_memory(struct device *dev)
{
if (dev)
if (dev) {
_dma_release_coherent_memory(dev->dma_mem);
dev->dma_mem = NULL;
}
}
static void *__dma_alloc_from_coherent(struct device *dev,

View file

@ -677,7 +677,6 @@ int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start,
return -ENOMEM;
map[0].cpu_start = cpu_start;
map[0].dma_start = dma_start;
map[0].offset = offset;
map[0].size = size;
dev->dma_range_map = map;
return 0;

View file

@ -957,7 +957,7 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
#endif /* CONFIG_DEBUG_FS */
/**
* swiotlb_area_find_slots() - search for slots in one IO TLB memory area
* swiotlb_search_pool_area() - search one memory area in one pool
* @dev: Device which maps the buffer.
* @pool: Memory pool to be searched.
* @area_index: Index of the IO TLB memory area to be searched.
@ -972,7 +972,7 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
*
* Return: Index of the first allocated slot, or -1 on error.
*/
static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool,
static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool,
int area_index, phys_addr_t orig_addr, size_t alloc_size,
unsigned int alloc_align_mask)
{
@ -1066,41 +1066,50 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool,
return slot_index;
}
#ifdef CONFIG_SWIOTLB_DYNAMIC
/**
* swiotlb_pool_find_slots() - search for slots in one memory pool
* swiotlb_search_area() - search one memory area in all pools
* @dev: Device which maps the buffer.
* @pool: Memory pool to be searched.
* @start_cpu: Start CPU number.
* @cpu_offset: Offset from @start_cpu.
* @orig_addr: Original (non-bounced) IO buffer address.
* @alloc_size: Total requested size of the bounce buffer,
* including initial alignment padding.
* @alloc_align_mask: Required alignment of the allocated buffer.
* @retpool: Used memory pool, updated on return.
*
* Search through one memory pool to find a sequence of slots that match the
* Search one memory area in all pools for a sequence of slots that match the
* allocation constraints.
*
* Return: Index of the first allocated slot, or -1 on error.
*/
static int swiotlb_pool_find_slots(struct device *dev, struct io_tlb_pool *pool,
phys_addr_t orig_addr, size_t alloc_size,
unsigned int alloc_align_mask)
static int swiotlb_search_area(struct device *dev, int start_cpu,
int cpu_offset, phys_addr_t orig_addr, size_t alloc_size,
unsigned int alloc_align_mask, struct io_tlb_pool **retpool)
{
int start = raw_smp_processor_id() & (pool->nareas - 1);
int i = start, index;
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
struct io_tlb_pool *pool;
int area_index;
int index = -1;
do {
index = swiotlb_area_find_slots(dev, pool, i, orig_addr,
alloc_size, alloc_align_mask);
if (index >= 0)
return index;
if (++i >= pool->nareas)
i = 0;
} while (i != start);
return -1;
rcu_read_lock();
list_for_each_entry_rcu(pool, &mem->pools, node) {
if (cpu_offset >= pool->nareas)
continue;
area_index = (start_cpu + cpu_offset) & (pool->nareas - 1);
index = swiotlb_search_pool_area(dev, pool, area_index,
orig_addr, alloc_size,
alloc_align_mask);
if (index >= 0) {
*retpool = pool;
break;
}
}
rcu_read_unlock();
return index;
}
#ifdef CONFIG_SWIOTLB_DYNAMIC
/**
* swiotlb_find_slots() - search for slots in the whole swiotlb
* @dev: Device which maps the buffer.
@ -1124,18 +1133,17 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
unsigned long nslabs;
unsigned long flags;
u64 phys_limit;
int cpu, i;
int index;
rcu_read_lock();
list_for_each_entry_rcu(pool, &mem->pools, node) {
index = swiotlb_pool_find_slots(dev, pool, orig_addr,
alloc_size, alloc_align_mask);
if (index >= 0) {
rcu_read_unlock();
cpu = raw_smp_processor_id();
for (i = 0; i < default_nareas; ++i) {
index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size,
alloc_align_mask, &pool);
if (index >= 0)
goto found;
}
}
rcu_read_unlock();
if (!mem->can_grow)
return -1;
@ -1148,8 +1156,8 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
if (!pool)
return -1;
index = swiotlb_pool_find_slots(dev, pool, orig_addr,
alloc_size, alloc_align_mask);
index = swiotlb_search_pool_area(dev, pool, 0, orig_addr,
alloc_size, alloc_align_mask);
if (index < 0) {
swiotlb_dyn_free(&pool->rcu);
return -1;
@ -1192,9 +1200,21 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
size_t alloc_size, unsigned int alloc_align_mask,
struct io_tlb_pool **retpool)
{
*retpool = &dev->dma_io_tlb_mem->defpool;
return swiotlb_pool_find_slots(dev, *retpool,
orig_addr, alloc_size, alloc_align_mask);
struct io_tlb_pool *pool;
int start, i;
int index;
*retpool = pool = &dev->dma_io_tlb_mem->defpool;
i = start = raw_smp_processor_id() & (pool->nareas - 1);
do {
index = swiotlb_search_pool_area(dev, pool, i, orig_addr,
alloc_size, alloc_align_mask);
if (index >= 0)
return index;
if (++i >= pool->nareas)
i = 0;
} while (i != start);
return -1;
}
#endif /* CONFIG_SWIOTLB_DYNAMIC */