iommu: Handle freelists when using deferred flushing in iommu drivers

Allow the iommu_unmap_fast to return newly freed page table pages and
pass the freelist to queue_iova in the dma-iommu ops path.

This is useful for iommu drivers (in this case the intel iommu driver)
which need to wait for the ioTLB to be flushed before newly
free/unmapped page table pages can be freed. This way we can still batch
ioTLB free operations and handle the freelists.

Signed-off-by: Tom Murphy <murphyt7@tcd.ie>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Logan Gunthorpe <logang@deltatee.com>
Link: https://lore.kernel.org/r/20201124082057.2614359-2-baolu.lu@linux.intel.com
Signed-off-by: Will Deacon <will@kernel.org>
This commit is contained in:
Tom Murphy 2020-11-24 16:20:51 +08:00 committed by Will Deacon
parent 66930e7e1e
commit 2a2b8eaa5b
3 changed files with 58 additions and 27 deletions

View file

@ -49,6 +49,18 @@ struct iommu_dma_cookie {
struct iommu_domain *fq_domain;
};
static void iommu_dma_entry_dtor(unsigned long data)
{
struct page *freelist = (struct page *)data;
while (freelist) {
unsigned long p = (unsigned long)page_address(freelist);
freelist = freelist->freelist;
free_page(p);
}
}
static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
{
if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
@ -343,7 +355,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
NULL))
iommu_dma_entry_dtor))
pr_warn("iova flush queue initialization failed\n");
else
cookie->fq_domain = domain;
@ -440,7 +452,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
}
static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
dma_addr_t iova, size_t size)
dma_addr_t iova, size_t size, struct page *freelist)
{
struct iova_domain *iovad = &cookie->iovad;
@ -449,7 +461,8 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
cookie->msi_iova -= size;
else if (cookie->fq_domain) /* non-strict mode */
queue_iova(iovad, iova_pfn(iovad, iova),
size >> iova_shift(iovad), 0);
size >> iova_shift(iovad),
(unsigned long)freelist);
else
free_iova_fast(iovad, iova_pfn(iovad, iova),
size >> iova_shift(iovad));
@ -474,7 +487,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
if (!cookie->fq_domain)
iommu_iotlb_sync(domain, &iotlb_gather);
iommu_dma_free_iova(cookie, dma_addr, size);
iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
}
static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@ -496,7 +509,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
return DMA_MAPPING_ERROR;
if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
iommu_dma_free_iova(cookie, iova, size);
iommu_dma_free_iova(cookie, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
return iova + iova_off;
@ -649,7 +662,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
out_free_sg:
sg_free_table(&sgt);
out_free_iova:
iommu_dma_free_iova(cookie, iova, size);
iommu_dma_free_iova(cookie, iova, size, NULL);
out_free_pages:
__iommu_dma_free_pages(pages, count);
return NULL;
@ -900,7 +913,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
return __finalise_sg(dev, sg, nents, iova);
out_free_iova:
iommu_dma_free_iova(cookie, iova, iova_len);
iommu_dma_free_iova(cookie, iova, iova_len, NULL);
out_restore_sg:
__invalidate_sg(sg, nents);
return 0;
@ -1228,7 +1241,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
return msi_page;
out_free_iova:
iommu_dma_free_iova(cookie, iova, size);
iommu_dma_free_iova(cookie, iova, size, NULL);
out_free_page:
kfree(msi_page);
return NULL;

View file

@ -1243,17 +1243,17 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
pages can only be freed after the IOTLB flush has been done. */
static struct page *domain_unmap(struct dmar_domain *domain,
unsigned long start_pfn,
unsigned long last_pfn)
unsigned long last_pfn,
struct page *freelist)
{
struct page *freelist;
BUG_ON(!domain_pfn_supported(domain, start_pfn));
BUG_ON(!domain_pfn_supported(domain, last_pfn));
BUG_ON(start_pfn > last_pfn);
/* we don't need lock here; nobody else touches the iova range */
freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
domain->pgd, 0, start_pfn, last_pfn, NULL);
domain->pgd, 0, start_pfn, last_pfn,
freelist);
/* free pgd */
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
@ -2011,7 +2011,8 @@ static void domain_exit(struct dmar_domain *domain)
if (domain->pgd) {
struct page *freelist;
freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
freelist = domain_unmap(domain, 0,
DOMAIN_MAX_PFN(domain->gaw), NULL);
dma_free_pagelist(freelist);
}
@ -3570,7 +3571,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
if (dev_is_pci(dev))
pdev = to_pci_dev(dev);
freelist = domain_unmap(domain, start_pfn, last_pfn);
freelist = domain_unmap(domain, start_pfn, last_pfn, NULL);
if (intel_iommu_strict || (pdev && pdev->untrusted) ||
!has_iova_flush_queue(&domain->iovad)) {
iommu_flush_iotlb_psi(iommu, domain, start_pfn,
@ -4636,7 +4637,8 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
struct page *freelist;
freelist = domain_unmap(si_domain,
start_vpfn, last_vpfn);
start_vpfn, last_vpfn,
NULL);
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
@ -5608,10 +5610,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct page *freelist = NULL;
unsigned long start_pfn, last_pfn;
unsigned int npages;
int iommu_id, level = 0;
int level = 0;
/* Cope with horrid API which requires us to unmap more than the
size argument if it happens to be a large-page mapping. */
@ -5623,22 +5623,38 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
start_pfn = iova >> VTD_PAGE_SHIFT;
last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
npages = last_pfn - start_pfn + 1;
for_each_domain_iommu(iommu_id, dmar_domain)
iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
start_pfn, npages, !freelist, 0);
dma_free_pagelist(freelist);
gather->freelist = domain_unmap(dmar_domain, start_pfn,
last_pfn, gather->freelist);
if (dmar_domain->max_addr == iova + size)
dmar_domain->max_addr = iova;
iommu_iotlb_gather_add_page(domain, gather, iova, size);
return size;
}
static void intel_iommu_tlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long iova_pfn = IOVA_PFN(gather->start);
size_t size = gather->end - gather->start;
unsigned long start_pfn, last_pfn;
unsigned long nrpages;
int iommu_id;
nrpages = aligned_nrpages(gather->start, size);
start_pfn = mm_to_dma_pfn(iova_pfn);
last_pfn = start_pfn + nrpages - 1;
for_each_domain_iommu(iommu_id, dmar_domain)
iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
start_pfn, nrpages, !gather->freelist, 0);
dma_free_pagelist(gather->freelist);
}
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
@ -6098,6 +6114,7 @@ const struct iommu_ops intel_iommu_ops = {
.aux_get_pasid = intel_iommu_aux_get_pasid,
.map = intel_iommu_map,
.unmap = intel_iommu_unmap,
.iotlb_sync = intel_iommu_tlb_sync,
.iova_to_phys = intel_iommu_iova_to_phys,
.probe_device = intel_iommu_probe_device,
.probe_finalize = intel_iommu_probe_finalize,

View file

@ -180,6 +180,7 @@ struct iommu_iotlb_gather {
unsigned long start;
unsigned long end;
size_t pgsize;
struct page *freelist;
};
/**