From aa977833dee5de583ac5dbc902e39983ec6302ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 14 Feb 2023 03:25:53 +0000 Subject: [PATCH 01/82] iommu: Make kobj_type structure constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit ee6d3dd4ed48 ("driver core: make kobj_type constant.") the driver core allows the usage of const struct kobj_type. Take advantage of this to constify the structure definition to prevent modification at runtime. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20230214-kobj_type-iommu-v1-1-e7392834b9d0@weissschuh.net Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 10db680acaed..ace9d418a115 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -739,7 +739,7 @@ static void iommu_group_release(struct kobject *kobj) kfree(group); } -static struct kobj_type iommu_group_ktype = { +static const struct kobj_type iommu_group_ktype = { .sysfs_ops = &iommu_group_sysfs_ops, .release = iommu_group_release, }; From 08632365b274e4be6c43053eba7589c811e6e572 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Fri, 24 Feb 2023 15:07:51 +0000 Subject: [PATCH 02/82] iommu/sun50i: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Luis Chamberlain Cc: linux-modules@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Hitomi Hasegawa Cc: Joerg Roedel Cc: Will Deacon Cc: Chen-Yu Tsai Cc: Jernej Skrabec Cc: Samuel Holland Cc: Philipp Zabel Cc: iommu@lists.linux.dev Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@lists.linux.dev Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20230224150811.80316-8-nick.alcock@oracle.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 2d993d0cea7d..74c5cb93e900 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -1076,4 +1076,3 @@ builtin_platform_driver_probe(sun50i_iommu_driver, sun50i_iommu_probe); MODULE_DESCRIPTION("Allwinner H6 IOMMU driver"); MODULE_AUTHOR("Maxime Ripard "); MODULE_AUTHOR("zhuxianbin "); -MODULE_LICENSE("Dual BSD/GPL"); From efe37fda9d52c59ac7b5837a877353486d8cb45c Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Mar 2023 17:30:29 +0100 Subject: [PATCH 03/82] iommu/ipmmu-vmsa: remove R-Car H3 ES1.* handling R-Car H3 ES1.* was only available to an internal development group and needed a lot of quirks and workarounds. These become a maintenance burden now, so our development group decided to remove upstream support and disable booting for this SoC. Public users only have ES2 onwards. Reviewed-by: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20230307163041.3815-2-wsa+renesas@sang-engineering.com Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index bdf1a4e5eae0..ba42001a6f57 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -697,7 +697,6 @@ static const struct soc_device_attribute soc_needs_opt_in[] = { static const struct soc_device_attribute soc_denylist[] = { { .soc_id = "r8a774a1", }, - { .soc_id = "r8a7795", .revision = "ES1.*" }, { .soc_id = "r8a7795", .revision = "ES2.*" }, { .soc_id = "r8a7796", }, { /* sentinel */ } From 829a79556fc983f2d4a3d687d4bc60fe8deb56fd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 7 Mar 2023 19:45:04 -0800 Subject: [PATCH 04/82] iommu/fsl: fix all kernel-doc warnings in fsl_pamu.c Fix kernel-doc warnings as reported by the kernel test robot: fsl_pamu.c:192: warning: expecting prototype for pamu_config_paace(). Prototype was for pamu_config_ppaace() instead fsl_pamu.c:239: warning: Function parameter or member 'omi_index' not described in 'get_ome_index' fsl_pamu.c:239: warning: Function parameter or member 'dev' not described in 'get_ome_index' fsl_pamu.c:332: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Setup operation mapping and stash destinations for QMAN and QMAN portal. fsl_pamu.c:361: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Setup the operation mapping table for various devices. This is a static Fixes: 695093e38c3e ("iommu/fsl: Freescale PAMU driver and iommu implementation.") Fixes: cd70d4659ff3 ("iommu/fsl: Various cleanups") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: lore.kernel.org/r/202302281151.B1WtZvSC-lkp@intel.com Cc: Aditya Srivastava Cc: Joerg Roedel Cc: Will Deacon Cc: Robin Murphy Cc: iommu@lists.linux.dev Cc: Timur Tabi Cc: Varun Sethi Cc: Emil Medve Reviewed-by: Lu Baolu Acked-by: Timur Tabi Link: https://lore.kernel.org/r/20230308034504.9985-1-rdunlap@infradead.org Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 05d820fb1d0b..f37d3b044131 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -178,7 +178,7 @@ int pamu_update_paace_stash(int liodn, u32 value) } /** - * pamu_config_paace() - Sets up PPAACE entry for specified liodn + * pamu_config_ppaace() - Sets up PPAACE entry for specified liodn * * @liodn: Logical IO device number * @omi: Operation mapping index -- if ~omi == 0 then omi not defined @@ -232,7 +232,8 @@ int pamu_config_ppaace(int liodn, u32 omi, u32 stashid, int prot) /** * get_ome_index() - Returns the index in the operation mapping table * for device. - * @*omi_index: pointer for storing the index value + * @omi_index: pointer for storing the index value + * @dev: target device * */ void get_ome_index(u32 *omi_index, struct device *dev) @@ -328,7 +329,7 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) #define QMAN_PORTAL_PAACE 2 #define BMAN_PAACE 3 -/** +/* * Setup operation mapping and stash destinations for QMAN and QMAN portal. * Memory accesses to QMAN and BMAN private memory need not be coherent, so * clear the PAACE entry coherency attribute for them. @@ -357,7 +358,7 @@ static void setup_qbman_paace(struct paace *ppaace, int paace_type) } } -/** +/* * Setup the operation mapping table for various devices. This is a static * table where each table index corresponds to a particular device. PAMU uses * this table to translate device transaction to appropriate corenet From 1b0b5f50dc83351fa2d2158a183cbe258178cb3f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 8 Mar 2023 18:17:08 +0100 Subject: [PATCH 05/82] iommu: Spelling s/cpmxchg64/cmpxchg64/ Fix misspellings of "cmpxchg64" Fixes: d286a58bc8f4d5cf ("iommu: Tidy up io-pgtable dependencies") Signed-off-by: Geert Uytterhoeven Acked-by: Robin Murphy Link: https://lore.kernel.org/r/eab156858147249d44463662eb9192202c39ab9f.1678295792.git.geert+renesas@glider.be Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 889c7efd050b..c4928514e5e2 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -33,7 +33,7 @@ config IOMMU_IO_PGTABLE_LPAE bool "ARMv7/v8 Long Descriptor Format" select IOMMU_IO_PGTABLE depends on ARM || ARM64 || COMPILE_TEST - depends on !GENERIC_ATOMIC64 # for cpmxchg64() + depends on !GENERIC_ATOMIC64 # for cmpxchg64() help Enable support for the ARM long descriptor pagetable format. This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page @@ -72,7 +72,7 @@ config IOMMU_IO_PGTABLE_DART bool "Apple DART Formats" select IOMMU_IO_PGTABLE depends on ARM64 || COMPILE_TEST - depends on !GENERIC_ATOMIC64 # for cpmxchg64() + depends on !GENERIC_ATOMIC64 # for cmpxchg64() help Enable support for the Apple DART pagetable formats. These include the t8020 and t6000/t8110 DART formats used in Apple M1/M2 family From a6c9e3874e5759af089c07ab80c87105babc9727 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:09 -0600 Subject: [PATCH 06/82] iommu: Use of_property_present() for testing DT property presence It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. As part of this, convert of_get_property/of_find_property calls to the recently added of_property_present() helper when we just want to test for presence of a property and nothing more. Signed-off-by: Rob Herring Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20230310144709.1542910-1-robh@kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 2 +- drivers/iommu/ipmmu-vmsa.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 2ff7a72cf377..d0843caf8760 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -139,7 +139,7 @@ static int arm_smmu_register_legacy_master(struct device *dev, int err; np = dev_get_dev_node(dev); - if (!np || !of_find_property(np, "#stream-id-cells", NULL)) { + if (!np || !of_property_present(np, "#stream-id-cells")) { of_node_put(np); return -ENODEV; } diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index bdf1a4e5eae0..f4470303d906 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1014,7 +1014,7 @@ static int ipmmu_probe(struct platform_device *pdev) * the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property. */ if (!mmu->features->has_cache_leaf_nodes || - !of_find_property(pdev->dev.of_node, "renesas,ipmmu-main", NULL)) + !of_property_present(pdev->dev.of_node, "renesas,ipmmu-main")) mmu->root = mmu; else mmu->root = ipmmu_find_root(); From 0c0431646116b4fbba75c4784e1bda0781dc8073 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:09 -0600 Subject: [PATCH 07/82] iommu/omap: Use of_property_read_bool() for boolean properties It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. Convert reading boolean properties to of_property_read_bool(). Signed-off-by: Rob Herring Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20230310144709.1542980-1-robh@kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 3ab078112a7c..baafd6211ac4 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1191,7 +1191,7 @@ static int omap_iommu_probe(struct platform_device *pdev) return err; if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8) return -EINVAL; - if (of_find_property(of, "ti,iommu-bus-err-back", NULL)) + if (of_property_read_bool(of, "ti,iommu-bus-err-back")) obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN; obj->dev = &pdev->dev; From b67ab6fb63bbbe6d2c0edebd28c27ea425c8b55b Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 13 Mar 2023 21:40:26 +0900 Subject: [PATCH 08/82] dt-bindings: iommu: renesas, ipmmu-vmsa: Update for R-Car Gen4 Since R-Car Gen4 does not have the main IPMMU IMSSTR register, update the bindings to drop the interrupt bit number from the renesas,ipmmu-main property. Signed-off-by: Yoshihiro Shimoda [geert: Re-add removed items level, add minItems/maxItems constraints] Signed-off-by: Geert Uytterhoeven Acked-by: Rob Herring Link: https://lore.kernel.org/r/20230313124026.954514-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Joerg Roedel --- .../bindings/iommu/renesas,ipmmu-vmsa.yaml | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml index 72308a4c14e7..be90f68c11d1 100644 --- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml +++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml @@ -74,16 +74,16 @@ properties: renesas,ipmmu-main: $ref: /schemas/types.yaml#/definitions/phandle-array items: - - items: + - minItems: 1 + items: - description: phandle to main IPMMU - - description: the interrupt bit number associated with the particular - cache IPMMU device. The interrupt bit number needs to match the main - IPMMU IMSSTR register. Only used by cache IPMMU instances. + - description: + The interrupt bit number associated with the particular cache + IPMMU device. If present, the interrupt bit number needs to match + the main IPMMU IMSSTR register. Only used by cache IPMMU + instances. description: - Reference to the main IPMMU phandle plus 1 cell. The cell is - the interrupt bit number associated with the particular cache IPMMU - device. The interrupt bit number needs to match the main IPMMU IMSSTR - register. Only used by cache IPMMU instances. + Reference to the main IPMMU. required: - compatible @@ -109,6 +109,22 @@ allOf: required: - power-domains + - if: + properties: + compatible: + contains: + const: renesas,rcar-gen4-ipmmu-vmsa + then: + properties: + renesas,ipmmu-main: + items: + - maxItems: 1 + else: + properties: + renesas,ipmmu-main: + items: + - minItems: 2 + examples: - | #include From 0d571dcbe7c6d36dcfcb8e04a49cc01fe462d171 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Tue, 21 Mar 2023 09:23:47 +0000 Subject: [PATCH 09/82] iommu/amd: Allocate page table using numa locality info Introduce 'struct protection_domain->nid' variable. It will contain IOMMU NUMA node ID. And allocate page table pages using IOMMU numa locality info. This optimizes page table walk by IOMMU. Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20230321092348.6127-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 8 ++++++++ drivers/iommu/amd/amd_iommu_types.h | 1 + drivers/iommu/amd/io_pgtable.c | 4 ++-- drivers/iommu/amd/io_pgtable_v2.c | 16 ++++++---------- drivers/iommu/amd/iommu.c | 6 ++++++ 5 files changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index c160a332ce33..20a142b54498 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -122,6 +122,14 @@ static inline int get_pci_sbdf_id(struct pci_dev *pdev) return PCI_SEG_DEVID_TO_SBDF(seg, devid); } +static inline void *alloc_pgtable_page(int nid, gfp_t gfp) +{ + struct page *page; + + page = alloc_pages_node(nid, gfp | __GFP_ZERO, 0); + return page ? page_address(page) : NULL; +} + extern bool translation_pre_enabled(struct amd_iommu *iommu); extern bool amd_iommu_is_attach_deferred(struct device *dev); extern int __init add_special_device(u8 type, u8 id, u32 *devid, diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 3d684190b4d5..e5cfdeaaad48 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -549,6 +549,7 @@ struct protection_domain { spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ int glx; /* Number of levels for GCR3 table */ + int nid; /* Node ID */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ unsigned dev_cnt; /* devices assigned to this domain */ diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index ace0e9b8b913..1b67116882be 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -156,7 +156,7 @@ static bool increase_address_space(struct protection_domain *domain, bool ret = true; u64 *pte; - pte = (void *)get_zeroed_page(gfp); + pte = alloc_pgtable_page(domain->nid, gfp); if (!pte) return false; @@ -250,7 +250,7 @@ static u64 *alloc_pte(struct protection_domain *domain, if (!IOMMU_PTE_PRESENT(__pte) || pte_level == PAGE_MODE_NONE) { - page = (u64 *)get_zeroed_page(gfp); + page = alloc_pgtable_page(domain->nid, gfp); if (!page) return NULL; diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 8638ddf6fb3b..9f8b423f7630 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -46,11 +46,6 @@ static inline bool is_large_pte(u64 pte) return (pte & IOMMU_PAGE_PSE); } -static inline void *alloc_pgtable_page(void) -{ - return (void *)get_zeroed_page(GFP_KERNEL); -} - static inline u64 set_pgtable_attr(u64 *page) { u64 prot; @@ -138,8 +133,8 @@ static void free_pgtable(u64 *pt, int level) } /* Allocate page table */ -static u64 *v2_alloc_pte(u64 *pgd, unsigned long iova, - unsigned long pg_size, bool *updated) +static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, + unsigned long pg_size, gfp_t gfp, bool *updated) { u64 *pte, *page; int level, end_level; @@ -162,7 +157,7 @@ static u64 *v2_alloc_pte(u64 *pgd, unsigned long iova, } if (!IOMMU_PTE_PRESENT(__pte)) { - page = alloc_pgtable_page(); + page = alloc_pgtable_page(nid, gfp); if (!page) return NULL; @@ -262,7 +257,8 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, while (mapped_size < size) { map_size = get_alloc_page_size(pgsize); - pte = v2_alloc_pte(pdom->iop.pgd, iova, map_size, &updated); + pte = v2_alloc_pte(pdom->nid, pdom->iop.pgd, + iova, map_size, gfp, &updated); if (!pte) { ret = -EINVAL; goto out; @@ -384,7 +380,7 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo struct protection_domain *pdom = (struct protection_domain *)cookie; int ret; - pgtable->pgd = alloc_pgtable_page(); + pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC); if (!pgtable->pgd) return NULL; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 5a505ba5467e..5452d0dd6569 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1662,6 +1662,10 @@ static void do_attach(struct iommu_dev_data *dev_data, dev_data->domain = domain; list_add(&dev_data->list, &domain->dev_list); + /* Update NUMA Node ID */ + if (domain->nid == NUMA_NO_NODE) + domain->nid = dev_to_node(dev_data->dev); + /* Do reference counting */ domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; @@ -2097,6 +2101,8 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) if (type == IOMMU_DOMAIN_IDENTITY) return domain; + domain->nid = NUMA_NO_NODE; + pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); if (!pgtbl_ops) { domain_id_free(domain->id); From 4d4a0dbab2b998692f54fa6f7a9d2eb675110743 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Tue, 21 Mar 2023 09:23:48 +0000 Subject: [PATCH 10/82] iommu/amd: Allocate IOMMU irqs using numa locality info Use numa information to allocate irq resources and also to set irq affinity. This optimizes the IOMMU interrupt handling. Reviewed-by: Suravee Suthikulpanit Reviewed-by: Alexey Kardashevskiy Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20230321092348.6127-3-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 19a46b9f7357..141eca29c951 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2383,6 +2383,7 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu) struct irq_domain *domain; struct irq_alloc_info info; int irq, ret; + int node = dev_to_node(&iommu->dev->dev); domain = iommu_get_irqdomain(); if (!domain) @@ -2392,7 +2393,7 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu) info.type = X86_IRQ_ALLOC_TYPE_AMDVI; info.data = iommu; - irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); + irq = irq_domain_alloc_irqs(domain, 1, node, &info); if (irq < 0) { irq_domain_remove(domain); return irq; From 24dfb197c3b7d274e5f78fce5e63392526a933e9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 22 Mar 2023 14:49:51 +0800 Subject: [PATCH 11/82] iommu/ipmmu-vmsa: Call arm_iommu_release_mapping() in release path In the iommu driver's release_device operation, the driver should detach the device from any attached domain and release the resources allocated in the probe_device and probe_finalize paths. Replace arm_iommu_detach_device() with arm_iommu_release_mapping() in the release path of the ipmmu-vmsa driver. The device_release callback is called in device_del(), this device is not coming back. Zeroing out pointers and testing for a condition which cannot be true by construction is simply a waste of time and code. The bonus is that it also removes a obstacle of arm_iommu_detach_device() re-entering the iommu core during release_device. With this removed, the iommu core code could be simplified a lot. Signed-off-by: Jason Gunthorpe Suggested-by: Robin Murphy Link: https://lore.kernel.org/linux-iommu/7b248ba1-3967-5cd8-82e9-0268c706d320@arm.com/ Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230322064956.263419-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index f4470303d906..7fcbef8802de 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -30,7 +30,6 @@ #define arm_iommu_create_mapping(...) NULL #define arm_iommu_attach_device(...) -ENODEV #define arm_iommu_release_mapping(...) do {} while (0) -#define arm_iommu_detach_device(...) do {} while (0) #endif #define IPMMU_CTX_MAX 16U @@ -820,7 +819,18 @@ static void ipmmu_probe_finalize(struct device *dev) static void ipmmu_release_device(struct device *dev) { - arm_iommu_detach_device(dev); + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); + unsigned int i; + + for (i = 0; i < fwspec->num_ids; ++i) { + unsigned int utlb = fwspec->ids[i]; + + ipmmu_imuctr_write(mmu, utlb, 0); + mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID; + } + + arm_iommu_release_mapping(mmu->mapping); } static struct iommu_group *ipmmu_find_group(struct device *dev) From 293f2564f3dd76b749fc31ae4226bd818f860e4f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 22 Mar 2023 14:49:52 +0800 Subject: [PATCH 12/82] iommu: Split iommu_group_remove_device() into helpers So that code could be re-used by iommu_release_device() in the subsequent change. No intention for functionality change. Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230322064956.263419-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 64 +++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index ace9d418a115..942bab8b9798 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -453,6 +453,46 @@ int iommu_probe_device(struct device *dev) } +/* + * Remove a device from a group's device list and return the group device + * if successful. + */ +static struct group_device * +__iommu_group_remove_device(struct iommu_group *group, struct device *dev) +{ + struct group_device *device; + + lockdep_assert_held(&group->mutex); + list_for_each_entry(device, &group->devices, list) { + if (device->dev == dev) { + list_del(&device->list); + return device; + } + } + + return NULL; +} + +/* + * Release a device from its group and decrements the iommu group reference + * count. + */ +static void __iommu_group_release_device(struct iommu_group *group, + struct group_device *grp_dev) +{ + struct device *dev = grp_dev->dev; + + sysfs_remove_link(group->devices_kobj, grp_dev->name); + sysfs_remove_link(&dev->kobj, "iommu_group"); + + trace_remove_device_from_group(group->id, dev); + + kfree(grp_dev->name); + kfree(grp_dev); + dev->iommu_group = NULL; + kobject_put(group->devices_kobj); +} + void iommu_release_device(struct device *dev) { const struct iommu_ops *ops; @@ -1068,7 +1108,7 @@ EXPORT_SYMBOL_GPL(iommu_group_add_device); void iommu_group_remove_device(struct device *dev) { struct iommu_group *group = dev->iommu_group; - struct group_device *tmp_device, *device = NULL; + struct group_device *device; if (!group) return; @@ -1076,27 +1116,11 @@ void iommu_group_remove_device(struct device *dev) dev_info(dev, "Removing from iommu group %d\n", group->id); mutex_lock(&group->mutex); - list_for_each_entry(tmp_device, &group->devices, list) { - if (tmp_device->dev == dev) { - device = tmp_device; - list_del(&device->list); - break; - } - } + device = __iommu_group_remove_device(group, dev); mutex_unlock(&group->mutex); - if (!device) - return; - - sysfs_remove_link(group->devices_kobj, device->name); - sysfs_remove_link(&dev->kobj, "iommu_group"); - - trace_remove_device_from_group(group->id, dev); - - kfree(device->name); - kfree(device); - dev->iommu_group = NULL; - kobject_put(group->devices_kobj); + if (device) + __iommu_group_release_device(group, device); } EXPORT_SYMBOL_GPL(iommu_group_remove_device); From dba9ca9d41f5266194a5e8007db50985d271d0a4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 22 Mar 2023 14:49:53 +0800 Subject: [PATCH 13/82] iommu: Same critical region for device release and removal In a non-driver context, it is crucial to ensure the consistency of a device's iommu ops. Otherwise, it may result in a situation where a device is released but it's iommu ops are still used. Put the ops->release_device and __iommu_group_remove_device() in a same group->mutext critical region, so that, as long as group->mutex is held and the device is in its group's device list, its iommu ops are always consistent. Add check of group ownership if the released device is the last one. Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230322064956.263419-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 942bab8b9798..419d5655ce61 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -495,18 +495,44 @@ static void __iommu_group_release_device(struct iommu_group *group, void iommu_release_device(struct device *dev) { + struct iommu_group *group = dev->iommu_group; + struct group_device *device; const struct iommu_ops *ops; - if (!dev->iommu) + if (!dev->iommu || !group) return; iommu_device_unlink(dev->iommu->iommu_dev, dev); + mutex_lock(&group->mutex); + device = __iommu_group_remove_device(group, dev); + + /* + * If the group has become empty then ownership must have been released, + * and the current domain must be set back to NULL or the default + * domain. + */ + if (list_empty(&group->devices)) + WARN_ON(group->owner_cnt || + group->domain != group->default_domain); + + /* + * release_device() must stop using any attached domain on the device. + * If there are still other devices in the group they are not effected + * by this callback. + * + * The IOMMU driver must set the device to either an identity or + * blocking translation and stop using any domain pointer, as it is + * going to be freed. + */ ops = dev_iommu_ops(dev); if (ops->release_device) ops->release_device(dev); + mutex_unlock(&group->mutex); + + if (device) + __iommu_group_release_device(group, device); - iommu_group_remove_device(dev); module_put(ops->owner); dev_iommu_free(dev); } From 33793748de8fde338c11277d3d16dadc79ba65bf Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 22 Mar 2023 14:49:54 +0800 Subject: [PATCH 14/82] iommu: Move lock from iommu_change_dev_def_domain() to its caller The intention is to make it possible to put group ownership check and default domain change in a same critical region protected by the group's mutex lock. No intentional functional change. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230322064956.263419-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 419d5655ce61..32c51e6a3491 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2889,7 +2889,7 @@ static int iommu_change_dev_def_domain(struct iommu_group *group, int ret, dev_def_dom; struct device *dev; - mutex_lock(&group->mutex); + lockdep_assert_held(&group->mutex); if (group->default_domain != group->domain) { dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n"); @@ -2978,28 +2978,15 @@ static int iommu_change_dev_def_domain(struct iommu_group *group, goto free_new_domain; group->domain = group->default_domain; - - /* - * Release the mutex here because ops->probe_finalize() call-back of - * some vendor IOMMU drivers calls arm_iommu_attach_device() which - * in-turn might call back into IOMMU core code, where it tries to take - * group->mutex, resulting in a deadlock. - */ - mutex_unlock(&group->mutex); - - /* Make sure dma_ops is appropriatley set */ - iommu_group_do_probe_finalize(dev, group->default_domain); iommu_domain_free(prev_dom); + return 0; free_new_domain: iommu_domain_free(group->default_domain); group->default_domain = prev_dom; group->domain = prev_dom; - out: - mutex_unlock(&group->mutex); - return ret; } @@ -3089,7 +3076,19 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, goto out; } + mutex_lock(&group->mutex); ret = iommu_change_dev_def_domain(group, dev, req_type); + /* + * Release the mutex here because ops->probe_finalize() call-back of + * some vendor IOMMU drivers calls arm_iommu_attach_device() which + * in-turn might call back into IOMMU core code, where it tries to take + * group->mutex, resulting in a deadlock. + */ + mutex_unlock(&group->mutex); + + /* Make sure dma_ops is appropriatley set */ + if (!ret) + iommu_group_do_probe_finalize(dev, group->default_domain); ret = ret ?: count; out: From 49a22aae7d9cfd1e8060b7ea4e0a1537f60dd39d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 22 Mar 2023 14:49:55 +0800 Subject: [PATCH 15/82] iommu: Replace device_lock() with group->mutex device_lock() was used in iommu_group_store_type() to prevent the devices in an iommu group from being attached by any device driver. On the other hand, in order to avoid lock race between group->mutex and device_lock(), it limited the usage scenario to the singleton groups. We already have the DMA ownership scheme to avoid driver attachment and group->mutex ensures that device ops are always valid, there's no need for device_lock() anymore. Remove device_lock() and the singleton group limitation. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230322064956.263419-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 81 ++++++++++--------------------------------- 1 file changed, 18 insertions(+), 63 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 32c51e6a3491..6538c72b5ae1 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2956,14 +2956,6 @@ static int iommu_change_dev_def_domain(struct iommu_group *group, goto out; } - /* We can bring up a flush queue without tearing down the domain */ - if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) { - ret = iommu_dma_init_fq(prev_dom); - if (!ret) - prev_dom->type = IOMMU_DOMAIN_DMA_FQ; - goto out; - } - /* Sets group->default_domain to the newly allocated domain */ ret = iommu_group_alloc_default_domain(dev->bus, group, type); if (ret) @@ -2996,7 +2988,7 @@ static int iommu_change_dev_def_domain(struct iommu_group *group, * transition. Return failure if this isn't met. * * We need to consider the race between this and the device release path. - * device_lock(dev) is used here to guarantee that the device release path + * group->mutex is used here to guarantee that the device release path * will not be entered at the same time. */ static ssize_t iommu_group_store_type(struct iommu_group *group, @@ -3023,61 +3015,29 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, else return -EINVAL; - /* - * Lock/Unlock the group mutex here before device lock to - * 1. Make sure that the iommu group has only one device (this is a - * prerequisite for step 2) - * 2. Get struct *dev which is needed to lock device - */ mutex_lock(&group->mutex); - if (iommu_group_device_count(group) != 1) { + /* We can bring up a flush queue without tearing down the domain. */ + if (req_type == IOMMU_DOMAIN_DMA_FQ && + group->default_domain->type == IOMMU_DOMAIN_DMA) { + ret = iommu_dma_init_fq(group->default_domain); + if (!ret) + group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; mutex_unlock(&group->mutex); - pr_err_ratelimited("Cannot change default domain: Group has more than one device\n"); - return -EINVAL; + + return ret ?: count; + } + + /* Otherwise, ensure that device exists and no driver is bound. */ + if (list_empty(&group->devices) || group->owner_cnt) { + mutex_unlock(&group->mutex); + return -EPERM; } - /* Since group has only one device */ grp_dev = list_first_entry(&group->devices, struct group_device, list); dev = grp_dev->dev; - get_device(dev); - /* - * Don't hold the group mutex because taking group mutex first and then - * the device lock could potentially cause a deadlock as below. Assume - * two threads T1 and T2. T1 is trying to change default domain of an - * iommu group and T2 is trying to hot unplug a device or release [1] VF - * of a PCIe device which is in the same iommu group. T1 takes group - * mutex and before it could take device lock assume T2 has taken device - * lock and is yet to take group mutex. Now, both the threads will be - * waiting for the other thread to release lock. Below, lock order was - * suggested. - * device_lock(dev); - * mutex_lock(&group->mutex); - * iommu_change_dev_def_domain(); - * mutex_unlock(&group->mutex); - * device_unlock(dev); - * - * [1] Typical device release path - * device_lock() from device/driver core code - * -> bus_notifier() - * -> iommu_bus_notifier() - * -> iommu_release_device() - * -> ops->release_device() vendor driver calls back iommu core code - * -> mutex_lock() from iommu core code - */ - mutex_unlock(&group->mutex); - - /* Check if the device in the group still has a driver bound to it */ - device_lock(dev); - if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ && - group->default_domain->type == IOMMU_DOMAIN_DMA)) { - pr_err_ratelimited("Device is still bound to driver\n"); - ret = -EBUSY; - goto out; - } - - mutex_lock(&group->mutex); ret = iommu_change_dev_def_domain(group, dev, req_type); + /* * Release the mutex here because ops->probe_finalize() call-back of * some vendor IOMMU drivers calls arm_iommu_attach_device() which @@ -3088,14 +3048,9 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, /* Make sure dma_ops is appropriatley set */ if (!ret) - iommu_group_do_probe_finalize(dev, group->default_domain); - ret = ret ?: count; + __iommu_group_dma_finalize(group); -out: - device_unlock(dev); - put_device(dev); - - return ret; + return ret ?: count; } static bool iommu_is_default_domain(struct iommu_group *group) From 4c8444f19ed05971c91cd93252f4a8e3dcb37995 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 22 Mar 2023 14:49:56 +0800 Subject: [PATCH 16/82] iommu: Cleanup iommu_change_dev_def_domain() As the singleton group limitation has been removed, cleanup the code in iommu_change_dev_def_domain() accordingly. Documentation is also updated. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230322064956.263419-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- .../ABI/testing/sysfs-kernel-iommu_groups | 1 - drivers/iommu/iommu.c | 83 +++++-------------- 2 files changed, 21 insertions(+), 63 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups index b15af6a5bc08..a42d4383d999 100644 --- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups +++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups @@ -53,7 +53,6 @@ Description: /sys/kernel/iommu_groups//type shows the type of default The default domain type of a group may be modified only when - - The group has only one device. - The device in the group is not bound to any device driver. So, the users must unbind the appropriate driver before changing the default domain type. diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 6538c72b5ae1..9fccfc69e4c5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2867,11 +2867,10 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); /* - * Changes the default domain of an iommu group that has *only* one device + * Changes the default domain of an iommu group * * @group: The group for which the default domain should be changed - * @prev_dev: The device in the group (this is used to make sure that the device - * hasn't changed after the caller has called this function) + * @dev: The first device in the group * @type: The type of the new default domain that gets associated with the group * * Returns 0 on success and error code on failure @@ -2882,103 +2881,63 @@ EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); * Please take a closer look if intended to use for other purposes. */ static int iommu_change_dev_def_domain(struct iommu_group *group, - struct device *prev_dev, int type) + struct device *dev, int type) { + struct __group_domain_type gtype = {NULL, 0}; struct iommu_domain *prev_dom; - struct group_device *grp_dev; - int ret, dev_def_dom; - struct device *dev; + int ret; lockdep_assert_held(&group->mutex); - if (group->default_domain != group->domain) { - dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n"); - ret = -EBUSY; - goto out; - } - - /* - * iommu group wasn't locked while acquiring device lock in - * iommu_group_store_type(). So, make sure that the device count hasn't - * changed while acquiring device lock. - * - * Changing default domain of an iommu group with two or more devices - * isn't supported because there could be a potential deadlock. Consider - * the following scenario. T1 is trying to acquire device locks of all - * the devices in the group and before it could acquire all of them, - * there could be another thread T2 (from different sub-system and use - * case) that has already acquired some of the device locks and might be - * waiting for T1 to release other device locks. - */ - if (iommu_group_device_count(group) != 1) { - dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n"); - ret = -EINVAL; - goto out; - } - - /* Since group has only one device */ - grp_dev = list_first_entry(&group->devices, struct group_device, list); - dev = grp_dev->dev; - - if (prev_dev != dev) { - dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n"); - ret = -EBUSY; - goto out; - } - prev_dom = group->default_domain; - if (!prev_dom) { - ret = -EINVAL; - goto out; - } - - dev_def_dom = iommu_get_def_domain_type(dev); + __iommu_group_for_each_dev(group, >ype, + probe_get_default_domain_type); if (!type) { /* * If the user hasn't requested any specific type of domain and * if the device supports both the domains, then default to the * domain the device was booted with */ - type = dev_def_dom ? : iommu_def_domain_type; - } else if (dev_def_dom && type != dev_def_dom) { - dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n", + type = gtype.type ? : iommu_def_domain_type; + } else if (gtype.type && type != gtype.type) { + dev_err_ratelimited(dev, "Device cannot be in %s domain\n", iommu_domain_type_str(type)); - ret = -EINVAL; - goto out; + return -EINVAL; } /* * Switch to a new domain only if the requested domain type is different * from the existing default domain type */ - if (prev_dom->type == type) { - ret = 0; - goto out; - } + if (prev_dom->type == type) + return 0; + + group->default_domain = NULL; + group->domain = NULL; /* Sets group->default_domain to the newly allocated domain */ ret = iommu_group_alloc_default_domain(dev->bus, group, type); if (ret) - goto out; + goto restore_old_domain; - ret = iommu_create_device_direct_mappings(group, dev); + ret = iommu_group_create_direct_mappings(group); if (ret) goto free_new_domain; - ret = __iommu_attach_device(group->default_domain, dev); + ret = __iommu_attach_group(group->default_domain, group); if (ret) goto free_new_domain; - group->domain = group->default_domain; iommu_domain_free(prev_dom); return 0; free_new_domain: iommu_domain_free(group->default_domain); +restore_old_domain: group->default_domain = prev_dom; group->domain = prev_dom; -out: + return ret; } From c33fcc13ee0d02efe5729588e8fba8b899c8879a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 22 Mar 2023 20:34:21 +0800 Subject: [PATCH 17/82] iommu: Use sysfs_emit() for sysfs show Use sysfs_emit() instead of the sprintf() for sysfs entries. sysfs_emit() knows the maximum of the temporary buffer used for outputting sysfs content and avoids overrunning the buffer length. Prefer 'long long' over 'long long int' as suggested by checkpatch.pl. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230322123421.278852-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 4 ++-- drivers/iommu/intel/iommu.c | 17 +++++++++-------- drivers/iommu/iommu.c | 31 +++++++++++++++---------------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 19a46b9f7357..a98202018140 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1941,7 +1941,7 @@ static ssize_t amd_iommu_show_cap(struct device *dev, char *buf) { struct amd_iommu *iommu = dev_to_amd_iommu(dev); - return sprintf(buf, "%x\n", iommu->cap); + return sysfs_emit(buf, "%x\n", iommu->cap); } static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); @@ -1950,7 +1950,7 @@ static ssize_t amd_iommu_show_features(struct device *dev, char *buf) { struct amd_iommu *iommu = dev_to_amd_iommu(dev); - return sprintf(buf, "%llx:%llx\n", iommu->features2, iommu->features); + return sysfs_emit(buf, "%llx:%llx\n", iommu->features2, iommu->features); } static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7c2f4bd33582..a366790fb781 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3760,8 +3760,8 @@ static ssize_t version_show(struct device *dev, { struct intel_iommu *iommu = dev_to_intel_iommu(dev); u32 ver = readl(iommu->reg + DMAR_VER_REG); - return sprintf(buf, "%d:%d\n", - DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); + return sysfs_emit(buf, "%d:%d\n", + DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); } static DEVICE_ATTR_RO(version); @@ -3769,7 +3769,7 @@ static ssize_t address_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%llx\n", iommu->reg_phys); + return sysfs_emit(buf, "%llx\n", iommu->reg_phys); } static DEVICE_ATTR_RO(address); @@ -3777,7 +3777,7 @@ static ssize_t cap_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%llx\n", iommu->cap); + return sysfs_emit(buf, "%llx\n", iommu->cap); } static DEVICE_ATTR_RO(cap); @@ -3785,7 +3785,7 @@ static ssize_t ecap_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%llx\n", iommu->ecap); + return sysfs_emit(buf, "%llx\n", iommu->ecap); } static DEVICE_ATTR_RO(ecap); @@ -3793,7 +3793,7 @@ static ssize_t domains_supported_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap)); + return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap)); } static DEVICE_ATTR_RO(domains_supported); @@ -3801,8 +3801,9 @@ static ssize_t domains_used_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids, - cap_ndoms(iommu->cap))); + return sysfs_emit(buf, "%d\n", + bitmap_weight(iommu->domain_ids, + cap_ndoms(iommu->cap))); } static DEVICE_ATTR_RO(domains_used); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9fccfc69e4c5..7abee83610b6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -616,7 +616,7 @@ static void iommu_group_remove_file(struct iommu_group *group, static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) { - return sprintf(buf, "%s\n", group->name); + return sysfs_emit(buf, "%s\n", group->name); } /** @@ -729,52 +729,51 @@ static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, { struct iommu_resv_region *region, *next; struct list_head group_resv_regions; - char *str = buf; + int offset = 0; INIT_LIST_HEAD(&group_resv_regions); iommu_get_group_resv_regions(group, &group_resv_regions); list_for_each_entry_safe(region, next, &group_resv_regions, list) { - str += sprintf(str, "0x%016llx 0x%016llx %s\n", - (long long int)region->start, - (long long int)(region->start + - region->length - 1), - iommu_group_resv_type_string[region->type]); + offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", + (long long)region->start, + (long long)(region->start + + region->length - 1), + iommu_group_resv_type_string[region->type]); kfree(region); } - return (str - buf); + return offset; } static ssize_t iommu_group_show_type(struct iommu_group *group, char *buf) { - char *type = "unknown\n"; + char *type = "unknown"; mutex_lock(&group->mutex); if (group->default_domain) { switch (group->default_domain->type) { case IOMMU_DOMAIN_BLOCKED: - type = "blocked\n"; + type = "blocked"; break; case IOMMU_DOMAIN_IDENTITY: - type = "identity\n"; + type = "identity"; break; case IOMMU_DOMAIN_UNMANAGED: - type = "unmanaged\n"; + type = "unmanaged"; break; case IOMMU_DOMAIN_DMA: - type = "DMA\n"; + type = "DMA"; break; case IOMMU_DOMAIN_DMA_FQ: - type = "DMA-FQ\n"; + type = "DMA-FQ"; break; } } mutex_unlock(&group->mutex); - strcpy(buf, type); - return strlen(type); + return sysfs_emit(buf, "%s\n", type); } static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); From 7f061c19f6521ef2b3e6f784ae344ebb562a5343 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 8 Feb 2023 10:13:58 +0100 Subject: [PATCH 18/82] dt-bindings: arm-smmu: Add compatible for SM8550 SoC Add the SoC specific compatible for SM8550 implementing arm,mmu-500. Signed-off-by: Abel Vesa Signed-off-by: Neil Armstrong Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230207-topic-sm8550-upstream-smmu-bindings-v3-1-cb15a7123cfe@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 807cb511fe18..ea81e9b1860c 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -53,6 +53,7 @@ properties: - qcom,sm8250-smmu-500 - qcom,sm8350-smmu-500 - qcom,sm8450-smmu-500 + - qcom,sm8550-smmu-500 - const: qcom,smmu-500 - const: arm,mmu-500 @@ -389,6 +390,7 @@ allOf: - qcom,sm6375-smmu-500 - qcom,sm8350-smmu-500 - qcom,sm8450-smmu-500 + - qcom,sm8550-smmu-500 then: properties: clock-names: false From 5c3686616b1840b3143b227eb58fb1c1621d204e Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 13 Mar 2023 17:44:16 +0100 Subject: [PATCH 19/82] dt-bindings: arm-smmu: Use qcom,smmu compatible for MMU500 adreno SMMUs qcom,smmu-500 was introduced to prevent people from adding new compatibles for what seems to roughly be the same hardware. Use it for qcom,adreno-smmu-compatible targets as well. While at it, fix the "arm,smmu-500" -> "arm,mmu-500" typo in the comment. Acked-by: Krzysztof Kozlowski Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230313-topic-gpu_smmu_bindings-v3-1-66ab655fbfd5@linaro.org Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.yaml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index ea81e9b1860c..b3a8f5864648 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -76,9 +76,19 @@ properties: - qcom,sm8350-smmu-500 - qcom,sm8450-smmu-500 - const: arm,mmu-500 - - - description: Qcom Adreno GPUs implementing "arm,smmu-500" + - description: Qcom Adreno GPUs implementing "qcom,smmu-500" and "arm,mmu-500" items: + - enum: + - qcom,sc7280-smmu-500 + - qcom,sm8150-smmu-500 + - qcom,sm8250-smmu-500 + - const: qcom,adreno-smmu + - const: qcom,smmu-500 + - const: arm,mmu-500 + - description: Qcom Adreno GPUs implementing "arm,mmu-500" (legacy binding) + deprecated: true + items: + # Do not add additional SoC to this list. Instead use previous list. - enum: - qcom,sc7280-smmu-500 - qcom,sm8150-smmu-500 From 16d1646871fbe800c9751f0816a970f9126a6586 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 13 Mar 2023 17:44:17 +0100 Subject: [PATCH 20/82] dt-bindings: arm-smmu: Add SM8350 Adreno SMMU Document the Adreno SMMU present on SM8350. Acked-by: Krzysztof Kozlowski Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230313-topic-gpu_smmu_bindings-v3-2-66ab655fbfd5@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index b3a8f5864648..f45e4296a5ea 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -82,6 +82,7 @@ properties: - qcom,sc7280-smmu-500 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 + - qcom,sm8350-smmu-500 - const: qcom,adreno-smmu - const: qcom,smmu-500 - const: arm,mmu-500 From 3ad6585509dc8157e598dbd06b71efed2e45fee8 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 15 Mar 2023 11:52:08 +0100 Subject: [PATCH 21/82] dt-bindings: arm-smmu: Document SM61[12]5 GPU SMMU Both of these SoCs have a Qualcomm MMU500 implementation of SMMU in front of their GPUs that expect 3 clocks. Both of them also have an APPS SMMU that expects no clocks. Remove qcom,sm61[12]5-smmu-500 from the "no clocks" list (intentionally 'breaking' the schema checks of APPS SMMU, as now it *can* accept clocks - with the current structure of this file it would have taken a wastefully-long time to sort this out properly..) and add necessary yaml to describe the clocks required by the GPU SMMUs. Signed-off-by: Konrad Dybcio Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230315-topic-kamorta_adrsmmu-v1-1-d1c0dea90bd9@linaro.org Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.yaml | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index f45e4296a5ea..ba677d401e24 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -80,6 +80,8 @@ properties: items: - enum: - qcom,sc7280-smmu-500 + - qcom,sm6115-smmu-500 + - qcom,sm6125-smmu-500 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 - qcom,sm8350-smmu-500 @@ -376,6 +378,30 @@ allOf: - description: interface clock required to access smmu's registers through the TCU's programming interface. + - if: + properties: + compatible: + items: + - enum: + - qcom,sm6115-smmu-500 + - qcom,sm6125-smmu-500 + - const: qcom,adreno-smmu + - const: qcom,smmu-500 + - const: arm,mmu-500 + then: + properties: + clock-names: + items: + - const: mem + - const: hlos + - const: iface + + clocks: + items: + - description: GPU memory bus clock + - description: Voter clock required for HLOS SMMU access + - description: Interface clock required for register access + # Disallow clocks for all other platforms with specific compatibles - if: properties: @@ -395,8 +421,6 @@ allOf: - qcom,sdm845-smmu-500 - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 - - qcom,sm6115-smmu-500 - - qcom,sm6125-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 - qcom,sm8350-smmu-500 From 8c153645fa402ee96ec3c2fef9db3a087b3667ac Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 21 Mar 2023 10:06:00 +0000 Subject: [PATCH 22/82] iommu/arm-smmu-v3: Explain why ATS stays disabled with bypass The SMMU does not support enabling ATS for a bypass stream. Add a comment. Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20230321100559.341981-1-jean-philippe@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f2425b0f0cd6..a8410c132f1e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2447,6 +2447,13 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) master->domain = smmu_domain; + /* + * The SMMU does not support enabling ATS with bypass. When the STE is + * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and + * Translated transactions are denied as though ATS is disabled for the + * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and + * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry). + */ if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) master->ats_enabled = arm_smmu_ats_supported(master); From 12261134732689b7e30c59db9978f81230965181 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 27 Mar 2023 13:30:29 +0530 Subject: [PATCH 23/82] iommu/arm-smmu-qcom: Limit the SMR groups to 128 Some platforms support more than 128 stream matching groups than what is defined by the ARM SMMU architecture specification. But due to some unknown reasons, those additional groups don't exhibit the same behavior as the architecture supported ones. For instance, the additional groups will not detect the quirky behavior of some firmware versions intercepting writes to S2CR register, thus skipping the quirk implemented in the driver and causing boot crash. So let's limit the groups to 128 for now until the issue with those groups are fixed and issue a notice to users in that case. Reviewed-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20230327080029.11584-1-manivannan.sadhasivam@linaro.org [will: Reworded the comment slightly] Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index d1b296b95c86..ae09c627bc84 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -268,12 +268,26 @@ static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain, static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) { - unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + unsigned int last_s2cr; u32 reg; u32 smr; int i; + /* + * Some platforms support more than the Arm SMMU architected maximum of + * 128 stream matching groups. For unknown reasons, the additional + * groups don't exhibit the same behavior as the architected registers, + * so limit the groups to 128 until the behavior is fixed for the other + * groups. + */ + if (smmu->num_mapping_groups > 128) { + dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n"); + smmu->num_mapping_groups = 128; + } + + last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); + /* * With some firmware versions writes to S2CR of type FAULT are * ignored, and writing BYPASS will end up written as FAULT in the From f045e9df6537175d02565f21616ac1a9dd59b61c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 16 Mar 2023 18:14:45 +0800 Subject: [PATCH 24/82] iommu/mediatek: Set dma_mask for PGTABLE_PA_35_EN When we enable PGTABLE_PA_35_EN, the PA for pgtable may be 35bits. Thus add dma_mask for it. Fixes: 301c3ca12576 ("iommu/mediatek: Allow page table PA up to 35bit") Signed-off-by: Chengci.Xu Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230316101445.12443-1-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index d5a4955910ff..6a00ce208dc2 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1258,6 +1258,14 @@ static int mtk_iommu_probe(struct platform_device *pdev) return PTR_ERR(data->bclk); } + if (MTK_IOMMU_HAS_FLAG(data->plat_data, PGTABLE_PA_35_EN)) { + ret = dma_set_mask(dev, DMA_BIT_MASK(35)); + if (ret) { + dev_err(dev, "Failed to set dma_mask 35.\n"); + return ret; + } + } + pm_runtime_enable(dev); if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { From f594496403fa383259aa7dfad92f383a2ee07e1b Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Fri, 10 Mar 2023 09:00:00 +0000 Subject: [PATCH 25/82] iommu/amd: Add 5 level guest page table support Newer AMD IOMMU supports 5 level guest page table (v2 page table). If both processor and IOMMU supports 5 level page table then enable it. Otherwise fall back to 4 level page table. Co-developed-by: Wei Huang Signed-off-by: Wei Huang Reviewed-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20230310090000.1117786-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/amd_iommu_types.h | 7 +++++++ drivers/iommu/amd/init.c | 23 +++++++++++++++++++++-- drivers/iommu/amd/io_pgtable_v2.c | 9 ++++++--- drivers/iommu/amd/iommu.c | 5 +++++ 5 files changed, 40 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 20a142b54498..e98f20a9bdd8 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -34,6 +34,7 @@ extern int amd_iommu_reenable(int); extern int amd_iommu_enable_faulting(void); extern int amd_iommu_guest_ir; extern enum io_pgtable_fmt amd_iommu_pgtable; +extern int amd_iommu_gpt_level; /* IOMMUv2 specific functions */ struct iommu_domain; diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index e5cfdeaaad48..3cda781b6c7f 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -93,6 +93,8 @@ #define FEATURE_GA (1ULL<<7) #define FEATURE_HE (1ULL<<8) #define FEATURE_PC (1ULL<<9) +#define FEATURE_GATS_SHIFT (12) +#define FEATURE_GATS_MASK (3ULL) #define FEATURE_GAM_VAPIC (1ULL<<21) #define FEATURE_GIOSUP (1ULL<<48) #define FEATURE_EPHSUP (1ULL<<50) @@ -305,6 +307,9 @@ #define PAGE_MODE_6_LEVEL 0x06 #define PAGE_MODE_7_LEVEL 0x07 +#define GUEST_PGTABLE_4_LEVEL 0x00 +#define GUEST_PGTABLE_5_LEVEL 0x01 + #define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) #define PM_LEVEL_SIZE(x) (((x) < 6) ? \ ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \ @@ -398,6 +403,8 @@ #define DTE_GCR3_SHIFT_B 16 #define DTE_GCR3_SHIFT_C 43 +#define DTE_GPT_LEVEL_SHIFT 54 + #define GCR3_VALID 0x01ULL #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 141eca29c951..c46ee218d387 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -153,6 +153,8 @@ bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; +/* Guest page table level */ +int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; @@ -306,6 +308,11 @@ static bool check_feature_on_all_iommus(u64 mask) return !!(amd_iommu_efr & mask); } +static inline int check_feature_gpt_level(void) +{ + return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK); +} + /* * For IVHD type 0x11/0x40, EFR is also available via IVHD. * Default to IVHD EFR since it is available sooner @@ -2155,8 +2162,10 @@ static void print_iommu_info(void) if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) pr_info("X2APIC enabled\n"); } - if (amd_iommu_pgtable == AMD_IOMMU_V2) - pr_info("V2 page table enabled\n"); + if (amd_iommu_pgtable == AMD_IOMMU_V2) { + pr_info("V2 page table enabled (Paging mode : %d level)\n", + amd_iommu_gpt_level); + } } static int __init amd_iommu_init_pci(void) @@ -3026,6 +3035,11 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + /* 5 level guest page table */ + if (cpu_feature_enabled(X86_FEATURE_LA57) && + check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL) + amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; + /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); @@ -3557,6 +3571,11 @@ __setup("ivrs_acpihid", parse_ivrs_acpihid); bool amd_iommu_v2_supported(void) { + /* CPU page table size should match IOMMU guest page table size */ + if (cpu_feature_enabled(X86_FEATURE_LA57) && + amd_iommu_gpt_level != PAGE_MODE_5_LEVEL) + return false; + /* * Since DTE[Mode]=0 is prohibited on SNP-enabled system * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 9f8b423f7630..27c3015947e6 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -37,8 +37,7 @@ static inline int get_pgtable_level(void) { - /* 5 level page table is not supported */ - return PAGE_MODE_4_LEVEL; + return amd_iommu_gpt_level; } static inline bool is_large_pte(u64 pte) @@ -379,6 +378,7 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); struct protection_domain *pdom = (struct protection_domain *)cookie; int ret; + int ias = IOMMU_IN_ADDR_BIT_SIZE; pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC); if (!pgtable->pgd) @@ -388,12 +388,15 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo if (ret) goto err_free_pgd; + if (get_pgtable_level() == PAGE_MODE_5_LEVEL) + ias = 57; + pgtable->iop.ops.map_pages = iommu_v2_map_pages; pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages; pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys; cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2, - cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, + cfg->ias = ias, cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, cfg->tlb = &v2_flush_ops; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 5452d0dd6569..e290f2034252 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1611,6 +1611,11 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C; flags |= tmp; + if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) { + dev_table[devid].data[2] |= + ((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT); + } + if (domain->flags & PD_GIOV_MASK) pte_root |= DTE_FLAG_GIOV; } From 67ea0b7ce41844eae7c10bb04dfe66a23318c224 Mon Sep 17 00:00:00 2001 From: Tomas Krcka Date: Wed, 29 Mar 2023 12:34:19 +0000 Subject: [PATCH 26/82] iommu/arm-smmu-v3: Acknowledge pri/event queue overflow if any When an overflow occurs in the PRI queue, the SMMU toggles the overflow flag in the PROD register. To exit the overflow condition, the PRI thread is supposed to acknowledge it by toggling this flag in the CONS register. Unacknowledged overflow causes the queue to stop adding anything new. Currently, the priq thread always writes the CONS register back to the SMMU after clearing the queue. The writeback is not necessary if the OVFLG in the PROD register has not been changed, no overflow has occured. This commit checks the difference of the overflow flag between CONS and PROD register. If it's different, toggles the OVACKFLG flag in the CONS register and write it to the SMMU. The situation is similar for the event queue. The acknowledge register is also toggled after clearing the event queue but never propagated to the hardware. This would only be done the next time when executing evtq thread. Unacknowledged event queue overflow doesn't affect the event queue, because the SMMU still adds elements to that queue when the overflow condition is active. But it feel nicer to keep SMMU in sync when possible, so use the same way here as well. Signed-off-by: Tomas Krcka Link: https://lore.kernel.org/r/20230329123420.34641-1-tomas.krcka@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index a8410c132f1e..e00e92c71239 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -152,6 +152,18 @@ static void queue_inc_cons(struct arm_smmu_ll_queue *q) q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); } +static void queue_sync_cons_ovf(struct arm_smmu_queue *q) +{ + struct arm_smmu_ll_queue *llq = &q->llq; + + if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons))) + return; + + llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | + Q_IDX(llq, llq->cons); + queue_sync_cons_out(q); +} + static int queue_sync_prod_in(struct arm_smmu_queue *q) { u32 prod; @@ -1577,8 +1589,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) } while (!queue_empty(llq)); /* Sync our overflow flag, as we believe we're up to speed */ - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | - Q_IDX(llq, llq->cons); + queue_sync_cons_ovf(q); return IRQ_HANDLED; } @@ -1636,9 +1647,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) } while (!queue_empty(llq)); /* Sync our overflow flag, as we believe we're up to speed */ - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | - Q_IDX(llq, llq->cons); - queue_sync_cons_out(q); + queue_sync_cons_ovf(q); return IRQ_HANDLED; } From a2972cb89935160bfe515b15d28a77694723ac06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 21 Mar 2023 09:41:16 +0100 Subject: [PATCH 27/82] iommu/arm-smmu: Drop if with an always false condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The remove and shutdown callback are only called after probe completed successfully. In this case platform_set_drvdata() was called with a non-NULL argument and so smmu is never NULL. Other functions in this driver also don't check for smmu being non-NULL before using it. Also note that returning an error code from a remove callback doesn't result in the device staying bound. It's still removed and devm allocated resources are freed (among others *smmu and the register mapping). So after an early exit to iommu device stayed around and using it probably oopses. Signed-off-by: Uwe Kleine-König Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20230321084125.337021-2-u.kleine-koenig@pengutronix.de Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 2ff7a72cf377..f4a36533ae47 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -2195,9 +2195,6 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); - if (!smmu) - return; - if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) dev_notice(&pdev->dev, "disabling translation\n"); @@ -2218,9 +2215,6 @@ static int arm_smmu_device_remove(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); - if (!smmu) - return -ENODEV; - iommu_device_unregister(&smmu->iommu); iommu_device_sysfs_remove(&smmu->iommu); From f80473183b40bb98a7499c61091c3c55b646f387 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 21 Mar 2023 09:41:17 +0100 Subject: [PATCH 28/82] iommu/apple-dart: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230321084125.337021-3-u.kleine-koenig@pengutronix.de Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 06169d36eab8..8af64b57f048 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -1150,7 +1150,7 @@ static int apple_dart_probe(struct platform_device *pdev) return ret; } -static int apple_dart_remove(struct platform_device *pdev) +static void apple_dart_remove(struct platform_device *pdev) { struct apple_dart *dart = platform_get_drvdata(pdev); @@ -1161,8 +1161,6 @@ static int apple_dart_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&dart->iommu); clk_bulk_disable_unprepare(dart->num_clks, dart->clks); - - return 0; } static const struct apple_dart_hw apple_dart_hw_t8103 = { @@ -1296,7 +1294,7 @@ static struct platform_driver apple_dart_driver = { .pm = pm_sleep_ptr(&apple_dart_pm_ops), }, .probe = apple_dart_probe, - .remove = apple_dart_remove, + .remove_new = apple_dart_remove, }; module_platform_driver(apple_dart_driver); From 66c7076f7636f197d8a67416961037d6d2d7b4df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 21 Mar 2023 09:41:18 +0100 Subject: [PATCH 29/82] iommu/arm-smmu-v3: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230321084125.337021-4-u.kleine-koenig@pengutronix.de Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f2425b0f0cd6..d99e1a8dbe44 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3844,7 +3844,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return 0; } -static int arm_smmu_device_remove(struct platform_device *pdev) +static void arm_smmu_device_remove(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); @@ -3852,8 +3852,6 @@ static int arm_smmu_device_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&smmu->iommu); arm_smmu_device_disable(smmu); iopf_queue_free(smmu->evtq.iopf); - - return 0; } static void arm_smmu_device_shutdown(struct platform_device *pdev) @@ -3882,7 +3880,7 @@ static struct platform_driver arm_smmu_driver = { .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, - .remove = arm_smmu_device_remove, + .remove_new = arm_smmu_device_remove, .shutdown = arm_smmu_device_shutdown, }; module_driver(arm_smmu_driver, platform_driver_register, From 62565a77c2323d32f2be737455729ac7d3efe6ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 21 Mar 2023 09:41:19 +0100 Subject: [PATCH 30/82] iommu/arm-smmu: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230321084125.337021-5-u.kleine-koenig@pengutronix.de Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 6 ++---- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 12 ++++-------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index f4a36533ae47..b30a4bd7c79d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -2211,7 +2211,7 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev) clk_bulk_unprepare(smmu->num_clks, smmu->clks); } -static int arm_smmu_device_remove(struct platform_device *pdev) +static void arm_smmu_device_remove(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); @@ -2219,8 +2219,6 @@ static int arm_smmu_device_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&smmu->iommu); arm_smmu_device_shutdown(pdev); - - return 0; } static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) @@ -2296,7 +2294,7 @@ static struct platform_driver arm_smmu_driver = { .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, - .remove = arm_smmu_device_remove, + .remove_new = arm_smmu_device_remove, .shutdown = arm_smmu_device_shutdown, }; module_platform_driver(arm_smmu_driver); diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index c8b70f476cd8..a503ed758ec3 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -682,7 +682,7 @@ static int qcom_iommu_ctx_probe(struct platform_device *pdev) return 0; } -static int qcom_iommu_ctx_remove(struct platform_device *pdev) +static void qcom_iommu_ctx_remove(struct platform_device *pdev) { struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(pdev->dev.parent); struct qcom_iommu_ctx *ctx = platform_get_drvdata(pdev); @@ -690,8 +690,6 @@ static int qcom_iommu_ctx_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); qcom_iommu->ctxs[ctx->asid - 1] = NULL; - - return 0; } static const struct of_device_id ctx_of_match[] = { @@ -706,7 +704,7 @@ static struct platform_driver qcom_iommu_ctx_driver = { .of_match_table = ctx_of_match, }, .probe = qcom_iommu_ctx_probe, - .remove = qcom_iommu_ctx_remove, + .remove_new = qcom_iommu_ctx_remove, }; static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu) @@ -824,7 +822,7 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) return ret; } -static int qcom_iommu_device_remove(struct platform_device *pdev) +static void qcom_iommu_device_remove(struct platform_device *pdev) { struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); @@ -832,8 +830,6 @@ static int qcom_iommu_device_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); iommu_device_sysfs_remove(&qcom_iommu->iommu); iommu_device_unregister(&qcom_iommu->iommu); - - return 0; } static int __maybe_unused qcom_iommu_resume(struct device *dev) @@ -870,7 +866,7 @@ static struct platform_driver qcom_iommu_driver = { .pm = &qcom_iommu_pm_ops, }, .probe = qcom_iommu_device_probe, - .remove = qcom_iommu_device_remove, + .remove_new = qcom_iommu_device_remove, }; static int __init qcom_iommu_init(void) From 7471ea50eae448953126c285e8ee0351db5e2b44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 21 Mar 2023 09:41:20 +0100 Subject: [PATCH 31/82] iommu/ipmmu-vmsa: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230321084125.337021-6-u.kleine-koenig@pengutronix.de Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index bdf1a4e5eae0..813dbd1f7ba0 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1073,7 +1073,7 @@ static int ipmmu_probe(struct platform_device *pdev) return 0; } -static int ipmmu_remove(struct platform_device *pdev) +static void ipmmu_remove(struct platform_device *pdev) { struct ipmmu_vmsa_device *mmu = platform_get_drvdata(pdev); @@ -1083,8 +1083,6 @@ static int ipmmu_remove(struct platform_device *pdev) arm_iommu_release_mapping(mmu->mapping); ipmmu_device_reset(mmu); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -1131,6 +1129,6 @@ static struct platform_driver ipmmu_driver = { .pm = DEV_PM_OPS, }, .probe = ipmmu_probe, - .remove = ipmmu_remove, + .remove_new = ipmmu_remove, }; builtin_platform_driver(ipmmu_driver); From 816a4afce133e75f5977bbf0f4c24c02280b1a99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 21 Mar 2023 09:41:21 +0100 Subject: [PATCH 32/82] iommu/msm: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230321084125.337021-7-u.kleine-koenig@pengutronix.de Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 454f6331c889..79d89bad5132 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -811,13 +811,12 @@ static const struct of_device_id msm_iommu_dt_match[] = { {} }; -static int msm_iommu_remove(struct platform_device *pdev) +static void msm_iommu_remove(struct platform_device *pdev) { struct msm_iommu_dev *iommu = platform_get_drvdata(pdev); clk_unprepare(iommu->clk); clk_unprepare(iommu->pclk); - return 0; } static struct platform_driver msm_iommu_driver = { @@ -826,6 +825,6 @@ static struct platform_driver msm_iommu_driver = { .of_match_table = msm_iommu_dt_match, }, .probe = msm_iommu_probe, - .remove = msm_iommu_remove, + .remove_new = msm_iommu_remove, }; builtin_platform_driver(msm_iommu_driver); From d8149d39299ec89125a1cb35cdd77defd7a79deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 21 Mar 2023 09:41:22 +0100 Subject: [PATCH 33/82] iommu/mtk: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230321084125.337021-8-u.kleine-koenig@pengutronix.de Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index d5a4955910ff..bcc7ccdb2d6e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1316,7 +1316,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) return ret; } -static int mtk_iommu_remove(struct platform_device *pdev) +static void mtk_iommu_remove(struct platform_device *pdev) { struct mtk_iommu_data *data = platform_get_drvdata(pdev); struct mtk_iommu_bank_data *bank; @@ -1338,7 +1338,6 @@ static int mtk_iommu_remove(struct platform_device *pdev) continue; devm_free_irq(&pdev->dev, bank->irq, bank); } - return 0; } static int __maybe_unused mtk_iommu_runtime_suspend(struct device *dev) @@ -1595,7 +1594,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = { static struct platform_driver mtk_iommu_driver = { .probe = mtk_iommu_probe, - .remove = mtk_iommu_remove, + .remove_new = mtk_iommu_remove, .driver = { .name = "mtk-iommu", .of_match_table = mtk_iommu_of_ids, From 85e1049e50da9409678fc247ebad4c019d68041f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 21 Mar 2023 09:41:23 +0100 Subject: [PATCH 34/82] iommu/mtk_iommu_v1: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230321084125.337021-9-u.kleine-koenig@pengutronix.de Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu_v1.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 43e4c8f89e23..8a0a5e5d049f 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -703,7 +703,7 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev) return ret; } -static int mtk_iommu_v1_remove(struct platform_device *pdev) +static void mtk_iommu_v1_remove(struct platform_device *pdev) { struct mtk_iommu_v1_data *data = platform_get_drvdata(pdev); @@ -713,7 +713,6 @@ static int mtk_iommu_v1_remove(struct platform_device *pdev) clk_disable_unprepare(data->bclk); devm_free_irq(&pdev->dev, data->irq, data); component_master_del(&pdev->dev, &mtk_iommu_v1_com_ops); - return 0; } static int __maybe_unused mtk_iommu_v1_suspend(struct device *dev) @@ -752,7 +751,7 @@ static const struct dev_pm_ops mtk_iommu_v1_pm_ops = { static struct platform_driver mtk_iommu_v1_driver = { .probe = mtk_iommu_v1_probe, - .remove = mtk_iommu_v1_remove, + .remove_new = mtk_iommu_v1_remove, .driver = { .name = "mtk-iommu-v1", .of_match_table = mtk_iommu_v1_of_ids, From 5930df68aec6cc63ea8525c75561dfb203a9441f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 21 Mar 2023 09:41:24 +0100 Subject: [PATCH 35/82] iommu/omap: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230321084125.337021-10-u.kleine-koenig@pengutronix.de Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 3ab078112a7c..dec7cdcfbe9c 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1257,7 +1257,7 @@ static int omap_iommu_probe(struct platform_device *pdev) return err; } -static int omap_iommu_remove(struct platform_device *pdev) +static void omap_iommu_remove(struct platform_device *pdev) { struct omap_iommu *obj = platform_get_drvdata(pdev); @@ -1274,7 +1274,6 @@ static int omap_iommu_remove(struct platform_device *pdev) pm_runtime_disable(obj->dev); dev_info(&pdev->dev, "%s removed\n", obj->name); - return 0; } static const struct dev_pm_ops omap_iommu_pm_ops = { @@ -1295,7 +1294,7 @@ static const struct of_device_id omap_iommu_of_match[] = { static struct platform_driver omap_iommu_driver = { .probe = omap_iommu_probe, - .remove = omap_iommu_remove, + .remove_new = omap_iommu_remove, .driver = { .name = "omap-iommu", .pm = &omap_iommu_pm_ops, From 421b6093f5ac3edf9cb79f36e79b7e2d51d6182b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 21 Mar 2023 09:41:25 +0100 Subject: [PATCH 36/82] iommu/sprd: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230321084125.337021-11-u.kleine-koenig@pengutronix.de Signed-off-by: Joerg Roedel --- drivers/iommu/sprd-iommu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index ae94d74b73f4..907569e76d59 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -507,7 +507,7 @@ static int sprd_iommu_probe(struct platform_device *pdev) return ret; } -static int sprd_iommu_remove(struct platform_device *pdev) +static void sprd_iommu_remove(struct platform_device *pdev) { struct sprd_iommu_device *sdev = platform_get_drvdata(pdev); @@ -519,8 +519,6 @@ static int sprd_iommu_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); iommu_device_sysfs_remove(&sdev->iommu); iommu_device_unregister(&sdev->iommu); - - return 0; } static struct platform_driver sprd_iommu_driver = { @@ -530,7 +528,7 @@ static struct platform_driver sprd_iommu_driver = { .suppress_bind_attrs = true, }, .probe = sprd_iommu_probe, - .remove = sprd_iommu_remove, + .remove_new = sprd_iommu_remove, }; module_platform_driver(sprd_iommu_driver); From 760f41d182ec94a651977e70045fd61b57973408 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 22 Mar 2023 13:07:57 -0700 Subject: [PATCH 37/82] iommu/vt-d: Remove virtual command interface Virtual command interface was introduced to allow using host PASIDs inside VMs. It is unused and abandoned due to architectural change. With this patch, we can safely remove this feature and the related helpers. Link: https://lore.kernel.org/r/20230210230206.3160144-2-jacob.jun.pan@linux.intel.com Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20230322200803.869130-2-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/cap_audit.c | 2 - drivers/iommu/intel/dmar.c | 2 - drivers/iommu/intel/iommu.c | 85 --------------------------------- drivers/iommu/intel/iommu.h | 2 - 4 files changed, 91 deletions(-) diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c index 806986696841..9862dc20b35e 100644 --- a/drivers/iommu/intel/cap_audit.c +++ b/drivers/iommu/intel/cap_audit.c @@ -54,7 +54,6 @@ static inline void check_dmar_capabilities(struct intel_iommu *a, CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK); CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK); CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, vcs, ECAP_VCS_MASK); CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK); CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK); CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK); @@ -101,7 +100,6 @@ static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, vcs, ECAP_VCS_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK); diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 6acfe879589c..0f348439ef0e 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -993,8 +993,6 @@ static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd) warn_invalid_dmar(phys_addr, " returns all ones"); goto unmap; } - if (ecap_vcs(iommu->ecap)) - iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG); /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a366790fb781..6c2d1ffb5b0a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1722,9 +1722,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); } - if (vccap_pasid(iommu->vccap)) - ioasid_unregister_allocator(&iommu->pasid_allocator); - #endif } @@ -2797,85 +2794,6 @@ static int copy_translation_tables(struct intel_iommu *iommu) return ret; } -#ifdef CONFIG_INTEL_IOMMU_SVM -static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data) -{ - struct intel_iommu *iommu = data; - ioasid_t ioasid; - - if (!iommu) - return INVALID_IOASID; - /* - * VT-d virtual command interface always uses the full 20 bit - * PASID range. Host can partition guest PASID range based on - * policies but it is out of guest's control. - */ - if (min < PASID_MIN || max > intel_pasid_max_id) - return INVALID_IOASID; - - if (vcmd_alloc_pasid(iommu, &ioasid)) - return INVALID_IOASID; - - return ioasid; -} - -static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data) -{ - struct intel_iommu *iommu = data; - - if (!iommu) - return; - /* - * Sanity check the ioasid owner is done at upper layer, e.g. VFIO - * We can only free the PASID when all the devices are unbound. - */ - if (ioasid_find(NULL, ioasid, NULL)) { - pr_alert("Cannot free active IOASID %d\n", ioasid); - return; - } - vcmd_free_pasid(iommu, ioasid); -} - -static void register_pasid_allocator(struct intel_iommu *iommu) -{ - /* - * If we are running in the host, no need for custom allocator - * in that PASIDs are allocated from the host system-wide. - */ - if (!cap_caching_mode(iommu->cap)) - return; - - if (!sm_supported(iommu)) { - pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n"); - return; - } - - /* - * Register a custom PASID allocator if we are running in a guest, - * guest PASID must be obtained via virtual command interface. - * There can be multiple vIOMMUs in each guest but only one allocator - * is active. All vIOMMU allocators will eventually be calling the same - * host allocator. - */ - if (!vccap_pasid(iommu->vccap)) - return; - - pr_info("Register custom PASID allocator\n"); - iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc; - iommu->pasid_allocator.free = intel_vcmd_ioasid_free; - iommu->pasid_allocator.pdata = (void *)iommu; - if (ioasid_register_allocator(&iommu->pasid_allocator)) { - pr_warn("Custom PASID allocator failed, scalable mode disabled\n"); - /* - * Disable scalable mode on this IOMMU if there - * is no custom allocator. Mixing SM capable vIOMMU - * and non-SM vIOMMU are not supported. - */ - intel_iommu_sm = 0; - } -} -#endif - static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; @@ -2964,9 +2882,6 @@ static int __init init_dmars(void) */ for_each_active_iommu(iommu, drhd) { iommu_flush_write_buffer(iommu); -#ifdef CONFIG_INTEL_IOMMU_SVM - register_pasid_allocator(iommu); -#endif iommu_set_root_entry(iommu); } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d6df3b865812..a2010fb120e2 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -198,7 +198,6 @@ #define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) #define ecap_slads(e) (((e) >> 45) & 0x1) -#define ecap_vcs(e) (((e) >> 44) & 0x1) #define ecap_smts(e) (((e) >> 43) & 0x1) #define ecap_dit(e) (((e) >> 41) & 0x1) #define ecap_pds(e) (((e) >> 42) & 0x1) @@ -676,7 +675,6 @@ struct intel_iommu { unsigned char prq_name[16]; /* Name for PRQ interrupt */ unsigned long prq_seq_number; struct completion prq_complete; - struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */ #endif struct iopf_queue *iopf_queue; unsigned char iopfq_name[16]; From cd3891158a77685aee6129f7374a018d13540b2c Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 22 Mar 2023 13:07:58 -0700 Subject: [PATCH 38/82] iommu/sva: Move PASID helpers to sva code Preparing to remove IOASID infrastructure, PASID management will be under SVA code. Decouple mm code from IOASID. Reviewed-by: Jason Gunthorpe Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230322200803.869130-3-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 10 +++++++++- include/linux/ioasid.h | 11 +---------- include/linux/iommu.h | 14 +++++++++++++- include/linux/sched/mm.h | 26 -------------------------- kernel/fork.c | 1 + 5 files changed, 24 insertions(+), 38 deletions(-) diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 24bf9b2b58aa..fcfdc80a3939 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -44,7 +44,7 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) if (!pasid_valid(pasid)) ret = -ENOMEM; else - mm_pasid_set(mm, pasid); + mm->pasid = pasid; out: mutex_unlock(&iommu_sva_lock); return ret; @@ -238,3 +238,11 @@ iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) return status; } + +void mm_pasid_drop(struct mm_struct *mm) +{ + if (pasid_valid(mm->pasid)) { + ioasid_free(mm->pasid); + mm->pasid = INVALID_IOASID; + } +} diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index af1c9d62e642..734bf0e036ee 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -4,8 +4,8 @@ #include #include +#include -#define INVALID_IOASID ((ioasid_t)-1) typedef unsigned int ioasid_t; typedef ioasid_t (*ioasid_alloc_fn_t)(ioasid_t min, ioasid_t max, void *data); typedef void (*ioasid_free_fn_t)(ioasid_t ioasid, void *data); @@ -40,10 +40,6 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); int ioasid_set_data(ioasid_t ioasid, void *data); -static inline bool pasid_valid(ioasid_t ioasid) -{ - return ioasid != INVALID_IOASID; -} #else /* !CONFIG_IOASID */ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, @@ -74,10 +70,5 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data) return -ENOTSUPP; } -static inline bool pasid_valid(ioasid_t ioasid) -{ - return false; -} - #endif /* CONFIG_IOASID */ #endif /* __LINUX_IOASID_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6595454d4f48..d3f81dc6e4dd 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #define IOMMU_READ (1 << 0) @@ -192,6 +191,8 @@ enum iommu_dev_features { }; #define IOMMU_PASID_INVALID (-1U) +typedef unsigned int ioasid_t; +#define INVALID_IOASID ((ioasid_t)-1) #ifdef CONFIG_IOMMU_API @@ -1172,7 +1173,16 @@ static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream return false; } +static inline bool pasid_valid(ioasid_t ioasid) +{ + return ioasid != INVALID_IOASID; +} #ifdef CONFIG_IOMMU_SVA +static inline void mm_pasid_init(struct mm_struct *mm) +{ + mm->pasid = INVALID_IOASID; +} +void mm_pasid_drop(struct mm_struct *mm); struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); @@ -1192,6 +1202,8 @@ static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) { return IOMMU_PASID_INVALID; } +static inline void mm_pasid_init(struct mm_struct *mm) {} +static inline void mm_pasid_drop(struct mm_struct *mm) {} #endif /* CONFIG_IOMMU_SVA */ #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 2a243616f222..da9712a3ba73 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -8,7 +8,6 @@ #include #include #include -#include /* * Routines for handling mm_structs @@ -451,29 +450,4 @@ static inline void membarrier_update_current_mm(struct mm_struct *next_mm) } #endif -#ifdef CONFIG_IOMMU_SVA -static inline void mm_pasid_init(struct mm_struct *mm) -{ - mm->pasid = INVALID_IOASID; -} - -/* Associate a PASID with an mm_struct: */ -static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) -{ - mm->pasid = pasid; -} - -static inline void mm_pasid_drop(struct mm_struct *mm) -{ - if (pasid_valid(mm->pasid)) { - ioasid_free(mm->pasid); - mm->pasid = INVALID_IOASID; - } -} -#else -static inline void mm_pasid_init(struct mm_struct *mm) {} -static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} -static inline void mm_pasid_drop(struct mm_struct *mm) {} -#endif - #endif /* _LINUX_SCHED_MM_H */ diff --git a/kernel/fork.c b/kernel/fork.c index d8cda4c6de6c..9f14e4084fc0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #include From 2bef9ba8aefc211674427a0db046a773001b3329 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 22 Mar 2023 13:07:59 -0700 Subject: [PATCH 39/82] iommu/sva: Remove PASID to mm lookup function There is no user of iommu_sva_find() function, remove it so that PASID allocator can be a simple IDA. Device drivers are expected to store and keep track of their own PASID metadata. Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230322200803.869130-4-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 20 -------------------- drivers/iommu/iommu-sva.h | 1 - 2 files changed, 21 deletions(-) diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index fcfdc80a3939..4f357ef14f04 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -51,26 +51,6 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) } EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); -/* ioasid_find getter() requires a void * argument */ -static bool __mmget_not_zero(void *mm) -{ - return mmget_not_zero(mm); -} - -/** - * iommu_sva_find() - Find mm associated to the given PASID - * @pasid: Process Address Space ID assigned to the mm - * - * On success a reference to the mm is taken, and must be released with mmput(). - * - * Returns the mm corresponding to this PASID, or an error if not found. - */ -struct mm_struct *iommu_sva_find(ioasid_t pasid) -{ - return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero); -} -EXPORT_SYMBOL_GPL(iommu_sva_find); - /** * iommu_sva_bind_device() - Bind a process address space to a device * @dev: the device diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h index 7215a761b962..102eae1817a2 100644 --- a/drivers/iommu/iommu-sva.h +++ b/drivers/iommu/iommu-sva.h @@ -9,7 +9,6 @@ #include int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max); -struct mm_struct *iommu_sva_find(ioasid_t pasid); /* I/O Page fault */ struct device; From 4e14176ab13fb6986dd079c711d46b70712da2f1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 22 Mar 2023 13:08:00 -0700 Subject: [PATCH 40/82] iommu/sva: Stop using ioasid_set for SVA Instead SVA drivers can use a simple global IDA to allocate PASIDs for each mm_struct. Future work would be to allow drivers using the SVA APIs to reserve global PASIDs from this IDA for their internal use, eg with the DMA API PASID support. Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230322200803.869130-5-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 41 +++++++++++++-------------------------- drivers/iommu/iommu-sva.h | 2 -- 2 files changed, 14 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 4f357ef14f04..48e8a15ddc9b 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -9,47 +9,34 @@ #include "iommu-sva.h" static DEFINE_MUTEX(iommu_sva_lock); -static DECLARE_IOASID_SET(iommu_sva_pasid); +static DEFINE_IDA(iommu_global_pasid_ida); -/** - * iommu_sva_alloc_pasid - Allocate a PASID for the mm - * @mm: the mm - * @min: minimum PASID value (inclusive) - * @max: maximum PASID value (inclusive) - * - * Try to allocate a PASID for this mm, or take a reference to the existing one - * provided it fits within the [@min, @max] range. On success the PASID is - * available in mm->pasid and will be available for the lifetime of the mm. - * - * Returns 0 on success and < 0 on error. - */ -int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) +/* Allocate a PASID for the mm within range (inclusive) */ +static int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) { int ret = 0; - ioasid_t pasid; - if (min == INVALID_IOASID || max == INVALID_IOASID || + if (!pasid_valid(min) || !pasid_valid(max) || min == 0 || max < min) return -EINVAL; mutex_lock(&iommu_sva_lock); /* Is a PASID already associated with this mm? */ if (pasid_valid(mm->pasid)) { - if (mm->pasid < min || mm->pasid >= max) + if (mm->pasid < min || mm->pasid > max) ret = -EOVERFLOW; goto out; } - pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); - if (!pasid_valid(pasid)) - ret = -ENOMEM; - else - mm->pasid = pasid; + ret = ida_alloc_range(&iommu_global_pasid_ida, min, max, GFP_ATOMIC); + if (ret < 0) + goto out; + mm->pasid = ret; + ret = 0; out: mutex_unlock(&iommu_sva_lock); return ret; } -EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); /** * iommu_sva_bind_device() - Bind a process address space to a device @@ -221,8 +208,8 @@ iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) void mm_pasid_drop(struct mm_struct *mm) { - if (pasid_valid(mm->pasid)) { - ioasid_free(mm->pasid); - mm->pasid = INVALID_IOASID; - } + if (likely(!pasid_valid(mm->pasid))) + return; + + ida_free(&iommu_global_pasid_ida, mm->pasid); } diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h index 102eae1817a2..c22d0174ad61 100644 --- a/drivers/iommu/iommu-sva.h +++ b/drivers/iommu/iommu-sva.h @@ -8,8 +8,6 @@ #include #include -int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max); - /* I/O Page fault */ struct device; struct iommu_fault; From 1a14bf0fc7ed9476284cd6ab358c783fd9a0cb5b Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 22 Mar 2023 13:08:01 -0700 Subject: [PATCH 41/82] iommu/sva: Use GFP_KERNEL for pasid allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We’re not using spinlock-protected IOASID allocation anymore, there’s no need for GFP_ATOMIC. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20230322200803.869130-6-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 48e8a15ddc9b..c434b95dc8eb 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -28,8 +28,8 @@ static int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t ma goto out; } - ret = ida_alloc_range(&iommu_global_pasid_ida, min, max, GFP_ATOMIC); - if (ret < 0) + ret = ida_alloc_range(&iommu_global_pasid_ida, min, max, GFP_KERNEL); + if (ret < min) goto out; mm->pasid = ret; ret = 0; From fffaed1e24b8d114e958d180cb4a8aed3febbb5a Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 22 Mar 2023 13:08:02 -0700 Subject: [PATCH 42/82] iommu/ioasid: Rename INVALID_IOASID INVALID_IOASID and IOMMU_PASID_INVALID are duplicated. Rename INVALID_IOASID and consolidate since we are moving away from IOASID infrastructure. Reviewed-by: Dave Jiang Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20230322200803.869130-7-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- Documentation/x86/sva.rst | 2 +- arch/x86/kernel/traps.c | 2 +- drivers/dma/idxd/device.c | 8 ++++---- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/init.c | 2 +- drivers/dma/idxd/irq.c | 2 +- drivers/iommu/intel/dmar.c | 4 ++-- drivers/iommu/intel/iommu.c | 2 +- drivers/iommu/intel/svm.c | 2 +- include/linux/ioasid.h | 1 + include/linux/iommu.h | 6 +++--- mm/init-mm.c | 4 ++-- 12 files changed, 19 insertions(+), 17 deletions(-) diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst index 2e9b8b0f9a0f..33cb05005982 100644 --- a/Documentation/x86/sva.rst +++ b/Documentation/x86/sva.rst @@ -107,7 +107,7 @@ process share the same page tables, thus the same MSR value. PASID Life Cycle Management =========================== -PASID is initialized as INVALID_IOASID (-1) when a process is created. +PASID is initialized as IOMMU_PASID_INVALID (-1) when a process is created. Only processes that access SVA-capable devices need to have a PASID allocated. This allocation happens when a process opens/binds an SVA-capable diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d317dc3d06a3..492a60febb11 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5f321f3b4242..6fca8fa8d3a8 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1194,7 +1194,7 @@ static void idxd_device_set_perm_entry(struct idxd_device *idxd, { union msix_perm mperm; - if (ie->pasid == INVALID_IOASID) + if (ie->pasid == IOMMU_PASID_INVALID) return; mperm.bits = 0; @@ -1224,7 +1224,7 @@ void idxd_wq_free_irq(struct idxd_wq *wq) idxd_device_clear_perm_entry(idxd, ie); ie->vector = -1; ie->int_handle = INVALID_INT_HANDLE; - ie->pasid = INVALID_IOASID; + ie->pasid = IOMMU_PASID_INVALID; } int idxd_wq_request_irq(struct idxd_wq *wq) @@ -1240,7 +1240,7 @@ int idxd_wq_request_irq(struct idxd_wq *wq) ie = &wq->ie; ie->vector = pci_irq_vector(pdev, ie->id); - ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID; + ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : IOMMU_PASID_INVALID; idxd_device_set_perm_entry(idxd, ie); rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie); @@ -1265,7 +1265,7 @@ int idxd_wq_request_irq(struct idxd_wq *wq) free_irq(ie->vector, ie); err_irq: idxd_device_clear_perm_entry(idxd, ie); - ie->pasid = INVALID_IOASID; + ie->pasid = IOMMU_PASID_INVALID; return rc; } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 7ced8d283d98..417e602a46b6 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "registers.h" diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 640d3048368e..e6ee267da0ff 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -105,7 +105,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) ie = idxd_get_ie(idxd, msix_idx); ie->id = msix_idx; ie->int_handle = INVALID_INT_HANDLE; - ie->pasid = INVALID_IOASID; + ie->pasid = IOMMU_PASID_INVALID; spin_lock_init(&ie->list_lock); init_llist_head(&ie->pending_llist); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index aa314ebec587..242f1f0b9f09 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -80,7 +80,7 @@ static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) desc.opcode = DSA_OPCODE_DRAIN; desc.priv = 1; - if (ie->pasid != INVALID_IOASID) + if (ie->pasid != IOMMU_PASID_INVALID) desc.pasid = ie->pasid; desc.int_handle = ie->int_handle; portal = idxd_wq_portal_addr(wq); diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 0f348439ef0e..10bb20ff50fd 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1958,7 +1958,7 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, return 0; } - if (pasid == INVALID_IOASID) + if (pasid == IOMMU_PASID_INVALID) pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", type ? "DMA Read" : "DMA Write", source_id >> 8, PCI_SLOT(source_id & 0xFF), @@ -2039,7 +2039,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) if (!ratelimited) /* Using pasid -1 if pasid is not present */ dmar_fault_do_one(iommu, type, fault_reason, - pasid_present ? pasid : INVALID_IOASID, + pasid_present ? pasid : IOMMU_PASID_INVALID, source_id, guest_addr); fault_index++; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6c2d1ffb5b0a..350c33605fd3 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -876,7 +876,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, return; } /* For request-without-pasid, get the pasid from context entry */ - if (intel_iommu_sm && pasid == INVALID_IOASID) + if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) pasid = PASID_RID2PASID; dir_index = pasid >> PASID_PDE_SHIFT; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7367f56c3bad..3848a1b0800c 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -273,7 +273,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, if (WARN_ON(!mutex_is_locked(&pasid_mutex))) return -EINVAL; - if (pasid == INVALID_IOASID || pasid >= PASID_MAX) + if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX) return -EINVAL; svm = pasid_private_find(pasid); diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index 734bf0e036ee..bbc9c6fd3310 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -7,6 +7,7 @@ #include typedef unsigned int ioasid_t; +#define INVALID_IOASID ((ioasid_t)-1) typedef ioasid_t (*ioasid_alloc_fn_t)(ioasid_t min, ioasid_t max, void *data); typedef void (*ioasid_free_fn_t)(ioasid_t ioasid, void *data); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d3f81dc6e4dd..54f535ff9868 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -192,7 +192,6 @@ enum iommu_dev_features { #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; -#define INVALID_IOASID ((ioasid_t)-1) #ifdef CONFIG_IOMMU_API @@ -1175,12 +1174,13 @@ static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream static inline bool pasid_valid(ioasid_t ioasid) { - return ioasid != INVALID_IOASID; + return ioasid != IOMMU_PASID_INVALID; } + #ifdef CONFIG_IOMMU_SVA static inline void mm_pasid_init(struct mm_struct *mm) { - mm->pasid = INVALID_IOASID; + mm->pasid = IOMMU_PASID_INVALID; } void mm_pasid_drop(struct mm_struct *mm); struct iommu_sva *iommu_sva_bind_device(struct device *dev, diff --git a/mm/init-mm.c b/mm/init-mm.c index c9327abb771c..a084039f55d8 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #ifndef INIT_MM_CONTEXT @@ -40,7 +40,7 @@ struct mm_struct init_mm = { .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, #ifdef CONFIG_IOMMU_SVA - .pasid = INVALID_IOASID, + .pasid = IOMMU_PASID_INVALID, #endif INIT_MM_CONTEXT(init_mm) }; From 99b5726b44230329f35b4c4d7fe1577d4f4edb31 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 22 Mar 2023 13:08:03 -0700 Subject: [PATCH 43/82] iommu: Remove ioasid infrastructure This has no use anymore, delete it all. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20230322200803.869130-8-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/dma/idxd/idxd.h | 1 - drivers/iommu/Kconfig | 5 - drivers/iommu/Makefile | 1 - drivers/iommu/intel/iommu.h | 1 - drivers/iommu/intel/svm.c | 1 - drivers/iommu/ioasid.c | 422 ------------------------------------ drivers/iommu/iommu-sva.h | 1 - include/linux/ioasid.h | 75 ------- 8 files changed, 507 deletions(-) delete mode 100644 drivers/iommu/ioasid.c delete mode 100644 include/linux/ioasid.h diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 417e602a46b6..dd2a6ed8949b 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index c4928514e5e2..db98c3f86e8c 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -3,10 +3,6 @@ config IOMMU_IOVA tristate -# The IOASID library may also be used by non-IOMMU_API users -config IOASID - tristate - # IOMMU_API always gets selected by whoever wants it. config IOMMU_API bool @@ -160,7 +156,6 @@ config IOMMU_DMA # Shared Virtual Addressing config IOMMU_SVA bool - select IOASID config FSL_PAMU bool "Freescale IOMMU support" diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index f461d0651385..769e43d780ce 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -9,7 +9,6 @@ obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o obj-$(CONFIG_IOMMU_IO_PGTABLE_DART) += io-pgtable-dart.o -obj-$(CONFIG_IOASID) += ioasid.o obj-$(CONFIG_IOMMU_IOVA) += iova.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index a2010fb120e2..65b15be72878 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 3848a1b0800c..e95b339e9cdc 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c deleted file mode 100644 index a786c034907c..000000000000 --- a/drivers/iommu/ioasid.c +++ /dev/null @@ -1,422 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * I/O Address Space ID allocator. There is one global IOASID space, split into - * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and - * free IOASIDs with ioasid_alloc() and ioasid_free(). - */ -#include -#include -#include -#include -#include - -struct ioasid_data { - ioasid_t id; - struct ioasid_set *set; - void *private; - struct rcu_head rcu; -}; - -/* - * struct ioasid_allocator_data - Internal data structure to hold information - * about an allocator. There are two types of allocators: - * - * - Default allocator always has its own XArray to track the IOASIDs allocated. - * - Custom allocators may share allocation helpers with different private data. - * Custom allocators that share the same helper functions also share the same - * XArray. - * Rules: - * 1. Default allocator is always available, not dynamically registered. This is - * to prevent race conditions with early boot code that want to register - * custom allocators or allocate IOASIDs. - * 2. Custom allocators take precedence over the default allocator. - * 3. When all custom allocators sharing the same helper functions are - * unregistered (e.g. due to hotplug), all outstanding IOASIDs must be - * freed. Otherwise, outstanding IOASIDs will be lost and orphaned. - * 4. When switching between custom allocators sharing the same helper - * functions, outstanding IOASIDs are preserved. - * 5. When switching between custom allocator and default allocator, all IOASIDs - * must be freed to ensure unadulterated space for the new allocator. - * - * @ops: allocator helper functions and its data - * @list: registered custom allocators - * @slist: allocators share the same ops but different data - * @flags: attributes of the allocator - * @xa: xarray holds the IOASID space - * @rcu: used for kfree_rcu when unregistering allocator - */ -struct ioasid_allocator_data { - struct ioasid_allocator_ops *ops; - struct list_head list; - struct list_head slist; -#define IOASID_ALLOCATOR_CUSTOM BIT(0) /* Needs framework to track results */ - unsigned long flags; - struct xarray xa; - struct rcu_head rcu; -}; - -static DEFINE_SPINLOCK(ioasid_allocator_lock); -static LIST_HEAD(allocators_list); - -static ioasid_t default_alloc(ioasid_t min, ioasid_t max, void *opaque); -static void default_free(ioasid_t ioasid, void *opaque); - -static struct ioasid_allocator_ops default_ops = { - .alloc = default_alloc, - .free = default_free, -}; - -static struct ioasid_allocator_data default_allocator = { - .ops = &default_ops, - .flags = 0, - .xa = XARRAY_INIT(ioasid_xa, XA_FLAGS_ALLOC), -}; - -static struct ioasid_allocator_data *active_allocator = &default_allocator; - -static ioasid_t default_alloc(ioasid_t min, ioasid_t max, void *opaque) -{ - ioasid_t id; - - if (xa_alloc(&default_allocator.xa, &id, opaque, XA_LIMIT(min, max), GFP_ATOMIC)) { - pr_err("Failed to alloc ioasid from %d to %d\n", min, max); - return INVALID_IOASID; - } - - return id; -} - -static void default_free(ioasid_t ioasid, void *opaque) -{ - struct ioasid_data *ioasid_data; - - ioasid_data = xa_erase(&default_allocator.xa, ioasid); - kfree_rcu(ioasid_data, rcu); -} - -/* Allocate and initialize a new custom allocator with its helper functions */ -static struct ioasid_allocator_data *ioasid_alloc_allocator(struct ioasid_allocator_ops *ops) -{ - struct ioasid_allocator_data *ia_data; - - ia_data = kzalloc(sizeof(*ia_data), GFP_ATOMIC); - if (!ia_data) - return NULL; - - xa_init_flags(&ia_data->xa, XA_FLAGS_ALLOC); - INIT_LIST_HEAD(&ia_data->slist); - ia_data->flags |= IOASID_ALLOCATOR_CUSTOM; - ia_data->ops = ops; - - /* For tracking custom allocators that share the same ops */ - list_add_tail(&ops->list, &ia_data->slist); - - return ia_data; -} - -static bool use_same_ops(struct ioasid_allocator_ops *a, struct ioasid_allocator_ops *b) -{ - return (a->free == b->free) && (a->alloc == b->alloc); -} - -/** - * ioasid_register_allocator - register a custom allocator - * @ops: the custom allocator ops to be registered - * - * Custom allocators take precedence over the default xarray based allocator. - * Private data associated with the IOASID allocated by the custom allocators - * are managed by IOASID framework similar to data stored in xa by default - * allocator. - * - * There can be multiple allocators registered but only one is active. In case - * of runtime removal of a custom allocator, the next one is activated based - * on the registration ordering. - * - * Multiple allocators can share the same alloc() function, in this case the - * IOASID space is shared. - */ -int ioasid_register_allocator(struct ioasid_allocator_ops *ops) -{ - struct ioasid_allocator_data *ia_data; - struct ioasid_allocator_data *pallocator; - int ret = 0; - - spin_lock(&ioasid_allocator_lock); - - ia_data = ioasid_alloc_allocator(ops); - if (!ia_data) { - ret = -ENOMEM; - goto out_unlock; - } - - /* - * No particular preference, we activate the first one and keep - * the later registered allocators in a list in case the first one gets - * removed due to hotplug. - */ - if (list_empty(&allocators_list)) { - WARN_ON(active_allocator != &default_allocator); - /* Use this new allocator if default is not active */ - if (xa_empty(&active_allocator->xa)) { - rcu_assign_pointer(active_allocator, ia_data); - list_add_tail(&ia_data->list, &allocators_list); - goto out_unlock; - } - pr_warn("Default allocator active with outstanding IOASID\n"); - ret = -EAGAIN; - goto out_free; - } - - /* Check if the allocator is already registered */ - list_for_each_entry(pallocator, &allocators_list, list) { - if (pallocator->ops == ops) { - pr_err("IOASID allocator already registered\n"); - ret = -EEXIST; - goto out_free; - } else if (use_same_ops(pallocator->ops, ops)) { - /* - * If the new allocator shares the same ops, - * then they will share the same IOASID space. - * We should put them under the same xarray. - */ - list_add_tail(&ops->list, &pallocator->slist); - goto out_free; - } - } - list_add_tail(&ia_data->list, &allocators_list); - - spin_unlock(&ioasid_allocator_lock); - return 0; -out_free: - kfree(ia_data); -out_unlock: - spin_unlock(&ioasid_allocator_lock); - return ret; -} -EXPORT_SYMBOL_GPL(ioasid_register_allocator); - -/** - * ioasid_unregister_allocator - Remove a custom IOASID allocator ops - * @ops: the custom allocator to be removed - * - * Remove an allocator from the list, activate the next allocator in - * the order it was registered. Or revert to default allocator if all - * custom allocators are unregistered without outstanding IOASIDs. - */ -void ioasid_unregister_allocator(struct ioasid_allocator_ops *ops) -{ - struct ioasid_allocator_data *pallocator; - struct ioasid_allocator_ops *sops; - - spin_lock(&ioasid_allocator_lock); - if (list_empty(&allocators_list)) { - pr_warn("No custom IOASID allocators active!\n"); - goto exit_unlock; - } - - list_for_each_entry(pallocator, &allocators_list, list) { - if (!use_same_ops(pallocator->ops, ops)) - continue; - - if (list_is_singular(&pallocator->slist)) { - /* No shared helper functions */ - list_del(&pallocator->list); - /* - * All IOASIDs should have been freed before - * the last allocator that shares the same ops - * is unregistered. - */ - WARN_ON(!xa_empty(&pallocator->xa)); - if (list_empty(&allocators_list)) { - pr_info("No custom IOASID allocators, switch to default.\n"); - rcu_assign_pointer(active_allocator, &default_allocator); - } else if (pallocator == active_allocator) { - rcu_assign_pointer(active_allocator, - list_first_entry(&allocators_list, - struct ioasid_allocator_data, list)); - pr_info("IOASID allocator changed"); - } - kfree_rcu(pallocator, rcu); - break; - } - /* - * Find the matching shared ops to delete, - * but keep outstanding IOASIDs - */ - list_for_each_entry(sops, &pallocator->slist, list) { - if (sops == ops) { - list_del(&ops->list); - break; - } - } - break; - } - -exit_unlock: - spin_unlock(&ioasid_allocator_lock); -} -EXPORT_SYMBOL_GPL(ioasid_unregister_allocator); - -/** - * ioasid_set_data - Set private data for an allocated ioasid - * @ioasid: the ID to set data - * @data: the private data - * - * For IOASID that is already allocated, private data can be set - * via this API. Future lookup can be done via ioasid_find. - */ -int ioasid_set_data(ioasid_t ioasid, void *data) -{ - struct ioasid_data *ioasid_data; - int ret = 0; - - spin_lock(&ioasid_allocator_lock); - ioasid_data = xa_load(&active_allocator->xa, ioasid); - if (ioasid_data) - rcu_assign_pointer(ioasid_data->private, data); - else - ret = -ENOENT; - spin_unlock(&ioasid_allocator_lock); - - /* - * Wait for readers to stop accessing the old private data, so the - * caller can free it. - */ - if (!ret) - synchronize_rcu(); - - return ret; -} -EXPORT_SYMBOL_GPL(ioasid_set_data); - -/** - * ioasid_alloc - Allocate an IOASID - * @set: the IOASID set - * @min: the minimum ID (inclusive) - * @max: the maximum ID (inclusive) - * @private: data private to the caller - * - * Allocate an ID between @min and @max. The @private pointer is stored - * internally and can be retrieved with ioasid_find(). - * - * Return: the allocated ID on success, or %INVALID_IOASID on failure. - */ -ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, - void *private) -{ - struct ioasid_data *data; - void *adata; - ioasid_t id; - - data = kzalloc(sizeof(*data), GFP_ATOMIC); - if (!data) - return INVALID_IOASID; - - data->set = set; - data->private = private; - - /* - * Custom allocator needs allocator data to perform platform specific - * operations. - */ - spin_lock(&ioasid_allocator_lock); - adata = active_allocator->flags & IOASID_ALLOCATOR_CUSTOM ? active_allocator->ops->pdata : data; - id = active_allocator->ops->alloc(min, max, adata); - if (id == INVALID_IOASID) { - pr_err("Failed ASID allocation %lu\n", active_allocator->flags); - goto exit_free; - } - - if ((active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) && - xa_alloc(&active_allocator->xa, &id, data, XA_LIMIT(id, id), GFP_ATOMIC)) { - /* Custom allocator needs framework to store and track allocation results */ - pr_err("Failed to alloc ioasid from %d\n", id); - active_allocator->ops->free(id, active_allocator->ops->pdata); - goto exit_free; - } - data->id = id; - - spin_unlock(&ioasid_allocator_lock); - return id; -exit_free: - spin_unlock(&ioasid_allocator_lock); - kfree(data); - return INVALID_IOASID; -} -EXPORT_SYMBOL_GPL(ioasid_alloc); - -/** - * ioasid_free - Free an ioasid - * @ioasid: the ID to remove - */ -void ioasid_free(ioasid_t ioasid) -{ - struct ioasid_data *ioasid_data; - - spin_lock(&ioasid_allocator_lock); - ioasid_data = xa_load(&active_allocator->xa, ioasid); - if (!ioasid_data) { - pr_err("Trying to free unknown IOASID %u\n", ioasid); - goto exit_unlock; - } - - active_allocator->ops->free(ioasid, active_allocator->ops->pdata); - /* Custom allocator needs additional steps to free the xa element */ - if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) { - ioasid_data = xa_erase(&active_allocator->xa, ioasid); - kfree_rcu(ioasid_data, rcu); - } - -exit_unlock: - spin_unlock(&ioasid_allocator_lock); -} -EXPORT_SYMBOL_GPL(ioasid_free); - -/** - * ioasid_find - Find IOASID data - * @set: the IOASID set - * @ioasid: the IOASID to find - * @getter: function to call on the found object - * - * The optional getter function allows to take a reference to the found object - * under the rcu lock. The function can also check if the object is still valid: - * if @getter returns false, then the object is invalid and NULL is returned. - * - * If the IOASID exists, return the private pointer passed to ioasid_alloc. - * Private data can be NULL if not set. Return an error if the IOASID is not - * found, or if @set is not NULL and the IOASID does not belong to the set. - */ -void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, - bool (*getter)(void *)) -{ - void *priv; - struct ioasid_data *ioasid_data; - struct ioasid_allocator_data *idata; - - rcu_read_lock(); - idata = rcu_dereference(active_allocator); - ioasid_data = xa_load(&idata->xa, ioasid); - if (!ioasid_data) { - priv = ERR_PTR(-ENOENT); - goto unlock; - } - if (set && ioasid_data->set != set) { - /* data found but does not belong to the set */ - priv = ERR_PTR(-EACCES); - goto unlock; - } - /* Now IOASID and its set is verified, we can return the private data */ - priv = rcu_dereference(ioasid_data->private); - if (getter && !getter(priv)) - priv = NULL; -unlock: - rcu_read_unlock(); - - return priv; -} -EXPORT_SYMBOL_GPL(ioasid_find); - -MODULE_AUTHOR("Jean-Philippe Brucker "); -MODULE_AUTHOR("Jacob Pan "); -MODULE_DESCRIPTION("IO Address Space ID (IOASID) allocator"); -MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h index c22d0174ad61..54946b5a7caf 100644 --- a/drivers/iommu/iommu-sva.h +++ b/drivers/iommu/iommu-sva.h @@ -5,7 +5,6 @@ #ifndef _IOMMU_SVA_H #define _IOMMU_SVA_H -#include #include /* I/O Page fault */ diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h deleted file mode 100644 index bbc9c6fd3310..000000000000 --- a/include/linux/ioasid.h +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_IOASID_H -#define __LINUX_IOASID_H - -#include -#include -#include - -typedef unsigned int ioasid_t; -#define INVALID_IOASID ((ioasid_t)-1) -typedef ioasid_t (*ioasid_alloc_fn_t)(ioasid_t min, ioasid_t max, void *data); -typedef void (*ioasid_free_fn_t)(ioasid_t ioasid, void *data); - -struct ioasid_set { - int dummy; -}; - -/** - * struct ioasid_allocator_ops - IOASID allocator helper functions and data - * - * @alloc: helper function to allocate IOASID - * @free: helper function to free IOASID - * @list: for tracking ops that share helper functions but not data - * @pdata: data belong to the allocator, provided when calling alloc() - */ -struct ioasid_allocator_ops { - ioasid_alloc_fn_t alloc; - ioasid_free_fn_t free; - struct list_head list; - void *pdata; -}; - -#define DECLARE_IOASID_SET(name) struct ioasid_set name = { 0 } - -#if IS_ENABLED(CONFIG_IOASID) -ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, - void *private); -void ioasid_free(ioasid_t ioasid); -void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, - bool (*getter)(void *)); -int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); -void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); -int ioasid_set_data(ioasid_t ioasid, void *data); - -#else /* !CONFIG_IOASID */ -static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, - ioasid_t max, void *private) -{ - return INVALID_IOASID; -} - -static inline void ioasid_free(ioasid_t ioasid) { } - -static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, - bool (*getter)(void *)) -{ - return NULL; -} - -static inline int ioasid_register_allocator(struct ioasid_allocator_ops *allocator) -{ - return -ENOTSUPP; -} - -static inline void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator) -{ -} - -static inline int ioasid_set_data(ioasid_t ioasid, void *data) -{ - return -ENOTSUPP; -} - -#endif /* CONFIG_IOASID */ -#endif /* __LINUX_IOASID_H */ From 9afea57384d4ae7b2034593eac7fa76c7122762a Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 31 Mar 2023 11:31:23 +0800 Subject: [PATCH 44/82] iommu/sprd: Release dma buffer to avoid memory leak When attaching to a domain, the driver would alloc a DMA buffer which is used to store address mapping table, and it need to be released when the IOMMU domain is freed. Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20230331033124.864691-2-zhang.lyra@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sprd-iommu.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index ae94d74b73f4..7df1f730c778 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -151,13 +151,6 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) return &dom->domain; } -static void sprd_iommu_domain_free(struct iommu_domain *domain) -{ - struct sprd_iommu_domain *dom = to_sprd_domain(domain); - - kfree(dom); -} - static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom) { struct sprd_iommu_device *sdev = dom->sdev; @@ -230,6 +223,28 @@ static void sprd_iommu_hw_en(struct sprd_iommu_device *sdev, bool en) sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val); } +static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom) +{ + size_t pgt_size; + + /* Nothing need to do if the domain hasn't been attached */ + if (!dom->sdev) + return; + + pgt_size = sprd_iommu_pgt_size(&dom->domain); + dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); + dom->sdev = NULL; + sprd_iommu_hw_en(dom->sdev, false); +} + +static void sprd_iommu_domain_free(struct iommu_domain *domain) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + + sprd_iommu_cleanup(dom); + kfree(dom); +} + static int sprd_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { From 816c698c052471f525bea92ff0aeeda8a5844a85 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 31 Mar 2023 11:31:24 +0800 Subject: [PATCH 45/82] iommu/sprd: Add support for reattaching an existing domain This IOMMU driver should allow a domain to be attached more than once. If IOMMU is reattaching to the same domain which is attached, there's nothing to be done. If reattching to a previously-used domain, do not alloc DMA buffer again which stores address mapping table to avoid memory leak. Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20230331033124.864691-3-zhang.lyra@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sprd-iommu.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 7df1f730c778..3513b2b108bf 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -62,6 +62,7 @@ enum sprd_iommu_version { * @eb: gate clock which controls IOMMU access */ struct sprd_iommu_device { + struct sprd_iommu_domain *dom; enum sprd_iommu_version ver; u32 *prot_page_va; dma_addr_t prot_page_pa; @@ -252,15 +253,27 @@ static int sprd_iommu_attach_device(struct iommu_domain *domain, struct sprd_iommu_domain *dom = to_sprd_domain(domain); size_t pgt_size = sprd_iommu_pgt_size(domain); - if (dom->sdev) - return -EINVAL; + /* The device is attached to this domain */ + if (sdev->dom == dom) + return 0; - dom->pgt_va = dma_alloc_coherent(sdev->dev, pgt_size, &dom->pgt_pa, GFP_KERNEL); - if (!dom->pgt_va) - return -ENOMEM; + /* The first time that domain is attaching to a device */ + if (!dom->pgt_va) { + dom->pgt_va = dma_alloc_coherent(sdev->dev, pgt_size, &dom->pgt_pa, GFP_KERNEL); + if (!dom->pgt_va) + return -ENOMEM; - dom->sdev = sdev; + dom->sdev = sdev; + } + sdev->dom = dom; + + /* + * One sprd IOMMU serves one client device only, disabled it before + * configure mapping table to avoid access conflict in case other + * mapping table is stored in. + */ + sprd_iommu_hw_en(sdev, false); sprd_iommu_first_ppn(dom); sprd_iommu_first_vpn(dom); sprd_iommu_vpn_range(dom); From 5e799a7ceed815e71e364b22d0bc5efe3a9ed39f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Mar 2023 14:37:50 +0200 Subject: [PATCH 46/82] iommu/exynos: Use the devm_clk_get_optional() helper Use devm_clk_get_optional() instead of hand writing it. This saves some loC and improves the semantic. Signed-off-by: Christophe JAILLET Acked-by: Marek Szyprowski Link: https://lore.kernel.org/r/99c0d5ce643737ee0952df41fd60433a0bbeb447.1679834256.git.christophe.jaillet@wanadoo.fr Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 483aaaeb6dae..867f409e0325 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -747,22 +747,16 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) return ret; } - data->clk = devm_clk_get(dev, "sysmmu"); - if (PTR_ERR(data->clk) == -ENOENT) - data->clk = NULL; - else if (IS_ERR(data->clk)) + data->clk = devm_clk_get_optional(dev, "sysmmu"); + if (IS_ERR(data->clk)) return PTR_ERR(data->clk); - data->aclk = devm_clk_get(dev, "aclk"); - if (PTR_ERR(data->aclk) == -ENOENT) - data->aclk = NULL; - else if (IS_ERR(data->aclk)) + data->aclk = devm_clk_get_optional(dev, "aclk"); + if (IS_ERR(data->aclk)) return PTR_ERR(data->aclk); - data->pclk = devm_clk_get(dev, "pclk"); - if (PTR_ERR(data->pclk) == -ENOENT) - data->pclk = NULL; - else if (IS_ERR(data->pclk)) + data->pclk = devm_clk_get_optional(dev, "pclk"); + if (IS_ERR(data->pclk)) return PTR_ERR(data->pclk); if (!data->clk && (!data->aclk || !data->pclk)) { @@ -770,10 +764,8 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) return -ENOSYS; } - data->clk_master = devm_clk_get(dev, "master"); - if (PTR_ERR(data->clk_master) == -ENOENT) - data->clk_master = NULL; - else if (IS_ERR(data->clk_master)) + data->clk_master = devm_clk_get_optional(dev, "master"); + if (IS_ERR(data->clk_master)) return PTR_ERR(data->clk_master); data->sysmmu = dev; From 25c2325575cc7f960c9f81f2b77606478f6b911f Mon Sep 17 00:00:00 2001 From: Steven Price Date: Fri, 31 Mar 2023 10:51:54 +0100 Subject: [PATCH 47/82] iommu/rockchip: Add missing set_platform_dma_ops callback Similar to exynos, we need a set_platform_dma_ops() callback for proper operation on ARM 32 bit after recent changes in the IOMMU framework (detach ops removal). But also the use of a NULL domain is confusing. Rework the code to add support for IOMMU_DOMAIN_IDENTITY and a singleton rk_identity_domain which is assigned to domain when using an identity mapping rather than "detaching". This makes the code easier to reason about. Signed-off-by: Steven Price Acked-by: Heiko Stuebner Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230331095154.2671129-1-steven.price@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 61 ++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index f30db22ea5d7..ea5a3088bb7e 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -124,6 +124,7 @@ struct rk_iommudata { static struct device *dma_dev; static const struct rk_iommu_ops *rk_ops; +static struct iommu_domain rk_identity_domain; static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma, unsigned int count) @@ -646,7 +647,7 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id) * Ignore the return code, though, since we always zap cache * and clear the page fault anyway. */ - if (iommu->domain) + if (iommu->domain != &rk_identity_domain) report_iommu_fault(iommu->domain, iommu->dev, iova, flags); else @@ -980,26 +981,27 @@ static int rk_iommu_enable(struct rk_iommu *iommu) return ret; } -static void rk_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) +static int rk_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { struct rk_iommu *iommu; - struct rk_iommu_domain *rk_domain = to_rk_domain(domain); + struct rk_iommu_domain *rk_domain; unsigned long flags; int ret; /* Allow 'virtual devices' (eg drm) to detach from domain */ iommu = rk_iommu_from_dev(dev); if (!iommu) - return; + return -ENODEV; + + rk_domain = to_rk_domain(iommu->domain); dev_dbg(dev, "Detaching from iommu domain\n"); - /* iommu already detached */ - if (iommu->domain != domain) - return; + if (iommu->domain == identity_domain) + return 0; - iommu->domain = NULL; + iommu->domain = identity_domain; spin_lock_irqsave(&rk_domain->iommus_lock, flags); list_del_init(&iommu->node); @@ -1011,8 +1013,31 @@ static void rk_iommu_detach_device(struct iommu_domain *domain, rk_iommu_disable(iommu); pm_runtime_put(iommu->dev); } + + return 0; } +static void rk_iommu_identity_free(struct iommu_domain *domain) +{ +} + +static struct iommu_domain_ops rk_identity_ops = { + .attach_dev = rk_iommu_identity_attach, + .free = rk_iommu_identity_free, +}; + +static struct iommu_domain rk_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &rk_identity_ops, +}; + +#ifdef CONFIG_ARM +static void rk_iommu_set_platform_dma(struct device *dev) +{ + WARN_ON(rk_iommu_identity_attach(&rk_identity_domain, dev)); +} +#endif + static int rk_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -1035,8 +1060,9 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, if (iommu->domain == domain) return 0; - if (iommu->domain) - rk_iommu_detach_device(iommu->domain, dev); + ret = rk_iommu_identity_attach(&rk_identity_domain, dev); + if (ret) + return ret; iommu->domain = domain; @@ -1050,7 +1076,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, ret = rk_iommu_enable(iommu); if (ret) - rk_iommu_detach_device(iommu->domain, dev); + WARN_ON(rk_iommu_identity_attach(&rk_identity_domain, dev)); pm_runtime_put(iommu->dev); @@ -1061,6 +1087,9 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) { struct rk_iommu_domain *rk_domain; + if (type == IOMMU_DOMAIN_IDENTITY) + return &rk_identity_domain; + if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) return NULL; @@ -1176,6 +1205,7 @@ static int rk_iommu_of_xlate(struct device *dev, iommu_dev = of_find_device_by_node(args->np); data->iommu = platform_get_drvdata(iommu_dev); + data->iommu->domain = &rk_identity_domain; dev_iommu_priv_set(dev, data); platform_device_put(iommu_dev); @@ -1188,6 +1218,9 @@ static const struct iommu_ops rk_iommu_ops = { .probe_device = rk_iommu_probe_device, .release_device = rk_iommu_release_device, .device_group = rk_iommu_device_group, +#ifdef CONFIG_ARM + .set_platform_dma_ops = rk_iommu_set_platform_dma, +#endif .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, .of_xlate = rk_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { @@ -1343,7 +1376,7 @@ static int __maybe_unused rk_iommu_suspend(struct device *dev) { struct rk_iommu *iommu = dev_get_drvdata(dev); - if (!iommu->domain) + if (iommu->domain == &rk_identity_domain) return 0; rk_iommu_disable(iommu); @@ -1354,7 +1387,7 @@ static int __maybe_unused rk_iommu_resume(struct device *dev) { struct rk_iommu *iommu = dev_get_drvdata(dev); - if (!iommu->domain) + if (iommu->domain == &rk_identity_domain) return 0; return rk_iommu_enable(iommu); From 8f880d19e6ad645a4b8066d5ff091c980b3231e7 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Tue, 4 Apr 2023 00:27:42 -0700 Subject: [PATCH 48/82] iommu/amd: Set page size bitmap during V2 domain allocation With the addition of the V2 page table support, the domain page size bitmap needs to be set prior to iommu core setting up direct mappings for reserved regions. When reserved regions are mapped, if this is not done, it will be looking at the V1 page size bitmap when determining the page size to use in iommu_pgsize(). When it gets into the actual amd mapping code, a check of see if the page size is supported can fail, because at that point it is checking it against the V2 page size bitmap which only supports 4K, 2M, and 1G. Add a check to __iommu_domain_alloc() to not override the bitmap if it was already set by the iommu ops domain_alloc() code path. Cc: Vasant Hegde Cc: Suravee Suthikulpanit Cc: Robin Murphy Cc: Will Deacon Cc: Joerg Roedel Fixes: 4db6c41f0946 ("iommu/amd: Add support for using AMD IOMMU v2 page table for DMA-API") Signed-off-by: Jerry Snitselaar Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20230404072742.1895252-1-jsnitsel@redhat.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 6 ++---- drivers/iommu/iommu.c | 9 +++++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 5a505ba5467e..167da5b1a5e3 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1666,10 +1666,6 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; - /* Override supported page sizes */ - if (domain->flags & PD_GIOV_MASK) - domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; - /* Update device table */ set_dte_entry(iommu, dev_data->devid, domain, ats, dev_data->iommu_v2); @@ -2048,6 +2044,8 @@ static int protection_domain_init_v2(struct protection_domain *domain) domain->flags |= PD_GIOV_MASK; + domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; + if (domain_enable_v2(domain, 1)) { domain_id_free(domain->id); return -ENOMEM; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 10db680acaed..256a38371120 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1964,8 +1964,13 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, return NULL; domain->type = type; - /* Assume all sizes by default; the driver may override this later */ - domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; + /* + * If not already set, assume all sizes by default; the driver + * may override this later + */ + if (!domain->pgsize_bitmap) + domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; + if (!domain->ops) domain->ops = bus->iommu_ops->default_domain_ops; From ccc62b827775915a9b82db42a29813d04f92df7a Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Wed, 5 Apr 2023 13:03:17 +0000 Subject: [PATCH 49/82] iommu/amd: Fix "Guest Virtual APIC Table Root Pointer" configuration in IRTE commit b9c6ff94e43a ("iommu/amd: Re-factor guest virtual APIC (de-)activation code") while refactoring guest virtual APIC activation/de-activation code, stored information for activate/de-activate in "struct amd_ir_data". It used 32-bit integer data type for storing the "Guest Virtual APIC Table Root Pointer" (ga_root_ptr), though the "ga_root_ptr" is actually a 40-bit field in IRTE (Interrupt Remapping Table Entry). This causes interrupts from PCIe devices to not reach the guest in the case of PCIe passthrough with SME (Secure Memory Encryption) enabled as _SME_ bit in the "ga_root_ptr" is lost before writing it to the IRTE. Fix it by using 64-bit data type for storing the "ga_root_ptr". While at that also change the data type of "ga_tag" to u32 in order to match the IOMMU spec. Fixes: b9c6ff94e43a ("iommu/amd: Re-factor guest virtual APIC (de-)activation code") Cc: stable@vger.kernel.org # v5.4+ Reported-by: Alejandro Jimenez Reviewed-by: Suravee Suthikulpanit Signed-off-by: Kishon Vijay Abraham I Link: https://lore.kernel.org/r/20230405130317.9351-1-kvijayab@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 3d684190b4d5..f7cb1ce0f9bb 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -1001,8 +1001,8 @@ struct amd_ir_data { */ struct irq_cfg *cfg; int ga_vector; - int ga_root_ptr; - int ga_tag; + u64 ga_root_ptr; + u32 ga_tag; }; struct amd_irte_ops { From e494245c756e270e3450983b654c38e6f8aa0b18 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:31 +0800 Subject: [PATCH 50/82] dt-bindings: media: mediatek,vcodec: Remove dma-ranges property After commit f1ad5338a4d5 ("of: Fix "dma-ranges" handling for bus controllers"), the dma-ranges of the leaf node doesn't work. Remove it for vcodec here. For mediatek,vcodec-decoder.yaml and mediatek,vcodec-encoder.yaml, this property is in the leaf node, it is invalid as the above comment. Currently there is only mt8195 VENC node has this property in upstream. Indeed, VENC is affected, but it is not a fatal issue. Originally it expects its iova range locate at 4GB-8GB. However after that commit, its expectation doesn't come true, it will fall back to 0-4GB iova and also could work well. Cc: Tiffany Lin Cc: Andrew-CT Chen Cc: Yunfei Dong Cc: Mauro Carvalho Chehab Cc: Matthias Brugger Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Acked-by: Rob Herring Acked-by: Hans Verkuil Link: https://lore.kernel.org/r/20230411093144.2690-2-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- .../devicetree/bindings/media/mediatek,vcodec-decoder.yaml | 5 ----- .../devicetree/bindings/media/mediatek,vcodec-encoder.yaml | 5 ----- 2 files changed, 10 deletions(-) diff --git a/Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml b/Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml index aa55ca65d6ed..fad59b486d5d 100644 --- a/Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml +++ b/Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml @@ -56,11 +56,6 @@ properties: List of the hardware port in respective IOMMU block for current Socs. Refer to bindings/iommu/mediatek,iommu.yaml. - dma-ranges: - maxItems: 1 - description: | - Describes the physical address space of IOMMU maps to memory. - mediatek,vpu: $ref: /schemas/types.yaml#/definitions/phandle description: diff --git a/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml b/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml index 0f2ea8d9a10c..a2051b31fa29 100644 --- a/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml +++ b/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml @@ -49,11 +49,6 @@ properties: List of the hardware port in respective IOMMU block for current Socs. Refer to bindings/iommu/mediatek,iommu.yaml. - dma-ranges: - maxItems: 1 - description: | - Describes the physical address space of IOMMU maps to memory. - mediatek,vpu: $ref: /schemas/types.yaml#/definitions/phandle description: From 559549b1f250a1b5bec3c6c3a9354deead371bde Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:32 +0800 Subject: [PATCH 51/82] dt-bindings: media: mediatek,jpeg: Remove dma-ranges property After commit f1ad5338a4d5 ("of: Fix "dma-ranges" handling for bus controllers"), the dma-ranges of the leaf node doesn't work. Remove it for jpeg here. Currently there is only mt8195 jpeg node has this property in upstream, and it already uses parent-child node, this property did work. But instead, MediaTek iommu will control the masters' iova ranges by the master's larb/port id internally, then this property is unnecessary. Cc: Mauro Carvalho Chehab Cc: Matthias Brugger Cc: Bin Liu Cc: kyrie wu Cc: Xia Jiang Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Acked-by: Rob Herring Reviewed-by: Hans Verkuil Link: https://lore.kernel.org/r/20230411093144.2690-3-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- .../devicetree/bindings/media/mediatek,mt8195-jpegdec.yaml | 7 ------- .../devicetree/bindings/media/mediatek,mt8195-jpegenc.yaml | 7 ------- .../devicetree/bindings/media/mediatek-jpeg-encoder.yaml | 5 ----- 3 files changed, 19 deletions(-) diff --git a/Documentation/devicetree/bindings/media/mediatek,mt8195-jpegdec.yaml b/Documentation/devicetree/bindings/media/mediatek,mt8195-jpegdec.yaml index 71595c013dbb..e5448c60e3eb 100644 --- a/Documentation/devicetree/bindings/media/mediatek,mt8195-jpegdec.yaml +++ b/Documentation/devicetree/bindings/media/mediatek,mt8195-jpegdec.yaml @@ -26,11 +26,6 @@ properties: Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for details. Ports are according to the HW. - dma-ranges: - maxItems: 1 - description: | - Describes the physical address space of IOMMU maps to memory. - "#address-cells": const: 2 @@ -89,7 +84,6 @@ required: - compatible - power-domains - iommus - - dma-ranges - ranges additionalProperties: false @@ -115,7 +109,6 @@ examples: <&iommu_vpp M4U_PORT_L19_JPGDEC_BSDMA1>, <&iommu_vpp M4U_PORT_L19_JPGDEC_BUFF_OFFSET1>, <&iommu_vpp M4U_PORT_L19_JPGDEC_BUFF_OFFSET0>; - dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>; #address-cells = <2>; #size-cells = <2>; ranges; diff --git a/Documentation/devicetree/bindings/media/mediatek,mt8195-jpegenc.yaml b/Documentation/devicetree/bindings/media/mediatek,mt8195-jpegenc.yaml index 95990539f7c0..596186497b68 100644 --- a/Documentation/devicetree/bindings/media/mediatek,mt8195-jpegenc.yaml +++ b/Documentation/devicetree/bindings/media/mediatek,mt8195-jpegenc.yaml @@ -26,11 +26,6 @@ properties: Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for details. Ports are according to the HW. - dma-ranges: - maxItems: 1 - description: | - Describes the physical address space of IOMMU maps to memory. - "#address-cells": const: 2 @@ -89,7 +84,6 @@ required: - compatible - power-domains - iommus - - dma-ranges - ranges additionalProperties: false @@ -113,7 +107,6 @@ examples: <&iommu_vpp M4U_PORT_L20_JPGENC_C_RDMA>, <&iommu_vpp M4U_PORT_L20_JPGENC_Q_TABLE>, <&iommu_vpp M4U_PORT_L20_JPGENC_BSDMA>; - dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>; #address-cells = <2>; #size-cells = <2>; ranges; diff --git a/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml b/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml index c8412e8ab353..37800e1908cc 100644 --- a/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml +++ b/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml @@ -44,11 +44,6 @@ properties: Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for details. Ports are according to the HW. - dma-ranges: - maxItems: 1 - description: | - Describes the physical address space of IOMMU maps to memory. - required: - compatible - reg From ae6693453ac665015929d2b05104d5a2951375af Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:33 +0800 Subject: [PATCH 52/82] iommu/mediatek: Improve comment for the current region/bank No functional change. Just add more comment about the current region/bank in the code. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-4-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 6a00ce208dc2..efc247a3989e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -197,12 +197,33 @@ struct mtk_iommu_plat_data { char *pericfg_comp_str; struct list_head *hw_list; - unsigned int iova_region_nr; - const struct mtk_iommu_iova_region *iova_region; - u8 banks_num; - bool banks_enable[MTK_IOMMU_BANK_MAX]; - unsigned int banks_portmsk[MTK_IOMMU_BANK_MAX]; + /* + * The IOMMU HW may support 16GB iova. In order to balance the IOVA ranges, + * different masters will be put in different iova ranges, for example vcodec + * is in 4G-8G and cam is in 8G-12G. Meanwhile, some masters may have the + * special IOVA range requirement, like CCU can only support the address + * 0x40000000-0x44000000. + * Here list the iova ranges this SoC supports and which larbs/ports are in + * which region. + * + * 16GB iova all use one pgtable, but each a region is a iommu group. + */ + struct { + unsigned int iova_region_nr; + const struct mtk_iommu_iova_region *iova_region; + }; + + /* + * The IOMMU HW may have 5 banks. Each bank has a independent pgtable. + * Here list how many banks this SoC supports/enables and which ports are in which bank. + */ + struct { + u8 banks_num; + bool banks_enable[MTK_IOMMU_BANK_MAX]; + unsigned int banks_portmsk[MTK_IOMMU_BANK_MAX]; + }; + unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX]; }; From b2a6876d215b3e980d8ac89ccc704667ad266205 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:34 +0800 Subject: [PATCH 53/82] iommu/mediatek: Get regionid from larb/port id After commit f1ad5338a4d5 ("of: Fix "dma-ranges" handling for bus controllers"), the dma-ranges is not allowed for dts leaf node. but we still would like to separate to different masters into different iova regions. Thus we have to separate it by the HW larbid and portid. For example, larb1/2 are in region2 and larb3 is in region3. The problem is that some ports inside a larb are in region4 while some ports inside this larb are in region5. Therefore I define a "iova_region_larb_msk" to help record the information for each a port. Take a example for a larb: [1] = ~0: means all ports in this larb are in region1; [2] = BIT(3) | BIT(4): means port3/4 in this larb are region2; [3] = ~(BIT(3) | BIT(4)): means all the other ports except port3/4 in this larb are region3. This method also avoids the users forget/abuse the iova regions. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-5-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 45 ++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index efc247a3989e..2bbcce783624 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -212,6 +211,15 @@ struct mtk_iommu_plat_data { struct { unsigned int iova_region_nr; const struct mtk_iommu_iova_region *iova_region; + /* + * Indicate the correspondance between larbs, ports and regions. + * + * The index is the same as iova_region and larb port numbers are + * described as bit positions. + * For example, storing BIT(0) at index 2,1 means "larb 1, port0 is in region 2". + * [2] = { [1] = BIT(0) } + */ + const u32 (*iova_region_larb_msk)[MTK_LARB_NR_MAX]; }; /* @@ -529,30 +537,29 @@ static unsigned int mtk_iommu_get_bank_id(struct device *dev, static int mtk_iommu_get_iova_region_id(struct device *dev, const struct mtk_iommu_plat_data *plat_data) { - const struct mtk_iommu_iova_region *rgn = plat_data->iova_region; - const struct bus_dma_region *dma_rgn = dev->dma_range_map; - int i, candidate = -1; - dma_addr_t dma_end; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + unsigned int portidmsk = 0, larbid; + const u32 *rgn_larb_msk; + int i; - if (!dma_rgn || plat_data->iova_region_nr == 1) + if (plat_data->iova_region_nr == 1) return 0; - dma_end = dma_rgn->dma_start + dma_rgn->size - 1; - for (i = 0; i < plat_data->iova_region_nr; i++, rgn++) { - /* Best fit. */ - if (dma_rgn->dma_start == rgn->iova_base && - dma_end == rgn->iova_base + rgn->size - 1) + larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); + for (i = 0; i < fwspec->num_ids; i++) + portidmsk |= BIT(MTK_M4U_TO_PORT(fwspec->ids[i])); + + for (i = 0; i < plat_data->iova_region_nr; i++) { + rgn_larb_msk = plat_data->iova_region_larb_msk[i]; + if (!rgn_larb_msk) + continue; + + if ((rgn_larb_msk[larbid] & portidmsk) == portidmsk) return i; - /* ok if it is inside this region. */ - if (dma_rgn->dma_start >= rgn->iova_base && - dma_end < rgn->iova_base + rgn->size) - candidate = i; } - if (candidate >= 0) - return candidate; - dev_err(dev, "Can NOT find the iommu domain id(%pad 0x%llx).\n", - &dma_rgn->dma_start, dma_rgn->size); + dev_err(dev, "Can NOT find the region for larb(%d-%x).\n", + larbid, portidmsk); return -EINVAL; } From 6b1317f92874cc82719d9f38b7c298dadfc16a6b Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:35 +0800 Subject: [PATCH 54/82] iommu/mediatek: mt8192: Add iova_region_larb_msk Add iova_region_larb_msk for mt8192. We separate the 16GB iova regions by each device's larbid/portid. Note: larb3/6/8/10/12/15 connect nothing in this SoC. Refer to the comment in include/dt-bindings/memory/mt8192-larb-port.h Define a new macro MT8192_MULTI_REGION_NR_MAX to indicate the index of mt8xxx_larb_region_msk and "struct mtk_iommu_iova_region mt8192_multi_dom" are the same. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-6-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2bbcce783624..ed692c2f51d2 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -336,7 +336,12 @@ static const struct mtk_iommu_iova_region single_domain[] = { {.iova_base = 0, .size = SZ_4G}, }; -static const struct mtk_iommu_iova_region mt8192_multi_dom[] = { +#define MT8192_MULTI_REGION_NR_MAX 6 + +#define MT8192_MULTI_REGION_NR (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) ? \ + MT8192_MULTI_REGION_NR_MAX : 1) + +static const struct mtk_iommu_iova_region mt8192_multi_dom[MT8192_MULTI_REGION_NR] = { { .iova_base = 0x0, .size = SZ_4G}, /* 0 ~ 4G */ #if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) { .iova_base = SZ_4G, .size = SZ_4G}, /* 4G ~ 8G */ @@ -1542,6 +1547,17 @@ static const struct mtk_iommu_plat_data mt8186_data_mm = { .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), }; +static const unsigned int mt8192_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { + [0] = {~0, ~0}, /* Region0: larb0/1 */ + [1] = {0, 0, 0, 0, ~0, ~0, 0, ~0}, /* Region1: larb4/5/7 */ + [2] = {0, 0, ~0, 0, 0, 0, 0, 0, /* Region2: larb2/9/11/13/14/16/17/18/19/20 */ + 0, ~0, 0, ~0, 0, ~(u32)(BIT(9) | BIT(10)), ~(u32)(BIT(4) | BIT(5)), 0, + ~0, ~0, ~0, ~0, ~0}, + [3] = {0}, + [4] = {[13] = BIT(9) | BIT(10)}, /* larb13 port9/10 */ + [5] = {[14] = BIT(4) | BIT(5)}, /* larb14 port4/5 */ +}; + static const struct mtk_iommu_plat_data mt8192_data = { .m4u_plat = M4U_MT8192, .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | @@ -1551,6 +1567,7 @@ static const struct mtk_iommu_plat_data mt8192_data = { .banks_enable = {true}, .iova_region = mt8192_multi_dom, .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .iova_region_larb_msk = mt8192_larb_region_msk, .larbid_remap = {{0}, {1}, {4, 5}, {7}, {2}, {9, 11, 19, 20}, {0, 14, 16}, {0, 13, 18, 17}}, }; From a43e767d4e1b913a22a80074a703bd29c644a41b Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:36 +0800 Subject: [PATCH 55/82] iommu/mediatek: mt8195: Add iova_region_larb_msk Add iova_region_larb_msk for mt8195. We separate the 16GB iova regions by each device's larbid/portid. Refer to include/dt-bindings/memory/mt8195-memory-port.h Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-7-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index ed692c2f51d2..35e64f65b164 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1587,6 +1587,21 @@ static const struct mtk_iommu_plat_data mt8195_data_infra = { .iova_region_nr = ARRAY_SIZE(single_domain), }; +static const unsigned int mt8195_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { + [0] = {~0, ~0, ~0, ~0}, /* Region0: all ports for larb0/1/2/3 */ + [1] = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, ~0, ~0, ~0, ~0, ~0, /* Region1: larb19/20/21/22/23/24 */ + ~0}, + [2] = {0, 0, 0, 0, ~0, ~0, ~0, ~0, /* Region2: the other larbs. */ + ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, + ~0, ~0, 0, 0, 0, 0, 0, 0, + 0, ~0, ~0, ~0, ~0}, + [3] = {0}, + [4] = {[18] = BIT(0) | BIT(1)}, /* Only larb18 port0/1 */ + [5] = {[18] = BIT(2) | BIT(3)}, /* Only larb18 port2/3 */ +}; + static const struct mtk_iommu_plat_data mt8195_data_vdo = { .m4u_plat = M4U_MT8195, .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | @@ -1597,6 +1612,7 @@ static const struct mtk_iommu_plat_data mt8195_data_vdo = { .banks_enable = {true}, .iova_region = mt8192_multi_dom, .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .iova_region_larb_msk = mt8195_larb_region_msk, .larbid_remap = {{2, 0}, {21}, {24}, {7}, {19}, {9, 10, 11}, {13, 17, 15/* 17b */, 25}, {5}}, }; @@ -1611,6 +1627,7 @@ static const struct mtk_iommu_plat_data mt8195_data_vpp = { .banks_enable = {true}, .iova_region = mt8192_multi_dom, .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .iova_region_larb_msk = mt8195_larb_region_msk, .larbid_remap = {{1}, {3}, {22, MTK_INVALID_LARBID, MTK_INVALID_LARBID, MTK_INVALID_LARBID, 23}, {8}, {20}, {12}, From f5d4233ad373a1b0ab7c5d10f9b6e79619ecf194 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:37 +0800 Subject: [PATCH 56/82] iommu/mediatek: mt8186: Add iova_region_larb_msk Add iova_region_larb_msk for mt8186. We separate the 16GB iova regions by each device's larbid/portid. Note: larb5/6/10/12/14/15/18 connect nothing in this SoC. Refer to include/dt-bindings/memory/mt8186-memory-port.h Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-8-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 35e64f65b164..c8e2840cda15 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1533,6 +1533,18 @@ static const struct mtk_iommu_plat_data mt8183_data = { .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}}, }; +static const unsigned int mt8186_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { + [0] = {~0, ~0, ~0}, /* Region0: all ports for larb0/1/2 */ + [1] = {0, 0, 0, 0, ~0, 0, 0, ~0}, /* Region1: larb4/7 */ + [2] = {0, 0, 0, 0, 0, 0, 0, 0, /* Region2: larb8/9/11/13/16/17/19/20 */ + ~0, ~0, 0, ~0, 0, ~(u32)(BIT(9) | BIT(10)), 0, 0, + /* larb13: the other ports except port9/10 */ + ~0, ~0, 0, ~0, ~0}, + [3] = {0}, + [4] = {[13] = BIT(9) | BIT(10)}, /* larb13 port9/10 */ + [5] = {[14] = ~0}, /* larb14 */ +}; + static const struct mtk_iommu_plat_data mt8186_data_mm = { .m4u_plat = M4U_MT8186, .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | @@ -1545,6 +1557,7 @@ static const struct mtk_iommu_plat_data mt8186_data_mm = { .banks_enable = {true}, .iova_region = mt8192_multi_dom, .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .iova_region_larb_msk = mt8186_larb_region_msk, }; static const unsigned int mt8192_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { From 3df9bdd4ae8e41b526906f2f6e9fa2470a9bbbe0 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:38 +0800 Subject: [PATCH 57/82] iommu/mediatek: Add a gap for the iova regions As the removed property in the vcodec dt-binding, the property is: dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>; The length is 0xfff0_0000 rather than 0x1_0000_0000, this means it requires 1M as a gap. This is because the end address for some vcodec HW is (address + size). If the size is 4G, the end address may be 0x2_0000_0000, and the width for vcodec register only is 32, then the HW may get the ZERO address. Currently the consumer's dma-ranges property doesn't work, IOMMU has to consider this case. Add a bigger gap(8M) for all the regions to avoid it. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-9-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index c8e2840cda15..9fee4b6efb7b 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -332,8 +332,10 @@ static LIST_HEAD(m4ulist); /* List all the M4U HWs */ #define for_each_m4u(data, head) list_for_each_entry(data, head, list) +#define MTK_IOMMU_IOVA_SZ_4G (SZ_4G - SZ_8M) /* 8M as gap */ + static const struct mtk_iommu_iova_region single_domain[] = { - {.iova_base = 0, .size = SZ_4G}, + {.iova_base = 0, .size = MTK_IOMMU_IOVA_SZ_4G}, }; #define MT8192_MULTI_REGION_NR_MAX 6 @@ -342,11 +344,11 @@ static const struct mtk_iommu_iova_region single_domain[] = { MT8192_MULTI_REGION_NR_MAX : 1) static const struct mtk_iommu_iova_region mt8192_multi_dom[MT8192_MULTI_REGION_NR] = { - { .iova_base = 0x0, .size = SZ_4G}, /* 0 ~ 4G */ + { .iova_base = 0x0, .size = MTK_IOMMU_IOVA_SZ_4G}, /* 0 ~ 4G, */ #if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) - { .iova_base = SZ_4G, .size = SZ_4G}, /* 4G ~ 8G */ - { .iova_base = SZ_4G * 2, .size = SZ_4G}, /* 8G ~ 12G */ - { .iova_base = SZ_4G * 3, .size = SZ_4G}, /* 12G ~ 16G */ + { .iova_base = SZ_4G, .size = MTK_IOMMU_IOVA_SZ_4G}, /* 4G ~ 8G */ + { .iova_base = SZ_4G * 2, .size = MTK_IOMMU_IOVA_SZ_4G}, /* 8G ~ 12G */ + { .iova_base = SZ_4G * 3, .size = MTK_IOMMU_IOVA_SZ_4G}, /* 12G ~ 16G */ { .iova_base = 0x240000000ULL, .size = 0x4000000}, /* CCU0 */ { .iova_base = 0x244000000ULL, .size = 0x4000000}, /* CCU1 */ From f7da2da8675b061f9397f2701c6cc21cb908291b Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:39 +0800 Subject: [PATCH 58/82] iommu/mediatek: Set dma_mask for the master devices MediaTek iommu arranges dma ranges for all the masters, this patch is to help them set dma mask. This is to avoid each master setting their own mask, but also to avoid a real issue, such as JPEG uses "mediatek,mtk-jpgenc" for 2701/8183/8186/8188, then JPEG could ignore its different dma_mask in different SoC to achieve common code. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-10-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 9fee4b6efb7b..95b1bdb58cbe 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -738,6 +738,14 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, } mutex_unlock(&data->mutex); + if (region_id > 0) { + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(34)); + if (ret) { + dev_err(m4udev, "Failed to set dma_mask for %s(%d).\n", dev_name(dev), ret); + return ret; + } + } + return mtk_iommu_config(data, dev, true, region_id); err_unlock: From 7d35584d9c6d2a78d8ab95450fafec40620ddf19 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:40 +0800 Subject: [PATCH 59/82] media: mtk-jpegdec: Remove the setting for dma_mask In order to simplify the masters to set their respective dma masks, MTK IOMMU helps to centralize the processing. Because all the dma ranges is set in IOMMU, IOMMU knows well the dma mask requirements of masters. After this patch, the masters code does not need care dma-ranges/dma_mask related information. Cc: Bin Liu Cc: Mauro Carvalho Chehab Cc: kyrie wu Signed-off-by: Yong Wu Acked-by: Hans Verkuil Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-11-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c index 969516a940ba..9b96d2436311 100644 --- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c +++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c @@ -1757,9 +1757,6 @@ static int mtk_jpeg_probe(struct platform_device *pdev) jpeg->vdev->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_M2M_MPLANE; - if (of_get_property(pdev->dev.of_node, "dma-ranges", NULL)) - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34)); - ret = video_register_device(jpeg->vdev, VFL_TYPE_VIDEO, -1); if (ret) { v4l2_err(&jpeg->v4l2_dev, "Failed to register video device\n"); From aa0ee7b4d5221127a0fe80afb716a1ac72473d72 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:41 +0800 Subject: [PATCH 60/82] media: mediatek: vcodec: Remove the setting for dma_mask In order to simplify the masters to set their respective dma masks, MTK IOMMU helps to centralize the processing. Because all the dma ranges is set in IOMMU, IOMMU knows well the dma mask requirements of masters. After this patch, the masters(codec here) code does not need care dma-ranges/dma_mask related information. Cc: Tiffany Lin Cc: Andrew-CT Chen Cc: Yunfei Dong Cc: Mauro Carvalho Chehab Cc: irui wang Signed-off-by: Yong Wu Acked-by: Hans Verkuil Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-12-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- .../media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c | 8 -------- .../media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c | 3 --- 2 files changed, 11 deletions(-) diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c index 174a6eec2f54..11583405cf61 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c @@ -321,14 +321,6 @@ static int mtk_vcodec_probe(struct platform_device *pdev) } } - if (of_get_property(pdev->dev.of_node, "dma-ranges", NULL)) { - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34)); - if (ret) { - mtk_v4l2_err("Failed to set mask"); - goto err_core_workq; - } - } - for (i = 0; i < MTK_VDEC_HW_MAX; i++) mutex_init(&dev->dec_mutex[i]); mutex_init(&dev->dev_mutex); diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c index 9095186d5495..50e5571608cd 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c @@ -344,9 +344,6 @@ static int mtk_vcodec_probe(struct platform_device *pdev) goto err_event_workq; } - if (of_get_property(pdev->dev.of_node, "dma-ranges", NULL)) - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34)); - ret = video_register_device(vfd_enc, VFL_TYPE_VIDEO, -1); if (ret) { mtk_v4l2_err("Failed to register video device"); From 2aa6e5f63ea2b1768afbde763a948534c190254e Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:42 +0800 Subject: [PATCH 61/82] arm64: dts: mt8195: Remove the unnecessary dma-ranges After we add the dma-ranges in the parent "soc" node, this property is unnecessary for the leaf node. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-13-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 8f1264d5290b..89f469ba9f6c 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -2299,7 +2299,6 @@ venc: video-codec@1a020000 { power-domains = <&spm MT8195_POWER_DOMAIN_VENC>; #address-cells = <2>; #size-cells = <2>; - dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>; }; jpgdec-master { @@ -2311,7 +2310,6 @@ jpgdec-master { <&iommu_vdo M4U_PORT_L19_JPGDEC_BSDMA1>, <&iommu_vdo M4U_PORT_L19_JPGDEC_BUFF_OFFSET1>, <&iommu_vdo M4U_PORT_L19_JPGDEC_BUFF_OFFSET0>; - dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>; #address-cells = <2>; #size-cells = <2>; ranges; @@ -2383,7 +2381,6 @@ jpgenc-master { <&iommu_vpp M4U_PORT_L20_JPGENC_C_RDMA>, <&iommu_vpp M4U_PORT_L20_JPGENC_Q_TABLE>, <&iommu_vpp M4U_PORT_L20_JPGENC_BSDMA>; - dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>; #address-cells = <2>; #size-cells = <2>; ranges; From 88c531b42a986c02a2630b84521d3dfe19b01686 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:43 +0800 Subject: [PATCH 62/82] arm64: dts: mt8195: Add dma-ranges for the parent "soc" node After commit f1ad5338a4d5 ("of: Fix "dma-ranges" handling for bus controllers"), the dma-ranges property is not allowed for the leaf node. But our iommu/dma-ranges is 16GB, we still expect separate the 16GB dma-range like: a) display is in 0 - 4GB; b) vcodec is in 4GB - 8GB; c) camera is in 8GB - 12GB. We can not expect all the masters add a parent node for them, especial for the existed drivers/nodes. Thus, we add whole the 16GB dma-ranges in the parent "soc" node. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-14-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 89f469ba9f6c..7d87cbabc9f1 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -364,6 +364,7 @@ soc { #size-cells = <2>; compatible = "simple-bus"; ranges; + dma-ranges = <0x0 0x0 0x0 0x0 0x4 0x0>; gic: interrupt-controller@c000000 { compatible = "arm,gic-v3"; From f543028451d1361b7e6ab45b42503a31047c3102 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 11 Apr 2023 17:31:44 +0800 Subject: [PATCH 63/82] arm64: dts: mt8186: Add dma-ranges for the parent "soc" node Prepare for the MM nodes whose dma-ranges(iova range) is 16GB. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230411093144.2690-15-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- arch/arm64/boot/dts/mediatek/mt8186.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi index a0d3e1f731bd..251eace411c0 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi @@ -324,6 +324,7 @@ soc { #address-cells = <2>; #size-cells = <2>; compatible = "simple-bus"; + dma-ranges = <0x0 0x0 0x0 0x0 0x4 0x0>; ranges; gic: interrupt-controller@c000000 { From 84c9ef72b64d9bd0d8a219a75019ec4c2da77a08 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 13 Apr 2023 12:06:29 +0800 Subject: [PATCH 64/82] dmaengine: idxd: Add enable/disable device IOPF feature The iommu subsystem requires IOMMU_DEV_FEAT_IOPF must be enabled before and disabled after IOMMU_DEV_FEAT_SVA, if device's I/O page faults rely on the IOMMU. Add explicit IOMMU_DEV_FEAT_IOPF enabling/disabling in this driver. At present, missing IOPF enabling/disabling doesn't cause any real issue, because the IOMMU driver places the IOPF enabling/disabling in the path of SVA feature handling. But this may change. Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Reviewed-by: Kevin Tian Acked-by: Vinod Koul Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230324120234.313643-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/dma/idxd/init.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 640d3048368e..09ef62aa0635 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -516,6 +516,27 @@ static void idxd_disable_system_pasid(struct idxd_device *idxd) idxd->sva = NULL; } +static int idxd_enable_sva(struct pci_dev *pdev) +{ + int ret; + + ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); + if (ret) + return ret; + + ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + if (ret) + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); + + return ret; +} + +static void idxd_disable_sva(struct pci_dev *pdev) +{ + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); +} + static int idxd_probe(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; @@ -530,7 +551,7 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD reset complete\n"); if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { - if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) { + if (idxd_enable_sva(pdev)) { dev_warn(dev, "Unable to turn on user SVA feature.\n"); } else { set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); @@ -578,21 +599,19 @@ static int idxd_probe(struct idxd_device *idxd) if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); if (device_user_pasid_enabled(idxd)) - iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); + idxd_disable_sva(pdev); return rc; } static void idxd_cleanup(struct idxd_device *idxd) { - struct device *dev = &idxd->pdev->dev; - perfmon_pmu_remove(idxd); idxd_cleanup_interrupts(idxd); idxd_cleanup_internals(idxd); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); if (device_user_pasid_enabled(idxd)) - iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); + idxd_disable_sva(idxd->pdev); } static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -710,7 +729,7 @@ static void idxd_remove(struct pci_dev *pdev) pci_free_irq_vectors(pdev); pci_iounmap(pdev, idxd->reg_base); if (device_user_pasid_enabled(idxd)) - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + idxd_disable_sva(pdev); pci_disable_device(pdev); destroy_workqueue(idxd->wq); perfmon_pmu_remove(idxd); From a86fb7717320bf2c05701cbcfaab5afc452d1480 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 13 Apr 2023 12:06:30 +0800 Subject: [PATCH 65/82] iommu/vt-d: Allow SVA with device-specific IOPF Currently enabling SVA requires IOPF support from the IOMMU and device PCI PRI. However, some devices can handle IOPF by itself without ever sending PCI page requests nor advertising PRI capability. Allow SVA support with IOPF handled either by IOMMU (PCI PRI) or device driver (device-specific IOPF). As long as IOPF could be handled, SVA should continue to work. Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230324120234.313643-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7c2f4bd33582..caf664448ee9 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4650,7 +4650,21 @@ static int intel_iommu_enable_sva(struct device *dev) if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) return -ENODEV; - if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + if (!info->pasid_enabled || !info->ats_enabled) + return -EINVAL; + + /* + * Devices having device-specific I/O fault handling should not + * support PCI/PRI. The IOMMU side has no means to check the + * capability of device-specific IOPF. Therefore, IOMMU can only + * default that if the device driver enables SVA on a non-PRI + * device, it will handle IOPF in its own way. + */ + if (!info->pri_supported) + return 0; + + /* Devices supporting PRI should have it enabled. */ + if (!info->pri_enabled) return -EINVAL; ret = iopf_queue_add_device(iommu->iopf_queue, dev); From 3d4c7cc3d168dc728dd3472688b2360b6db124e3 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 13 Apr 2023 12:06:31 +0800 Subject: [PATCH 66/82] iommu/vt-d: Move iopf code from SVA to IOPF enabling path Generally enabling IOMMU_DEV_FEAT_SVA requires IOMMU_DEV_FEAT_IOPF, but some devices manage I/O Page Faults themselves instead of relying on the IOMMU. Move IOPF related code from SVA to IOPF enabling path. For the device drivers that relies on the IOMMU for IOPF through PCI/PRI, IOMMU_DEV_FEAT_IOPF must be enabled before and disabled after IOMMU_DEV_FEAT_SVA. Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230324120234.313643-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index caf664448ee9..a6f07c74da2d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4638,7 +4638,6 @@ static int intel_iommu_enable_sva(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu; - int ret; if (!info || dmar_disabled) return -EINVAL; @@ -4667,6 +4666,21 @@ static int intel_iommu_enable_sva(struct device *dev) if (!info->pri_enabled) return -EINVAL; + return 0; +} + +static int intel_iommu_enable_iopf(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu; + int ret; + + if (!info || !info->ats_enabled || !info->pri_enabled) + return -ENODEV; + iommu = info->iommu; + if (!iommu) + return -EINVAL; + ret = iopf_queue_add_device(iommu->iopf_queue, dev); if (ret) return ret; @@ -4678,7 +4692,7 @@ static int intel_iommu_enable_sva(struct device *dev) return ret; } -static int intel_iommu_disable_sva(struct device *dev) +static int intel_iommu_disable_iopf(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; @@ -4695,16 +4709,6 @@ static int intel_iommu_disable_sva(struct device *dev) return ret; } -static int intel_iommu_enable_iopf(struct device *dev) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - - if (info && info->pri_supported) - return 0; - - return -ENODEV; -} - static int intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) { @@ -4725,10 +4729,10 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) { switch (feat) { case IOMMU_DEV_FEAT_IOPF: - return 0; + return intel_iommu_disable_iopf(dev); case IOMMU_DEV_FEAT_SVA: - return intel_iommu_disable_sva(dev); + return 0; default: return -ENODEV; From 5ae4008055fed503f7bf42aeed45d3b032a79876 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 13 Apr 2023 12:06:32 +0800 Subject: [PATCH 67/82] iommu/vt-d: Move pfsid and ats_qdep calculation to device probe path They should be part of the per-device iommu private data initialization. Reviewed-by: Jacob Pan Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230324120234.313643-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a6f07c74da2d..6d77b4072fdd 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1406,20 +1406,6 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); - /* For IOMMU that supports device IOTLB throttling (DIT), we assign - * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge - * queue depth at PF level. If DIT is not set, PFSID will be treated as - * reserved, which should be set to 0. - */ - if (!ecap_dit(info->iommu->ecap)) - info->pfsid = 0; - else { - struct pci_dev *pf_pdev; - - /* pdev will be returned if device is not a vf */ - pf_pdev = pci_physfn(pdev); - info->pfsid = pci_dev_id(pf_pdev); - } /* The PCIe spec, in its wisdom, declares that the behaviour of the device if you enable PASID support after ATS support is @@ -1438,7 +1424,6 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { info->ats_enabled = 1; domain_update_iotlb(info->domain); - info->ats_qdep = pci_ats_queue_depth(pdev); } } @@ -4521,6 +4506,17 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) dmar_ats_supported(pdev, iommu)) { info->ats_supported = 1; info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); + + /* + * For IOMMU that supports device IOTLB throttling + * (DIT), we assign PFSID to the invalidation desc + * of a VF such that IOMMU HW can gauge queue depth + * at PF level. If DIT is not set, PFSID will be + * treated as reserved, which should be set to 0. + */ + if (ecap_dit(iommu->ecap)) + info->pfsid = pci_dev_id(pci_physfn(pdev)); + info->ats_qdep = pci_ats_queue_depth(pdev); } if (sm_supported(iommu)) { if (pasid_supported(iommu)) { From fbcde5bb92bd3d299688d615dce26c59d3f63694 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 13 Apr 2023 12:06:33 +0800 Subject: [PATCH 68/82] iommu/vt-d: Move PRI handling to IOPF feature path PRI is only used for IOPF. With this move, the PCI/PRI feature could be controlled by the device driver through iommu_dev_enable/disable_feature() interfaces. Reviewed-by: Jacob Pan Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230324120234.313643-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 53 ++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6d77b4072fdd..cd3a3c4b5e64 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1415,11 +1415,6 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (info->pri_supported && - (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) && - !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH)) - info->pri_enabled = 1; - if (info->ats_supported && pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { info->ats_enabled = 1; @@ -1442,11 +1437,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info) domain_update_iotlb(info->domain); } - if (info->pri_enabled) { - pci_disable_pri(pdev); - info->pri_enabled = 0; - } - if (info->pasid_enabled) { pci_disable_pasid(pdev); info->pasid_enabled = 0; @@ -4667,23 +4657,48 @@ static int intel_iommu_enable_sva(struct device *dev) static int intel_iommu_enable_iopf(struct device *dev) { + struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu; int ret; - if (!info || !info->ats_enabled || !info->pri_enabled) + if (!pdev || !info || !info->ats_enabled || !info->pri_supported) return -ENODEV; + + if (info->pri_enabled) + return -EBUSY; + iommu = info->iommu; if (!iommu) return -EINVAL; + /* PASID is required in PRG Response Message. */ + if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) + return -EINVAL; + + ret = pci_reset_pri(pdev); + if (ret) + return ret; + ret = iopf_queue_add_device(iommu->iopf_queue, dev); if (ret) return ret; ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); if (ret) - iopf_queue_remove_device(iommu->iopf_queue, dev); + goto iopf_remove_device; + + ret = pci_enable_pri(pdev, PRQ_DEPTH); + if (ret) + goto iopf_unregister_handler; + info->pri_enabled = 1; + + return 0; + +iopf_unregister_handler: + iommu_unregister_device_fault_handler(dev); +iopf_remove_device: + iopf_queue_remove_device(iommu->iopf_queue, dev); return ret; } @@ -4694,6 +4709,20 @@ static int intel_iommu_disable_iopf(struct device *dev) struct intel_iommu *iommu = info->iommu; int ret; + if (!info->pri_enabled) + return -EINVAL; + + /* + * PCIe spec states that by clearing PRI enable bit, the Page + * Request Interface will not issue new page requests, but has + * outstanding page requests that have been transmitted or are + * queued for transmission. This is supposed to be called after + * the device driver has stopped DMA, all PASIDs have been + * unbound and the outstanding PRQs have been drained. + */ + pci_disable_pri(to_pci_dev(dev)); + info->pri_enabled = 0; + ret = iommu_unregister_device_fault_handler(dev); if (ret) return ret; From 7b8aa998d627afeef576ab61bf8e286f56439bb2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 13 Apr 2023 12:06:34 +0800 Subject: [PATCH 69/82] iommu/vt-d: Remove unnecessary checks in iopf disabling path iommu_unregister_device_fault_handler() and iopf_queue_remove_device() are called after device has stopped issuing new page falut requests and all outstanding page requests have been drained. They should never fail. Trigger a warning if it happens unfortunately. Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230324120234.313643-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index cd3a3c4b5e64..c771233d6f2a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4707,7 +4707,6 @@ static int intel_iommu_disable_iopf(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; - int ret; if (!info->pri_enabled) return -EINVAL; @@ -4723,15 +4722,15 @@ static int intel_iommu_disable_iopf(struct device *dev) pci_disable_pri(to_pci_dev(dev)); info->pri_enabled = 0; - ret = iommu_unregister_device_fault_handler(dev); - if (ret) - return ret; + /* + * With PRI disabled and outstanding PRQs drained, unregistering + * fault handler and removing device from iopf queue should never + * fail. + */ + WARN_ON(iommu_unregister_device_fault_handler(dev)); + WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev)); - ret = iopf_queue_remove_device(iommu->iopf_queue, dev); - if (ret) - iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); - - return ret; + return 0; } static int From 41d71e09a155a05bd9bec08dd195d8d8e0146ad8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 13 Apr 2023 12:06:35 +0800 Subject: [PATCH 70/82] iommu/vt-d: Do not use GFP_ATOMIC when not needed There is no need to use GFP_ATOMIC here. GFP_KERNEL is already used for some other memory allocations just a few lines above. Commit e3a981d61d15 ("iommu/vt-d: Convert allocations to GFP_KERNEL") has changed the other memory allocation flags. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/e2a8a1019ffc8a86b4b4ed93def3623f60581274.1675542576.git.christophe.jaillet@wanadoo.fr Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/irq_remapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 6d01fa078c36..013ccc25ee4f 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -552,7 +552,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) goto out_free_table; } - bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_ATOMIC); + bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_KERNEL); if (bitmap == NULL) { pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id); goto out_free_pages; From a06c2ecec113fa882f48eb71e9d7e8fb520b1c78 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 13 Apr 2023 12:06:36 +0800 Subject: [PATCH 71/82] iommu/vt-d: Remove extern from function prototypes The kernel coding style does not require 'extern' in function prototypes in .h files, so remove them from drivers/iommu/intel/iommu.h as they are not needed. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230331045452.500265-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d6df3b865812..f843493dc4f2 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -796,18 +796,18 @@ static inline bool context_present(struct context_entry *context) return (context->lo & 1); } -extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); +struct dmar_drhd_unit *dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern int dmar_enable_qi(struct intel_iommu *iommu); -extern void dmar_disable_qi(struct intel_iommu *iommu); -extern int dmar_reenable_qi(struct intel_iommu *iommu); -extern void qi_global_iec(struct intel_iommu *iommu); +int dmar_enable_qi(struct intel_iommu *iommu); +void dmar_disable_qi(struct intel_iommu *iommu); +int dmar_reenable_qi(struct intel_iommu *iommu); +void qi_global_iec(struct intel_iommu *iommu); -extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, - u8 fm, u64 type); -extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type); -extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, +void qi_flush_context(struct intel_iommu *iommu, u16 did, + u16 sid, u8 fm, u64 type); +void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); +void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, u16 qdep, u64 addr, unsigned mask); void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, @@ -830,7 +830,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, */ #define QI_OPT_WAIT_DRAIN BIT(0) -extern int dmar_ir_support(void); +int dmar_ir_support(void); void *alloc_pgtable_page(int node, gfp_t gfp); void free_pgtable_page(void *vaddr); @@ -838,9 +838,9 @@ void iommu_flush_write_buffer(struct intel_iommu *iommu); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); #ifdef CONFIG_INTEL_IOMMU_SVM -extern void intel_svm_check(struct intel_iommu *iommu); -extern int intel_svm_enable_prq(struct intel_iommu *iommu); -extern int intel_svm_finish_prq(struct intel_iommu *iommu); +void intel_svm_check(struct intel_iommu *iommu); +int intel_svm_enable_prq(struct intel_iommu *iommu); +int intel_svm_finish_prq(struct intel_iommu *iommu); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); struct iommu_domain *intel_svm_domain_alloc(void); @@ -887,8 +887,8 @@ extern const struct iommu_ops intel_iommu_ops; #ifdef CONFIG_INTEL_IOMMU extern int intel_iommu_sm; -extern int iommu_calculate_agaw(struct intel_iommu *iommu); -extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); +int iommu_calculate_agaw(struct intel_iommu *iommu); +int iommu_calculate_max_sagaw(struct intel_iommu *iommu); int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob); static inline bool ecmd_has_pmu_essential(struct intel_iommu *iommu) From a7050fbde36e0634f89d41e6384c140fe4ae011e Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 13 Apr 2023 12:06:37 +0800 Subject: [PATCH 72/82] iommu/vt-d: Use non-privileged mode for all PASIDs Supervisor Request Enable (SRE) bit in a PASID entry is for permission checking on DMA requests. When SRE = 0, DMA with supervisor privilege will be blocked. However, for in-kernel DMA this is not necessary in that we are targeting kernel memory anyway. There's no need to differentiate user and kernel for in-kernel DMA. Let's use non-privileged (user) permission for all PASIDs used in kernel, it will be consistent with DMA without PASID (RID_PASID) as well. Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20230331231137.1947675-2-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index c771233d6f2a..f4e536fd5a28 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2316,8 +2316,6 @@ static int domain_setup_first_level(struct intel_iommu *iommu, if (level != 4 && level != 5) return -EINVAL; - if (pasid != PASID_RID2PASID) - flags |= PASID_FLAG_SUPERVISOR_MODE; if (level == 5) flags |= PASID_FLAG_FL5LP; From 113a031becc8a1b904d7fd873fb3d329f203d2f0 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 13 Apr 2023 12:06:38 +0800 Subject: [PATCH 73/82] iommu/vt-d: Remove PASID supervisor request support There's no more usage, remove PASID supervisor support. Suggested-by: Lu Baolu Reviewed-by: Lu Baolu Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20230331231137.1947675-3-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 43 ------------------------------------- drivers/iommu/intel/pasid.h | 7 ------ 2 files changed, 50 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 633e0a4a01e7..c5d479770e12 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -335,15 +335,6 @@ static inline void pasid_set_fault_enable(struct pasid_entry *pe) pasid_set_bits(&pe->val[0], 1 << 1, 0); } -/* - * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a - * scalable mode PASID entry. - */ -static inline void pasid_set_sre(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 0, 1); -} - /* * Setup the WPE(Write Protect Enable) field (Bit 132) of a * scalable mode PASID entry. @@ -521,23 +512,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, return -EINVAL; } - if (flags & PASID_FLAG_SUPERVISOR_MODE) { -#ifdef CONFIG_X86 - unsigned long cr0 = read_cr0(); - - /* CR0.WP is normally set but just to be sure */ - if (unlikely(!(cr0 & X86_CR0_WP))) { - pr_err("No CPU write protect!\n"); - return -EINVAL; - } -#endif - if (!ecap_srs(iommu->ecap)) { - pr_err("No supervisor request support on %s\n", - iommu->name); - return -EINVAL; - } - } - if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { pr_err("No 5-level paging support for first-level on %s\n", iommu->name); @@ -560,10 +534,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, /* Setup the first level page table pointer: */ pasid_set_flptr(pte, (u64)__pa(pgd)); - if (flags & PASID_FLAG_SUPERVISOR_MODE) { - pasid_set_sre(pte); - pasid_set_wpe(pte); - } if (flags & PASID_FLAG_FL5LP) pasid_set_flpm(pte, 1); @@ -658,12 +628,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); - /* - * Since it is a second level only translation setup, we should - * set SRE bit as well (addresses are expected to be GPAs). - */ - if (pasid != PASID_RID2PASID && ecap_srs(iommu->ecap)) - pasid_set_sre(pte); pasid_set_present(pte); spin_unlock(&iommu->lock); @@ -700,13 +664,6 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); - - /* - * We should set SRE bit as well since the addresses are expected - * to be GPAs. - */ - if (ecap_srs(iommu->ecap)) - pasid_set_sre(pte); pasid_set_present(pte); spin_unlock(&iommu->lock); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 20c54e50f533..d6b7d21244b1 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -41,13 +41,6 @@ #define FLPT_DEFAULT_DID 1 #define NUM_RESERVED_DID 2 -/* - * The SUPERVISOR_MODE flag indicates a first level translation which - * can be used for access to kernel addresses. It is valid only for - * access to the kernel's static 1:1 mapping of physical memory — not - * to vmalloc or even module mappings. - */ -#define PASID_FLAG_SUPERVISOR_MODE BIT(0) #define PASID_FLAG_NESTED BIT(1) #define PASID_FLAG_PAGE_SNOOP BIT(2) From b31064f881eec6047874ef58df4376b20432859c Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 13 Apr 2023 12:06:39 +0800 Subject: [PATCH 74/82] iommu/vt-d: Make size of operands same in bitwise operations This addresses the following issue reported by klocwork tool: - operands of different size in bitwise operations Suggested-by: Yongwei Ma Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20230406065944.2773296-2-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 2 +- drivers/iommu/intel/iommu.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 6acfe879589c..01d0ca0019f2 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1689,7 +1689,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) * is present. */ if (ecap_smts(iommu->ecap)) - val |= (1 << 11) | 1; + val |= BIT_ULL(11) | BIT_ULL(0); raw_spin_lock_irqsave(&iommu->register_lock, flags); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f4e536fd5a28..acbf82fa90e7 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1870,7 +1870,7 @@ context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) */ static inline void context_set_sm_dte(struct context_entry *context) { - context->lo |= (1 << 2); + context->lo |= BIT_ULL(2); } /* @@ -1879,7 +1879,7 @@ static inline void context_set_sm_dte(struct context_entry *context) */ static inline void context_set_sm_pre(struct context_entry *context) { - context->lo |= (1 << 4); + context->lo |= BIT_ULL(4); } /* Convert value to context PASID directory size field coding. */ From 35dc5d8998efa0f87a25bb184a9c54c46100fec3 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 13 Apr 2023 12:06:40 +0800 Subject: [PATCH 75/82] iommu/vt-d: Remove BUG_ON on checking valid pfn range When encountering an unexpected invalid pfn range, the kernel should attempt recovery and proceed with execution. Therefore, using WARN_ON to replace BUG_ON to avoid halting the machine. Besides, one redundant checking is reduced. Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20230406065944.2773296-3-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index acbf82fa90e7..c4847a5aaf52 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1005,9 +1005,9 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned int large_page; struct dma_pte *first_pte, *pte; - BUG_ON(!domain_pfn_supported(domain, start_pfn)); - BUG_ON(!domain_pfn_supported(domain, last_pfn)); - BUG_ON(start_pfn > last_pfn); + if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || + WARN_ON(start_pfn > last_pfn)) + return; /* we don't need lock here; nobody else touches the iova range */ do { @@ -1166,9 +1166,9 @@ static void dma_pte_clear_level(struct dmar_domain *domain, int level, static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn, struct list_head *freelist) { - BUG_ON(!domain_pfn_supported(domain, start_pfn)); - BUG_ON(!domain_pfn_supported(domain, last_pfn)); - BUG_ON(start_pfn > last_pfn); + if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || + WARN_ON(start_pfn > last_pfn)) + return; /* we don't need lock here; nobody else touches the iova range */ dma_pte_clear_level(domain, agaw_to_level(domain->agaw), From 4a627a2593b457336047c6803b5d5c097183a9ca Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 13 Apr 2023 12:06:41 +0800 Subject: [PATCH 76/82] iommu/vt-d: Remove BUG_ON in handling iotlb cache invalidation VT-d iotlb cache invalidation request with unexpected type is considered as a bug to developers, which can be fixed. So, when such kind of issue comes out, it needs to be reported through the kernel log, instead of halting the system. Replacing BUG_ON with warning reporting. Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20230406065944.2773296-4-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index c4847a5aaf52..dd61bb554aa7 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1272,7 +1272,9 @@ static void __iommu_flush_context(struct intel_iommu *iommu, | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); break; default: - BUG(); + pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n", + iommu->name, type); + return; } val |= DMA_CCMD_ICC; @@ -1308,7 +1310,9 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, val_iva = size_order | addr; break; default: - BUG(); + pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n", + iommu->name, type); + return; } /* Note: set drain read/write */ #if 0 @@ -1483,7 +1487,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; u16 did = domain_id_iommu(domain, iommu); - BUG_ON(pages == 0); + if (WARN_ON(!pages)) + return; if (ih) ih = 1 << 6; From 998d4c2db30c7279ce6070faf06b914c98f2ae30 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 13 Apr 2023 12:06:42 +0800 Subject: [PATCH 77/82] iommu/vt-d: Remove BUG_ON when domain->pgd is NULL When performing domain_context_mapping or getting dma_pte of a pfn, the availability of the domain page table directory is ensured. Therefore, the domain->pgd checkings are unnecessary. Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20230406065944.2773296-5-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index dd61bb554aa7..f11347a590d7 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -915,8 +915,6 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, int level = agaw_to_level(domain->agaw); int offset; - BUG_ON(!domain->pgd); - if (!domain_pfn_supported(domain, pfn)) /* Address beyond IOMMU's addressing capabilities. */ return NULL; @@ -1910,8 +1908,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - BUG_ON(!domain->pgd); - spin_lock(&iommu->lock); ret = -ENOMEM; context = iommu_context_addr(iommu, bus, devfn, 1); From cbf2f9e8badd483e2ee17a56ad8dbd1cfdcead20 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 13 Apr 2023 12:06:43 +0800 Subject: [PATCH 78/82] iommu/vt-d: Remove BUG_ON in map/unmap() Domain map/unmap with invalid parameters shouldn't crash the kernel. Therefore, using if() replaces the BUG_ON. Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20230406065944.2773296-6-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f11347a590d7..ab21ef1ddb3c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2159,7 +2159,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, phys_addr_t pteval; u64 attr; - BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); + if (unlikely(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1))) + return -EINVAL; if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; @@ -4314,8 +4315,9 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, - GFP_ATOMIC)); + if (unlikely(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, + &level, GFP_ATOMIC))) + return 0; if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); From ff45ab96465f013bee39ac88b756bb1129a7bc9d Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 13 Apr 2023 12:06:44 +0800 Subject: [PATCH 79/82] iommu/vt-d: Remove a useless BUG_ON(dev->is_virtfn) When dmar_alloc_pci_notify_info() is being invoked, the invoker has ensured the dev->is_virtfn is false. So, remove the useless BUG_ON in dmar_alloc_pci_notify_info(). Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20230406065944.2773296-7-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 01d0ca0019f2..9684c96247f8 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -127,8 +127,6 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) struct pci_dev *tmp; struct dmar_pci_notify_info *info; - BUG_ON(dev->is_virtfn); - /* * Ignore devices that have a domain number higher than what can * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000 From e60d63e32d239c4c20b370106b57079d7f0994cf Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 13 Apr 2023 12:06:45 +0800 Subject: [PATCH 80/82] iommu/vt-d: Remove BUG_ON in dmar_insert_dev_scope() The dmar_insert_dev_scope() could fail if any unexpected condition is encountered. However, in this situation, the kernel should attempt recovery and proceed with execution. Remove BUG_ON with WARN_ON, so that kernel can avoid being crashed when an unexpected condition occurs. Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20230406065944.2773296-8-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 9684c96247f8..7eceee081235 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -262,7 +262,8 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, get_device(dev)); return 1; } - BUG_ON(i >= devices_cnt); + if (WARN_ON(i >= devices_cnt)) + return -EINVAL; } return 0; From e223864f8257afde5e23eca4c006a0d69581a7a2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 12 Apr 2023 11:10:45 -0300 Subject: [PATCH 81/82] iommu: Make iommu_release_device() static This is not called outside the core code, and indeed cannot be called correctly outside the bus notifier. Make it static. Signed-off-by: Jason Gunthorpe Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/0-v1-c3da18124d2d+56-rm_iommu_release_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 3 ++- include/linux/iommu.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 7abee83610b6..435fc902df19 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -87,6 +87,7 @@ static const char * const iommu_group_resv_type_string[] = { static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data); +static void iommu_release_device(struct device *dev); static int iommu_alloc_default_domain(struct iommu_group *group, struct device *dev); static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, @@ -493,7 +494,7 @@ static void __iommu_group_release_device(struct iommu_group *group, kobject_put(group->devices_kobj); } -void iommu_release_device(struct device *dev) +static void iommu_release_device(struct device *dev) { struct iommu_group *group = dev->iommu_group; struct group_device *device; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 54f535ff9868..c892e06f8357 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -699,7 +699,6 @@ static inline void dev_iommu_priv_set(struct device *dev, void *priv) } int iommu_probe_device(struct device *dev); -void iommu_release_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); From f7f9c054a227ad4922070d748b1f4fc4b5657329 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 12 Apr 2023 11:11:58 -0300 Subject: [PATCH 82/82] iommu: Remove iommu_group_get_by_id() This is never called. Signed-off-by: Jason Gunthorpe Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/0-v1-60bbc66d7e92+24-rm_iommu_get_by_id_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 29 ----------------------------- include/linux/iommu.h | 6 ------ 2 files changed, 35 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 435fc902df19..f35058f1d68e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -882,35 +882,6 @@ struct iommu_group *iommu_group_alloc(void) } EXPORT_SYMBOL_GPL(iommu_group_alloc); -struct iommu_group *iommu_group_get_by_id(int id) -{ - struct kobject *group_kobj; - struct iommu_group *group; - const char *name; - - if (!iommu_group_kset) - return NULL; - - name = kasprintf(GFP_KERNEL, "%d", id); - if (!name) - return NULL; - - group_kobj = kset_find_obj(iommu_group_kset, name); - kfree(name); - - if (!group_kobj) - return NULL; - - group = container_of(group_kobj, struct iommu_group, kobj); - BUG_ON(group->id != id); - - kobject_get(group->devices_kobj); - kobject_put(&group->kobj); - - return group; -} -EXPORT_SYMBOL_GPL(iommu_group_get_by_id); - /** * iommu_group_get_iommudata - retrieve iommu_data registered for a group * @group: the group diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c892e06f8357..7dbdd13d7ce0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -460,7 +460,6 @@ extern bool iommu_present(struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); -extern struct iommu_group *iommu_group_get_by_id(int id); extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); @@ -746,11 +745,6 @@ static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) return NULL; } -static inline struct iommu_group *iommu_group_get_by_id(int id) -{ - return NULL; -} - static inline void iommu_domain_free(struct iommu_domain *domain) { }