From cce8365fc47b053ab1ea9dcc8cd8c8466e53fcde Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 6 Jul 2023 14:30:31 -0600 Subject: [PATCH 01/62] arm64: errata: Group all Cortex-A510 errata together There are 2 sections of Cortex-A510 errata. As the ordering within vendors is in order by CPU/IP name, move the 2nd section up to the 1st section of A510 errata. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230706203030.276437-1-robh@kernel.org Signed-off-by: Will Deacon --- Documentation/arch/arm64/silicon-errata.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 496cdca5cb99..e3e4450c1c98 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -63,6 +63,14 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #1902691 | ARM64_ERRATUM_1902691 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2441009 | ARM64_ERRATUM_2441009 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | @@ -109,14 +117,6 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ -| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 | -+----------------+-----------------+-----------------+-----------------------------+ -| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 | -+----------------+-----------------+-----------------+-----------------------------+ -| ARM | Cortex-A510 | #2441009 | ARM64_ERRATUM_2441009 | -+----------------+-----------------+-----------------+-----------------------------+ -| ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 | -+----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | From d0999555e306db8d4f973f7316baa5650495a01f Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 11 Jul 2023 14:34:58 +0530 Subject: [PATCH 02/62] arm64/mm: Replace an open coding with ID_AA64MMFR1_EL1_HAFDBS_MASK Replace '0xf' with ID_AA64MMFR1_EL1_HAFDBS_MASK while evaluating if the cpu supports implicit page table entry access flag update in HW. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20230711090458.238346-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/proc.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 2baeec419f62..14fdf645edc8 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -447,7 +447,7 @@ SYM_FUNC_START(__cpu_setup) * via capabilities. */ mrs x9, ID_AA64MMFR1_EL1 - and x9, x9, #0xf + and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK cbz x9, 1f orr tcr, tcr, #TCR_HA // hardware Access flag update 1: From 62ce7af97ba5096ecb60bc28d6a619e6eb77ebda Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 11 Jul 2023 14:50:55 +0530 Subject: [PATCH 03/62] arm64/mm: Directly use ID_AA64MMFR2_EL1_VARange_MASK Tools generated register fields have in place mask macros which can be used directly instead of shifting the older right end sided masks. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20230711092055.245756-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 757a0de07f91..7b236994f0e1 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -113,7 +113,7 @@ SYM_CODE_START(primary_entry) */ #if VA_BITS > 48 mrs_s x0, SYS_ID_AA64MMFR2_EL1 - tst x0, #0xf << ID_AA64MMFR2_EL1_VARange_SHIFT + tst x0, ID_AA64MMFR2_EL1_VARange_MASK mov x0, #VA_BITS mov x25, #VA_BITS_MIN csel x25, x25, x0, eq @@ -756,7 +756,7 @@ SYM_FUNC_START(__cpu_secondary_check52bitva) b.ne 2f mrs_s x0, SYS_ID_AA64MMFR2_EL1 - and x0, x0, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT) + and x0, x0, ID_AA64MMFR2_EL1_VARange_MASK cbnz x0, 2f update_early_cpu_boot_status \ From 5f69ca4229c7d8e23f238174827ee7aa49b0bcb2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 17 Jul 2023 19:55:05 +0200 Subject: [PATCH 04/62] arm64/ptrace: Clean up error handling path in sve_set_common() All error handling paths go to 'out', except this one. Be consistent and also branch to 'out' here. Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers") Signed-off-by: Christophe JAILLET Reviewed-by: Mark Brown Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/aa61301ed2dfd079b74b37f7fede5f179ac3087a.1689616473.git.christophe.jaillet@wanadoo.fr Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index d7f4f0d1ae12..9bc23f1b499e 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -884,7 +884,8 @@ static int sve_set_common(struct task_struct *target, break; default: WARN_ON_ONCE(1); - return -EINVAL; + ret = -EINVAL; + goto out; } /* From ce33cea5d833c24fe2564e3f0416c7f2720a9c9c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 18 Jul 2023 20:45:44 +0100 Subject: [PATCH 05/62] arm64/cpufeature: Use ARM64_CPUID_FIELD() to match EVT The recently added Enhanced Virtualization Traps cpufeature does not use the ARM64_CPUID_FIELDS() helper, convert it to do so. No functional change. Signed-off-by: Mark Brown Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20230718-arm64-evt-cpuid-helper-v1-1-68375d1e6b92@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f9d456fe132d..bcb006390e55 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2708,12 +2708,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .desc = "Enhanced Virtualization Traps", .capability = ARM64_HAS_EVT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .sys_reg = SYS_ID_AA64MMFR2_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR2_EL1_EVT_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64MMFR2_EL1_EVT_IMP, .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP) }, {}, }; From d0ba961217e0ef8608e751a46902259ce4ff52b7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 13 Jul 2023 14:50:04 +0530 Subject: [PATCH 06/62] arm64/mm: Add pte_rdonly() helper This replaces open coding PTE_RDONLY check with a new helper pte_rdonly(). No functional change is intended here. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Reviewed-by: David Hildenbrand Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20230713092004.693749-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0bd18de9fd97..02b7abcf1f9f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -103,6 +103,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) +#define pte_rdonly(pte) (!!(pte_val(pte) & PTE_RDONLY)) #define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) @@ -120,7 +121,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) (__boundary - 1 < (end) - 1) ? __boundary : (end); \ }) -#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) +#define pte_hw_dirty(pte) (pte_write(pte) && !pte_rdonly(pte)) #define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) From 6477c3886ae101367aae2e808caf71f7b0fcc18e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 13 Jul 2023 12:45:18 +0530 Subject: [PATCH 07/62] arm64/mm: Set only the PTE_DIRTY bit while preserving the HW dirty state pte_mkdirty() creates dirty states both in SW and HW bits, which is really not required, either in pte_wrprotect() or pte_modify() for preserving the HW dirty state. Because pte_mkdirty() sets PTE_DIRTY and clears PTE_RDONLY as pte_write() always evaluates to be true - otherwise pte_hw_dirty() will not test out in the first place. Clearing PTE_RDONLY again is not required here because the pte is already in pte_hw_dirty() but might soon loose its dirty state thus requiring preservation in SW dirty bit i.e PTE_DIRTY. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Reviewed-by: David Hildenbrand Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20230713071518.628440-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 02b7abcf1f9f..72c2e8431360 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -213,7 +213,7 @@ static inline pte_t pte_wrprotect(pte_t pte) * clear), set the PTE_DIRTY bit. */ if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); @@ -824,7 +824,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) PTE_ATTRINDX_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); + pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } From 42501f6d4d5d4e5b69fa083193c8247918e8c393 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 20 Jul 2023 22:35:55 +0800 Subject: [PATCH 08/62] arm64: Remove unsued extern declaration init_mem_pgprot() commit a501e32430d4 ("arm64: Clean up the default pgprot setting") left behind this. Signed-off-by: YueHaibing Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20230720143555.26044-1-yuehaibing@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 4384eaa0aeb7..94b68850cb9f 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -64,7 +64,6 @@ extern void arm64_memblock_init(void); extern void paging_init(void); extern void bootmem_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); -extern void init_mem_pgprot(void); extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, From a96a7a7ddf9559eb20e608c0de30d1867d755a33 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 27 Jul 2023 01:36:19 +0800 Subject: [PATCH 09/62] arm64: vdso: remove two .altinstructions related symbols The two symbols __alt_instructions and __alt_instructions_end are not used, since the vDSO patching code looks for the '.altinstructions' ELF section directly. Remove the unused linker symbols. Fixes: 4e3bca8f7cdd ("arm64: alternative: patch alternatives in the vDSO") Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20230726173619.3732-1-jszhang@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/vdso.lds.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 6028f1fe2d1c..45354f2ddf70 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -50,9 +50,7 @@ SECTIONS . = ALIGN(4); .altinstructions : { - __alt_instructions = .; *(.altinstructions) - __alt_instructions_end = .; } .dynamic : { *(.dynamic) } :text :dynamic From cbbc6fdd85be6cd748d6c6db23c1d5be6b04161e Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 20 Jun 2023 15:12:33 +0800 Subject: [PATCH 10/62] driver/perf: Add identifier sysfs file for Yitian 710 DDR To allow userspace to identify the specific implementation of the device, add an "identifier" sysfs file. The perf tool can match the Yitian 710 DDR metric through the identifier. Signed-off-by: Jing Zhang Acked-by: Ian Rogers Reviewed-by: Shuai Xue Link: https://lore.kernel.org/r/1687245156-61215-2-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/alibaba_uncore_drw_pmu.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c index 5c5be9fc1b15..19d459a36be5 100644 --- a/drivers/perf/alibaba_uncore_drw_pmu.c +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -236,10 +236,37 @@ static const struct attribute_group ali_drw_pmu_cpumask_attr_group = { .attrs = ali_drw_pmu_cpumask_attrs, }; +static ssize_t ali_drw_pmu_identifier_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%s\n", "ali_drw_pmu"); +} + +static umode_t ali_drw_pmu_identifier_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + return attr->mode; +} + +static struct device_attribute ali_drw_pmu_identifier_attr = + __ATTR(identifier, 0444, ali_drw_pmu_identifier_show, NULL); + +static struct attribute *ali_drw_pmu_identifier_attrs[] = { + &ali_drw_pmu_identifier_attr.attr, + NULL +}; + +static const struct attribute_group ali_drw_pmu_identifier_attr_group = { + .attrs = ali_drw_pmu_identifier_attrs, + .is_visible = ali_drw_pmu_identifier_attr_visible +}; + static const struct attribute_group *ali_drw_pmu_attr_groups[] = { &ali_drw_pmu_events_attr_group, &ali_drw_pmu_cpumask_attr_group, &ali_drw_pmu_format_group, + &ali_drw_pmu_identifier_attr_group, NULL, }; From c47ea342d85db6fde628dc55996d852d5244bda3 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 4 Jul 2023 17:35:55 +0800 Subject: [PATCH 11/62] perf: xgene_pmu: Convert to devm_platform_ioremap_resource() Use devm_platform_ioremap_resource() to simplify code. Signed-off-by: Yangtao Li Link: https://lore.kernel.org/r/20230704093556.17926-1-frank.li@vivo.com Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 0c32dffc7ede..9972bfc11a5c 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1833,7 +1833,6 @@ static int xgene_pmu_probe(struct platform_device *pdev) const struct xgene_pmu_data *dev_data; const struct of_device_id *of_id; struct xgene_pmu *xgene_pmu; - struct resource *res; int irq, rc; int version; @@ -1883,8 +1882,7 @@ static int xgene_pmu_probe(struct platform_device *pdev) xgene_pmu->version = version; dev_info(&pdev->dev, "X-Gene PMU version %d\n", xgene_pmu->version); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - xgene_pmu->pcppmu_csr = devm_ioremap_resource(&pdev->dev, res); + xgene_pmu->pcppmu_csr = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(xgene_pmu->pcppmu_csr)) { dev_err(&pdev->dev, "ioremap failed for PCP PMU resource\n"); return PTR_ERR(xgene_pmu->pcppmu_csr); From 7c3f204e544dfa376bf1b34ebaa5552304a2b7d9 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Thu, 6 Jul 2023 11:23:05 +0200 Subject: [PATCH 12/62] perf/smmuv3: Remove build dependency on ACPI This driver supports working without ACPI since commit 3f7be43561766 ("perf/smmuv3: Add devicetree support"), so remove the build dependency. Signed-off-by: Vincent Whitchurch Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20230706-smmuv3-pmu-noacpi-v1-1-7083ef189158@axis.com Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index f4572a5cca72..273d67ecf6d2 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -92,7 +92,7 @@ config ARM_PMU_ACPI config ARM_SMMU_V3_PMU tristate "ARM SMMUv3 Performance Monitors Extension" - depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT) + depends on ARM64 || (COMPILE_TEST && 64BIT) depends on GENERIC_MSI_IRQ help Provides support for the ARM SMMUv3 Performance Monitor Counter From 039768b558537af23734c4cc6fd688ed575158c8 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 6 Jul 2023 14:55:03 -0600 Subject: [PATCH 13/62] dt-bindings: arm: pmu: Add Cortex A520, A715, A720, X3, and X4 Add compatible strings for the Arm Cortex-A520, Cortex-A715, Cortex-A720, Cortex-X3, and Cortex-X4 CPU PMUs. Acked-by: Conor Dooley Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230706205505.308523-1-robh@kernel.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index e14358bf0b9c..99b5e9530707 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -49,9 +49,14 @@ properties: - arm,cortex-a77-pmu - arm,cortex-a78-pmu - arm,cortex-a510-pmu + - arm,cortex-a520-pmu - arm,cortex-a710-pmu + - arm,cortex-a715-pmu + - arm,cortex-a720-pmu - arm,cortex-x1-pmu - arm,cortex-x2-pmu + - arm,cortex-x3-pmu + - arm,cortex-x4-pmu - arm,neoverse-e1-pmu - arm,neoverse-n1-pmu - arm,neoverse-n2-pmu From 989567fc0f3d5ba1c2a48cdb857a3965b7e1a276 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 6 Jul 2023 14:55:04 -0600 Subject: [PATCH 14/62] perf: pmuv3: Add Cortex A520, A715, A720, X3 and X4 PMUs Add support for the Arm Cortex-A520, Cortex-A715, Cortex-A720, Cortex-X3, and Cortex-X4 CPU PMUs. They are straight-forward additions with just new compatible strings. Acked-by: Mark Rutland Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230706205505.308523-2-robh@kernel.org Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 08b3a1bf0ef6..d2dffb4e9d07 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1266,9 +1266,14 @@ PMUV3_INIT_SIMPLE(armv8_cortex_a76) PMUV3_INIT_SIMPLE(armv8_cortex_a77) PMUV3_INIT_SIMPLE(armv8_cortex_a78) PMUV3_INIT_SIMPLE(armv9_cortex_a510) +PMUV3_INIT_SIMPLE(armv9_cortex_a520) PMUV3_INIT_SIMPLE(armv9_cortex_a710) +PMUV3_INIT_SIMPLE(armv9_cortex_a715) +PMUV3_INIT_SIMPLE(armv9_cortex_a720) PMUV3_INIT_SIMPLE(armv8_cortex_x1) PMUV3_INIT_SIMPLE(armv9_cortex_x2) +PMUV3_INIT_SIMPLE(armv9_cortex_x3) +PMUV3_INIT_SIMPLE(armv9_cortex_x4) PMUV3_INIT_SIMPLE(armv8_neoverse_e1) PMUV3_INIT_SIMPLE(armv8_neoverse_n1) PMUV3_INIT_SIMPLE(armv9_neoverse_n2) @@ -1334,9 +1339,14 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init}, {.compatible = "arm,cortex-a78-pmu", .data = armv8_cortex_a78_pmu_init}, {.compatible = "arm,cortex-a510-pmu", .data = armv9_cortex_a510_pmu_init}, + {.compatible = "arm,cortex-a520-pmu", .data = armv9_cortex_a520_pmu_init}, {.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init}, + {.compatible = "arm,cortex-a715-pmu", .data = armv9_cortex_a715_pmu_init}, + {.compatible = "arm,cortex-a720-pmu", .data = armv9_cortex_a720_pmu_init}, {.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init}, {.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init}, + {.compatible = "arm,cortex-x3-pmu", .data = armv9_cortex_x3_pmu_init}, + {.compatible = "arm,cortex-x4-pmu", .data = armv9_cortex_x4_pmu_init}, {.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init}, {.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init}, {.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init}, From 918dc87b746e0114eb64d6e478da7f370e266d5a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 14 Jul 2023 11:48:31 -0600 Subject: [PATCH 15/62] drivers/perf: Explicitly include correct DT includes The DT of_device.h and of_platform.h date back to the separate of_platform_bus_type before it as merged into the regular platform bus. As part of that merge prepping Arm DT support 13 years ago, they "temporarily" include each other. They also include platform_device.h and of.h. As a result, there's a pretty much random mix of those include files used throughout the tree. In order to detangle these headers and replace the implicit includes with struct declarations, users need to explicitly include the correct includes. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230714174832.4061752-1-robh@kernel.org Signed-off-by: Will Deacon --- drivers/perf/amlogic/meson_ddr_pmu_core.c | 2 -- drivers/perf/arm-cci.c | 5 +---- drivers/perf/arm_dsu_pmu.c | 2 +- drivers/perf/arm_pmu_platform.c | 1 - drivers/perf/arm_spe_pmu.c | 3 +-- drivers/perf/fsl_imx8_ddr_perf.c | 3 +-- drivers/perf/fsl_imx9_ddr_perf.c | 4 +--- drivers/perf/marvell_cn10k_ddr_pmu.c | 3 +-- drivers/perf/marvell_cn10k_tad_pmu.c | 3 +-- 9 files changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c index 0b24dee1ed3c..bbc7285fd934 100644 --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c @@ -9,8 +9,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 998259f1d973..61de861eaf91 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -7,10 +7,7 @@ #include #include #include -#include -#include -#include -#include +#include #include #include #include diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index fe2abb412c00..8223c49bd082 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 933b96e243b8..3596db36cbff 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index b9ba4c4fe5a2..d2b0cbf0e0c4 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -25,8 +25,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 5222ba1e79d0..1cb3861ab0e0 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -10,10 +10,9 @@ #include #include #include -#include -#include #include #include +#include #include #define COUNTER_CNTL 0x0 diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c index 71d5b07e3aff..5cf770a1bc31 100644 --- a/drivers/perf/fsl_imx9_ddr_perf.c +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -7,9 +7,7 @@ #include #include #include -#include -#include -#include +#include #include /* Performance monitor configuration */ diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c index b94a5f6cc22b..524ba82bfce2 100644 --- a/drivers/perf/marvell_cn10k_ddr_pmu.c +++ b/drivers/perf/marvell_cn10k_ddr_pmu.c @@ -8,11 +8,10 @@ #include #include #include -#include -#include #include #include #include +#include /* Performance Counters Operating Mode Control Registers */ #define DDRC_PERF_CNT_OP_MODE_CTRL 0x8020 diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c index 3972197e2210..fec8e82edb95 100644 --- a/drivers/perf/marvell_cn10k_tad_pmu.c +++ b/drivers/perf/marvell_cn10k_tad_pmu.c @@ -6,10 +6,9 @@ #define pr_fmt(fmt) "tad_pmu: " fmt +#include #include #include -#include -#include #include #include #include From 0c7c237b1c35011ef0b8d30c1d5c20bc6ae7b69b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 20 Jul 2023 19:38:59 +0100 Subject: [PATCH 16/62] kselftest/arm64: Add a test case for SVE VL changes with SME active We just fixed an issue where changing the SVE VL while SME was active could result in us attempting to save the streaming mode SVE vectors without any backing storage. Add a test case which provokes that issue, ideally we should also verify that the contents of ZA are unaffected by any of what we did. Note that since we need to keep streaming mode enabled we can't use any syscalls to trigger the issue, we have to sit in a loop in usersapce and hope to be preempted. The chosen numbers trigger with defconfig on all the virtual platforms for me, this won't be 100% on all systems but avoid an overcomplicated test implementation. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230720-arm64-fix-sve-sme-vl-change-v2-2-8eea06b82d57@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/vec-syscfg.c | 105 +++++++++++++++++- 1 file changed, 102 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index 9bcfcdc34ee9..58ea4bde5be7 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -39,9 +40,11 @@ struct vec_data { int max_vl; }; +#define VEC_SVE 0 +#define VEC_SME 1 static struct vec_data vec_data[] = { - { + [VEC_SVE] = { .name = "SVE", .hwcap_type = AT_HWCAP, .hwcap = HWCAP_SVE, @@ -51,7 +54,7 @@ static struct vec_data vec_data[] = { .prctl_set = PR_SVE_SET_VL, .default_vl_file = "/proc/sys/abi/sve_default_vector_length", }, - { + [VEC_SME] = { .name = "SME", .hwcap_type = AT_HWCAP2, .hwcap = HWCAP2_SME, @@ -644,18 +647,107 @@ static const test_type tests[] = { prctl_set_all_vqs, }; +static inline void smstart(void) +{ + asm volatile("msr S0_3_C4_C7_3, xzr"); +} + +static inline void smstart_sm(void) +{ + asm volatile("msr S0_3_C4_C3_3, xzr"); +} + +static inline void smstop(void) +{ + asm volatile("msr S0_3_C4_C6_3, xzr"); +} + + +/* + * Verify we can change the SVE vector length while SME is active and + * continue to use SME afterwards. + */ +static void change_sve_with_za(void) +{ + struct vec_data *sve_data = &vec_data[VEC_SVE]; + bool pass = true; + int ret, i; + + if (sve_data->min_vl == sve_data->max_vl) { + ksft_print_msg("Only one SVE VL supported, can't change\n"); + ksft_test_result_skip("change_sve_while_sme\n"); + return; + } + + /* Ensure we will trigger a change when we set the maximum */ + ret = prctl(sve_data->prctl_set, sve_data->min_vl); + if (ret != sve_data->min_vl) { + ksft_print_msg("Failed to set SVE VL %d: %d\n", + sve_data->min_vl, ret); + pass = false; + } + + /* Enable SM and ZA */ + smstart(); + + /* Trigger another VL change */ + ret = prctl(sve_data->prctl_set, sve_data->max_vl); + if (ret != sve_data->max_vl) { + ksft_print_msg("Failed to set SVE VL %d: %d\n", + sve_data->max_vl, ret); + pass = false; + } + + /* + * Spin for a bit with SM enabled to try to trigger another + * save/restore. We can't use syscalls without exiting + * streaming mode. + */ + for (i = 0; i < 100000000; i++) + smstart_sm(); + + /* + * TODO: Verify that ZA was preserved over the VL change and + * spin. + */ + + /* Clean up after ourselves */ + smstop(); + ret = prctl(sve_data->prctl_set, sve_data->default_vl); + if (ret != sve_data->default_vl) { + ksft_print_msg("Failed to restore SVE VL %d: %d\n", + sve_data->default_vl, ret); + pass = false; + } + + ksft_test_result(pass, "change_sve_with_za\n"); +} + +typedef void (*test_all_type)(void); + +static const struct { + const char *name; + test_all_type test; +} all_types_tests[] = { + { "change_sve_with_za", change_sve_with_za }, +}; + int main(void) { + bool all_supported = true; int i, j; ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data)); + ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data) + + ARRAY_SIZE(all_types_tests)); for (i = 0; i < ARRAY_SIZE(vec_data); i++) { struct vec_data *data = &vec_data[i]; unsigned long supported; supported = getauxval(data->hwcap_type) & data->hwcap; + if (!supported) + all_supported = false; for (j = 0; j < ARRAY_SIZE(tests); j++) { if (supported) @@ -666,5 +758,12 @@ int main(void) } } + for (i = 0; i < ARRAY_SIZE(all_types_tests); i++) { + if (all_supported) + all_types_tests[i].test(); + else + ksft_test_result_skip("%s\n", all_types_tests[i].name); + } + ksft_exit_pass(); } From 0aeead9bb240a6965ede2bed68096c4381fd46a3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 20 Jul 2023 19:39:00 +0100 Subject: [PATCH 17/62] kselftest/arm64: Validate that changing one VL type does not affect another On a system with both SVE and SME when we change one of the VLs this should not result in a change in the other VL. Add a check that this is in fact the case to vec-syscfg. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230720-arm64-fix-sve-sme-vl-change-v2-3-8eea06b82d57@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/vec-syscfg.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index 58ea4bde5be7..5f648b97a06f 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -554,7 +554,8 @@ static void prctl_set_onexec(struct vec_data *data) /* For each VQ verify that setting via prctl() does the right thing */ static void prctl_set_all_vqs(struct vec_data *data) { - int ret, vq, vl, new_vl; + int ret, vq, vl, new_vl, i; + int orig_vls[ARRAY_SIZE(vec_data)]; int errors = 0; if (!data->min_vl || !data->max_vl) { @@ -563,6 +564,9 @@ static void prctl_set_all_vqs(struct vec_data *data) return; } + for (i = 0; i < ARRAY_SIZE(vec_data); i++) + orig_vls[i] = vec_data[i].rdvl(); + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { vl = sve_vl_from_vq(vq); @@ -585,6 +589,22 @@ static void prctl_set_all_vqs(struct vec_data *data) errors++; } + /* Did any other VLs change? */ + for (i = 0; i < ARRAY_SIZE(vec_data); i++) { + if (&vec_data[i] == data) + continue; + + if (!(getauxval(vec_data[i].hwcap_type) & vec_data[i].hwcap)) + continue; + + if (vec_data[i].rdvl() != orig_vls[i]) { + ksft_print_msg("%s VL changed from %d to %d\n", + vec_data[i].name, orig_vls[i], + vec_data[i].rdvl()); + errors++; + } + } + /* Was that the VL we asked for? */ if (new_vl == vl) continue; From c3651feff2969b13c45f87444befcb96a0137b8d Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 24 Jul 2023 11:45:39 +0100 Subject: [PATCH 18/62] Documentation: arm64: Correct SME ZA macros name It should be ZA_PT_ZA*. ZA_PT_ZA_OFFSET is one example. It is not ZA_PT_ZA_* because there is one macro ZA_PT_ZAV_OFFSET that doesn't fit that pattern. Fixes: 96d32e630935 ("arm64/sme: Provide ABI documentation for SME") Signed-off-by: David Spickett Reviewed-by: Mark Brown Cc: Mark Brown Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- Documentation/arch/arm64/sme.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst index ba529a1dc606..3d0e53ecac4f 100644 --- a/Documentation/arch/arm64/sme.rst +++ b/Documentation/arch/arm64/sme.rst @@ -322,7 +322,7 @@ The regset data starts with struct user_za_header, containing: VL is supported. * The size and layout of the payload depends on the header fields. The - SME_PT_ZA_*() macros are provided to facilitate access to the data. + ZA_PT_ZA*() macros are provided to facilitate access to the data. * In either case, for SETREGSET it is permissible to omit the payload, in which case the vector length and flags are changed and PSTATE.ZA is set to 0 From 64a0b90a3c1ed0aa15e75f8c688f8e55f1f12999 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 27 Jul 2023 10:03:24 +0800 Subject: [PATCH 19/62] arm64/Kconfig: Sort the RCpc feature under the ARMv8.3 features menu Moving LDAPR detective config under the ARMv8.3 menu would be more reasonable than under ARMv8.1, since this feature was released together with the ARMv8.3 features list. Signed-off-by: Zeng Heng Link: https://lore.kernel.org/r/20230727020324.2149960-1-zengheng4@huawei.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a2511b30d0f6..29db061db9bb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1793,9 +1793,6 @@ config ARM64_PAN The feature is detected at runtime, and will remain as a 'nop' instruction if the cpu does not implement the feature. -config AS_HAS_LDAPR - def_bool $(as-instr,.arch_extension rcpc) - config AS_HAS_LSE_ATOMICS def_bool $(as-instr,.arch_extension lse) @@ -1933,6 +1930,9 @@ config AS_HAS_ARMV8_3 config AS_HAS_CFI_NEGATE_RA_STATE def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n) +config AS_HAS_LDAPR + def_bool $(as-instr,.arch_extension rcpc) + endmenu # "ARMv8.3 architectural features" menu "ARMv8.4 architectural features" From 00df90934c9effc52b60749788b0eeb131d1bfd7 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 7 Jul 2023 17:38:11 +0100 Subject: [PATCH 20/62] perf/arm-cmn: Remove spurious event aliases As the name suggests, the "partial DAT flit" event is only counted for the DAT channel, and furthermore is only applicable to device ports, not mesh links (strictly it's only device ports with CHI-A requesters connected, but detecting that degree of detail is more bother than it's worth). Stop generating spurious event aliases for other combinations which aren't meaningful. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/b01a58e3ff05c322547fbfd015f6dbfedf555ed3.1688746690.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index b8c15878bc86..a007648dbf1c 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -791,16 +791,22 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, _CMN_EVENT_HNF(_model, _name##_read, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \ _CMN_EVENT_HNF(_model, _name##_write, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL) -#define _CMN_EVENT_XP(_name, _event) \ +#define _CMN_EVENT_XP_MESH(_name, _event) \ __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \ __CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)), \ __CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)), \ - __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)), \ + __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)) + +#define _CMN_EVENT_XP_PORT(_name, _event) \ __CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)), \ __CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)), \ __CMN_EVENT_XP(p2_##_name, (_event) | (6 << 2)), \ __CMN_EVENT_XP(p3_##_name, (_event) | (7 << 2)) +#define _CMN_EVENT_XP(_name, _event) \ + _CMN_EVENT_XP_MESH(_name, _event), \ + _CMN_EVENT_XP_PORT(_name, _event) + /* Good thing there are only 3 fundamental XP events... */ #define CMN_EVENT_XP(_name, _event) \ _CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)), \ @@ -813,6 +819,10 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, _CMN_EVENT_XP(snp2_##_name, (_event) | (7 << 5)), \ _CMN_EVENT_XP(req2_##_name, (_event) | (8 << 5)) +#define CMN_EVENT_XP_DAT(_name, _event) \ + _CMN_EVENT_XP_PORT(dat_##_name, (_event) | (3 << 5)), \ + _CMN_EVENT_XP_PORT(dat2_##_name, (_event) | (6 << 5)) + static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_DTC(cycles), @@ -943,7 +953,7 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_XP(txflit_valid, 0x01), CMN_EVENT_XP(txflit_stall, 0x02), - CMN_EVENT_XP(partial_dat_flit, 0x03), + CMN_EVENT_XP_DAT(partial_dat_flit, 0x03), /* We treat watchpoints as a special made-up class of XP events */ CMN_EVENT_ATTR(CMN_ANY, watchpoint_up, CMN_TYPE_WP, CMN_WP_UP), CMN_EVENT_ATTR(CMN_ANY, watchpoint_down, CMN_TYPE_WP, CMN_WP_DOWN), From b1b7dc38e4827bf842ed763a4dfb4c0d72259ad5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 7 Jul 2023 17:38:12 +0100 Subject: [PATCH 21/62] perf/arm-cmn: Refactor HN-F event selector macros Refactor the macros for defining HN-F events with additional selectors, so they can be shared with another upcoming similar-but-distinct HN type. No functional change intended. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/0f05327941e06c665dbfd47e03fad29276b9e63c.1688746690.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 49 +++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index a007648dbf1c..1efe9b72c0e6 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -742,8 +742,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, _CMN_EVENT_ATTR(_model, dn_##_name, CMN_TYPE_DVM, _event, _occup, _fsel) #define CMN_EVENT_DTC(_name) \ CMN_EVENT_ATTR(CMN_ANY, dtc_##_name, CMN_TYPE_DTC, 0) -#define _CMN_EVENT_HNF(_model, _name, _event, _occup, _fsel) \ - _CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event, _occup, _fsel) +#define CMN_EVENT_HNF(_model, _name, _event) \ + CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event) #define CMN_EVENT_HNI(_name, _event) \ CMN_EVENT_ATTR(CMN_ANY, hni_##_name, CMN_TYPE_HNI, _event) #define CMN_EVENT_HNP(_name, _event) \ @@ -775,21 +775,34 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, _CMN_EVENT_DVM(_model, _name##_all, _event, 0, SEL_OCCUP1ID), \ _CMN_EVENT_DVM(_model, _name##_dvmop, _event, 1, SEL_OCCUP1ID), \ _CMN_EVENT_DVM(_model, _name##_dvmsync, _event, 2, SEL_OCCUP1ID) -#define CMN_EVENT_HNF(_model, _name, _event) \ - _CMN_EVENT_HNF(_model, _name, _event, 0, SEL_NONE) + +#define CMN_EVENT_HN_OCC(_model, _name, _type, _event) \ + _CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 1, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 2, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(_model, _name##_atomic, _type, _event, 3, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(_model, _name##_stash, _type, _event, 4, SEL_OCCUP1ID) +#define CMN_EVENT_HN_CLS(_model, _name, _type, _event) \ + _CMN_EVENT_ATTR(_model, _name##_class0, _type, _event, 0, SEL_CLASS_OCCUP_ID), \ + _CMN_EVENT_ATTR(_model, _name##_class1, _type, _event, 1, SEL_CLASS_OCCUP_ID), \ + _CMN_EVENT_ATTR(_model, _name##_class2, _type, _event, 2, SEL_CLASS_OCCUP_ID), \ + _CMN_EVENT_ATTR(_model, _name##_class3, _type, _event, 3, SEL_CLASS_OCCUP_ID) +#define CMN_EVENT_HN_SNT(_model, _name, _type, _event) \ + _CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_group0_read, _type, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_group0_write, _type, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_group1_read, _type, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_group1_write, _type, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL) + +#define CMN_EVENT_HNF_OCC(_model, _name, _event) \ + CMN_EVENT_HN_OCC(_model, hnf_##_name, CMN_TYPE_HNF, _event) #define CMN_EVENT_HNF_CLS(_model, _name, _event) \ - _CMN_EVENT_HNF(_model, _name##_class0, _event, 0, SEL_CLASS_OCCUP_ID), \ - _CMN_EVENT_HNF(_model, _name##_class1, _event, 1, SEL_CLASS_OCCUP_ID), \ - _CMN_EVENT_HNF(_model, _name##_class2, _event, 2, SEL_CLASS_OCCUP_ID), \ - _CMN_EVENT_HNF(_model, _name##_class3, _event, 3, SEL_CLASS_OCCUP_ID) + CMN_EVENT_HN_CLS(_model, hnf_##_name, CMN_TYPE_HNS, _event) #define CMN_EVENT_HNF_SNT(_model, _name, _event) \ - _CMN_EVENT_HNF(_model, _name##_all, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_group0_read, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_group0_write, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_group1_read, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_group1_write, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_read, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_write, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL) + CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event) + #define _CMN_EVENT_XP_MESH(_name, _event) \ __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \ @@ -872,11 +885,7 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_HNF(CMN_ANY, mc_retries, 0x0c), CMN_EVENT_HNF(CMN_ANY, mc_reqs, 0x0d), CMN_EVENT_HNF(CMN_ANY, qos_hh_retry, 0x0e), - _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_all, 0x0f, 0, SEL_OCCUP1ID), - _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_read, 0x0f, 1, SEL_OCCUP1ID), - _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_write, 0x0f, 2, SEL_OCCUP1ID), - _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_atomic, 0x0f, 3, SEL_OCCUP1ID), - _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_stash, 0x0f, 4, SEL_OCCUP1ID), + CMN_EVENT_HNF_OCC(CMN_ANY, qos_pocq_occupancy, 0x0f), CMN_EVENT_HNF(CMN_ANY, pocq_addrhaz, 0x10), CMN_EVENT_HNF(CMN_ANY, pocq_atomic_addrhaz, 0x11), CMN_EVENT_HNF(CMN_ANY, ld_st_swp_adq_full, 0x12), From ac18ea1a8935920a6b3284f24e58f903b95dac20 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 7 Jul 2023 17:38:13 +0100 Subject: [PATCH 22/62] perf/arm-cmn: Add CMN-700 r3 support CMN-700 r3 has a special configuration option for a so-called "Super Home Node", which is a superset of the standard HN-F that also manages remote-chip coherency for multi-chip setups. As such it has a similar but expanded set of PMU events compared to HN-F, with some additional filtering options to boot. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/49153b72253f6af0e625cb55b9e1b825b110c49c.1688746690.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 94 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 1efe9b72c0e6..913dc04b3a40 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -72,6 +72,8 @@ /* For most nodes, this is all there is */ #define CMN_PMU_EVENT_SEL 0x000 #define CMN__PMU_CBUSY_SNTHROTTLE_SEL GENMASK_ULL(44, 42) +#define CMN__PMU_SN_HOME_SEL GENMASK_ULL(40, 39) +#define CMN__PMU_HBT_LBT_SEL GENMASK_ULL(38, 37) #define CMN__PMU_CLASS_OCCUP_ID GENMASK_ULL(36, 35) /* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */ #define CMN__PMU_OCCUP1_ID GENMASK_ULL(34, 32) @@ -226,6 +228,7 @@ enum cmn_revision { REV_CMN700_R0P0 = 0, REV_CMN700_R1P0, REV_CMN700_R2P0, + REV_CMN700_R3P0, REV_CI700_R0P0 = 0, REV_CI700_R1P0, REV_CI700_R2P0, @@ -254,6 +257,9 @@ enum cmn_node_type { CMN_TYPE_CCHA, CMN_TYPE_CCLA, CMN_TYPE_CCLA_RNI, + CMN_TYPE_HNS = 0x200, + CMN_TYPE_HNS_MPAM_S, + CMN_TYPE_HNS_MPAM_NS, /* Not a real node type */ CMN_TYPE_WP = 0x7770 }; @@ -263,6 +269,8 @@ enum cmn_filter_select { SEL_OCCUP1ID, SEL_CLASS_OCCUP_ID, SEL_CBUSY_SNTHROTTLE_SEL, + SEL_HBT_LBT_SEL, + SEL_SN_HOME_SEL, SEL_MAX }; @@ -768,6 +776,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event) #define CMN_EVENT_CCLA_RNI(_name, _event) \ CMN_EVENT_ATTR(CMN_ANY, ccla_rni_##_name, CMN_TYPE_CCLA_RNI, _event) +#define CMN_EVENT_HNS(_name, _event) \ + CMN_EVENT_ATTR(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event) #define CMN_EVENT_DVM(_model, _name, _event) \ _CMN_EVENT_DVM(_model, _name, _event, 0, SEL_NONE) @@ -803,6 +813,23 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, #define CMN_EVENT_HNF_SNT(_model, _name, _event) \ CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event) +#define CMN_EVENT_HNS_OCC(_name, _event) \ + CMN_EVENT_HN_OCC(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_rxsnp, CMN_TYPE_HNS, _event, 5, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 6, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 7, SEL_OCCUP1ID) +#define CMN_EVENT_HNS_CLS( _name, _event) \ + CMN_EVENT_HN_CLS(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event) +#define CMN_EVENT_HNS_SNT(_name, _event) \ + CMN_EVENT_HN_SNT(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event) +#define CMN_EVENT_HNS_HBT(_name, _event) \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_HBT_LBT_SEL), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 1, SEL_HBT_LBT_SEL), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 2, SEL_HBT_LBT_SEL) +#define CMN_EVENT_HNS_SNH(_name, _event) \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_SN_HOME_SEL), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_sn, CMN_TYPE_HNS, _event, 1, SEL_SN_HOME_SEL), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_home, CMN_TYPE_HNS, _event, 2, SEL_SN_HOME_SEL) #define _CMN_EVENT_XP_MESH(_name, _event) \ __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \ @@ -1151,6 +1178,66 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_CCLA(pfwd_sndr_stalls_static_crd, 0x2a), CMN_EVENT_CCLA(pfwd_sndr_stalls_dynmaic_crd, 0x2b), + CMN_EVENT_HNS_HBT(cache_miss, 0x01), + CMN_EVENT_HNS_HBT(slc_sf_cache_access, 0x02), + CMN_EVENT_HNS_HBT(cache_fill, 0x03), + CMN_EVENT_HNS_HBT(pocq_retry, 0x04), + CMN_EVENT_HNS_HBT(pocq_reqs_recvd, 0x05), + CMN_EVENT_HNS_HBT(sf_hit, 0x06), + CMN_EVENT_HNS_HBT(sf_evictions, 0x07), + CMN_EVENT_HNS(dir_snoops_sent, 0x08), + CMN_EVENT_HNS(brd_snoops_sent, 0x09), + CMN_EVENT_HNS_HBT(slc_eviction, 0x0a), + CMN_EVENT_HNS_HBT(slc_fill_invalid_way, 0x0b), + CMN_EVENT_HNS(mc_retries_local, 0x0c), + CMN_EVENT_HNS_SNH(mc_reqs_local, 0x0d), + CMN_EVENT_HNS(qos_hh_retry, 0x0e), + CMN_EVENT_HNS_OCC(qos_pocq_occupancy, 0x0f), + CMN_EVENT_HNS(pocq_addrhaz, 0x10), + CMN_EVENT_HNS(pocq_atomic_addrhaz, 0x11), + CMN_EVENT_HNS(ld_st_swp_adq_full, 0x12), + CMN_EVENT_HNS(cmp_adq_full, 0x13), + CMN_EVENT_HNS(txdat_stall, 0x14), + CMN_EVENT_HNS(txrsp_stall, 0x15), + CMN_EVENT_HNS(seq_full, 0x16), + CMN_EVENT_HNS(seq_hit, 0x17), + CMN_EVENT_HNS(snp_sent, 0x18), + CMN_EVENT_HNS(sfbi_dir_snp_sent, 0x19), + CMN_EVENT_HNS(sfbi_brd_snp_sent, 0x1a), + CMN_EVENT_HNS(intv_dirty, 0x1c), + CMN_EVENT_HNS(stash_snp_sent, 0x1d), + CMN_EVENT_HNS(stash_data_pull, 0x1e), + CMN_EVENT_HNS(snp_fwded, 0x1f), + CMN_EVENT_HNS(atomic_fwd, 0x20), + CMN_EVENT_HNS(mpam_hardlim, 0x21), + CMN_EVENT_HNS(mpam_softlim, 0x22), + CMN_EVENT_HNS(snp_sent_cluster, 0x23), + CMN_EVENT_HNS(sf_imprecise_evict, 0x24), + CMN_EVENT_HNS(sf_evict_shared_line, 0x25), + CMN_EVENT_HNS_CLS(pocq_class_occup, 0x26), + CMN_EVENT_HNS_CLS(pocq_class_retry, 0x27), + CMN_EVENT_HNS_CLS(class_mc_reqs_local, 0x28), + CMN_EVENT_HNS_CLS(class_cgnt_cmin, 0x29), + CMN_EVENT_HNS_SNT(sn_throttle, 0x2a), + CMN_EVENT_HNS_SNT(sn_throttle_min, 0x2b), + CMN_EVENT_HNS(sf_precise_to_imprecise, 0x2c), + CMN_EVENT_HNS(snp_intv_cln, 0x2d), + CMN_EVENT_HNS(nc_excl, 0x2e), + CMN_EVENT_HNS(excl_mon_ovfl, 0x2f), + CMN_EVENT_HNS(snp_req_recvd, 0x30), + CMN_EVENT_HNS(snp_req_byp_pocq, 0x31), + CMN_EVENT_HNS(dir_ccgha_snp_sent, 0x32), + CMN_EVENT_HNS(brd_ccgha_snp_sent, 0x33), + CMN_EVENT_HNS(ccgha_snp_stall, 0x34), + CMN_EVENT_HNS(lbt_req_hardlim, 0x35), + CMN_EVENT_HNS(hbt_req_hardlim, 0x36), + CMN_EVENT_HNS(sf_reupdate, 0x37), + CMN_EVENT_HNS(excl_sf_imprecise, 0x38), + CMN_EVENT_HNS(snp_pocq_addrhaz, 0x39), + CMN_EVENT_HNS(mc_retries_remote, 0x3a), + CMN_EVENT_HNS_SNH(mc_reqs_remote, 0x3b), + CMN_EVENT_HNS_CLS(class_mc_reqs_remote, 0x3c), + NULL }; @@ -1392,6 +1479,10 @@ static int arm_cmn_set_event_sel_hi(struct arm_cmn_node *dn, dn->occupid[fsel].val = occupid; reg = FIELD_PREP(CMN__PMU_CBUSY_SNTHROTTLE_SEL, dn->occupid[SEL_CBUSY_SNTHROTTLE_SEL].val) | + FIELD_PREP(CMN__PMU_SN_HOME_SEL, + dn->occupid[SEL_SN_HOME_SEL].val) | + FIELD_PREP(CMN__PMU_HBT_LBT_SEL, + dn->occupid[SEL_HBT_LBT_SEL].val) | FIELD_PREP(CMN__PMU_CLASS_OCCUP_ID, dn->occupid[SEL_CLASS_OCCUP_ID].val) | FIELD_PREP(CMN__PMU_OCCUP1_ID, @@ -2219,6 +2310,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) case CMN_TYPE_CCRA: case CMN_TYPE_CCHA: case CMN_TYPE_CCLA: + case CMN_TYPE_HNS: dn++; break; /* Nothing to see here */ @@ -2226,6 +2318,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) case CMN_TYPE_MPAM_NS: case CMN_TYPE_RNSAM: case CMN_TYPE_CXLA: + case CMN_TYPE_HNS_MPAM_S: + case CMN_TYPE_HNS_MPAM_NS: break; /* * Split "optimised" combination nodes into separate From b9d6012497400aa65ef0f2cc537ce7720615dea7 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 14 Jul 2023 11:40:20 -0600 Subject: [PATCH 23/62] arm64: Explicitly include correct DT includes Remove unused 'of*.h' header inclusions from the arm64 arch code to allow for the eventual untangling of 'of_device.h and 'of_platform.h', which currently include each other. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230714174021.4039807-1-robh@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuidle.c | 2 -- arch/arm64/kernel/pci.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index d1f68599c29f..f372295207fb 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -9,8 +9,6 @@ #include #include #include -#include -#include #include #ifdef CONFIG_ACPI_PROCESSOR_IDLE diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 2276689b5411..f872c57e9909 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -11,8 +11,6 @@ #include #include #include -#include -#include #include #include #include From 4e0bacd65e72f7f9b1b60e48e85e6d4187474644 Mon Sep 17 00:00:00 2001 From: Zhang Jianhua Date: Fri, 4 Aug 2023 15:56:15 +0800 Subject: [PATCH 24/62] arm64: fix build warning for ARM64_MEMSTART_SHIFT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with W=1, the following warning occurs. arch/arm64/include/asm/kernel-pgtable.h:129:41: error: "PUD_SHIFT" is not defined, evaluates to 0 [-Werror=undef] 129 | #define ARM64_MEMSTART_SHIFT PUD_SHIFT | ^~~~~~~~~ arch/arm64/include/asm/kernel-pgtable.h:142:5: note: in expansion of macro ‘ARM64_MEMSTART_SHIFT’ 142 | #if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS | ^~~~~~~~~~~~~~~~~~~~ The generic PUD_SHIFT was defined in include/asm-generic/pgtable-nopud.h, however the #ifndef __ASSEMBLY__ guard in this header file makes it unavailable for assembly files. While someone .S file include the , the build warning would occur. Now move the macro ARM64_MEMSTART_SHIFT and ARM64_MEMSTART_ALIGN to arch/arm64/mm/init.c where it is used only, to avoid this issue. Signed-off-by: Zhang Jianhua Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20230804075615.3334756-1-chris.zjh@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/kernel-pgtable.h | 27 ------------------------- arch/arm64/mm/init.c | 27 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 577773870b66..85d26143faa5 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -118,31 +118,4 @@ #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) #endif -/* - * To make optimal use of block mappings when laying out the linear - * mapping, round down the base of physical memory to a size that can - * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE - * (64k granule), or a multiple that can be mapped using contiguous bits - * in the page tables: 32 * PMD_SIZE (16k granule) - */ -#if defined(CONFIG_ARM64_4K_PAGES) -#define ARM64_MEMSTART_SHIFT PUD_SHIFT -#elif defined(CONFIG_ARM64_16K_PAGES) -#define ARM64_MEMSTART_SHIFT CONT_PMD_SHIFT -#else -#define ARM64_MEMSTART_SHIFT PMD_SHIFT -#endif - -/* - * sparsemem vmemmap imposes an additional requirement on the alignment of - * memstart_addr, due to the fact that the base of the vmemmap region - * has a direct correspondence, and needs to appear sufficiently aligned - * in the virtual address space. - */ -#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS -#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS) -#else -#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT) -#endif - #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d31c3a9290c5..4fcb88a445ef 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -73,6 +73,33 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; #define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +/* + * To make optimal use of block mappings when laying out the linear + * mapping, round down the base of physical memory to a size that can + * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE + * (64k granule), or a multiple that can be mapped using contiguous bits + * in the page tables: 32 * PMD_SIZE (16k granule) + */ +#if defined(CONFIG_ARM64_4K_PAGES) +#define ARM64_MEMSTART_SHIFT PUD_SHIFT +#elif defined(CONFIG_ARM64_16K_PAGES) +#define ARM64_MEMSTART_SHIFT CONT_PMD_SHIFT +#else +#define ARM64_MEMSTART_SHIFT PMD_SHIFT +#endif + +/* + * sparsemem vmemmap imposes an additional requirement on the alignment of + * memstart_addr, due to the fact that the base of the vmemmap region + * has a direct correspondence, and needs to appear sufficiently aligned + * in the virtual address space. + */ +#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS +#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS) +#else +#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT) +#endif + static int __init reserve_crashkernel_low(unsigned long long low_size) { unsigned long long low_base; From 90d68677226ac7cf344648919df2016686b3e2ab Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 2 Aug 2023 14:38:53 +0530 Subject: [PATCH 25/62] perf: pmuv3: Remove comments from armv8pmu_[enable|disable]_event() The comments in armv8pmu_[enable|disable]_event() are blindingly obvious, and does not contribute in making things any better. Let's drop them off. Functional change is not intended. Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Mark Rutland Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20230802090853.1190391-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index d2dffb4e9d07..e5a2ac4155f6 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -721,38 +721,15 @@ static void armv8pmu_enable_event(struct perf_event *event) * Enable counter and interrupt, and set the counter to count * the event that we're interested in. */ - - /* - * Disable counter - */ armv8pmu_disable_event_counter(event); - - /* - * Set event. - */ armv8pmu_write_event_type(event); - - /* - * Enable interrupt for this counter - */ armv8pmu_enable_event_irq(event); - - /* - * Enable counter - */ armv8pmu_enable_event_counter(event); } static void armv8pmu_disable_event(struct perf_event *event) { - /* - * Disable counter - */ armv8pmu_disable_event_counter(event); - - /* - * Disable interrupt for this counter - */ armv8pmu_disable_event_irq(event); } From 7f86d128e437990fd08d9e66ae7c1571666cff8a Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 4 Aug 2023 15:37:45 +0100 Subject: [PATCH 26/62] arm64: add HWCAP for FEAT_HBC (hinted conditional branches) Add a HWCAP for FEAT_HBC, so that userspace can make a decision on using this feature. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20230804143746.3900803-2-joey.gouly@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 3 ++- arch/arm64/kernel/cpuinfo.c | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 692b1ec663b2..521267478d18 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -138,6 +138,7 @@ #define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16) #define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16) #define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS) +#define KERNEL_HWCAP_HBC __khwcap2_feature(HBC) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index a2cac4305b1e..53026f45a509 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -103,5 +103,6 @@ #define HWCAP2_SME_B16B16 (1UL << 41) #define HWCAP2_SME_F16F16 (1UL << 42) #define HWCAP2_MOPS (1UL << 43) +#define HWCAP2_HBC (1UL << 44) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index bcb006390e55..a5f533f63b60 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -222,7 +222,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), @@ -2840,6 +2840,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES), HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), HWCAP_CAP(ID_AA64ISAR2_EL1, MOPS, IMP, CAP_HWCAP, KERNEL_HWCAP_MOPS), + HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 58622dc85917..98fda8500535 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -126,6 +126,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_B16B16] = "smeb16b16", [KERNEL_HWCAP_SME_F16F16] = "smef16f16", [KERNEL_HWCAP_MOPS] = "mops", + [KERNEL_HWCAP_HBC] = "hbc", }; #ifdef CONFIG_COMPAT From d70175b1470c4bdc8643fd744d722a00c7f0fb17 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 4 Aug 2023 15:37:46 +0100 Subject: [PATCH 27/62] selftests/arm64: add HWCAP2_HBC test Add a test for the newly added HWCAP2_HBC. Signed-off-by: Joey Gouly Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20230804143746.3900803-3-joey.gouly@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index d4ad813fed10..fabeac9a1b5e 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -208,6 +208,13 @@ static void svebf16_sigill(void) asm volatile(".inst 0x658aa000" : : : "z0"); } +static void hbc_sigill(void) +{ + /* BC.EQ +4 */ + asm volatile("cmp xzr, xzr\n" + ".inst 0x54000030" : : : "cc"); +} + static const struct hwcap_data { const char *name; unsigned long at_hwcap; @@ -386,6 +393,14 @@ static const struct hwcap_data { .hwcap_bit = HWCAP2_SVE_EBF16, .cpuinfo = "sveebf16", }, + { + .name = "HBC", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_HBC, + .cpuinfo = "hbc", + .sigill_fn = hbc_sigill, + .sigill_reliable = true, + }, }; static bool seen_sigill; From d1890517ac751a427b962caddda91ecc2bbf21d0 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 3 Aug 2023 21:39:05 +0800 Subject: [PATCH 28/62] kselftest/arm64: add RCpc load-acquire to hwcap test Add the RCpc and various features check in the set of hwcap tests. Signed-off-by: Zeng Heng Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230803133905.971697-1-zengheng4@huawei.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 26 +++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index d4ad813fed10..6a0adf916028 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -39,6 +39,18 @@ static void cssc_sigill(void) asm volatile(".inst 0xdac01c00" : : : "x0"); } +static void ilrcpc_sigill(void) +{ + /* LDAPUR W0, [SP, #8] */ + asm volatile(".inst 0x994083e0" : : : ); +} + +static void lrcpc_sigill(void) +{ + /* LDAPR W0, [SP, #0] */ + asm volatile(".inst 0xb8bfc3e0" : : : ); +} + static void mops_sigill(void) { char dst[1], src[1]; @@ -223,6 +235,20 @@ static const struct hwcap_data { .cpuinfo = "cssc", .sigill_fn = cssc_sigill, }, + { + .name = "LRCPC", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_LRCPC, + .cpuinfo = "lrcpc", + .sigill_fn = lrcpc_sigill, + }, + { + .name = "LRCPC2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_ILRCPC, + .cpuinfo = "ilrcpc", + .sigill_fn = ilrcpc_sigill, + }, { .name = "MOPS", .at_hwcap = AT_HWCAP2, From 5cd474e57368f0957c343bb21e309cf82826b1ef Mon Sep 17 00:00:00 2001 From: D Scott Phillips Date: Mon, 26 Jun 2023 17:29:39 -0700 Subject: [PATCH 29/62] arm64: sdei: abort running SDEI handlers during crash Interrupts are blocked in SDEI context, per the SDEI spec: "The client interrupts cannot preempt the event handler." If we crashed in the SDEI handler-running context (as with ACPI's AGDI) then we need to clean up the SDEI state before proceeding to the crash kernel so that the crash kernel can have working interrupts. Track the active SDEI handler per-cpu so that we can COMPLETE_AND_RESUME the handler, discarding the interrupted context. Fixes: f5df26961853 ("arm64: kernel: Add arch-specific SDEI entry code and CPU masking") Signed-off-by: D Scott Phillips Cc: stable@vger.kernel.org Reviewed-by: James Morse Tested-by: Mihai Carabas Link: https://lore.kernel.org/r/20230627002939.2758-1-scott@os.amperecomputing.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sdei.h | 6 ++++++ arch/arm64/kernel/entry.S | 27 +++++++++++++++++++++++++-- arch/arm64/kernel/sdei.c | 3 +++ arch/arm64/kernel/smp.c | 8 ++++---- drivers/firmware/arm_sdei.c | 19 +++++++++++++++++++ include/linux/arm_sdei.h | 2 ++ 6 files changed, 59 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h index 4292d9bafb9d..484cb6972e99 100644 --- a/arch/arm64/include/asm/sdei.h +++ b/arch/arm64/include/asm/sdei.h @@ -17,6 +17,9 @@ #include +DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event); +DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event); + extern unsigned long sdei_exit_mode; /* Software Delegated Exception entry point from firmware*/ @@ -29,6 +32,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num, unsigned long pc, unsigned long pstate); +/* Abort a running handler. Context is discarded. */ +void __sdei_handler_abort(void); + /* * The above entry point does the minimum to call C code. This function does * anything else, before calling the driver. diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a40e5e50fa55..6ad61de03d0a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -986,9 +986,13 @@ SYM_CODE_START(__sdei_asm_handler) mov x19, x1 -#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK) + /* Store the registered-event for crash_smp_send_stop() */ ldrb w4, [x19, #SDEI_EVENT_PRIORITY] -#endif + cbnz w4, 1f + adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6 + b 2f +1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 +2: str x19, [x5] #ifdef CONFIG_VMAP_STACK /* @@ -1055,6 +1059,14 @@ SYM_CODE_START(__sdei_asm_handler) ldr_l x2, sdei_exit_mode + /* Clear the registered-event seen by crash_smp_send_stop() */ + ldrb w3, [x4, #SDEI_EVENT_PRIORITY] + cbnz w3, 1f + adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6 + b 2f +1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 +2: str xzr, [x5] + alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 sdei_handler_exit exit_mode=x2 alternative_else_nop_endif @@ -1065,4 +1077,15 @@ alternative_else_nop_endif #endif SYM_CODE_END(__sdei_asm_handler) NOKPROBE(__sdei_asm_handler) + +SYM_CODE_START(__sdei_handler_abort) + mov_q x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME + adr x1, 1f + ldr_l x2, sdei_exit_mode + sdei_handler_exit exit_mode=x2 + // exit the handler and jump to the next instruction. + // Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx. +1: ret +SYM_CODE_END(__sdei_handler_abort) +NOKPROBE(__sdei_handler_abort) #endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 830be01af32d..255d12f881c2 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -47,6 +47,9 @@ DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr); DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr); #endif +DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event); +DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event); + static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu) { unsigned long *p; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index edd63894d61e..960b98b43506 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1044,10 +1044,8 @@ void crash_smp_send_stop(void) * If this cpu is the only one alive at this point in time, online or * not, there are no stop messages to be sent around, so just back out. */ - if (num_other_online_cpus() == 0) { - sdei_mask_local_cpu(); - return; - } + if (num_other_online_cpus() == 0) + goto skip_ipi; cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); @@ -1066,7 +1064,9 @@ void crash_smp_send_stop(void) pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", cpumask_pr_args(&mask)); +skip_ipi: sdei_mask_local_cpu(); + sdei_handler_abort(); } bool smp_crash_stop_failed(void) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index f9040bd61081..285fe7ad490d 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1095,3 +1095,22 @@ int sdei_event_handler(struct pt_regs *regs, return err; } NOKPROBE_SYMBOL(sdei_event_handler); + +void sdei_handler_abort(void) +{ + /* + * If the crash happened in an SDEI event handler then we need to + * finish the handler with the firmware so that we can have working + * interrupts in the crash kernel. + */ + if (__this_cpu_read(sdei_active_critical_event)) { + pr_warn("still in SDEI critical event context, attempting to finish handler.\n"); + __sdei_handler_abort(); + __this_cpu_write(sdei_active_critical_event, NULL); + } + if (__this_cpu_read(sdei_active_normal_event)) { + pr_warn("still in SDEI normal event context, attempting to finish handler.\n"); + __sdei_handler_abort(); + __this_cpu_write(sdei_active_normal_event, NULL); + } +} diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 14dc461b0e82..255701e1251b 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -47,10 +47,12 @@ int sdei_unregister_ghes(struct ghes *ghes); int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); void __init sdei_init(void); +void sdei_handler_abort(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } static inline void sdei_init(void) { } +static inline void sdei_handler_abort(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */ From d6da04b6fbabf4b464bfe29e34ff10c62024d1e4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 28 Jul 2023 00:26:12 +0100 Subject: [PATCH 30/62] kselftest/arm64: Exit streaming mode after collecting signal context When we collect a signal context with one of the SME modes enabled we will have enabled that mode behind the compiler and libc's back so they may issue some instructions not valid in streaming mode, causing spurious failures. For the code prior to issuing the BRK to trigger signal handling we need to stay in streaming mode if we were already there since that's a part of the signal context the caller is trying to collect. Unfortunately this code includes a memset() which is likely to be heavily optimised and is likely to use FP instructions incompatible with streaming mode. We can avoid this happening by open coding the memset(), inserting a volatile assembly statement to avoid the compiler recognising what's being done and doing something in optimisation. This code is not performance critical so the inefficiency should not be an issue. After collecting the context we can simply exit streaming mode, avoiding these issues. Use a full SMSTOP for safety to prevent any issues appearing with ZA. Reported-by: Will Deacon Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-1-0c1290db5d46@kernel.org Signed-off-by: Will Deacon --- .../arm64/signal/test_signals_utils.h | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 222093f51b67..c7f5627171dd 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -60,13 +60,25 @@ static __always_inline bool get_current_context(struct tdescr *td, size_t dest_sz) { static volatile bool seen_already; + int i; + char *uc = (char *)dest_uc; assert(td && dest_uc); /* it's a genuine invocation..reinit */ seen_already = 0; td->live_uc_valid = 0; td->live_sz = dest_sz; - memset(dest_uc, 0x00, td->live_sz); + + /* + * This is a memset() but we don't want the compiler to + * optimise it into either instructions or a library call + * which might be incompatible with streaming mode. + */ + for (i = 0; i < td->live_sz; i++) { + uc[i] = 0; + __asm__ ("" : "=r" (uc[i]) : "0" (uc[i])); + } + td->live_uc = dest_uc; /* * Grab ucontext_t triggering a SIGTRAP. @@ -103,6 +115,17 @@ static __always_inline bool get_current_context(struct tdescr *td, : : "memory"); + /* + * If we were grabbing a streaming mode context then we may + * have entered streaming mode behind the system's back and + * libc or compiler generated code might decide to do + * something invalid in streaming mode, or potentially even + * the state of ZA. Issue a SMSTOP to exit both now we have + * grabbed the state. + */ + if (td->feats_supported & FEAT_SME) + asm volatile("msr S0_3_C4_C6_3, xzr"); + /* * If we get here with seen_already==1 it implies the td->live_uc * context has been used to get back here....this probably means From e5d51a6650211baf38daa894d06c47748978e141 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 28 Jul 2023 00:26:13 +0100 Subject: [PATCH 31/62] tools compiler.h: Add OPTIMIZER_HIDE_VAR() Port over the definition of OPTIMIZER_HIDE_VAR() so we can use it in kselftests. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-2-0c1290db5d46@kernel.org Signed-off-by: Will Deacon --- tools/include/linux/compiler.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 9d36c8ce1fe7..f75cced41d59 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -190,4 +190,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s #define ___PASTE(a, b) a##b #define __PASTE(a, b) ___PASTE(a, b) +#ifndef OPTIMIZER_HIDE_VAR +/* Make the optimizer believe the variable can be manipulated arbitrarily. */ +#define OPTIMIZER_HIDE_VAR(var) \ + __asm__ ("" : "=r" (var) : "0" (var)) +#endif + #endif /* _TOOLS_LINUX_COMPILER_H */ From 51e6ac1fa45160d400321fa06d7808b30d9d453c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 28 Jul 2023 00:26:14 +0100 Subject: [PATCH 32/62] tools include: Add some common function attributes We don't have definitions of __always_unused or __noreturn in the tools version of compiler.h, add them so we can use them in kselftests. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-3-0c1290db5d46@kernel.org Signed-off-by: Will Deacon --- tools/include/linux/compiler.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index f75cced41d59..1684216e826a 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -42,6 +42,18 @@ # define __always_inline inline __attribute__((always_inline)) #endif +#ifndef __always_unused +#define __always_unused __attribute__((__unused__)) +#endif + +#ifndef __noreturn +#define __noreturn __attribute__((__noreturn__)) +#endif + +#ifndef unreachable +#define unreachable() __builtin_unreachable() +#endif + #ifndef noinline #define noinline #endif From 35d7bc983a74db7067d874be2708a7d3bc269f13 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 28 Jul 2023 00:26:15 +0100 Subject: [PATCH 33/62] kselftest/arm64: Make the tools/include headers available Make the generic tools/include headers available to the arm64 selftests so we can reduce some duplication. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-4-0c1290db5d46@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index ace8b67fb22d..28b93cab8c0d 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -19,6 +19,8 @@ CFLAGS += -I$(top_srcdir)/tools/testing/selftests/ CFLAGS += $(KHDR_INCLUDES) +CFLAGS += -I$(top_srcdir)/tools/include + export CFLAGS export top_srcdir From db7a89f706d6080121b61099b1d69a752ce0c61e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 28 Jul 2023 00:26:16 +0100 Subject: [PATCH 34/62] kselftest/arm64: Use shared OPTIMZER_HIDE_VAR() definiton We had open coded the definition of OPTIMIZER_HIDE_VAR() as a fix but now that we have the generic tools/include available and that has had a definition of OPTIMIZER_HIDE_VAR() we can switch to the define. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-5-0c1290db5d46@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/signal/test_signals_utils.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index c7f5627171dd..762c8fe9c54a 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -8,6 +8,8 @@ #include #include +#include + #include "test_signals.h" int test_init(struct tdescr *td); @@ -76,7 +78,7 @@ static __always_inline bool get_current_context(struct tdescr *td, */ for (i = 0; i < td->live_sz; i++) { uc[i] = 0; - __asm__ ("" : "=r" (uc[i]) : "0" (uc[i])); + OPTIMIZER_HIDE_VAR(uc[0]); } td->live_uc = dest_uc; From 672dbf97f6123ffe61ee46bd8bfbbe1cae6de815 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 28 Jul 2023 00:26:17 +0100 Subject: [PATCH 35/62] kselftest/arm64: Use the tools/include compiler.h rather than our own The BTI test program started life as standalone programs outside the kselftest suite so provided it's own compiler.h. Now that we have updated the tools/include compiler.h to have all the definitions that we are using and the arm64 selftsets pull in tools/includes let's drop our custom version. __unreachable() is named unreachable() there requiring an update in the code. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-6-0c1290db5d46@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/bti/compiler.h | 21 -------------------- tools/testing/selftests/arm64/bti/system.c | 4 +--- tools/testing/selftests/arm64/bti/system.h | 4 ++-- tools/testing/selftests/arm64/bti/test.c | 1 - 4 files changed, 3 insertions(+), 27 deletions(-) delete mode 100644 tools/testing/selftests/arm64/bti/compiler.h diff --git a/tools/testing/selftests/arm64/bti/compiler.h b/tools/testing/selftests/arm64/bti/compiler.h deleted file mode 100644 index ebb6204f447a..000000000000 --- a/tools/testing/selftests/arm64/bti/compiler.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2019 Arm Limited - * Original author: Dave Martin - */ - -#ifndef COMPILER_H -#define COMPILER_H - -#define __always_unused __attribute__((__unused__)) -#define __noreturn __attribute__((__noreturn__)) -#define __unreachable() __builtin_unreachable() - -/* curse(e) has value e, but the compiler cannot assume so */ -#define curse(e) ({ \ - __typeof__(e) __curse_e = (e); \ - asm ("" : "+r" (__curse_e)); \ - __curse_e; \ -}) - -#endif /* ! COMPILER_H */ diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c index 6385d8d4973b..93d772b00bfe 100644 --- a/tools/testing/selftests/arm64/bti/system.c +++ b/tools/testing/selftests/arm64/bti/system.c @@ -8,12 +8,10 @@ #include -#include "compiler.h" - void __noreturn exit(int n) { syscall(__NR_exit, n); - __unreachable(); + unreachable(); } ssize_t write(int fd, const void *buf, size_t size) diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h index aca118589705..2e9ee1284a0c 100644 --- a/tools/testing/selftests/arm64/bti/system.h +++ b/tools/testing/selftests/arm64/bti/system.h @@ -14,12 +14,12 @@ typedef __kernel_size_t size_t; typedef __kernel_ssize_t ssize_t; #include +#include + #include #include #include -#include "compiler.h" - long syscall(int nr, ...); void __noreturn exit(int n); diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c index 2cd8dcee5aec..28a8e8a28a84 100644 --- a/tools/testing/selftests/arm64/bti/test.c +++ b/tools/testing/selftests/arm64/bti/test.c @@ -17,7 +17,6 @@ typedef struct ucontext ucontext_t; #include "btitest.h" -#include "compiler.h" #include "signal.h" #define EXPECTED_TESTS 18 From 01948b09edc3fecf8486c57c2d2fb8b80886f3d0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 31 Jul 2023 14:58:48 +0100 Subject: [PATCH 36/62] arm64/fpsimd: Only provide the length to cpufeature for xCR registers For both SVE and SME we abuse the generic register field comparison support in the cpufeature code as part of our detection of unsupported variations in the vector lengths available to PEs, reporting the maximum vector lengths via ZCR_EL1.LEN and SMCR_EL1.LEN. Since these are configuration registers rather than identification registers the assumptions the cpufeature code makes about how unknown bitfields behave are invalid, leading to warnings when SME features like FA64 are enabled and we hotplug a CPU: CPU features: SANITY CHECK: Unexpected variation in SYS_SMCR_EL1. Boot CPU: 0x0000000000000f, CPU3: 0x0000008000000f CPU features: Unsupported CPU feature variation detected. SVE has no controls other than the vector length so is not yet impacted but the same issue will apply there if any are defined. Since the only field we are interested in having the cpufeature code handle is the length field and we use a custom read function to obtain the value we can avoid these warnings by filtering out all other bits when we return the register value, if we're doing that we don't need to bother reading the register at all and can simply use the RDVL/RDSVL value we were filling in instead. Fixes: 2e0f2478ea37 ("arm64/sve: Probe SVE capabilities and usable vector lengths") FixeS: b42990d3bf77 ("arm64/sme: Identify supported SME vector lengths at boot") Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20230731-arm64-sme-fa64-hotplug-v2-1-7714c00dd902@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 89d54a5242d1..8e9a3451af4c 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1178,9 +1178,6 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) */ u64 read_zcr_features(void) { - u64 zcr; - unsigned int vq_max; - /* * Set the maximum possible VL, and write zeroes to all other * bits to see if they stick. @@ -1188,12 +1185,8 @@ u64 read_zcr_features(void) sve_kernel_enable(NULL); write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1); - zcr = read_sysreg_s(SYS_ZCR_EL1); - zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */ - vq_max = sve_vq_from_vl(sve_get_vl()); - zcr |= vq_max - 1; /* set LEN field to maximum effective value */ - - return zcr; + /* Return LEN value that would be written to get the maximum VL */ + return sve_vq_from_vl(sve_get_vl()) - 1; } void __init sve_setup(void) @@ -1348,9 +1341,6 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) */ u64 read_smcr_features(void) { - u64 smcr; - unsigned int vq_max; - sme_kernel_enable(NULL); /* @@ -1359,12 +1349,8 @@ u64 read_smcr_features(void) write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK, SYS_SMCR_EL1); - smcr = read_sysreg_s(SYS_SMCR_EL1); - smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */ - vq_max = sve_vq_from_vl(sme_get_vl()); - smcr |= vq_max - 1; /* set LEN field to maximum effective value */ - - return smcr; + /* Return LEN value that would be written to get the maximum VL */ + return sve_vq_from_vl(sme_get_vl()) - 1; } void __init sme_setup(void) From f130ac0ae4412dbe4cbe32d0449a6b694b459ce6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 8 Aug 2023 11:11:48 +0100 Subject: [PATCH 37/62] arm64: syscall: unmask DAIF earlier for SVCs For a number of historical reasons, when handling SVCs we don't unmask DAIF in el0_svc() or el0_svc_compat(), and instead do so later in el0_svc_common(). This is unfortunate and makes it harder to make changes to the DAIF management in entry-common.c as we'd like to do as cleanup and preparation for FEAT_NMI support. We can move the DAIF unmasking to entry-common.c as long as we also hoist the fp_user_discard() logic, as reasoned below. We converted the syscall trace logic from assembly to C in commit: f37099b6992a0b81 ("arm64: convert syscall trace logic to C") ... which was intended to have no functional change, and mirrored the existing assembly logic to avoid the risk of any functional regression. With the logic in C, it's clear that there is currently no reason to unmask DAIF so late within el0_svc_common(): * The thread flags are read prior to unmasking DAIF, but are not consumed until after DAIF is unmasked, and we don't perform a read-modify-write sequence of the thread flags for which we might need to serialize against an IPI modifying the flags. Similarly, for any thread flags set by other threads, whether DAIF is masked or not has no impact. The read_thread_flags() helpers performs a single-copy-atomic read of the flags, and so this can safely be moved after unmasking DAIF. * The pt_regs::orig_x0 and pt_regs::syscallno fields are neither consumed nor modified by the handler for any DAIF exception (e.g. these do not exist in the `perf_event_arm_regs` enum and are not sampled by perf in its IRQ handler). Thus, the manipulation of pt_regs::orig_x0 and pt_regs::syscallno can safely be moved after unmasking DAIF. Given the above, we can safely hoist unmasking of DAIF out of el0_svc_common(), and into its immediate callers: do_el0_svc() and do_el0_svc_compat(). Further: * In do_el0_svc(), we sample the syscall number from pt_regs::regs[8]. This is not modified by the handler for any DAIF exception, and thus can safely be moved after unmasking DAIF. As fp_user_discard() operates on the live FP/SVE/SME register state, this needs to occur before we clear DAIF.IF, as interrupts could result in preemption which would cause this state to become foreign. As fp_user_discard() is the first function called within do_el0_svc(), it has no dependency on other parts of do_el0_svc() and can be moved earlier so long as it is called prior to unmasking DAIF.IF. * In do_el0_svc_compat(), we sample the syscall number from pt_regs::regs[7]. This is not modified by the handler for any DAIF exception, and thus can safely be moved after unmasking DAIF. Compat threads cannot use SVE or SME, so there's no need for el0_svc_compat() to call fp_user_discard(). Given the above, we can safely hoist the unmasking of DAIF out of do_el0_svc() and do_el0_svc_compat(), and into their immediate callers: el0_svc() and el0_svc_compat(), so long a we also hoist fp_user_discard() into el0_svc(). Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230808101148.1064172-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 32 +++++++++++++++++++++++++++++++ arch/arm64/kernel/syscall.c | 33 -------------------------------- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 6b2e0c367702..0fc94207e69a 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -355,6 +355,35 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) } #endif /* CONFIG_ARM64_ERRATUM_1463225 */ +/* + * As per the ABI exit SME streaming mode and clear the SVE state not + * shared with FPSIMD on syscall entry. + */ +static inline void fp_user_discard(void) +{ + /* + * If SME is active then exit streaming mode. If ZA is active + * then flush the SVE registers but leave userspace access to + * both SVE and SME enabled, otherwise disable SME for the + * task and fall through to disabling SVE too. This means + * that after a syscall we never have any streaming mode + * register state to track, if this changes the KVM code will + * need updating. + */ + if (system_supports_sme()) + sme_smstop_sm(); + + if (!system_supports_sve()) + return; + + if (test_thread_flag(TIF_SVE)) { + unsigned int sve_vq_minus_one; + + sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1; + sve_flush_live(true, sve_vq_minus_one); + } +} + UNHANDLED(el1t, 64, sync) UNHANDLED(el1t, 64, irq) UNHANDLED(el1t, 64, fiq) @@ -644,6 +673,8 @@ static void noinstr el0_svc(struct pt_regs *regs) { enter_from_user_mode(regs); cortex_a76_erratum_1463225_svc_handler(); + fp_user_discard(); + local_daif_restore(DAIF_PROCCTX); do_el0_svc(regs); exit_to_user_mode(regs); } @@ -783,6 +814,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) { enter_from_user_mode(regs); cortex_a76_erratum_1463225_svc_handler(); + local_daif_restore(DAIF_PROCCTX); do_el0_svc_compat(regs); exit_to_user_mode(regs); } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index b1ae2f2eaf77..9a70d9746b66 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -101,8 +100,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * (Similarly for HVC and SMC elsewhere.) */ - local_daif_restore(DAIF_PROCCTX); - if (flags & _TIF_MTE_ASYNC_FAULT) { /* * Process the asynchronous tag check fault before the actual @@ -153,38 +150,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, syscall_trace_exit(regs); } -/* - * As per the ABI exit SME streaming mode and clear the SVE state not - * shared with FPSIMD on syscall entry. - */ -static inline void fp_user_discard(void) -{ - /* - * If SME is active then exit streaming mode. If ZA is active - * then flush the SVE registers but leave userspace access to - * both SVE and SME enabled, otherwise disable SME for the - * task and fall through to disabling SVE too. This means - * that after a syscall we never have any streaming mode - * register state to track, if this changes the KVM code will - * need updating. - */ - if (system_supports_sme()) - sme_smstop_sm(); - - if (!system_supports_sve()) - return; - - if (test_thread_flag(TIF_SVE)) { - unsigned int sve_vq_minus_one; - - sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1; - sve_flush_live(true, sve_vq_minus_one); - } -} - void do_el0_svc(struct pt_regs *regs) { - fp_user_discard(); el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); } From eb27c76ac9e1cf5e278703db8f2fea1ed5d3f78a Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 8 Aug 2023 21:40:32 +0800 Subject: [PATCH 38/62] kselftest/arm64: add float-point feature to hwcap test Add the FP feature check in the set of hwcap tests. Signed-off-by: Zeng Heng Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230808134036.668954-2-zengheng4@huawei.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 6a0adf916028..ec247587f2bf 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -39,6 +39,11 @@ static void cssc_sigill(void) asm volatile(".inst 0xdac01c00" : : : "x0"); } +static void fp_sigill(void) +{ + asm volatile("fmov s0, #1"); +} + static void ilrcpc_sigill(void) { /* LDAPUR W0, [SP, #8] */ @@ -235,6 +240,13 @@ static const struct hwcap_data { .cpuinfo = "cssc", .sigill_fn = cssc_sigill, }, + { + .name = "FP", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_FP, + .cpuinfo = "fp", + .sigill_fn = fp_sigill, + }, { .name = "LRCPC", .at_hwcap = AT_HWCAP, From 09d2e95a04ad6fd258082ea3ba9a5bf52bbd5229 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 8 Aug 2023 21:40:33 +0800 Subject: [PATCH 39/62] kselftest/arm64: add crc32 feature to hwcap test Add the CRC32 feature check in the set of hwcap tests. Signed-off-by: Zeng Heng Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230808134036.668954-3-zengheng4@huawei.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index ec247587f2bf..70850bdb0e35 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -33,6 +33,11 @@ */ typedef void (*sigill_fn)(void); +static void crc32_sigill(void) +{ + asm volatile("crc32w w0, w0, w1"); +} + static void cssc_sigill(void) { /* CNT x0, x0 */ @@ -233,6 +238,13 @@ static const struct hwcap_data { sigill_fn sigill_fn; bool sigill_reliable; } hwcaps[] = { + { + .name = "CRC32", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_CRC32, + .cpuinfo = "crc32", + .sigill_fn = crc32_sigill, + }, { .name = "CSSC", .at_hwcap = AT_HWCAP2, From 71b634aba36201fa24e214db1be7ce50cf6e1863 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 8 Aug 2023 21:40:34 +0800 Subject: [PATCH 40/62] kselftest/arm64: add DEF_SIGHANDLER_FUNC() and DEF_INST_RAISE_SIG() helpers Add macro definition functions DEF_SIGHANDLER_FUNC() and DEF_INST_RAISE_SIG() helpers. Furthermore, there is no need to modify the default SIGILL handling function throughout the entire testing lifecycle in the main() function. It is reasonable to narrow the scope to the context of the sig_fn function only. This is a pre-patch for the subsequent SIGBUS handler patch. Signed-off-by: Zeng Heng Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230808134036.668954-4-zengheng4@huawei.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 116 ++++++++++++++-------- 1 file changed, 74 insertions(+), 42 deletions(-) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 70850bdb0e35..141e50c16f21 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -438,18 +438,21 @@ static const struct hwcap_data { }, }; -static bool seen_sigill; +typedef void (*sighandler_fn)(int, siginfo_t *, void *); -static void handle_sigill(int sig, siginfo_t *info, void *context) -{ - ucontext_t *uc = context; - - seen_sigill = true; - - /* Skip over the offending instruction */ - uc->uc_mcontext.pc += 4; +#define DEF_SIGHANDLER_FUNC(SIG, NUM) \ +static bool seen_##SIG; \ +static void handle_##SIG(int sig, siginfo_t *info, void *context) \ +{ \ + ucontext_t *uc = context; \ + \ + seen_##SIG = true; \ + /* Skip over the offending instruction */ \ + uc->uc_mcontext.pc += 4; \ } +DEF_SIGHANDLER_FUNC(sigill, SIGILL); + bool cpuinfo_present(const char *name) { FILE *f; @@ -492,25 +495,77 @@ bool cpuinfo_present(const char *name) return false; } -int main(void) +static int install_sigaction(int signum, sighandler_fn handler) { - const struct hwcap_data *hwcap; - int i, ret; - bool have_cpuinfo, have_hwcap; + int ret; struct sigaction sa; - ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP); - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handle_sigill; + sa.sa_sigaction = handler; sa.sa_flags = SA_RESTART | SA_SIGINFO; sigemptyset(&sa.sa_mask); - ret = sigaction(SIGILL, &sa, NULL); + ret = sigaction(signum, &sa, NULL); if (ret < 0) ksft_exit_fail_msg("Failed to install SIGILL handler: %s (%d)\n", strerror(errno), errno); + return ret; +} + +static void uninstall_sigaction(int signum) +{ + if (sigaction(signum, NULL, NULL) < 0) + ksft_exit_fail_msg("Failed to uninstall SIGILL handler: %s (%d)\n", + strerror(errno), errno); +} + +#define DEF_INST_RAISE_SIG(SIG, NUM) \ +static bool inst_raise_##SIG(const struct hwcap_data *hwcap, \ + bool have_hwcap) \ +{ \ + if (!hwcap->SIG##_fn) { \ + ksft_test_result_skip(#SIG"_%s\n", hwcap->name); \ + /* assume that it would raise exception in default */ \ + return true; \ + } \ + \ + install_sigaction(NUM, handle_##SIG); \ + \ + seen_##SIG = false; \ + hwcap->SIG##_fn(); \ + \ + if (have_hwcap) { \ + /* Should be able to use the extension */ \ + ksft_test_result(!seen_##SIG, \ + #SIG"_%s\n", hwcap->name); \ + } else if (hwcap->SIG##_reliable) { \ + /* Guaranteed a SIGNAL */ \ + ksft_test_result(seen_##SIG, \ + #SIG"_%s\n", hwcap->name); \ + } else { \ + /* Missing SIGNAL might be fine */ \ + ksft_print_msg(#SIG"_%sreported for %s\n", \ + seen_##SIG ? "" : "not ", \ + hwcap->name); \ + ksft_test_result_skip(#SIG"_%s\n", \ + hwcap->name); \ + } \ + \ + uninstall_sigaction(NUM); \ + return seen_##SIG; \ +} + +DEF_INST_RAISE_SIG(sigill, SIGILL); + +int main(void) +{ + int i; + const struct hwcap_data *hwcap; + bool have_cpuinfo, have_hwcap; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP); + for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { hwcap = &hwcaps[i]; @@ -523,30 +578,7 @@ int main(void) ksft_test_result(have_hwcap == have_cpuinfo, "cpuinfo_match_%s\n", hwcap->name); - if (hwcap->sigill_fn) { - seen_sigill = false; - hwcap->sigill_fn(); - - if (have_hwcap) { - /* Should be able to use the extension */ - ksft_test_result(!seen_sigill, "sigill_%s\n", - hwcap->name); - } else if (hwcap->sigill_reliable) { - /* Guaranteed a SIGILL */ - ksft_test_result(seen_sigill, "sigill_%s\n", - hwcap->name); - } else { - /* Missing SIGILL might be fine */ - ksft_print_msg("SIGILL %sreported for %s\n", - seen_sigill ? "" : "not ", - hwcap->name); - ksft_test_result_skip("sigill_%s\n", - hwcap->name); - } - } else { - ksft_test_result_skip("sigill_%s\n", - hwcap->name); - } + inst_raise_sigill(hwcap, have_hwcap); } ksft_print_cnts(); From fd49cf088867f8fad7515b5424d301ca5d147070 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 8 Aug 2023 21:40:35 +0800 Subject: [PATCH 41/62] kselftest/arm64: add test item that support to capturing the SIGBUS signal Some enhanced features, such as the LSE2 feature, do not result in SILLILL if LSE2 is missing and LSE is present, but will generate a SIGBUS exception when atomic access unaligned. Therefore, we add test item to test this type of features. Notice that testing for SIGBUS only makes sense after make sure that the instruction does not cause a SIGILL signal. Signed-off-by: Zeng Heng Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230808134036.668954-5-zengheng4@huawei.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 33 ++++++++++++++++------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 141e50c16f21..69234a8e8c59 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -19,19 +19,20 @@ #include "../../kselftest.h" -#define TESTS_PER_HWCAP 2 +#define TESTS_PER_HWCAP 3 /* - * Function expected to generate SIGILL when the feature is not - * supported and return when it is supported. If SIGILL is generated - * then the handler must be able to skip over the instruction safely. + * Function expected to generate exception when the feature is not + * supported and return when it is supported. If the specific exception + * is generated then the handler must be able to skip over the + * instruction safely. * * Note that it is expected that for many architecture extensions * there are no specific traps due to no architecture state being * added so we may not fault if running on a kernel which doesn't know * to add the hwcap. */ -typedef void (*sigill_fn)(void); +typedef void (*sig_fn)(void); static void crc32_sigill(void) { @@ -235,8 +236,10 @@ static const struct hwcap_data { unsigned long at_hwcap; unsigned long hwcap_bit; const char *cpuinfo; - sigill_fn sigill_fn; + sig_fn sigill_fn; bool sigill_reliable; + sig_fn sigbus_fn; + bool sigbus_reliable; } hwcaps[] = { { .name = "CRC32", @@ -452,6 +455,7 @@ static void handle_##SIG(int sig, siginfo_t *info, void *context) \ } DEF_SIGHANDLER_FUNC(sigill, SIGILL); +DEF_SIGHANDLER_FUNC(sigbus, SIGBUS); bool cpuinfo_present(const char *name) { @@ -506,7 +510,7 @@ static int install_sigaction(int signum, sighandler_fn handler) sigemptyset(&sa.sa_mask); ret = sigaction(signum, &sa, NULL); if (ret < 0) - ksft_exit_fail_msg("Failed to install SIGILL handler: %s (%d)\n", + ksft_exit_fail_msg("Failed to install SIGNAL handler: %s (%d)\n", strerror(errno), errno); return ret; @@ -515,7 +519,7 @@ static int install_sigaction(int signum, sighandler_fn handler) static void uninstall_sigaction(int signum) { if (sigaction(signum, NULL, NULL) < 0) - ksft_exit_fail_msg("Failed to uninstall SIGILL handler: %s (%d)\n", + ksft_exit_fail_msg("Failed to uninstall SIGNAL handler: %s (%d)\n", strerror(errno), errno); } @@ -556,12 +560,13 @@ static bool inst_raise_##SIG(const struct hwcap_data *hwcap, \ } DEF_INST_RAISE_SIG(sigill, SIGILL); +DEF_INST_RAISE_SIG(sigbus, SIGBUS); int main(void) { int i; const struct hwcap_data *hwcap; - bool have_cpuinfo, have_hwcap; + bool have_cpuinfo, have_hwcap, raise_sigill; ksft_print_header(); ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP); @@ -578,7 +583,15 @@ int main(void) ksft_test_result(have_hwcap == have_cpuinfo, "cpuinfo_match_%s\n", hwcap->name); - inst_raise_sigill(hwcap, have_hwcap); + /* + * Testing for SIGBUS only makes sense after make sure + * that the instruction does not cause a SIGILL signal. + */ + raise_sigill = inst_raise_sigill(hwcap, have_hwcap); + if (!raise_sigill) + inst_raise_sigbus(hwcap, have_hwcap); + else + ksft_test_result_skip("sigbus_%s\n", hwcap->name); } ksft_print_cnts(); From 82e7882b141c177b1b09ae66e1b2ae791a9a3918 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 8 Aug 2023 21:40:36 +0800 Subject: [PATCH 42/62] kselftest/arm64: add lse and lse2 features to hwcap test Add the LSE and various features check in the set of hwcap tests. As stated in the ARM manual, the LSE2 feature allows for atomic access to unaligned memory. Therefore, for processors that only have the LSE feature, we register .sigbus_fn to test their ability to perform unaligned access. Signed-off-by: Zeng Heng Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230808134036.668954-6-zengheng4@huawei.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 30 +++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 69234a8e8c59..112b6a219382 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -34,6 +34,12 @@ */ typedef void (*sig_fn)(void); +static void atomics_sigill(void) +{ + /* STADD W0, [SP] */ + asm volatile(".inst 0xb82003ff" : : : ); +} + static void crc32_sigill(void) { asm volatile("crc32w w0, w0, w1"); @@ -231,6 +237,14 @@ static void svebf16_sigill(void) asm volatile(".inst 0x658aa000" : : : "z0"); } +static void uscat_sigbus(void) +{ + /* unaligned atomic access */ + asm volatile("ADD x1, sp, #2" : : : ); + /* STADD W0, [X1] */ + asm volatile(".inst 0xb820003f" : : : ); +} + static const struct hwcap_data { const char *name; unsigned long at_hwcap; @@ -276,6 +290,22 @@ static const struct hwcap_data { .cpuinfo = "ilrcpc", .sigill_fn = ilrcpc_sigill, }, + { + .name = "LSE", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_ATOMICS, + .cpuinfo = "atomics", + .sigill_fn = atomics_sigill, + }, + { + .name = "LSE2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_USCAT, + .cpuinfo = "uscat", + .sigill_fn = atomics_sigill, + .sigbus_fn = uscat_sigbus, + .sigbus_reliable = true, + }, { .name = "MOPS", .at_hwcap = AT_HWCAP2, From 358b763ee64b11f45287c286e1f6145729ed49ab Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 10 Aug 2023 12:39:19 +0100 Subject: [PATCH 43/62] kselftest/arm64: Size sycall-abi buffers for the actual maximum VL Our ABI opts to provide future proofing by defining a much larger SVE_VQ_MAX than the architecture actually supports. Since we use this define to control the size of our vector data buffers this results in a lot of overhead when we initialise which can be a very noticable problem in emulation, we fill buffers that are orders of magnitude larger than we will ever actually use even with virtual platforms that provide the full range of architecturally supported vector lengths. Define and use the actual architecture maximum to mitigate this. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230810-arm64-syscall-abi-perf-v1-1-6a0d7656359c@kernel.org Signed-off-by: Will Deacon --- .../testing/selftests/arm64/abi/syscall-abi.c | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index 18cc123e2347..d704511a0955 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -20,12 +20,20 @@ #include "syscall-abi.h" +/* + * The kernel defines a much larger SVE_VQ_MAX than is expressable in + * the architecture, this creates a *lot* of overhead filling the + * buffers (especially ZA) on emulated platforms so use the actual + * architectural maximum instead. + */ +#define ARCH_SVE_VQ_MAX 16 + static int default_sme_vl; static int sve_vl_count; -static unsigned int sve_vls[SVE_VQ_MAX]; +static unsigned int sve_vls[ARCH_SVE_VQ_MAX]; static int sme_vl_count; -static unsigned int sme_vls[SVE_VQ_MAX]; +static unsigned int sme_vls[ARCH_SVE_VQ_MAX]; extern void do_syscall(int sve_vl, int sme_vl); @@ -130,9 +138,9 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, #define SVE_Z_SHARED_BYTES (128 / 8) -static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)]; -uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; -uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; +static uint8_t z_zero[__SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)]; +uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)]; +uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)]; static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) @@ -190,8 +198,8 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, return errors; } -uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; -uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; +uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)]; +uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)]; static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) @@ -222,8 +230,8 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl, return errors; } -uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)]; -uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)]; +uint8_t ffr_in[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)]; +uint8_t ffr_out[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)]; static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) @@ -300,8 +308,8 @@ static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, return errors; } -uint8_t za_in[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)]; -uint8_t za_out[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)]; +uint8_t za_in[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)]; +uint8_t za_out[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)]; static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) @@ -470,9 +478,9 @@ void sve_count_vls(void) return; /* - * Enumerate up to SVE_VQ_MAX vector lengths + * Enumerate up to ARCH_SVE_VQ_MAX vector lengths */ - for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) { + for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) { vl = prctl(PR_SVE_SET_VL, vq * 16); if (vl == -1) ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", @@ -496,9 +504,9 @@ void sme_count_vls(void) return; /* - * Enumerate up to SVE_VQ_MAX vector lengths + * Enumerate up to ARCH_SVE_VQ_MAX vector lengths */ - for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) { + for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) { vl = prctl(PR_SME_SET_VL, vq * 16); if (vl == -1) ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", From 0242737dc4eb9f6e9a5ea594b3f93efa0b12f28d Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 14 Aug 2023 20:40:12 +0800 Subject: [PATCH 44/62] perf/smmuv3: Enable HiSilicon Erratum 162001900 quirk for HIP08/09 Some HiSilicon SMMU PMCG suffers the erratum 162001900 that the PMU disable control sometimes fail to disable the counters. This will lead to error or inaccurate data since before we enable the counters the counter's still counting for the event used in last perf session. This patch tries to fix this by hardening the global disable process. Before disable the PMU, writing an invalid event type (0xffff) to focibly stop the counters. Correspondingly restore each events on pmu::pmu_enable(). Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20230814124012.58013-1-yangyicong@huawei.com Signed-off-by: Will Deacon --- Documentation/arch/arm64/silicon-errata.rst | 3 ++ drivers/acpi/arm64/iort.c | 5 ++- drivers/perf/arm_smmuv3_pmu.c | 46 ++++++++++++++++++++- include/linux/acpi_iort.h | 1 + 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 496cdca5cb99..d54626cfcbda 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -195,6 +195,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| Hisilicon | Hip08 SMMU PMCG | #162001900 | N/A | +| | Hip09 SMMU PMCG | | | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 3631230a61c8..2c1640fd2b16 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1711,7 +1711,10 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res, static struct acpi_platform_list pmcg_plat_info[] __initdata = { /* HiSilicon Hip08 Platform */ {"HISI ", "HIP08 ", 0, ACPI_SIG_IORT, greater_than_or_equal, - "Erratum #162001800", IORT_SMMU_V3_PMCG_HISI_HIP08}, + "Erratum #162001800, Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP08}, + /* HiSilicon Hip09 Platform */ + {"HISI ", "HIP09 ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, { } }; diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 25a269d431e4..0e17c57ddb87 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -115,6 +115,7 @@ #define SMMU_PMCG_PA_SHIFT 12 #define SMMU_PMCG_EVCNTR_RDONLY BIT(0) +#define SMMU_PMCG_HARDEN_DISABLE BIT(1) static int cpuhp_state_num; @@ -159,6 +160,20 @@ static inline void smmu_pmu_enable(struct pmu *pmu) writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR); } +static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu, + struct perf_event *event, int idx); + +static inline void smmu_pmu_enable_quirk_hip08_09(struct pmu *pmu) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); + unsigned int idx; + + for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters) + smmu_pmu_apply_event_filter(smmu_pmu, smmu_pmu->events[idx], idx); + + smmu_pmu_enable(pmu); +} + static inline void smmu_pmu_disable(struct pmu *pmu) { struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); @@ -167,6 +182,22 @@ static inline void smmu_pmu_disable(struct pmu *pmu) writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL); } +static inline void smmu_pmu_disable_quirk_hip08_09(struct pmu *pmu) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); + unsigned int idx; + + /* + * The global disable of PMU sometimes fail to stop the counting. + * Harden this by writing an invalid event type to each used counter + * to forcibly stop counting. + */ + for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters) + writel(0xffff, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx)); + + smmu_pmu_disable(pmu); +} + static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu, u32 idx, u64 value) { @@ -765,7 +796,10 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu) switch (model) { case IORT_SMMU_V3_PMCG_HISI_HIP08: /* HiSilicon Erratum 162001800 */ - smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY; + smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY | SMMU_PMCG_HARDEN_DISABLE; + break; + case IORT_SMMU_V3_PMCG_HISI_HIP09: + smmu_pmu->options |= SMMU_PMCG_HARDEN_DISABLE; break; } @@ -890,6 +924,16 @@ static int smmu_pmu_probe(struct platform_device *pdev) if (!dev->of_node) smmu_pmu_get_acpi_options(smmu_pmu); + /* + * For platforms suffer this quirk, the PMU disable sometimes fails to + * stop the counters. This will leads to inaccurate or error counting. + * Forcibly disable the counters with these quirk handler. + */ + if (smmu_pmu->options & SMMU_PMCG_HARDEN_DISABLE) { + smmu_pmu->pmu.pmu_enable = smmu_pmu_enable_quirk_hip08_09; + smmu_pmu->pmu.pmu_disable = smmu_pmu_disable_quirk_hip08_09; + } + /* Pick one CPU to be the preferred one to use */ smmu_pmu->on_cpu = raw_smp_processor_id(); WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu))); diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index ee7cb6aaff71..1cb65592c95d 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -21,6 +21,7 @@ */ #define IORT_SMMU_V3_PMCG_GENERIC 0x00000000 /* Generic SMMUv3 PMCG */ #define IORT_SMMU_V3_PMCG_HISI_HIP08 0x00000001 /* HiSilicon HIP08 PMCG */ +#define IORT_SMMU_V3_PMCG_HISI_HIP09 0x00000002 /* HiSilicon HIP09 PMCG */ int iort_register_domain_token(int trans_id, phys_addr_t base, struct fwnode_handle *fw_node); From 1b0e3ea9301a422003d385cda8f8dee6c878ad05 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 14 Aug 2023 21:16:42 +0800 Subject: [PATCH 45/62] perf/smmuv3: Add MODULE_ALIAS for module auto loading On my ACPI based arm64 server, if the SMMUv3 PMU is configured as module it won't be loaded automatically after booting even if the device has already been scanned and added. It's because the module lacks a platform alias, the uevent mechanism and userspace tools like udevd make use of this to find the target driver module of the device. This patch adds the missing platform alias of the module, then module will be loaded automatically if device exists. Before this patch: [root@localhost tmp]# modinfo arm_smmuv3_pmu | grep alias alias: of:N*T*Carm,smmu-v3-pmcgC* alias: of:N*T*Carm,smmu-v3-pmcg After this patch: [root@localhost tmp]# modinfo arm_smmuv3_pmu | grep alias alias: platform:arm-smmu-v3-pmcg alias: of:N*T*Carm,smmu-v3-pmcgC* alias: of:N*T*Carm,smmu-v3-pmcg Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20230814131642.65263-1-yangyicong@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_smmuv3_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 0e17c57ddb87..6303b82566f9 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -1028,6 +1028,7 @@ static void __exit arm_smmu_pmu_exit(void) module_exit(arm_smmu_pmu_exit); +MODULE_ALIAS("platform:arm-smmu-v3-pmcg"); MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension"); MODULE_AUTHOR("Neil Leeder "); MODULE_AUTHOR("Shameer Kolothum "); From 4c1d2f56d685406fc6b452ca5f797bda62a06609 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 12 Aug 2023 19:55:49 -0400 Subject: [PATCH 46/62] perf/arm-dmc620: Fix dmc620_pmu_irqs_lock/cpu_hotplug_lock circular lock dependency The following circular locking dependency was reported when running cpus online/offline test on an arm64 system. [ 84.195923] Chain exists of: dmc620_pmu_irqs_lock --> cpu_hotplug_lock --> cpuhp_state-down [ 84.207305] Possible unsafe locking scenario: [ 84.213212] CPU0 CPU1 [ 84.217729] ---- ---- [ 84.222247] lock(cpuhp_state-down); [ 84.225899] lock(cpu_hotplug_lock); [ 84.232068] lock(cpuhp_state-down); [ 84.238237] lock(dmc620_pmu_irqs_lock); [ 84.242236] *** DEADLOCK *** The following locking order happens when dmc620_pmu_get_irq() calls cpuhp_state_add_instance_nocalls(). lock(dmc620_pmu_irqs_lock) --> lock(cpu_hotplug_lock) On the other hand, the calling sequence cpuhp_thread_fun() => cpuhp_invoke_callback() => dmc620_pmu_cpu_teardown() leads to the locking sequence lock(cpuhp_state-down) => lock(dmc620_pmu_irqs_lock) Here dmc620_pmu_irqs_lock protects both the dmc620_pmu_irqs and the pmus_node lists in various dmc620_pmu instances. dmc620_pmu_get_irq() requires protected access to dmc620_pmu_irqs whereas dmc620_pmu_cpu_teardown() needs protection to the pmus_node lists. Break this circular locking dependency by using two separate locks to protect dmc620_pmu_irqs list and the pmus_node lists respectively. Suggested-by: Robin Murphy Signed-off-by: Waiman Long Link: https://lore.kernel.org/r/20230812235549.494174-1-longman@redhat.com Signed-off-by: Will Deacon --- drivers/perf/arm_dmc620_pmu.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 9d0f01c4455a..30cea6859574 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -66,8 +66,13 @@ #define DMC620_PMU_COUNTERn_OFFSET(n) \ (DMC620_PMU_COUNTERS_BASE + 0x28 * (n)) -static LIST_HEAD(dmc620_pmu_irqs); +/* + * dmc620_pmu_irqs_lock: protects dmc620_pmu_irqs list + * dmc620_pmu_node_lock: protects pmus_node lists in all dmc620_pmu instances + */ static DEFINE_MUTEX(dmc620_pmu_irqs_lock); +static DEFINE_MUTEX(dmc620_pmu_node_lock); +static LIST_HEAD(dmc620_pmu_irqs); struct dmc620_pmu_irq { struct hlist_node node; @@ -475,9 +480,9 @@ static int dmc620_pmu_get_irq(struct dmc620_pmu *dmc620_pmu, int irq_num) return PTR_ERR(irq); dmc620_pmu->irq = irq; - mutex_lock(&dmc620_pmu_irqs_lock); + mutex_lock(&dmc620_pmu_node_lock); list_add_rcu(&dmc620_pmu->pmus_node, &irq->pmus_node); - mutex_unlock(&dmc620_pmu_irqs_lock); + mutex_unlock(&dmc620_pmu_node_lock); return 0; } @@ -486,9 +491,11 @@ static void dmc620_pmu_put_irq(struct dmc620_pmu *dmc620_pmu) { struct dmc620_pmu_irq *irq = dmc620_pmu->irq; - mutex_lock(&dmc620_pmu_irqs_lock); + mutex_lock(&dmc620_pmu_node_lock); list_del_rcu(&dmc620_pmu->pmus_node); + mutex_unlock(&dmc620_pmu_node_lock); + mutex_lock(&dmc620_pmu_irqs_lock); if (!refcount_dec_and_test(&irq->refcount)) { mutex_unlock(&dmc620_pmu_irqs_lock); return; @@ -638,10 +645,10 @@ static int dmc620_pmu_cpu_teardown(unsigned int cpu, return 0; /* We're only reading, but this isn't the place to be involving RCU */ - mutex_lock(&dmc620_pmu_irqs_lock); + mutex_lock(&dmc620_pmu_node_lock); list_for_each_entry(dmc620_pmu, &irq->pmus_node, pmus_node) perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target); - mutex_unlock(&dmc620_pmu_irqs_lock); + mutex_unlock(&dmc620_pmu_node_lock); WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target))); irq->cpu = target; From 46862da15e37efedb7d2d21e167f506c0b533772 Mon Sep 17 00:00:00 2001 From: Ding Xiang Date: Tue, 15 Aug 2023 15:49:15 +0800 Subject: [PATCH 47/62] kselftest/arm64: fix a memleak in zt_regs_run() If memcmp() does not return 0, "zeros" need to be freed to prevent memleak Signed-off-by: Ding Xiang Link: https://lore.kernel.org/r/20230815074915.245528-1-dingxiang@cmss.chinamobile.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/signal/testcases/zt_regs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c index e1eb4d5c027a..2e384d731618 100644 --- a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c @@ -65,6 +65,7 @@ int zt_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc) if (memcmp(zeros, (char *)zt + ZT_SIG_REGS_OFFSET, ZT_SIG_REGS_SIZE(zt->nregs)) != 0) { fprintf(stderr, "ZT data invalid\n"); + free(zeros); return 1; } From 83a6d80c2bfd1d348e5e7079af21a924fdc5c972 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 15 Aug 2023 21:10:10 +0800 Subject: [PATCH 48/62] drivers/perf: hisi: Schedule perf session according to locality The PCIe PMUs locate on different NUMA node but currently we don't consider it and likely stack all the sessions on the same CPU: [root@localhost tmp]# cat /sys/devices/hisi_pcie*/cpumask 0 0 0 0 0 0 This can be optimize a bit to use a local CPU for the PMU. Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20230815131010.2147-1-yangyicong@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index e10fc7cb9493..5a00adb2de8c 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -665,8 +665,8 @@ static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node); if (pcie_pmu->on_cpu == -1) { - pcie_pmu->on_cpu = cpu; - WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu))); + pcie_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(&pcie_pmu->pdev->dev)); + WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(pcie_pmu->on_cpu))); } return 0; @@ -676,14 +676,23 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node); unsigned int target; + cpumask_t mask; + int numa_node; /* Nothing to do if this CPU doesn't own the PMU */ if (pcie_pmu->on_cpu != cpu) return 0; pcie_pmu->on_cpu = -1; - /* Choose a new CPU from all online cpus. */ - target = cpumask_any_but(cpu_online_mask, cpu); + + /* Choose a local CPU from all online cpus. */ + numa_node = dev_to_node(&pcie_pmu->pdev->dev); + if (cpumask_and(&mask, cpumask_of_node(numa_node), cpu_online_mask) && + cpumask_andnot(&mask, &mask, cpumask_of(cpu))) + target = cpumask_any(&mask); + else + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) { pci_err(pcie_pmu->pdev, "There is no CPU to set\n"); return 0; From e89ecd8368860bf05437eabd07d292c316221cfc Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 11 Aug 2023 09:54:37 +0800 Subject: [PATCH 49/62] perf/imx_ddr: speed up overflow frequency of cycle For i.MX8MP, we cannot ensure that cycle counter overflow occurs at least 4 times as often as other events. Due to byte counters will count for any event configured, it will overflow more often. And if byte counters overflow that related counters would stop since they share the COUNTER_CNTL. We can speed up cycle counter overflow frequency by setting counter parameter (CP) field of cycle counter. In this way, we can avoid stop counting byte counters when interrupt didn't come and the byte counters can be fetched or updated from each cycle counter overflow interrupt. Because we initialize CP filed to shorten counter0 overflow time, the cycle counter will start couting from a fixed/base value each time. We need to remove the base from the result too. Therefore, we could get precise result from cycle counter. Signed-off-by: Xu Yang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20230811015438.1999307-1-xu.yang_2@nxp.com Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 1cb3861ab0e0..7d4e49565738 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -27,6 +27,8 @@ #define CNTL_CLEAR_MASK 0xFFFFFFFD #define CNTL_OVER_MASK 0xFFFFFFFE +#define CNTL_CP_SHIFT 16 +#define CNTL_CP_MASK (0xFF << CNTL_CP_SHIFT) #define CNTL_CSV_SHIFT 24 #define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT) @@ -34,6 +36,8 @@ #define EVENT_CYCLES_COUNTER 0 #define NUM_COUNTERS 4 +/* For removing bias if cycle counter CNTL.CP is set to 0xf0 */ +#define CYCLES_COUNTER_MASK 0x0FFFFFFF #define AXI_MASKING_REVERT 0xffff0000 /* AXI_MASKING(MSB 16bits) + AXI_ID(LSB 16bits) */ #define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) @@ -426,6 +430,17 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config, writel(0, pmu->base + reg); val = CNTL_EN | CNTL_CLEAR; val |= FIELD_PREP(CNTL_CSV_MASK, config); + + /* + * On i.MX8MP we need to bias the cycle counter to overflow more often. + * We do this by initializing bits [23:16] of the counter value via the + * COUNTER_CTRL Counter Parameter (CP) field. + */ + if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) { + if (counter == EVENT_CYCLES_COUNTER) + val |= FIELD_PREP(CNTL_CP_MASK, 0xf0); + } + writel(val, pmu->base + reg); } else { /* Disable counter */ @@ -465,6 +480,12 @@ static void ddr_perf_event_update(struct perf_event *event) int ret; new_raw_count = ddr_perf_read_counter(pmu, counter); + /* Remove the bias applied in ddr_perf_counter_enable(). */ + if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) { + if (counter == EVENT_CYCLES_COUNTER) + new_raw_count &= CYCLES_COUNTER_MASK; + } + local64_add(new_raw_count, &event->count); /* From f4e2bd91ddf5e8543cbe7ad80b3fba3d2dc63fa3 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 11 Aug 2023 09:54:38 +0800 Subject: [PATCH 50/62] perf/imx_ddr: don't enable counter0 if none of 4 counters are used In current driver, counter0 will be enabled after ddr_perf_pmu_enable() is called even though none of the 4 counters are used. This will cause counter0 continue to count until ddr_perf_pmu_disabled() is called. If pmu is not disabled all the time, the pmu interrupt will be asserted from time to time due to counter0 will overflow and irq handler will clear it. It's not an expected behavior. This patch will not enable counter0 if none of 4 counters are used. Fixes: 9a66d36cc7ac ("drivers/perf: imx_ddr: Add DDR performance counter support to perf") Signed-off-by: Xu Yang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20230811015438.1999307-2-xu.yang_2@nxp.com Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 7d4e49565738..92611c98120f 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -104,6 +104,7 @@ struct ddr_pmu { const struct fsl_ddr_devtype_data *devtype_data; int irq; int id; + int active_counter; }; static ssize_t ddr_perf_identifier_show(struct device *dev, @@ -515,6 +516,10 @@ static void ddr_perf_event_start(struct perf_event *event, int flags) ddr_perf_counter_enable(pmu, event->attr.config, counter, true); + if (!pmu->active_counter++) + ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID, + EVENT_CYCLES_COUNTER, true); + hwc->state = 0; } @@ -568,6 +573,10 @@ static void ddr_perf_event_stop(struct perf_event *event, int flags) ddr_perf_counter_enable(pmu, event->attr.config, counter, false); ddr_perf_event_update(event); + if (!--pmu->active_counter) + ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID, + EVENT_CYCLES_COUNTER, false); + hwc->state |= PERF_HES_STOPPED; } @@ -585,25 +594,10 @@ static void ddr_perf_event_del(struct perf_event *event, int flags) static void ddr_perf_pmu_enable(struct pmu *pmu) { - struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); - - /* enable cycle counter if cycle is not active event list */ - if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL) - ddr_perf_counter_enable(ddr_pmu, - EVENT_CYCLES_ID, - EVENT_CYCLES_COUNTER, - true); } static void ddr_perf_pmu_disable(struct pmu *pmu) { - struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); - - if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL) - ddr_perf_counter_enable(ddr_pmu, - EVENT_CYCLES_ID, - EVENT_CYCLES_COUNTER, - false); } static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, From e08302ee46c9a5101e3902350ba5e27fcb18894b Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 15 Aug 2023 15:59:31 +0100 Subject: [PATCH 51/62] kselftest/arm64: build BTI tests in output directory The arm64 BTI selftests are currently built in the source directory, then the generated binaries are copied to the output directory. This leaves the object files around in a potentially otherwise pristine source tree, tainting it for out-of-tree kernel builds. Prepend $(OUTPUT) to every reference to an object file in the Makefile, and remove the extra handling and copying. This puts all generated files under the output directory. Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230815145931.2522557-1-andre.przywara@arm.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/bti/Makefile | 45 +++++++++---------- .../selftests/arm64/bti/gen/.gitignore | 2 - 2 files changed, 20 insertions(+), 27 deletions(-) delete mode 100644 tools/testing/selftests/arm64/bti/gen/.gitignore diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile index ccdac414ad94..05e4ee523a53 100644 --- a/tools/testing/selftests/arm64/bti/Makefile +++ b/tools/testing/selftests/arm64/bti/Makefile @@ -2,8 +2,6 @@ TEST_GEN_PROGS := btitest nobtitest -PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS)) - # These tests are built as freestanding binaries since otherwise BTI # support in ld.so is required which is not currently widespread; when # it is available it will still be useful to test this separately as the @@ -18,44 +16,41 @@ CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS) BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $< NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $< -%-bti.o: %.c +$(OUTPUT)/%-bti.o: %.c $(BTI_CC_COMMAND) -%-bti.o: %.S +$(OUTPUT)/%-bti.o: %.S $(BTI_CC_COMMAND) -%-nobti.o: %.c +$(OUTPUT)/%-nobti.o: %.c $(NOBTI_CC_COMMAND) -%-nobti.o: %.S +$(OUTPUT)/%-nobti.o: %.S $(NOBTI_CC_COMMAND) BTI_OBJS = \ - test-bti.o \ - signal-bti.o \ - start-bti.o \ - syscall-bti.o \ - system-bti.o \ - teststubs-bti.o \ - trampoline-bti.o -gen/btitest: $(BTI_OBJS) + $(OUTPUT)/test-bti.o \ + $(OUTPUT)/signal-bti.o \ + $(OUTPUT)/start-bti.o \ + $(OUTPUT)/syscall-bti.o \ + $(OUTPUT)/system-bti.o \ + $(OUTPUT)/teststubs-bti.o \ + $(OUTPUT)/trampoline-bti.o +$(OUTPUT)/btitest: $(BTI_OBJS) $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^ NOBTI_OBJS = \ - test-nobti.o \ - signal-nobti.o \ - start-nobti.o \ - syscall-nobti.o \ - system-nobti.o \ - teststubs-nobti.o \ - trampoline-nobti.o -gen/nobtitest: $(NOBTI_OBJS) + $(OUTPUT)/test-nobti.o \ + $(OUTPUT)/signal-nobti.o \ + $(OUTPUT)/start-nobti.o \ + $(OUTPUT)/syscall-nobti.o \ + $(OUTPUT)/system-nobti.o \ + $(OUTPUT)/teststubs-nobti.o \ + $(OUTPUT)/trampoline-nobti.o +$(OUTPUT)/nobtitest: $(NOBTI_OBJS) $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^ # Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list # to account for any OUTPUT target-dirs optionally provided by # the toplevel makefile include ../../lib.mk - -$(TEST_GEN_PROGS): $(PROGS) - cp $(PROGS) $(OUTPUT)/ diff --git a/tools/testing/selftests/arm64/bti/gen/.gitignore b/tools/testing/selftests/arm64/bti/gen/.gitignore deleted file mode 100644 index 73869fabada4..000000000000 --- a/tools/testing/selftests/arm64/bti/gen/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -btitest -nobtitest From 18b8f57a7f51b4c834e5e974ec38133c02e9eb58 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 15 Aug 2023 15:06:39 +0100 Subject: [PATCH 52/62] arm64: sysreg: Generate C compiler warnings on {read,write}_sysreg_s arguments Evaluate the register before the asm section so that the C compiler generates warnings when there is an issue with the register argument. This will prevent possible future issues such as the one seen here [1] where a missing bracket caused the shift and addition operators to be evaluated in the wrong order, but no warning was emitted. The GNU assembler has no warning for when expressions evaluate differently to C due to different operator precedence, but the C compiler has some warnings that may suggest something is wrong. For example in this case the following warning would have been emitted: error: operator '>>' has lower precedence than '+'; '+' will be evaluated first [-Werror,-Wshift-op-parentheses] There are currently no existing warnings that need to be fixed. [1]: https://lore.kernel.org/linux-perf-users/20230728162011.GA22050@willie-the-truck/ Signed-off-by: James Clark Link: https://lore.kernel.org/r/20230815140639.614769-1-james.clark@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b481935e9314..16464bf9a8aa 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -803,15 +803,21 @@ /* * For registers without architectural names, or simply unsupported by * GAS. + * + * __check_r forces warnings to be generated by the compiler when + * evaluating r which wouldn't normally happen due to being passed to + * the assembler via __stringify(r). */ #define read_sysreg_s(r) ({ \ u64 __val; \ + u32 __maybe_unused __check_r = (u32)(r); \ asm volatile(__mrs_s("%0", r) : "=r" (__val)); \ __val; \ }) #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ + u32 __maybe_unused __check_r = (u32)(r); \ asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ } while (0) From 7eb4ee66739ea187d3855f6dbf2ab48a512b4ae0 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 15 Aug 2023 12:09:12 +0800 Subject: [PATCH 53/62] kselftest/arm64: add SHA1 and related features to hwcap test Add the SHA1 and related features check in the set of hwcap tests. Signed-off-by: Zeng Heng Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230815040915.3966955-2-zengheng4@huawei.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 39 +++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 112b6a219382..83d3b6dcc0d8 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -87,6 +87,24 @@ static void rng_sigill(void) asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0"); } +static void sha1_sigill(void) +{ + /* SHA1H S0, S0 */ + asm volatile(".inst 0x5e280800" : : : ); +} + +static void sha2_sigill(void) +{ + /* SHA256H Q0, Q0, V0.4S */ + asm volatile(".inst 0x5e004000" : : : ); +} + +static void sha512_sigill(void) +{ + /* SHA512H Q0, Q0, V0.2D */ + asm volatile(".inst 0xce608000" : : : ); +} + static void sme_sigill(void) { /* RDSVL x0, #0 */ @@ -327,6 +345,27 @@ static const struct hwcap_data { .hwcap_bit = HWCAP2_RPRFM, .cpuinfo = "rprfm", }, + { + .name = "SHA1", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SHA1, + .cpuinfo = "sha1", + .sigill_fn = sha1_sigill, + }, + { + .name = "SHA2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SHA2, + .cpuinfo = "sha2", + .sigill_fn = sha2_sigill, + }, + { + .name = "SHA512", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SHA512, + .cpuinfo = "sha512", + .sigill_fn = sha512_sigill, + }, { .name = "SME", .at_hwcap = AT_HWCAP2, From 3fc3c0d1246cfe440d9c44f2134fde432b311ab3 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 15 Aug 2023 12:09:13 +0800 Subject: [PATCH 54/62] kselftest/arm64: add AES feature check to hwcap test Add the AES feature check in the set of hwcap tests. Signed-off-by: Zeng Heng Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230815040915.3966955-3-zengheng4@huawei.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 83d3b6dcc0d8..96c2611f1ac0 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -34,6 +34,12 @@ */ typedef void (*sig_fn)(void); +static void aes_sigill(void) +{ + /* AESE V0.16B, V0.16B */ + asm volatile(".inst 0x4e284800" : : : ); +} + static void atomics_sigill(void) { /* STADD W0, [SP] */ @@ -273,6 +279,13 @@ static const struct hwcap_data { sig_fn sigbus_fn; bool sigbus_reliable; } hwcaps[] = { + { + .name = "AES", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_AES, + .cpuinfo = "aes", + .sigill_fn = aes_sigill, + }, { .name = "CRC32", .at_hwcap = AT_HWCAP, From 2c3ce0e7dd5154ffa017cd4dab3b14c09272c4a5 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 15 Aug 2023 12:09:14 +0800 Subject: [PATCH 55/62] kselftest/arm64: add pmull feature to hwcap test Add the pmull feature check in the set of hwcap tests. Signed-off-by: Zeng Heng Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230815040915.3966955-4-zengheng4@huawei.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 96c2611f1ac0..b3492a944503 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -88,6 +88,12 @@ static void mops_sigill(void) : "cc", "memory"); } +static void pmull_sigill(void) +{ + /* PMULL V0.1Q, V0.1D, V0.1D */ + asm volatile(".inst 0x0ee0e000" : : : ); +} + static void rng_sigill(void) { asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0"); @@ -345,6 +351,13 @@ static const struct hwcap_data { .sigill_fn = mops_sigill, .sigill_reliable = true, }, + { + .name = "PMULL", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_PMULL, + .cpuinfo = "pmull", + .sigill_fn = pmull_sigill, + }, { .name = "RNG", .at_hwcap = AT_HWCAP2, From fcb0b51a5dca1f69f408dfe5fd970cf167d41538 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 15 Aug 2023 12:09:15 +0800 Subject: [PATCH 56/62] kselftest/arm64: add jscvt feature to hwcap test Add the jscvt feature check in the set of hwcap tests. Due to the requirement of jscvt feature, a compiler configuration of v8.3 or above is needed to support assembly. Therefore, hand encode is used here instead. Signed-off-by: Zeng Heng Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230815040915.3966955-5-zengheng4@huawei.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index b3492a944503..fe844b825a1e 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -68,6 +68,12 @@ static void ilrcpc_sigill(void) asm volatile(".inst 0x994083e0" : : : ); } +static void jscvt_sigill(void) +{ + /* FJCVTZS W0, D0 */ + asm volatile(".inst 0x1e7e0000" : : : ); +} + static void lrcpc_sigill(void) { /* LDAPR W0, [SP, #0] */ @@ -313,6 +319,13 @@ static const struct hwcap_data { .cpuinfo = "fp", .sigill_fn = fp_sigill, }, + { + .name = "JSCVT", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_JSCVT, + .cpuinfo = "jscvt", + .sigill_fn = jscvt_sigill, + }, { .name = "LRCPC", .at_hwcap = AT_HWCAP, From d232606773a0b09ec7f1ffc25f63abe801d011fd Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Fri, 11 Aug 2023 16:33:51 +0000 Subject: [PATCH 57/62] arm64/sysreg: refactor deprecated strncpy `strncpy` is deprecated for use on NUL-terminated destination strings [1]. Which seems to be the case here due to the forceful setting of `buf`'s tail to 0. A suitable replacement is `strscpy` [2] due to the fact that it guarantees NUL-termination on its destination buffer argument which is _not_ the case for `strncpy`! In this case, we can simplify the logic and also check for any silent truncation by using `strscpy`'s return value. This should have no functional change and yet uses a more robust and less ambiguous interface whilst reducing code complexity. Link: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings[1] Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2] Link: https://github.com/KSPP/linux/issues/90 Suggested-by: Kees Cook Cc: linux-hardening@vger.kernel.org Signed-off-by: Justin Stitt Link: https://lore.kernel.org/r/20230811-strncpy-arch-arm64-v2-1-ba84eabffadb@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/idreg-override.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 2fe2491b692c..aee12c75b738 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -262,9 +262,9 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) if (!len) return; - len = min(len, ARRAY_SIZE(buf) - 1); - strncpy(buf, cmdline, len); - buf[len] = 0; + len = strscpy(buf, cmdline, ARRAY_SIZE(buf)); + if (len == -E2BIG) + len = ARRAY_SIZE(buf) - 1; if (strcmp(buf, "--") == 0) return; From d11a69873d9a7435fe6a48531e165ab80a8b1221 Mon Sep 17 00:00:00 2001 From: Tomislav Novak Date: Mon, 5 Jun 2023 12:19:23 -0700 Subject: [PATCH 58/62] hw_breakpoint: fix single-stepping when using bpf_overflow_handler Arm platforms use is_default_overflow_handler() to determine if the hw_breakpoint code should single-step over the breakpoint trigger or let the custom handler deal with it. Since bpf_overflow_handler() currently isn't recognized as a default handler, attaching a BPF program to a PERF_TYPE_BREAKPOINT event causes it to keep firing (the instruction triggering the data abort exception is never skipped). For example: # bpftrace -e 'watchpoint:0x10000:4:w { print("hit") }' -c ./test Attaching 1 probe... hit hit [...] ^C (./test performs a single 4-byte store to 0x10000) This patch replaces the check with uses_default_overflow_handler(), which accounts for the bpf_overflow_handler() case by also testing if one of the perf_event_output functions gets invoked indirectly, via orig_default_handler. Signed-off-by: Tomislav Novak Tested-by: Samuel Gosselin # arm64 Reviewed-by: Catalin Marinas Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/linux-arm-kernel/20220923203644.2731604-1-tnovak@fb.com/ Link: https://lore.kernel.org/r/20230605191923.1219974-1-tnovak@meta.com Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 8 ++++---- arch/arm64/kernel/hw_breakpoint.c | 4 ++-- include/linux/perf_event.h | 22 +++++++++++++++++++--- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 054e9199f30d..dc0fb7a81371 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -626,7 +626,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, hw->address &= ~alignment_mask; hw->ctrl.len <<= offset; - if (is_default_overflow_handler(bp)) { + if (uses_default_overflow_handler(bp)) { /* * Mismatch breakpoints are required for single-stepping * breakpoints. @@ -798,7 +798,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, * Otherwise, insert a temporary mismatch breakpoint so that * we can single-step over the watchpoint trigger. */ - if (!is_default_overflow_handler(wp)) + if (!uses_default_overflow_handler(wp)) continue; step: enable_single_step(wp, instruction_pointer(regs)); @@ -811,7 +811,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, info->trigger = addr; pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); perf_bp_event(wp, regs); - if (is_default_overflow_handler(wp)) + if (uses_default_overflow_handler(wp)) enable_single_step(wp, instruction_pointer(regs)); } @@ -886,7 +886,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) info->trigger = addr; pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); - if (is_default_overflow_handler(bp)) + if (uses_default_overflow_handler(bp)) enable_single_step(bp, addr); goto unlock; } diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index db2a1861bb97..35225632d70a 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -654,7 +654,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr, perf_bp_event(bp, regs); /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(bp)) + if (uses_default_overflow_handler(bp)) step = 1; unlock: rcu_read_unlock(); @@ -733,7 +733,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, static int watchpoint_report(struct perf_event *wp, unsigned long addr, struct pt_regs *regs) { - int step = is_default_overflow_handler(wp); + int step = uses_default_overflow_handler(wp); struct arch_hw_breakpoint *info = counter_arch_bp(wp); info->trigger = addr; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2166a69e3bf2..e657916c9509 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1316,15 +1316,31 @@ extern int perf_event_output(struct perf_event *event, struct pt_regs *regs); static inline bool -is_default_overflow_handler(struct perf_event *event) +__is_default_overflow_handler(perf_overflow_handler_t overflow_handler) { - if (likely(event->overflow_handler == perf_event_output_forward)) + if (likely(overflow_handler == perf_event_output_forward)) return true; - if (unlikely(event->overflow_handler == perf_event_output_backward)) + if (unlikely(overflow_handler == perf_event_output_backward)) return true; return false; } +#define is_default_overflow_handler(event) \ + __is_default_overflow_handler((event)->overflow_handler) + +#ifdef CONFIG_BPF_SYSCALL +static inline bool uses_default_overflow_handler(struct perf_event *event) +{ + if (likely(is_default_overflow_handler(event))) + return true; + + return __is_default_overflow_handler(event->orig_overflow_handler); +} +#else +#define uses_default_overflow_handler(event) \ + is_default_overflow_handler(event) +#endif + extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, From 94f23ac36f8b3c2140b00d78131c8df41cd9ac5d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 16 Aug 2023 19:39:32 +0100 Subject: [PATCH 59/62] kselftest/arm64: Fix hwcaps selftest build The hwcaps selftest currently relies on the assembler being able to assemble the crc32w instruction but this is not in the base v8.0 so is not accepted by the standard GCC configurations used by many distributions. Switch to manually encoding to fix the build. Fixes: 09d2e95a04ad ("kselftest/arm64: add crc32 feature to hwcap test") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230816-arm64-fix-crc32-build-v1-1-40165c1290f2@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index fe844b825a1e..0593192c0753 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -48,7 +48,8 @@ static void atomics_sigill(void) static void crc32_sigill(void) { - asm volatile("crc32w w0, w0, w1"); + /* CRC32W W0, W0, W1 */ + asm volatile(".inst 0x1ac14800" : : : ); } static void cssc_sigill(void) From 81e5ee471609848ee1ebf3beb2a46788113fe0eb Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 17 Aug 2023 11:24:02 +0530 Subject: [PATCH 60/62] arm_pmu: acpi: Refactor arm_spe_acpi_register_device() Sanity checking all the GICC tables for same interrupt number, and ensuring a homogeneous ACPI based machine, could be used for other platform devices as well. Hence this refactors arm_spe_acpi_register_device() into a common helper arm_acpi_register_pmu_device(). Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Co-developed-by: Will Deacon Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20230817055405.249630-2-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmu_acpi.c | 104 ++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 40 deletions(-) diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 90815ad762eb..48bd62d3993e 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -69,6 +69,62 @@ static void arm_pmu_acpi_unregister_irq(int cpu) acpi_unregister_gsi(gsi); } +static int __maybe_unused +arm_acpi_register_pmu_device(struct platform_device *pdev, u8 len, + u16 (*parse_gsi)(struct acpi_madt_generic_interrupt *)) +{ + int cpu, this_hetid, hetid, irq, ret; + u16 this_gsi = 0, gsi = 0; + + /* + * Ensure that platform device must have IORESOURCE_IRQ + * resource to hold gsi interrupt. + */ + if (pdev->num_resources != 1) + return -ENXIO; + + if (pdev->resource[0].flags != IORESOURCE_IRQ) + return -ENXIO; + + /* + * Sanity check all the GICC tables for the same interrupt + * number. For now, only support homogeneous ACPI machines. + */ + for_each_possible_cpu(cpu) { + struct acpi_madt_generic_interrupt *gicc; + + gicc = acpi_cpu_get_madt_gicc(cpu); + if (gicc->header.length < len) + return gsi ? -ENXIO : 0; + + this_gsi = parse_gsi(gicc); + this_hetid = find_acpi_cpu_topology_hetero_id(cpu); + if (!gsi) { + hetid = this_hetid; + gsi = this_gsi; + } else if (hetid != this_hetid || gsi != this_gsi) { + pr_warn("ACPI: %s: must be homogeneous\n", pdev->name); + return -ENXIO; + } + } + + if (!this_gsi) + return 0; + + irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); + if (irq < 0) { + pr_warn("ACPI: %s Unable to register interrupt: %d\n", pdev->name, gsi); + return -ENXIO; + } + + pdev->resource[0].start = irq; + ret = platform_device_register(pdev); + if (ret) + acpi_unregister_gsi(gsi); + + return ret; +} + #if IS_ENABLED(CONFIG_ARM_SPE_PMU) static struct resource spe_resources[] = { { @@ -84,6 +140,11 @@ static struct platform_device spe_dev = { .num_resources = ARRAY_SIZE(spe_resources) }; +static u16 arm_spe_parse_gsi(struct acpi_madt_generic_interrupt *gicc) +{ + return gicc->spe_interrupt; +} + /* * For lack of a better place, hook the normal PMU MADT walk * and create a SPE device if we detect a recent MADT with @@ -91,47 +152,10 @@ static struct platform_device spe_dev = { */ static void arm_spe_acpi_register_device(void) { - int cpu, hetid, irq, ret; - bool first = true; - u16 gsi = 0; - - /* - * Sanity check all the GICC tables for the same interrupt number. - * For now, we only support homogeneous ACPI/SPE machines. - */ - for_each_possible_cpu(cpu) { - struct acpi_madt_generic_interrupt *gicc; - - gicc = acpi_cpu_get_madt_gicc(cpu); - if (gicc->header.length < ACPI_MADT_GICC_SPE) - return; - - if (first) { - gsi = gicc->spe_interrupt; - if (!gsi) - return; - hetid = find_acpi_cpu_topology_hetero_id(cpu); - first = false; - } else if ((gsi != gicc->spe_interrupt) || - (hetid != find_acpi_cpu_topology_hetero_id(cpu))) { - pr_warn("ACPI: SPE must be homogeneous\n"); - return; - } - } - - irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, - ACPI_ACTIVE_HIGH); - if (irq < 0) { - pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi); - return; - } - - spe_resources[0].start = irq; - ret = platform_device_register(&spe_dev); - if (ret < 0) { + int ret = arm_acpi_register_pmu_device(&spe_dev, ACPI_MADT_GICC_SPE, + arm_spe_parse_gsi); + if (ret) pr_warn("ACPI: SPE: Unable to register device\n"); - acpi_unregister_gsi(gsi); - } } #else static inline void arm_spe_acpi_register_device(void) From 1aa3d0274a4aac338ee45a3dfc3b17c944bcc2bc Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 17 Aug 2023 11:24:03 +0530 Subject: [PATCH 61/62] arm_pmu: acpi: Add a representative platform device for TRBE ACPI TRBE does not have a HID for identification which could create and add a platform device into the platform bus. Also without a platform device, it cannot be probed and bound to a platform driver. This creates a dummy platform device for TRBE after ascertaining that ACPI provides required interrupts uniformly across all cpus on the system. This device gets created inside drivers/perf/arm_pmu_acpi.c to accommodate TRBE being built as a module. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20230817055405.249630-3-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/acpi.h | 3 +++ drivers/perf/arm_pmu_acpi.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/perf/arm_pmu.h | 1 + 3 files changed, 39 insertions(+) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index bd68e1b7f29f..4d537d56eb84 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -42,6 +42,9 @@ #define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \ spe_interrupt) + sizeof(u16)) +#define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \ + trbe_interrupt) + sizeof(u16)) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 48bd62d3993e..05dda19c5359 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -163,6 +163,40 @@ static inline void arm_spe_acpi_register_device(void) } #endif /* CONFIG_ARM_SPE_PMU */ +#if IS_ENABLED(CONFIG_CORESIGHT_TRBE) +static struct resource trbe_resources[] = { + { + /* irq */ + .flags = IORESOURCE_IRQ, + } +}; + +static struct platform_device trbe_dev = { + .name = ARMV8_TRBE_PDEV_NAME, + .id = -1, + .resource = trbe_resources, + .num_resources = ARRAY_SIZE(trbe_resources) +}; + +static u16 arm_trbe_parse_gsi(struct acpi_madt_generic_interrupt *gicc) +{ + return gicc->trbe_interrupt; +} + +static void arm_trbe_acpi_register_device(void) +{ + int ret = arm_acpi_register_pmu_device(&trbe_dev, ACPI_MADT_GICC_TRBE, + arm_trbe_parse_gsi); + if (ret) + pr_warn("ACPI: TRBE: Unable to register device\n"); +} +#else +static inline void arm_trbe_acpi_register_device(void) +{ + +} +#endif /* CONFIG_CORESIGHT_TRBE */ + static int arm_pmu_acpi_parse_irqs(void) { int irq, cpu, irq_cpu, err; @@ -398,6 +432,7 @@ static int arm_pmu_acpi_init(void) return 0; arm_spe_acpi_register_device(); + arm_trbe_acpi_register_device(); return 0; } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index a0801f68762b..143fbc10ecfe 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -187,5 +187,6 @@ void armpmu_free_irq(int irq, int cpu); #endif /* CONFIG_ARM_PMU */ #define ARMV8_SPE_PDEV_NAME "arm,spe-v1" +#define ARMV8_TRBE_PDEV_NAME "arm,trbe" #endif /* __ARM_PMU_H__ */ From 21b61fe48c2fc43d98ebb67a1f3832e0478fa523 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 24 Aug 2023 10:41:35 +0800 Subject: [PATCH 62/62] drivers/perf: hisi: Update HiSilicon PMU maintainers Since Guangbin and Shaokun have left HiSilicon and will no longer maintain the drivers, update the maintainer information and thanks for their work. Signed-off-by: Jijie Shao Acked-by: Jonathan Cameron Acked-by: Yicong Yang Link: https://lore.kernel.org/r/20230824024135.1291459-1-shaojijie@huawei.com [will: left the HNS3 title as-is to avoid the churn of resorting the entries] Signed-off-by: Will Deacon --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d516295978a4..ce4d209a5bf5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9306,7 +9306,7 @@ F: drivers/crypto/hisilicon/hpre/hpre_crypto.c F: drivers/crypto/hisilicon/hpre/hpre_main.c HISILICON HNS3 PMU DRIVER -M: Guangbin Huang +M: Jijie Shao S: Supported F: Documentation/admin-guide/perf/hns3-pmu.rst F: drivers/perf/hisilicon/hns3_pmu.c @@ -9344,7 +9344,7 @@ F: Documentation/devicetree/bindings/net/hisilicon*.txt F: drivers/net/ethernet/hisilicon/ HISILICON PMU DRIVER -M: Shaokun Zhang +M: Yicong Yang M: Jonathan Cameron S: Supported W: http://www.hisilicon.com