From 3dfbd2d26b646271c4a0291dd7325d5b06a17a21 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Sun, 10 Mar 2024 11:21:26 +0000 Subject: [PATCH 01/42] riscv: Remove unused asm/signal.h file When riscv moved to common entry the definition and usage of do_work_pending was removed. This unused header file remains. Remove the header file as it is not used. I have tested compiling the kernel with this patch applied and saw no issues. Noticed when auditing how different ports handle signals related to saving FPU state. Fixes: f0bddf50586d ("riscv: entry: Convert to generic entry") Signed-off-by: Stafford Horne Reviewed-by: Guo Ren Link: https://lore.kernel.org/r/20240310112129.376134-1-shorne@gmail.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/signal.h | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 arch/riscv/include/asm/signal.h diff --git a/arch/riscv/include/asm/signal.h b/arch/riscv/include/asm/signal.h deleted file mode 100644 index 956ae0a01bad..000000000000 --- a/arch/riscv/include/asm/signal.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#ifndef __ASM_SIGNAL_H -#define __ASM_SIGNAL_H - -#include -#include - -asmlinkage __visible -void do_work_pending(struct pt_regs *regs, unsigned long thread_info_flags); - -#endif From 3b938e231b660a278de2988ee77b832d665c5326 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 23 Mar 2024 20:35:00 +0900 Subject: [PATCH 02/42] riscv: merge two if-blocks for KBUILD_IMAGE In arch/riscv/Makefile, KBUILD_IMAGE is assigned in two separate if-blocks. When CONFIG_XIP_KERNEL is disabled, the decision made by the first if-block is overwritten by the second one, which is redundant and unreadable. Merge the two if-blocks. Signed-off-by: Masahiro Yamada Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240323113500.1249272-1-masahiroy@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 252d63942f34..a74e70405d3f 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -133,7 +133,15 @@ boot := arch/riscv/boot ifeq ($(CONFIG_XIP_KERNEL),y) KBUILD_IMAGE := $(boot)/xipImage else +ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy) +KBUILD_IMAGE := $(boot)/loader.bin +else +ifeq ($(CONFIG_EFI_ZBOOT),) KBUILD_IMAGE := $(boot)/Image.gz +else +KBUILD_IMAGE := $(boot)/vmlinuz.efi +endif +endif endif libs-y += arch/riscv/lib/ @@ -153,17 +161,6 @@ endif vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg:../compat_vdso/compat_vdso.so -ifneq ($(CONFIG_XIP_KERNEL),y) -ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy) -KBUILD_IMAGE := $(boot)/loader.bin -else -ifeq ($(CONFIG_EFI_ZBOOT),) -KBUILD_IMAGE := $(boot)/Image.gz -else -KBUILD_IMAGE := $(boot)/vmlinuz.efi -endif -endif -endif BOOT_TARGETS := Image Image.gz loader loader.bin xipImage vmlinuz.efi all: $(notdir $(KBUILD_IMAGE)) From 36d37f11f555812b0ded5d15aa686a6b9da57f61 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 23 Mar 2024 18:06:15 +0900 Subject: [PATCH 03/42] export.h: remove include/asm-generic/export.h Commit 3a6dd5f614a1 ("riscv: remove unneeded #include ") removed the last use of include/asm-generic/export.h. This deprecated header can go away. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20240323090615.1244904-1-masahiroy@kernel.org Signed-off-by: Palmer Dabbelt --- include/asm-generic/export.h | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 include/asm-generic/export.h diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h deleted file mode 100644 index 570cd4da7210..000000000000 --- a/include/asm-generic/export.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef __ASM_GENERIC_EXPORT_H -#define __ASM_GENERIC_EXPORT_H - -/* - * and are deprecated. - * Please include directly. - */ -#include - -#endif From 84c3a079ab98f729e9a968a201d9aa8d196195a1 Mon Sep 17 00:00:00 2001 From: "tanzirh@google.com" Date: Tue, 12 Dec 2023 00:20:14 +0000 Subject: [PATCH 04/42] riscv: remove unused header arch/riscv/kernel/sys_riscv.c builds without using anything from asm-generic/mman-common.h. There seems to be no reason to include this file. Signed-off-by: Tanzir Hasan Fixes: 9e2e6042a7ec ("riscv: Allow PROT_WRITE-only mmap()") Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Link: https://lore.kernel.org/r/20231212-removeasmgeneric-v2-1-a0e6d7df34a7@google.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index f1c1416a9f1e..64155323cc92 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -7,7 +7,6 @@ #include #include -#include static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, From 542124fc0d5cccea273bccf2ee58545ac17aad3f Mon Sep 17 00:00:00 2001 From: Yangyu Chen Date: Wed, 10 Jan 2024 02:48:59 +0800 Subject: [PATCH 05/42] RISC-V: only flush icache when it has VM_EXEC set As I-Cache flush on current RISC-V needs to send IPIs to every CPU cores in the system is very costly, limiting flush_icache_mm to be called only when vma->vm_flags has VM_EXEC can help minimize the frequency of these operations. It improves performance and reduces disturbances when copy_from_user_page is needed such as profiling with perf. For I-D coherence concerns, it will not fail if such a page adds VM_EXEC flags in the future since we have checked it in the __set_pte_at function. Signed-off-by: Yangyu Chen Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/tencent_6D851035F6F2FD0B5A69FB391AE39AC6300A@qq.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index a129dac4521d..dd8d07146116 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -33,8 +33,11 @@ static inline void flush_dcache_page(struct page *page) * so instead we just flush the whole thing. */ #define flush_icache_range(start, end) flush_icache_all() -#define flush_icache_user_page(vma, pg, addr, len) \ - flush_icache_mm(vma->vm_mm, 0) +#define flush_icache_user_page(vma, pg, addr, len) \ +do { \ + if (vma->vm_flags & VM_EXEC) \ + flush_icache_mm(vma->vm_mm, 0); \ +} while (0) #ifdef CONFIG_64BIT #define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end) From 4bfa185fe3f0b20ebb6e7224dae771fcb657f260 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Wed, 3 Jan 2024 13:31:59 -0300 Subject: [PATCH 06/42] riscv/cmpxchg: Deduplicate xchg() asm functions In this header every xchg define (_relaxed, _acquire, _release, vanilla) contain it's own asm file, both for 4-byte variables an 8-byte variables, on a total of 8 versions of mostly the same asm. This is usually bad, as it means any change may be done in up to 8 different places. Unify those versions by creating a new define with enough parameters to generate any version of the previous 8. Then unify the result under a more general define, and simplify arch_xchg* generation. (This did not cause any change in generated asm) Signed-off-by: Leonardo Bras Reviewed-by: Guo Ren Reviewed-by: Andrea Parri Tested-by: Guo Ren Link: https://lore.kernel.org/r/20240103163203.72768-3-leobras@redhat.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cmpxchg.h | 140 ++++++------------------------- 1 file changed, 24 insertions(+), 116 deletions(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 2f4726d3cfcc..48478a8eecee 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -11,140 +11,48 @@ #include #include -#define __xchg_relaxed(ptr, new, size) \ +#define __arch_xchg(sfx, prepend, append, r, p, n) \ +({ \ + __asm__ __volatile__ ( \ + prepend \ + " amoswap" sfx " %0, %2, %1\n" \ + append \ + : "=r" (r), "+A" (*(p)) \ + : "r" (n) \ + : "memory"); \ +}) + +#define _arch_xchg(ptr, new, sfx, prepend, append) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ - __typeof__(new) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - switch (size) { \ + __typeof__(*(__ptr)) __new = (new); \ + __typeof__(*(__ptr)) __ret; \ + switch (sizeof(*__ptr)) { \ case 4: \ - __asm__ __volatile__ ( \ - " amoswap.w %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ + __arch_xchg(".w" sfx, prepend, append, \ + __ret, __ptr, __new); \ break; \ case 8: \ - __asm__ __volatile__ ( \ - " amoswap.d %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ + __arch_xchg(".d" sfx, prepend, append, \ + __ret, __ptr, __new); \ break; \ default: \ BUILD_BUG(); \ } \ - __ret; \ + (__typeof__(*(__ptr)))__ret; \ }) #define arch_xchg_relaxed(ptr, x) \ -({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ - _x_, sizeof(*(ptr))); \ -}) - -#define __xchg_acquire(ptr, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(new) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - " amoswap.w %0, %2, %1\n" \ - RISCV_ACQUIRE_BARRIER \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - " amoswap.d %0, %2, %1\n" \ - RISCV_ACQUIRE_BARRIER \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) + _arch_xchg(ptr, x, "", "", "") #define arch_xchg_acquire(ptr, x) \ -({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg_acquire((ptr), \ - _x_, sizeof(*(ptr))); \ -}) - -#define __xchg_release(ptr, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(new) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - RISCV_RELEASE_BARRIER \ - " amoswap.w %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - RISCV_RELEASE_BARRIER \ - " amoswap.d %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) + _arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER) #define arch_xchg_release(ptr, x) \ -({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg_release((ptr), \ - _x_, sizeof(*(ptr))); \ -}) - -#define __arch_xchg(ptr, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(new) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - " amoswap.w.aqrl %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - " amoswap.d.aqrl %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) + _arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "") #define arch_xchg(ptr, x) \ -({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __arch_xchg((ptr), _x_, sizeof(*(ptr))); \ -}) + _arch_xchg(ptr, x, ".aqrl", "", "") #define xchg32(ptr, x) \ ({ \ From 07a0a41cb77d582e4db05bd9e79daa145d5d6ea4 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Wed, 3 Jan 2024 13:32:00 -0300 Subject: [PATCH 07/42] riscv/cmpxchg: Deduplicate cmpxchg() asm and macros In this header every cmpxchg define (_relaxed, _acquire, _release, vanilla) contain it's own asm file, both for 4-byte variables an 8-byte variables, on a total of 8 versions of mostly the same asm. This is usually bad, as it means any change may be done in up to 8 different places. Unify those versions by creating a new define with enough parameters to generate any version of the previous 8. Then unify the result under a more general define, and simplify arch_cmpxchg* generation (This did not cause any change in generated asm) Signed-off-by: Leonardo Bras Reviewed-by: Guo Ren Reviewed-by: Andrea Parri Tested-by: Guo Ren Link: https://lore.kernel.org/r/20240103163203.72768-4-leobras@redhat.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cmpxchg.h | 199 ++++++------------------------- 1 file changed, 35 insertions(+), 164 deletions(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 48478a8eecee..e3e0ac7ba061 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -71,190 +71,61 @@ * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ -#define __cmpxchg_relaxed(ptr, old, new, size) \ + + +#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ +({ \ + register unsigned int __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr" lr_sfx " %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc" sc_sfx " %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + append \ + "1:\n" \ + : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ + : "rJ" (co o), "rJ" (n) \ + : "memory"); \ +}) + +#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - register unsigned int __rc; \ - switch (size) { \ + __typeof__(*(__ptr)) __old = (old); \ + __typeof__(*(__ptr)) __new = (new); \ + __typeof__(*(__ptr)) __ret; \ + \ + switch (sizeof(*__ptr)) { \ case 4: \ - __asm__ __volatile__ ( \ - "0: lr.w %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.w %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" ((long)__old), "rJ" (__new) \ - : "memory"); \ + __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ + __ret, __ptr, (long), __old, __new); \ break; \ case 8: \ - __asm__ __volatile__ ( \ - "0: lr.d %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.d %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ - : "memory"); \ + __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append, \ + __ret, __ptr, /**/, __old, __new); \ break; \ default: \ BUILD_BUG(); \ } \ - __ret; \ + (__typeof__(*(__ptr)))__ret; \ }) #define arch_cmpxchg_relaxed(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) _o_ = (o); \ - __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \ - _o_, _n_, sizeof(*(ptr))); \ -}) - -#define __cmpxchg_acquire(ptr, old, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - register unsigned int __rc; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - "0: lr.w %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.w %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - RISCV_ACQUIRE_BARRIER \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" ((long)__old), "rJ" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - "0: lr.d %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.d %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - RISCV_ACQUIRE_BARRIER \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) + _arch_cmpxchg((ptr), (o), (n), "", "", "") #define arch_cmpxchg_acquire(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) _o_ = (o); \ - __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \ - _o_, _n_, sizeof(*(ptr))); \ -}) - -#define __cmpxchg_release(ptr, old, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - register unsigned int __rc; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - RISCV_RELEASE_BARRIER \ - "0: lr.w %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.w %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" ((long)__old), "rJ" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - RISCV_RELEASE_BARRIER \ - "0: lr.d %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.d %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) + _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER) #define arch_cmpxchg_release(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) _o_ = (o); \ - __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg_release((ptr), \ - _o_, _n_, sizeof(*(ptr))); \ -}) - -#define __cmpxchg(ptr, old, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - register unsigned int __rc; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - "0: lr.w %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.w.rl %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - " fence rw, rw\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" ((long)__old), "rJ" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - "0: lr.d %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.d.rl %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - " fence rw, rw\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) + _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "") #define arch_cmpxchg(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) _o_ = (o); \ - __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg((ptr), \ - _o_, _n_, sizeof(*(ptr))); \ -}) + _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") #define arch_cmpxchg_local(ptr, o, n) \ - (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr)))) + arch_cmpxchg_relaxed((ptr), (o), (n)) #define arch_cmpxchg64(ptr, o, n) \ ({ \ From 9061237392721f0e9b399161876fa36ddb6c4226 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Wed, 3 Jan 2024 13:32:01 -0300 Subject: [PATCH 08/42] riscv/atomic.h : Deduplicate arch_atomic.* Some functions use mostly the same asm for 32-bit and 64-bit versions. Make a macro that is generic enough and avoid code duplication. (This did not cause any change in generated asm) Signed-off-by: Leonardo Bras Reviewed-by: Guo Ren Reviewed-by: Andrea Parri Tested-by: Guo Ren Link: https://lore.kernel.org/r/20240103163203.72768-5-leobras@redhat.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/atomic.h | 164 +++++++++++++++----------------- 1 file changed, 76 insertions(+), 88 deletions(-) diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index f5dfef6c2153..80cca7ac16fd 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -196,22 +196,28 @@ ATOMIC_OPS(xor, xor, i) #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN +#define _arch_atomic_fetch_add_unless(_prev, _rc, counter, _a, _u, sfx) \ +({ \ + __asm__ __volatile__ ( \ + "0: lr." sfx " %[p], %[c]\n" \ + " beq %[p], %[u], 1f\n" \ + " add %[rc], %[p], %[a]\n" \ + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \ + " bnez %[rc], 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \ + : [a]"r" (_a), [u]"r" (_u) \ + : "memory"); \ +}) + /* This is required to provide a full barrier on success. */ static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) { int prev, rc; - __asm__ __volatile__ ( - "0: lr.w %[p], %[c]\n" - " beq %[p], %[u], 1f\n" - " add %[rc], %[p], %[a]\n" - " sc.w.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : [a]"r" (a), [u]"r" (u) - : "memory"); + _arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "w"); + return prev; } #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless @@ -222,77 +228,86 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 prev; long rc; - __asm__ __volatile__ ( - "0: lr.d %[p], %[c]\n" - " beq %[p], %[u], 1f\n" - " add %[rc], %[p], %[a]\n" - " sc.d.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : [a]"r" (a), [u]"r" (u) - : "memory"); + _arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "d"); + return prev; } #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless #endif +#define _arch_atomic_inc_unless_negative(_prev, _rc, counter, sfx) \ +({ \ + __asm__ __volatile__ ( \ + "0: lr." sfx " %[p], %[c]\n" \ + " bltz %[p], 1f\n" \ + " addi %[rc], %[p], 1\n" \ + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \ + " bnez %[rc], 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \ + : \ + : "memory"); \ +}) + static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v) { int prev, rc; - __asm__ __volatile__ ( - "0: lr.w %[p], %[c]\n" - " bltz %[p], 1f\n" - " addi %[rc], %[p], 1\n" - " sc.w.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_inc_unless_negative(prev, rc, v->counter, "w"); + return !(prev < 0); } #define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative +#define _arch_atomic_dec_unless_positive(_prev, _rc, counter, sfx) \ +({ \ + __asm__ __volatile__ ( \ + "0: lr." sfx " %[p], %[c]\n" \ + " bgtz %[p], 1f\n" \ + " addi %[rc], %[p], -1\n" \ + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \ + " bnez %[rc], 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \ + : \ + : "memory"); \ +}) + static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v) { int prev, rc; - __asm__ __volatile__ ( - "0: lr.w %[p], %[c]\n" - " bgtz %[p], 1f\n" - " addi %[rc], %[p], -1\n" - " sc.w.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_dec_unless_positive(prev, rc, v->counter, "w"); + return !(prev > 0); } #define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive +#define _arch_atomic_dec_if_positive(_prev, _rc, counter, sfx) \ +({ \ + __asm__ __volatile__ ( \ + "0: lr." sfx " %[p], %[c]\n" \ + " addi %[rc], %[p], -1\n" \ + " bltz %[rc], 1f\n" \ + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \ + " bnez %[rc], 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \ + : \ + : "memory"); \ +}) + static __always_inline int arch_atomic_dec_if_positive(atomic_t *v) { int prev, rc; - __asm__ __volatile__ ( - "0: lr.w %[p], %[c]\n" - " addi %[rc], %[p], -1\n" - " bltz %[rc], 1f\n" - " sc.w.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_dec_if_positive(prev, rc, v->counter, "w"); + return prev - 1; } @@ -304,17 +319,8 @@ static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v) s64 prev; long rc; - __asm__ __volatile__ ( - "0: lr.d %[p], %[c]\n" - " bltz %[p], 1f\n" - " addi %[rc], %[p], 1\n" - " sc.d.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_inc_unless_negative(prev, rc, v->counter, "d"); + return !(prev < 0); } @@ -325,17 +331,8 @@ static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v) s64 prev; long rc; - __asm__ __volatile__ ( - "0: lr.d %[p], %[c]\n" - " bgtz %[p], 1f\n" - " addi %[rc], %[p], -1\n" - " sc.d.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_dec_unless_positive(prev, rc, v->counter, "d"); + return !(prev > 0); } @@ -346,17 +343,8 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) s64 prev; long rc; - __asm__ __volatile__ ( - "0: lr.d %[p], %[c]\n" - " addi %[rc], %[p], -1\n" - " bltz %[rc], 1f\n" - " sc.d.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_dec_if_positive(prev, rc, v->counter, "d"); + return prev - 1; } From 54280ca64626f73ce39ba8f7befa9139a6786ffd Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Wed, 3 Jan 2024 13:32:02 -0300 Subject: [PATCH 09/42] riscv/cmpxchg: Implement cmpxchg for variables of size 1 and 2 cmpxchg for variables of size 1-byte and 2-bytes is not yet available for riscv, even though its present in other architectures such as arm64 and x86. This could lead to not being able to implement some locking mechanisms or requiring some rework to make it work properly. Implement 1-byte and 2-bytes cmpxchg in order to achieve parity with other architectures. Signed-off-by: Leonardo Bras Tested-by: Guo Ren Link: https://lore.kernel.org/r/20240103163203.72768-6-leobras@redhat.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cmpxchg.h | 34 ++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index e3e0ac7ba061..ac9d0eeb74e6 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -72,6 +72,35 @@ * indicated by comparing RETURN with OLD. */ +#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \ +({ \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __oldx = (ulong)(o) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z5\n" \ + " bne %1, %z3, 1f\n" \ + " and %1, %0, %z6\n" \ + " or %1, %1, %z4\n" \ + " sc.w" sc_sfx " %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + append \ + "1:\n" \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" ((long)__oldx), "rJ" (__newx), \ + "rJ" (__mask), "rJ" (~__mask) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +}) #define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ ({ \ @@ -98,6 +127,11 @@ __typeof__(*(__ptr)) __ret; \ \ switch (sizeof(*__ptr)) { \ + case 1: \ + case 2: \ + __arch_cmpxchg_masked(sc_sfx, prepend, append, \ + __ret, __ptr, __old, __new); \ + break; \ case 4: \ __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ __ret, __ptr, (long), __old, __new); \ From a8ed2b7a2c13cb8a613cc9a8862688a1385b942d Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Wed, 3 Jan 2024 13:32:03 -0300 Subject: [PATCH 10/42] riscv/cmpxchg: Implement xchg for variables of size 1 and 2 xchg for variables of size 1-byte and 2-bytes is not yet available for riscv, even though its present in other architectures such as arm64 and x86. This could lead to not being able to implement some locking mechanisms or requiring some rework to make it work properly. Implement 1-byte and 2-bytes xchg in order to achieve parity with other architectures. Signed-off-by: Leonardo Bras Tested-by: Guo Ren Link: https://lore.kernel.org/r/20240103163203.72768-7-leobras@redhat.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cmpxchg.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index ac9d0eeb74e6..26cea2395aae 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -11,6 +11,31 @@ #include #include +#define __arch_xchg_masked(prepend, append, r, p, n) \ +({ \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z4\n" \ + " or %1, %1, %z3\n" \ + " sc.w %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + append \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" (__newx), "rJ" (~__mask) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +}) + #define __arch_xchg(sfx, prepend, append, r, p, n) \ ({ \ __asm__ __volatile__ ( \ @@ -27,7 +52,13 @@ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(__ptr)) __new = (new); \ __typeof__(*(__ptr)) __ret; \ + \ switch (sizeof(*__ptr)) { \ + case 1: \ + case 2: \ + __arch_xchg_masked(prepend, append, \ + __ret, __ptr, __new); \ + break; \ case 4: \ __arch_xchg(".w" sfx, prepend, append, \ __ret, __ptr, __new); \ From 9c4319d697448e738b1e20d187d9391164c84a89 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 26 Feb 2024 16:34:48 -0800 Subject: [PATCH 11/42] riscv: Remove MMU dependency from Zbb and Zicboz The Zbb and Zicboz ISA extensions have no dependency on an MMU and are equally useful on NOMMU kernels. Signed-off-by: Samuel Holland Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20240227003630.3634533-4-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bffbd869a068..ef53c00470d6 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -581,7 +581,6 @@ config TOOLCHAIN_HAS_ZBB config RISCV_ISA_ZBB bool "Zbb extension support for bit manipulation instructions" depends on TOOLCHAIN_HAS_ZBB - depends on MMU depends on RISCV_ALTERNATIVE default y help @@ -613,7 +612,6 @@ config RISCV_ISA_ZICBOM config RISCV_ISA_ZICBOZ bool "Zicboz extension support for faster zeroing of memory" - depends on MMU depends on RISCV_ALTERNATIVE default y help From f862bbf4cdca696ef3073c5cf3d340b778a3e42a Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 26 Feb 2024 16:34:49 -0800 Subject: [PATCH 12/42] riscv: Allow NOMMU kernels to run in S-mode For ease of testing, it is convenient to run NOMMU kernels in supervisor mode. The only required change is to offset the kernel load address, since the beginning of RAM is usually reserved for M-mode firmware. Signed-off-by: Samuel Holland Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20240227003630.3634533-5-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index ef53c00470d6..0dc09b2ac2f6 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -64,7 +64,7 @@ config RISCV select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU select BUILDTIME_TABLE_SORT if MMU - select CLINT_TIMER if !MMU + select CLINT_TIMER if RISCV_M_MODE select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND @@ -220,8 +220,12 @@ config ARCH_MMAP_RND_COMPAT_BITS_MAX # set if we run in machine mode, cleared if we run in supervisor mode config RISCV_M_MODE - bool - default !MMU + bool "Build a kernel that runs in machine mode" + depends on !MMU + default y + help + Select this option if you want to run the kernel in M-mode, + without the assistance of any other firmware. # set if we are running in S-mode and can use SBI calls config RISCV_SBI @@ -238,8 +242,9 @@ config MMU config PAGE_OFFSET hex - default 0xC0000000 if 32BIT && MMU - default 0x80000000 if !MMU + default 0x80000000 if !MMU && RISCV_M_MODE + default 0x80200000 if !MMU + default 0xc0000000 if 32BIT default 0xff60000000000000 if 64BIT config KASAN_SHADOW_OFFSET From 29cee75fb66e6f2845360e0598974253bf79181a Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 29 Feb 2024 13:10:55 +0100 Subject: [PATCH 13/42] riscv: Remove superfluous smp_mb() This memory barrier is not needed and not documented so simply remove it. Suggested-by: Andrea Parri Signed-off-by: Alexandre Ghiti Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20240229121056.203419-2-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/patch.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 37e87fdcf6a0..0b5c16dfe3f4 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -239,7 +239,6 @@ static int patch_text_cb(void *data) } else { while (atomic_read(&patch->cpu_count) <= num_online_cpus()) cpu_relax(); - smp_mb(); } return ret; From c97bf629963e52b205ed5fbaf151e5bd342f9c63 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 29 Feb 2024 13:10:56 +0100 Subject: [PATCH 14/42] riscv: Fix text patching when IPI are used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For now, we use stop_machine() to patch the text and when we use IPIs for remote icache flushes (which is emitted in patch_text_nosync()), the system hangs. So instead, make sure every CPU executes the stop_machine() patching function and emit a local icache flush there. Co-developed-by: Björn Töpel Signed-off-by: Björn Töpel Signed-off-by: Alexandre Ghiti Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20240229121056.203419-3-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/patch.h | 1 + arch/riscv/kernel/ftrace.c | 44 ++++++++++++++++++++++++++++++---- arch/riscv/kernel/patch.c | 16 +++++++++---- 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h index e88b52d39eac..9f5d6e14c405 100644 --- a/arch/riscv/include/asm/patch.h +++ b/arch/riscv/include/asm/patch.h @@ -6,6 +6,7 @@ #ifndef _ASM_RISCV_PATCH_H #define _ASM_RISCV_PATCH_H +int patch_insn_write(void *addr, const void *insn, size_t len); int patch_text_nosync(void *addr, const void *insns, size_t len); int patch_text_set_nosync(void *addr, u8 c, size_t len); int patch_text(void *addr, u32 *insns, int ninsns); diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index f5aa24d9e1c1..4f4987a6d83d 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -75,8 +76,7 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, make_call_t0(hook_pos, target, call); /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */ - if (patch_text_nosync - ((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) return -EPERM; return 0; @@ -88,7 +88,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) make_call_t0(rec->ip, addr, call); - if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)rec->ip, call, MCOUNT_INSN_SIZE)) return -EPERM; return 0; @@ -99,7 +99,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, { unsigned int nops[2] = {NOP4, NOP4}; - if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) return -EPERM; return 0; @@ -134,6 +134,42 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } + +struct ftrace_modify_param { + int command; + atomic_t cpu_count; +}; + +static int __ftrace_modify_code(void *data) +{ + struct ftrace_modify_param *param = data; + + if (atomic_inc_return(¶m->cpu_count) == num_online_cpus()) { + ftrace_modify_all_code(param->command); + /* + * Make sure the patching store is effective *before* we + * increment the counter which releases all waiting CPUs + * by using the release variant of atomic increment. The + * release pairs with the call to local_flush_icache_all() + * on the waiting CPU. + */ + atomic_inc_return_release(¶m->cpu_count); + } else { + while (atomic_read(¶m->cpu_count) <= num_online_cpus()) + cpu_relax(); + } + + local_flush_icache_all(); + + return 0; +} + +void arch_ftrace_update_code(int command) +{ + struct ftrace_modify_param param = { command, ATOMIC_INIT(0) }; + + stop_machine(__ftrace_modify_code, ¶m, cpu_online_mask); +} #endif #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 0b5c16dfe3f4..9a1bce1adf5a 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -188,7 +188,7 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len) } NOKPROBE_SYMBOL(patch_text_set_nosync); -static int patch_insn_write(void *addr, const void *insn, size_t len) +int patch_insn_write(void *addr, const void *insn, size_t len) { size_t patched = 0; size_t size; @@ -232,15 +232,23 @@ static int patch_text_cb(void *data) if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) { for (i = 0; ret == 0 && i < patch->ninsns; i++) { len = GET_INSN_LENGTH(patch->insns[i]); - ret = patch_text_nosync(patch->addr + i * len, - &patch->insns[i], len); + ret = patch_insn_write(patch->addr + i * len, &patch->insns[i], len); } - atomic_inc(&patch->cpu_count); + /* + * Make sure the patching store is effective *before* we + * increment the counter which releases all waiting CPUs + * by using the release variant of atomic increment. The + * release pairs with the call to local_flush_icache_all() + * on the waiting CPU. + */ + atomic_inc_return_release(&patch->cpu_count); } else { while (atomic_read(&patch->cpu_count) <= num_online_cpus()) cpu_relax(); } + local_flush_icache_all(); + return ret; } NOKPROBE_SYMBOL(patch_text_cb); From bebc345413f5fb4c8fafb59ff0bd8509197627e6 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Tue, 12 Mar 2024 16:53:40 -0700 Subject: [PATCH 15/42] riscv: Remove unnecessary irqflags processor.h include This include is not used and can lead to circular dependencies. Signed-off-by: Charlie Jenkins Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20240312-fencei-v13-1-4b6bdc2bbf32@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/irqflags.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/include/asm/irqflags.h b/arch/riscv/include/asm/irqflags.h index 08d4d6a5b7e9..6fd8cbfcfcc7 100644 --- a/arch/riscv/include/asm/irqflags.h +++ b/arch/riscv/include/asm/irqflags.h @@ -7,7 +7,6 @@ #ifndef _ASM_RISCV_IRQFLAGS_H #define _ASM_RISCV_IRQFLAGS_H -#include #include /* read interrupt enabled status */ From 6b9391b581fddd8579239dad4de4f0393149e10a Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Tue, 12 Mar 2024 16:53:41 -0700 Subject: [PATCH 16/42] riscv: Include riscv_set_icache_flush_ctx prctl Support new prctl with key PR_RISCV_SET_ICACHE_FLUSH_CTX to enable optimization of cross modifying code. This prctl enables userspace code to use icache flushing instructions such as fence.i with the guarantee that the icache will continue to be clean after thread migration. Signed-off-by: Charlie Jenkins Reviewed-by: Atish Patra Reviewed-by: Alexandre Ghiti Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20240312-fencei-v13-2-4b6bdc2bbf32@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu.h | 2 + arch/riscv/include/asm/processor.h | 10 +++ arch/riscv/include/asm/switch_to.h | 23 ++++++ arch/riscv/mm/cacheflush.c | 113 +++++++++++++++++++++++++++++ arch/riscv/mm/context.c | 19 +++-- include/uapi/linux/prctl.h | 6 ++ kernel/sys.c | 6 ++ 7 files changed, 171 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 355504b37f8e..60be458e94da 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,6 +19,8 @@ typedef struct { #ifdef CONFIG_SMP /* A local icache flush is needed before user execution can resume. */ cpumask_t icache_stale_mask; + /* Force local icache flush on all migrations. */ + bool force_icache_flush; #endif #ifdef CONFIG_BINFMT_ELF_FDPIC unsigned long exec_fdpic_loadmap; diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index a8509cc31ab2..cca62013c3c0 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -69,6 +69,7 @@ #endif #ifndef __ASSEMBLY__ +#include struct task_struct; struct pt_regs; @@ -123,6 +124,12 @@ struct thread_struct { struct __riscv_v_ext_state vstate; unsigned long align_ctl; struct __riscv_v_ext_state kernel_vstate; +#ifdef CONFIG_SMP + /* Flush the icache on migration */ + bool force_icache_flush; + /* A forced icache flush is not needed if migrating to the previous cpu. */ + unsigned int prev_cpu; +#endif }; /* Whitelist the fstate from the task_struct for hardened usercopy */ @@ -184,6 +191,9 @@ extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); #define GET_UNALIGN_CTL(tsk, addr) get_unalign_ctl((tsk), (addr)) #define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val)) +#define RISCV_SET_ICACHE_FLUSH_CTX(arg1, arg2) riscv_set_icache_flush_ctx(arg1, arg2) +extern int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_PROCESSOR_H */ diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 7efdb0584d47..7594df37cc9f 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -72,14 +73,36 @@ static __always_inline bool has_fpu(void) { return false; } extern struct task_struct *__switch_to(struct task_struct *, struct task_struct *); +static inline bool switch_to_should_flush_icache(struct task_struct *task) +{ +#ifdef CONFIG_SMP + bool stale_mm = task->mm && task->mm->context.force_icache_flush; + bool stale_thread = task->thread.force_icache_flush; + bool thread_migrated = smp_processor_id() != task->thread.prev_cpu; + + return thread_migrated && (stale_mm || stale_thread); +#else + return false; +#endif +} + +#ifdef CONFIG_SMP +#define __set_prev_cpu(thread) ((thread).prev_cpu = smp_processor_id()) +#else +#define __set_prev_cpu(thread) +#endif + #define switch_to(prev, next, last) \ do { \ struct task_struct *__prev = (prev); \ struct task_struct *__next = (next); \ + __set_prev_cpu(__prev->thread); \ if (has_fpu()) \ __switch_to_fpu(__prev, __next); \ if (has_vector()) \ __switch_to_vector(__prev, __next); \ + if (switch_to_should_flush_icache(__next)) \ + local_flush_icache_all(); \ ((last) = __switch_to(__prev, __next)); \ } while (0) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 55a34f2020a8..15a95578e670 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -152,3 +153,115 @@ void __init riscv_init_cbo_blocksizes(void) if (cboz_block_size) riscv_cboz_block_size = cboz_block_size; } + +#ifdef CONFIG_SMP +static void set_icache_stale_mask(void) +{ + cpumask_t *mask; + bool stale_cpu; + + /* + * Mark every other hart's icache as needing a flush for + * this MM. Maintain the previous value of the current + * cpu to handle the case when this function is called + * concurrently on different harts. + */ + mask = ¤t->mm->context.icache_stale_mask; + stale_cpu = cpumask_test_cpu(smp_processor_id(), mask); + + cpumask_setall(mask); + assign_bit(cpumask_check(smp_processor_id()), cpumask_bits(mask), stale_cpu); +} +#endif + +/** + * riscv_set_icache_flush_ctx() - Enable/disable icache flushing instructions in + * userspace. + * @ctx: Set the type of icache flushing instructions permitted/prohibited in + * userspace. Supported values described below. + * + * Supported values for ctx: + * + * * %PR_RISCV_CTX_SW_FENCEI_ON: Allow fence.i in user space. + * + * * %PR_RISCV_CTX_SW_FENCEI_OFF: Disallow fence.i in user space. All threads in + * a process will be affected when ``scope == PR_RISCV_SCOPE_PER_PROCESS``. + * Therefore, caution must be taken; use this flag only when you can guarantee + * that no thread in the process will emit fence.i from this point onward. + * + * @scope: Set scope of where icache flushing instructions are allowed to be + * emitted. Supported values described below. + * + * Supported values for scope: + * + * * %PR_RISCV_SCOPE_PER_PROCESS: Ensure the icache of any thread in this process + * is coherent with instruction storage upon + * migration. + * + * * %PR_RISCV_SCOPE_PER_THREAD: Ensure the icache of the current thread is + * coherent with instruction storage upon + * migration. + * + * When ``scope == PR_RISCV_SCOPE_PER_PROCESS``, all threads in the process are + * permitted to emit icache flushing instructions. Whenever any thread in the + * process is migrated, the corresponding hart's icache will be guaranteed to be + * consistent with instruction storage. This does not enforce any guarantees + * outside of migration. If a thread modifies an instruction that another thread + * may attempt to execute, the other thread must still emit an icache flushing + * instruction before attempting to execute the potentially modified + * instruction. This must be performed by the user-space program. + * + * In per-thread context (eg. ``scope == PR_RISCV_SCOPE_PER_THREAD``) only the + * thread calling this function is permitted to emit icache flushing + * instructions. When the thread is migrated, the corresponding hart's icache + * will be guaranteed to be consistent with instruction storage. + * + * On kernels configured without SMP, this function is a nop as migrations + * across harts will not occur. + */ +int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long scope) +{ +#ifdef CONFIG_SMP + switch (ctx) { + case PR_RISCV_CTX_SW_FENCEI_ON: + switch (scope) { + case PR_RISCV_SCOPE_PER_PROCESS: + current->mm->context.force_icache_flush = true; + break; + case PR_RISCV_SCOPE_PER_THREAD: + current->thread.force_icache_flush = true; + break; + default: + return -EINVAL; + } + break; + case PR_RISCV_CTX_SW_FENCEI_OFF: + switch (scope) { + case PR_RISCV_SCOPE_PER_PROCESS: + current->mm->context.force_icache_flush = false; + + set_icache_stale_mask(); + break; + case PR_RISCV_SCOPE_PER_THREAD: + current->thread.force_icache_flush = false; + + set_icache_stale_mask(); + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + return 0; +#else + switch (ctx) { + case PR_RISCV_CTX_SW_FENCEI_ON: + case PR_RISCV_CTX_SW_FENCEI_OFF: + return 0; + default: + return -EINVAL; + } +#endif +} diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 217fd4de6134..beef30f42d5c 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_MMU @@ -297,21 +298,23 @@ static inline void set_mm(struct mm_struct *prev, * * The "cpu" argument must be the current local CPU number. */ -static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu) +static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu, + struct task_struct *task) { #ifdef CONFIG_SMP - cpumask_t *mask = &mm->context.icache_stale_mask; - - if (cpumask_test_cpu(cpu, mask)) { - cpumask_clear_cpu(cpu, mask); + if (cpumask_test_and_clear_cpu(cpu, &mm->context.icache_stale_mask)) { /* * Ensure the remote hart's writes are visible to this hart. * This pairs with a barrier in flush_icache_mm. */ smp_mb(); - local_flush_icache_all(); - } + /* + * If cache will be flushed in switch_to, no need to flush here. + */ + if (!(task && switch_to_should_flush_icache(task))) + local_flush_icache_all(); + } #endif } @@ -332,5 +335,5 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, set_mm(prev, next, cpu); - flush_icache_deferred(next, cpu); + flush_icache_deferred(next, cpu, task); } diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 370ed14b1ae0..524d546d697b 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -306,4 +306,10 @@ struct prctl_mm_map { # define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc # define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f +#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71 +# define PR_RISCV_CTX_SW_FENCEI_ON 0 +# define PR_RISCV_CTX_SW_FENCEI_OFF 1 +# define PR_RISCV_SCOPE_PER_PROCESS 0 +# define PR_RISCV_SCOPE_PER_THREAD 1 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index e219fcfa112d..69afdd8b430f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -146,6 +146,9 @@ #ifndef RISCV_V_GET_CONTROL # define RISCV_V_GET_CONTROL() (-EINVAL) #endif +#ifndef RISCV_SET_ICACHE_FLUSH_CTX +# define RISCV_SET_ICACHE_FLUSH_CTX(a, b) (-EINVAL) +#endif /* * this is where the system-wide overflow UID and GID are defined, for @@ -2743,6 +2746,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_RISCV_V_GET_CONTROL: error = RISCV_V_GET_CONTROL(); break; + case PR_RISCV_SET_ICACHE_FLUSH_CTX: + error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3); + break; default: error = -EINVAL; break; From 6a08e4709c58cb324a6324f07acec54e7764c32f Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Tue, 12 Mar 2024 16:53:42 -0700 Subject: [PATCH 17/42] documentation: Document PR_RISCV_SET_ICACHE_FLUSH_CTX prctl Provide documentation that explains how to properly do CMODX in riscv. Signed-off-by: Charlie Jenkins Reviewed-by: Atish Patra Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240312-fencei-v13-3-4b6bdc2bbf32@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/arch/riscv/cmodx.rst | 98 ++++++++++++++++++++++++++++++ Documentation/arch/riscv/index.rst | 1 + 2 files changed, 99 insertions(+) create mode 100644 Documentation/arch/riscv/cmodx.rst diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst new file mode 100644 index 000000000000..1c0ca06b6c97 --- /dev/null +++ b/Documentation/arch/riscv/cmodx.rst @@ -0,0 +1,98 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================================================== +Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux +============================================================================== + +CMODX is a programming technique where a program executes instructions that were +modified by the program itself. Instruction storage and the instruction cache +(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the +program must enforce its own synchronization with the unprivileged fence.i +instruction. + +However, the default Linux ABI prohibits the use of fence.i in userspace +applications. At any point the scheduler may migrate a task onto a new hart. If +migration occurs after the userspace synchronized the icache and instruction +storage with fence.i, the icache on the new hart will no longer be clean. This +is due to the behavior of fence.i only affecting the hart that it is called on. +Thus, the hart that the task has been migrated to may not have synchronized +instruction storage and icache. + +There are two ways to solve this problem: use the riscv_flush_icache() syscall, +or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in +userspace. The syscall performs a one-off icache flushing operation. The prctl +changes the Linux ABI to allow userspace to emit icache flushing operations. + +As an aside, "deferred" icache flushes can sometimes be triggered in the kernel. +At the time of writing, this only occurs during the riscv_flush_icache() syscall +and when the kernel uses copy_to_user_page(). These deferred flushes happen only +when the memory map being used by a hart changes. If the prctl() context caused +an icache flush, this deferred icache flush will be skipped as it is redundant. +Therefore, there will be no additional flush when using the riscv_flush_icache() +syscall inside of the prctl() context. + +prctl() Interface +--------------------- + +Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The +remaining arguments will be delegated to the riscv_set_icache_flush_ctx +function detailed below. + +.. kernel-doc:: arch/riscv/mm/cacheflush.c + :identifiers: riscv_set_icache_flush_ctx + +Example usage: + +The following files are meant to be compiled and linked with each other. The +modify_instruction() function replaces an add with 0 with an add with one, +causing the instruction sequence in get_value() to change from returning a zero +to returning a one. + +cmodx.c:: + + #include + #include + + extern int get_value(); + extern void modify_instruction(); + + int main() + { + int value = get_value(); + printf("Value before cmodx: %d\n", value); + + // Call prctl before first fence.i is called inside modify_instruction + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS); + modify_instruction(); + // Call prctl after final fence.i is called in process + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS); + + value = get_value(); + printf("Value after cmodx: %d\n", value); + return 0; + } + +cmodx.S:: + + .option norvc + + .text + .global modify_instruction + modify_instruction: + lw a0, new_insn + lui a5,%hi(old_insn) + sw a0,%lo(old_insn)(a5) + fence.i + ret + + .section modifiable, "awx" + .global get_value + get_value: + li a0, 0 + old_insn: + addi a0, a0, 0 + ret + + .data + new_insn: + addi a0, a0, 1 diff --git a/Documentation/arch/riscv/index.rst b/Documentation/arch/riscv/index.rst index 4dab0cb4b900..eecf347ce849 100644 --- a/Documentation/arch/riscv/index.rst +++ b/Documentation/arch/riscv/index.rst @@ -13,6 +13,7 @@ RISC-V architecture patch-acceptance uabi vector + cmodx features From decde1fa209323c77a7498e803350c5f3e992d62 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Tue, 12 Mar 2024 16:53:43 -0700 Subject: [PATCH 18/42] cpumask: Add assign cpu Standardize an assign_cpu function for cpumasks. Signed-off-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240312-fencei-v13-4-4b6bdc2bbf32@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 2 +- include/linux/cpumask.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 15a95578e670..9dad35f0ce8b 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -170,7 +170,7 @@ static void set_icache_stale_mask(void) stale_cpu = cpumask_test_cpu(smp_processor_id(), mask); cpumask_setall(mask); - assign_bit(cpumask_check(smp_processor_id()), cpumask_bits(mask), stale_cpu); + cpumask_assign_cpu(smp_processor_id(), mask, stale_cpu); } #endif diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index cfb545841a2c..1b85e09c4ba5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -492,6 +492,22 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) __clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } +/** + * cpumask_assign_cpu - assign a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @dstp: the cpumask pointer + * @bool: the value to assign + */ +static __always_inline void cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value) +{ + assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value); +} + +static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value) +{ + __assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value); +} + /** * cpumask_test_cpu - test for a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) From eb1e5037294652ddf1437f62292c0727183f11ae Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 25 Mar 2024 19:10:37 +0800 Subject: [PATCH 19/42] riscv: select ARCH_USE_CMPXCHG_LOCKREF Select ARCH_USE_CMPXCHG_LOCKREF to enable the cmpxchg-based lockless lockref implementation for riscv. Using Linus' test case[1] on TH1520 platform, I see a 11.2% improvement. On JH7110 platform, I see 12.0% improvement. Link: http://marc.info/?l=linux-fsdevel&m=137782380714721&w=4 [1] Signed-off-by: Jisheng Zhang Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20240325111038.1700-2-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bffbd869a068..b8b96baad058 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -50,6 +50,7 @@ config RISCV select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_SUPPORTS_PER_VMA_LOCK if MMU select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK + select ARCH_USE_CMPXCHG_LOCKREF if 64BIT select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USES_CFI_TRAPS if CFI_CLANG From 79d6e4eae9662b9103fecf94d52b44deca56743c Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 25 Mar 2024 19:10:38 +0800 Subject: [PATCH 20/42] riscv: cmpxchg: implement arch_cmpxchg64_{relaxed|acquire|release} After selecting ARCH_USE_CMPXCHG_LOCKREF, one straight futher optimization is implementing the arch_cmpxchg64_relaxed() because the lockref code does not need the cmpxchg to have barrier semantics. At the same time, implement arch_cmpxchg64_acquire and arch_cmpxchg64_release as well. However, on both TH1520 and JH7110 platforms, I didn't see obvious performance improvement with Linus' test case [1]. IMHO, this may be related with the fence and lr.d/sc.d hw implementations. In theory, lr/sc without fence could give performance improvement over lr/sc plus fence, so add the code here to leave performance improvement room on newer HW platforms. Link: http://marc.info/?l=linux-fsdevel&m=137782380714721&w=4 [1] Signed-off-by: Jisheng Zhang Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20240325111038.1700-3-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cmpxchg.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 2f4726d3cfcc..6318187f426f 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -360,4 +360,22 @@ arch_cmpxchg_relaxed((ptr), (o), (n)); \ }) +#define arch_cmpxchg64_relaxed(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_cmpxchg_relaxed((ptr), (o), (n)); \ +}) + +#define arch_cmpxchg64_acquire(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_cmpxchg_acquire((ptr), (o), (n)); \ +}) + +#define arch_cmpxchg64_release(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_cmpxchg_release((ptr), (o), (n)); \ +}) + #endif /* _ASM_RISCV_CMPXCHG_H */ From 70a57b247251aabadd67795c3097c0fcc616e533 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 9 Apr 2024 18:25:16 +0100 Subject: [PATCH 21/42] RISC-V: enable building 64-bit kernels with rust support The rust modules work on 64-bit RISC-V, with no twiddling required. Select HAVE_RUST and provide the required flags to kbuild so that the modules can be used. The Makefile and Kconfig changes are lifted from work done by Miguel in the Rust-for-Linux tree, hence his authorship. Following the rabbit hole, the Makefile changes originated in a script, created based on config files originally added by Gary, hence his co-authorship. 32-bit is broken in core rust code, so support is limited to 64-bit: ld.lld: error: undefined symbol: __udivdi3 As 64-bit RISC-V is now supported, add it to the arch support table. Co-developed-by: Gary Guo Signed-off-by: Gary Guo Signed-off-by: Miguel Ojeda Co-developed-by: Conor Dooley Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20240409-silencer-book-ce1320f06aab@spud Signed-off-by: Palmer Dabbelt --- Documentation/rust/arch-support.rst | 1 + arch/riscv/Kconfig | 1 + arch/riscv/Makefile | 7 +++++++ scripts/generate_rust_target.rs | 6 ++++++ 4 files changed, 15 insertions(+) diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst index 5c4fa9f5d1cd..4d1495ded2aa 100644 --- a/Documentation/rust/arch-support.rst +++ b/Documentation/rust/arch-support.rst @@ -17,6 +17,7 @@ Architecture Level of support Constraints ============= ================ ============================================== ``arm64`` Maintained Little Endian only. ``loongarch`` Maintained - +``riscv`` Maintained ``riscv64`` only. ``um`` Maintained ``x86_64`` only. ``x86`` Maintained ``x86_64`` only. ============= ================ ============================================== diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index be09c8836d56..cad31864fd0f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -155,6 +155,7 @@ config RISCV select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RETHOOK if !XIP_KERNEL select HAVE_RSEQ + select HAVE_RUST if 64BIT select HAVE_SAMPLE_FTRACE_DIRECT select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_STACKPROTECTOR diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index a74e70405d3f..1a2dca1e6a96 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -34,6 +34,9 @@ ifeq ($(CONFIG_ARCH_RV64I),y) KBUILD_AFLAGS += -mabi=lp64 KBUILD_LDFLAGS += -melf64lriscv + + KBUILD_RUSTFLAGS += -Ctarget-cpu=generic-rv64 --target=riscv64imac-unknown-none-elf \ + -Cno-redzone else BITS := 32 UTS_MACHINE := riscv32 @@ -68,6 +71,10 @@ riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c riscv-march-$(CONFIG_RISCV_ISA_V) := $(riscv-march-y)v +ifneq ($(CONFIG_RISCV_ISA_C),y) + KBUILD_RUSTFLAGS += -Ctarget-feature=-c +endif + ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC KBUILD_CFLAGS += -Wa,-misa-spec=2.2 KBUILD_AFLAGS += -Wa,-misa-spec=2.2 diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 54919cf48621..8f7846b9029a 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -150,6 +150,12 @@ fn main() { // `llvm-target`s are taken from `scripts/Makefile.clang`. if cfg.has("ARM64") { panic!("arm64 uses the builtin rustc aarch64-unknown-none target"); + } else if cfg.has("RISCV") { + if cfg.has("64BIT") { + panic!("64-bit RISC-V uses the builtin rustc riscv64-unknown-none-elf target"); + } else { + panic!("32-bit RISC-V is an unsupported architecture"); + } } else if cfg.has("X86_64") { ts.push("arch", "x86_64"); ts.push( From fa7d7339016ab7850258e85d6adfd4c4abca5498 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 12 Mar 2024 12:56:38 -0700 Subject: [PATCH 22/42] riscv: Do not save the scratch CSR during suspend While the processor is executing kernel code, the value of the scratch CSR is always zero, so there is no need to save the value. Continue to write the CSR during the resume flow, so we do not rely on firmware to initialize it. Signed-off-by: Samuel Holland Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20240312195641.1830521-1-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/suspend.h | 1 - arch/riscv/kernel/suspend.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h index 4718096fa5e3..4ffb022b097f 100644 --- a/arch/riscv/include/asm/suspend.h +++ b/arch/riscv/include/asm/suspend.h @@ -13,7 +13,6 @@ struct suspend_context { /* Saved and restored by low-level functions */ struct pt_regs regs; /* Saved and restored by high-level functions */ - unsigned long scratch; unsigned long envcfg; unsigned long tvec; unsigned long ie; diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 8a327b485b90..c8cec0cc5833 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -14,7 +14,6 @@ void suspend_save_csrs(struct suspend_context *context) { - context->scratch = csr_read(CSR_SCRATCH); if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) context->envcfg = csr_read(CSR_ENVCFG); context->tvec = csr_read(CSR_TVEC); @@ -37,7 +36,7 @@ void suspend_save_csrs(struct suspend_context *context) void suspend_restore_csrs(struct suspend_context *context) { - csr_write(CSR_SCRATCH, context->scratch); + csr_write(CSR_SCRATCH, 0); if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) csr_write(CSR_ENVCFG, context->envcfg); csr_write(CSR_TVEC, context->tvec); From 441381506ba7ca1cb8b44e651b130ab791d2e298 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 6 Feb 2024 16:40:59 +0100 Subject: [PATCH 23/42] riscv: misaligned: remove CONFIG_RISCV_M_MODE specific code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While reworking code to fix sparse errors, it appears that the RISCV_M_MODE specific could actually be removed and use the one for normal mode. Even though RISCV_M_MODE can do direct user memory access, using the user uaccess helpers is also going to work. Since there is no need anymore for specific accessors (load_u8()/store_u8()), we can directly use memcpy()/copy_{to/from}_user() and get rid of the copy loop entirely. __read_insn() is also fixed to use an unsigned long instead of a pointer which was cast in __user address space. The insn_addr parameter is now cast from unsigned lnog to the correct address space directly. Signed-off-by: Clément Léger Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240206154104.896809-1-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps_misaligned.c | 106 +++++---------------------- 1 file changed, 17 insertions(+), 89 deletions(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 2adb7c3e4dd5..b62d5a2f4541 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -264,86 +264,14 @@ static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, #define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs)) #define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs)) -#ifdef CONFIG_RISCV_M_MODE -static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) -{ - u8 val; - - asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr)); - *r_val = val; - - return 0; -} - -static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) -{ - asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr)); - - return 0; -} - -static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn) -{ - register ulong __mepc asm ("a2") = mepc; - ulong val, rvc_mask = 3, tmp; - - asm ("and %[tmp], %[addr], 2\n" - "bnez %[tmp], 1f\n" -#if defined(CONFIG_64BIT) - __stringify(LWU) " %[insn], (%[addr])\n" -#else - __stringify(LW) " %[insn], (%[addr])\n" -#endif - "and %[tmp], %[insn], %[rvc_mask]\n" - "beq %[tmp], %[rvc_mask], 2f\n" - "sll %[insn], %[insn], %[xlen_minus_16]\n" - "srl %[insn], %[insn], %[xlen_minus_16]\n" - "j 2f\n" - "1:\n" - "lhu %[insn], (%[addr])\n" - "and %[tmp], %[insn], %[rvc_mask]\n" - "bne %[tmp], %[rvc_mask], 2f\n" - "lhu %[tmp], 2(%[addr])\n" - "sll %[tmp], %[tmp], 16\n" - "add %[insn], %[insn], %[tmp]\n" - "2:" - : [insn] "=&r" (val), [tmp] "=&r" (tmp) - : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask), - [xlen_minus_16] "i" (XLEN_MINUS_16)); - - *r_insn = val; - - return 0; -} -#else -static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) -{ - if (user_mode(regs)) { - return __get_user(*r_val, (u8 __user *)addr); - } else { - *r_val = *addr; - return 0; - } -} - -static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) -{ - if (user_mode(regs)) { - return __put_user(val, (u8 __user *)addr); - } else { - *addr = val; - return 0; - } -} - -#define __read_insn(regs, insn, insn_addr) \ +#define __read_insn(regs, insn, insn_addr, type) \ ({ \ int __ret; \ \ if (user_mode(regs)) { \ - __ret = __get_user(insn, insn_addr); \ + __ret = __get_user(insn, (type __user *) insn_addr); \ } else { \ - insn = *(__force u16 *)insn_addr; \ + insn = *(type *)insn_addr; \ __ret = 0; \ } \ \ @@ -356,9 +284,8 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) if (epc & 0x2) { ulong tmp = 0; - u16 __user *insn_addr = (u16 __user *)epc; - if (__read_insn(regs, insn, insn_addr)) + if (__read_insn(regs, insn, epc, u16)) return -EFAULT; /* __get_user() uses regular "lw" which sign extend the loaded * value make sure to clear higher order bits in case we "or" it @@ -369,16 +296,14 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) *r_insn = insn; return 0; } - insn_addr++; - if (__read_insn(regs, tmp, insn_addr)) + epc += sizeof(u16); + if (__read_insn(regs, tmp, epc, u16)) return -EFAULT; *r_insn = (tmp << 16) | insn; return 0; } else { - u32 __user *insn_addr = (u32 __user *)epc; - - if (__read_insn(regs, insn, insn_addr)) + if (__read_insn(regs, insn, epc, u32)) return -EFAULT; if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) { *r_insn = insn; @@ -390,7 +315,6 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) return 0; } } -#endif union reg_data { u8 data_bytes[8]; @@ -409,7 +333,7 @@ int handle_misaligned_load(struct pt_regs *regs) unsigned long epc = regs->epc; unsigned long insn; unsigned long addr = regs->badaddr; - int i, fp = 0, shift = 0, len = 0; + int fp = 0, shift = 0, len = 0; perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); @@ -492,9 +416,11 @@ int handle_misaligned_load(struct pt_regs *regs) return -EOPNOTSUPP; val.data_u64 = 0; - for (i = 0; i < len; i++) { - if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i])) + if (user_mode(regs)) { + if (raw_copy_from_user(&val, (u8 __user *)addr, len)) return -1; + } else { + memcpy(&val, (u8 *)addr, len); } if (!fp) @@ -515,7 +441,7 @@ int handle_misaligned_store(struct pt_regs *regs) unsigned long epc = regs->epc; unsigned long insn; unsigned long addr = regs->badaddr; - int i, len = 0, fp = 0; + int len = 0, fp = 0; perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); @@ -588,9 +514,11 @@ int handle_misaligned_store(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_FPU) && fp) return -EOPNOTSUPP; - for (i = 0; i < len; i++) { - if (store_u8(regs, (void *)(addr + i), val.data_bytes[i])) + if (user_mode(regs)) { + if (raw_copy_to_user((u8 __user *)addr, &val, len)) return -1; + } else { + memcpy((u8 *)addr, &val, len); } regs->epc = epc + INSN_LEN(insn); From 63f93a3ca891fd90353cf81f5d2fc4cbc3508f1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 21 Feb 2024 09:31:06 +0100 Subject: [PATCH 24/42] riscv: hwprobe: export Zihintpause ISA extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export the Zihintpause ISA extension through hwprobe which allows using "pause" instructions. Some userspace applications (OpenJDK for instance) uses this to handle some locking back-off. Signed-off-by: Clément Léger Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20240221083108.1235311-1-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/arch/riscv/hwprobe.rst | 4 ++++ arch/riscv/include/uapi/asm/hwprobe.h | 1 + arch/riscv/kernel/sys_hwprobe.c | 1 + 3 files changed, 6 insertions(+) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index b2bcc9eed9aa..204cd4433af5 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -188,6 +188,10 @@ The following keys are defined: manual starting from commit 95cf1f9 ("Add changes requested by Ved during signoff") + * :c:macro:`RISCV_HWPROBE_EXT_ZIHINTPAUSE`: The Zihintpause extension is + supported as defined in the RISC-V ISA manual starting from commit + d8ab5c78c207 ("Zihintpause is ratified"). + * :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance information about the selected set of processors. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 9f2a8e3ff204..31c570cbd1c5 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -59,6 +59,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZTSO (1ULL << 33) #define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34) #define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35) +#define RISCV_HWPROBE_EXT_ZIHINTPAUSE (1ULL << 36) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 8cae41a502dd..969ef3d59dbe 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -111,6 +111,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZTSO); EXT_KEY(ZACAS); EXT_KEY(ZICOND); + EXT_KEY(ZIHINTPAUSE); if (has_vector()) { EXT_KEY(ZVBB); From 58661a30f1bcc748475ffd9be6d2fc9e4e6be679 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:42 -0700 Subject: [PATCH 25/42] riscv: Flush the instruction cache during SMP bringup Instruction cache flush IPIs are sent only to CPUs in cpu_online_mask, so they will not target a CPU until it calls set_cpu_online() earlier in smp_callin(). As a result, if instruction memory is modified between the CPU coming out of reset and that point, then its instruction cache may contain stale data. Therefore, the instruction cache must be flushed after the set_cpu_online() synchronization point. Fixes: 08f051eda33b ("RISC-V: Flush I$ when making a dirty page executable") Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-2-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smpboot.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index d41090fc3203..4b3c50da48ba 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include @@ -234,9 +234,10 @@ asmlinkage __visible void smp_callin(void) riscv_user_isa_enable(); /* - * Remote TLB flushes are ignored while the CPU is offline, so emit - * a local TLB flush right now just in case. + * Remote cache and TLB flushes are ignored while the CPU is offline, + * so flush them both right now just in case. */ + local_flush_icache_all(); local_flush_tlb_all(); complete(&cpu_running); /* From aaa56c8f378dd798f4a7f633cbf2eb129e98e6a4 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:43 -0700 Subject: [PATCH 26/42] riscv: Factor out page table TLB synchronization The logic is the same for all page table levels. See commit 69be3fb111e7 ("riscv: enable MMU_GATHER_RCU_TABLE_FREE for SMP && MMU"). Signed-off-by: Samuel Holland Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327045035.368512-3-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgalloc.h | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index deaf971253a2..b34587da8882 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -15,6 +15,14 @@ #define __HAVE_ARCH_PUD_FREE #include +static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) +{ + if (riscv_use_ipi_for_rfence()) + tlb_remove_page_ptdesc(tlb, pt); + else + tlb_remove_ptdesc(tlb, pt); +} + static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -102,10 +110,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, struct ptdesc *ptdesc = virt_to_ptdesc(pud); pagetable_pud_dtor(ptdesc); - if (riscv_use_ipi_for_rfence()) - tlb_remove_page_ptdesc(tlb, ptdesc); - else - tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, ptdesc); } } @@ -139,12 +144,8 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr) { - if (pgtable_l5_enabled) { - if (riscv_use_ipi_for_rfence()) - tlb_remove_page_ptdesc(tlb, virt_to_ptdesc(p4d)); - else - tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); - } + if (pgtable_l5_enabled) + riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -176,10 +177,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, struct ptdesc *ptdesc = virt_to_ptdesc(pmd); pagetable_pmd_dtor(ptdesc); - if (riscv_use_ipi_for_rfence()) - tlb_remove_page_ptdesc(tlb, ptdesc); - else - tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, ptdesc); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -190,10 +188,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, struct ptdesc *ptdesc = page_ptdesc(pte); pagetable_pte_dtor(ptdesc); - if (riscv_use_ipi_for_rfence()) - tlb_remove_page_ptdesc(tlb, ptdesc); - else - tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, ptdesc); } #endif /* CONFIG_MMU */ From dc892fb443224da7018891a5fac9cb6ac50c14b3 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:44 -0700 Subject: [PATCH 27/42] riscv: Use IPIs for remote cache/TLB flushes by default An IPI backend is always required in an SMP configuration, but an SBI implementation is not. For example, SBI will be unavailable when the kernel runs in M mode. For this reason, consider IPI delivery of cache and TLB flushes to be the base case, and any other implementation (such as the SBI remote fence extension) to be an optimization. Generally, if IPIs can be delivered without firmware assistance, they are assumed to be faster than SBI calls due to the SBI context switch overhead. However, when SBI is used as the IPI backend, then the context switch cost must be paid anyway, and performing the cache/TLB flush directly in the SBI implementation is more efficient than injecting an interrupt to S-mode. This is the only existing scenario where riscv_ipi_set_virq_range() is called with use_for_rfence set to false. sbi_ipi_init() already checks riscv_ipi_have_virq_range(), so it only calls riscv_ipi_set_virq_range() when no other IPI device is available. This allows moving the static key and dropping the use_for_rfence parameter. This decouples the static key from the irqchip driver probe order. Furthermore, the static branch only makes sense when CONFIG_RISCV_SBI is enabled. Optherwise, IPIs must be used. Add a fallback definition of riscv_use_sbi_for_rfence() which handles this case and removes the need to check CONFIG_RISCV_SBI elsewhere, such as in cacheflush.c. Reviewed-by: Anup Patel Signed-off-by: Samuel Holland Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327045035.368512-4-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgalloc.h | 7 ++++--- arch/riscv/include/asm/sbi.h | 4 ++++ arch/riscv/include/asm/smp.h | 15 ++------------- arch/riscv/kernel/sbi-ipi.c | 11 ++++++++++- arch/riscv/kernel/smp.c | 11 +---------- arch/riscv/mm/cacheflush.c | 5 ++--- arch/riscv/mm/tlbflush.c | 31 ++++++++++++++----------------- drivers/clocksource/timer-clint.c | 2 +- 8 files changed, 38 insertions(+), 48 deletions(-) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index b34587da8882..f52264304f77 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -8,6 +8,7 @@ #define _ASM_RISCV_PGALLOC_H #include +#include #include #ifdef CONFIG_MMU @@ -17,10 +18,10 @@ static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { - if (riscv_use_ipi_for_rfence()) - tlb_remove_page_ptdesc(tlb, pt); - else + if (riscv_use_sbi_for_rfence()) tlb_remove_ptdesc(tlb, pt); + else + tlb_remove_page_ptdesc(tlb, pt); } static inline void pmd_populate_kernel(struct mm_struct *mm, diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 6e68f8dff76b..ea84392ca9d7 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -375,8 +375,12 @@ unsigned long riscv_cached_marchid(unsigned int cpu_id); unsigned long riscv_cached_mimpid(unsigned int cpu_id); #if IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_RISCV_SBI) +DECLARE_STATIC_KEY_FALSE(riscv_sbi_for_rfence); +#define riscv_use_sbi_for_rfence() \ + static_branch_unlikely(&riscv_sbi_for_rfence) void sbi_ipi_init(void); #else +static inline bool riscv_use_sbi_for_rfence(void) { return false; } static inline void sbi_ipi_init(void) { } #endif diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index 0d555847cde6..7ac80e9f2288 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -49,12 +49,7 @@ void riscv_ipi_disable(void); bool riscv_ipi_have_virq_range(void); /* Set the IPI interrupt numbers for arch (called by irqchip drivers) */ -void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence); - -/* Check if we can use IPIs for remote FENCEs */ -DECLARE_STATIC_KEY_FALSE(riscv_ipi_for_rfence); -#define riscv_use_ipi_for_rfence() \ - static_branch_unlikely(&riscv_ipi_for_rfence) +void riscv_ipi_set_virq_range(int virq, int nr); /* Check other CPUs stop or not */ bool smp_crash_stop_failed(void); @@ -104,16 +99,10 @@ static inline bool riscv_ipi_have_virq_range(void) return false; } -static inline void riscv_ipi_set_virq_range(int virq, int nr, - bool use_for_rfence) +static inline void riscv_ipi_set_virq_range(int virq, int nr) { } -static inline bool riscv_use_ipi_for_rfence(void) -{ - return false; -} - #endif /* CONFIG_SMP */ #if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP) diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c index a4559695ce62..1026e22955cc 100644 --- a/arch/riscv/kernel/sbi-ipi.c +++ b/arch/riscv/kernel/sbi-ipi.c @@ -13,6 +13,9 @@ #include #include +DEFINE_STATIC_KEY_FALSE(riscv_sbi_for_rfence); +EXPORT_SYMBOL_GPL(riscv_sbi_for_rfence); + static int sbi_ipi_virq; static void sbi_ipi_handle(struct irq_desc *desc) @@ -72,6 +75,12 @@ void __init sbi_ipi_init(void) "irqchip/sbi-ipi:starting", sbi_ipi_starting_cpu, NULL); - riscv_ipi_set_virq_range(virq, BITS_PER_BYTE, false); + riscv_ipi_set_virq_range(virq, BITS_PER_BYTE); pr_info("providing IPIs using SBI IPI extension\n"); + + /* + * Use the SBI remote fence extension to avoid + * the extra context switch needed to handle IPIs. + */ + static_branch_enable(&riscv_sbi_for_rfence); } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 45dd4035416e..8e6eb64459af 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -171,10 +171,7 @@ bool riscv_ipi_have_virq_range(void) return (ipi_virq_base) ? true : false; } -DEFINE_STATIC_KEY_FALSE(riscv_ipi_for_rfence); -EXPORT_SYMBOL_GPL(riscv_ipi_for_rfence); - -void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) +void riscv_ipi_set_virq_range(int virq, int nr) { int i, err; @@ -197,12 +194,6 @@ void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) /* Enabled IPIs for boot CPU immediately */ riscv_ipi_enable(); - - /* Update RFENCE static key */ - if (use_for_rfence) - static_branch_enable(&riscv_ipi_for_rfence); - else - static_branch_disable(&riscv_ipi_for_rfence); } static const char * const ipi_names[] = { diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index bc61ee5975e4..d76fc73e594b 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -21,7 +21,7 @@ void flush_icache_all(void) { local_flush_icache_all(); - if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence()) + if (riscv_use_sbi_for_rfence()) sbi_remote_fence_i(NULL); else on_each_cpu(ipi_remote_fence_i, NULL, 1); @@ -69,8 +69,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local) * with flush_icache_deferred(). */ smp_mb(); - } else if (IS_ENABLED(CONFIG_RISCV_SBI) && - !riscv_use_ipi_for_rfence()) { + } else if (riscv_use_sbi_for_rfence()) { sbi_remote_fence_i(&others); } else { on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 893566e004b7..0435605b07d0 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -79,10 +79,10 @@ static void __ipi_flush_tlb_all(void *info) void flush_tlb_all(void) { - if (riscv_use_ipi_for_rfence()) - on_each_cpu(__ipi_flush_tlb_all, NULL, 1); - else + if (riscv_use_sbi_for_rfence()) sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); + else + on_each_cpu(__ipi_flush_tlb_all, NULL, 1); } struct flush_tlb_range_data { @@ -103,7 +103,6 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, unsigned long start, unsigned long size, unsigned long stride) { - struct flush_tlb_range_data ftd; bool broadcast; if (cpumask_empty(cmask)) @@ -119,20 +118,18 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, broadcast = true; } - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = asid; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range_asid, - &ftd, 1); - } else - sbi_remote_sfence_vma_asid(cmask, - start, size, asid); - } else { + if (!broadcast) { local_flush_tlb_range_asid(start, size, stride, asid); + } else if (riscv_use_sbi_for_rfence()) { + sbi_remote_sfence_vma_asid(cmask, start, size, asid); + } else { + struct flush_tlb_range_data ftd; + + ftd.asid = asid; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, __ipi_flush_tlb_range_asid, &ftd, 1); } if (cmask != cpu_online_mask) diff --git a/drivers/clocksource/timer-clint.c b/drivers/clocksource/timer-clint.c index 09fd292eb83d..0bdd9d7ec545 100644 --- a/drivers/clocksource/timer-clint.c +++ b/drivers/clocksource/timer-clint.c @@ -251,7 +251,7 @@ static int __init clint_timer_init_dt(struct device_node *np) } irq_set_chained_handler(clint_ipi_irq, clint_ipi_interrupt); - riscv_ipi_set_virq_range(rc, BITS_PER_BYTE, true); + riscv_ipi_set_virq_range(rc, BITS_PER_BYTE); clint_clear_ipi(); #endif From 038ac18aae935c89c874649acde5a82f588a12b4 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:45 -0700 Subject: [PATCH 28/42] riscv: mm: Broadcast kernel TLB flushes only when needed __flush_tlb_range() avoids broadcasting TLB flushes when an mm context is only active on the local CPU. Apply this same optimization to TLB flushes of kernel memory when only one CPU is online. This check can be constant-folded when SMP is disabled. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-5-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/tlbflush.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 0435605b07d0..da821315d43e 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -103,22 +103,15 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, unsigned long start, unsigned long size, unsigned long stride) { - bool broadcast; + unsigned int cpu; if (cpumask_empty(cmask)) return; - if (cmask != cpu_online_mask) { - unsigned int cpuid; + cpu = get_cpu(); - cpuid = get_cpu(); - /* check if the tlbflush needs to be sent to other CPUs */ - broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; - } else { - broadcast = true; - } - - if (!broadcast) { + /* Check if the TLB flush needs to be sent to other CPUs. */ + if (cpumask_any_but(cmask, cpu) >= nr_cpu_ids) { local_flush_tlb_range_asid(start, size, stride, asid); } else if (riscv_use_sbi_for_rfence()) { sbi_remote_sfence_vma_asid(cmask, start, size, asid); @@ -132,8 +125,7 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, on_each_cpu_mask(cmask, __ipi_flush_tlb_range_asid, &ftd, 1); } - if (cmask != cpu_online_mask) - put_cpu(); + put_cpu(); } static inline unsigned long get_mm_asid(struct mm_struct *mm) From 9546f00410ed88b8117388cfd74ea59f4616a158 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:46 -0700 Subject: [PATCH 29/42] riscv: Only send remote fences when some other CPU is online If no other CPU is online, a local cache or TLB flush is sufficient. These checks can be constant-folded when SMP is disabled. Signed-off-by: Samuel Holland Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327045035.368512-6-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 4 +++- arch/riscv/mm/tlbflush.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index d76fc73e594b..f5be1fec8191 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -21,7 +21,9 @@ void flush_icache_all(void) { local_flush_icache_all(); - if (riscv_use_sbi_for_rfence()) + if (num_online_cpus() < 2) + return; + else if (riscv_use_sbi_for_rfence()) sbi_remote_fence_i(NULL); else on_each_cpu(ipi_remote_fence_i, NULL, 1); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index da821315d43e..0901aa47b58f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -79,7 +79,9 @@ static void __ipi_flush_tlb_all(void *info) void flush_tlb_all(void) { - if (riscv_use_sbi_for_rfence()) + if (num_online_cpus() < 2) + local_flush_tlb_all(); + else if (riscv_use_sbi_for_rfence()) sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); else on_each_cpu(__ipi_flush_tlb_all, NULL, 1); From c6026d35b6abb5bd954788478bfa800a942e2033 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:47 -0700 Subject: [PATCH 30/42] riscv: mm: Combine the SMP and UP TLB flush code In SMP configurations, all TLB flushing narrower than flush_tlb_all() goes through __flush_tlb_range(). Do the same in UP configurations. This allows UP configurations to take advantage of recent improvements to the code in tlbflush.c, such as support for huge pages and flushing multiple-page ranges. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Reviewed-by: Yunhui Cui Link: https://lore.kernel.org/r/20240327045035.368512-7-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- arch/riscv/include/asm/tlbflush.h | 31 +++---------------------------- arch/riscv/mm/Makefile | 5 +---- 3 files changed, 5 insertions(+), 33 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index be09c8836d56..442532819a44 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -60,7 +60,7 @@ config RISCV select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USES_CFI_TRAPS if CFI_CLANG - select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if MMU select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 4112cc8d1d69..4f86424b1ba5 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -27,12 +27,7 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } -#else /* CONFIG_MMU */ -#define local_flush_tlb_all() do { } while (0) -#define local_flush_tlb_page(addr) do { } while (0) -#endif /* CONFIG_MMU */ -#if defined(CONFIG_SMP) && defined(CONFIG_MMU) void flush_tlb_all(void); void flush_tlb_mm(struct mm_struct *mm); void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, @@ -54,28 +49,8 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, unsigned long uaddr); void arch_flush_tlb_batched_pending(struct mm_struct *mm); void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); - -#else /* CONFIG_SMP && CONFIG_MMU */ - -#define flush_tlb_all() local_flush_tlb_all() -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - local_flush_tlb_all(); -} - -/* Flush a range of kernel pages */ -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - local_flush_tlb_all(); -} - -#define flush_tlb_mm(mm) flush_tlb_all() -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() -#define local_flush_tlb_kernel_range(start, end) flush_tlb_all() -#endif /* !CONFIG_SMP || !CONFIG_MMU */ +#else /* CONFIG_MMU */ +#define local_flush_tlb_all() do { } while (0) +#endif /* CONFIG_MMU */ #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 2c869f8026a8..cbe4d775ef56 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -13,14 +13,11 @@ endif KCOV_INSTRUMENT_init.o := n obj-y += init.o -obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o +obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o tlbflush.o obj-y += cacheflush.o obj-y += context.o obj-y += pmem.o -ifeq ($(CONFIG_MMU),y) -obj-$(CONFIG_SMP) += tlbflush.o -endif obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_KASAN) += kasan_init.o From 20e03d702e00a3e0269a1d6f9549c2e370492054 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:48 -0700 Subject: [PATCH 31/42] riscv: Apply SiFive CIP-1200 workaround to single-ASID sfence.vma commit 3f1e782998cd ("riscv: add ASID-based tlbflushing methods") added calls to the sfence.vma instruction with rs2 != x0. These single-ASID instruction variants are also affected by SiFive errata CIP-1200. Until now, the errata workaround was not needed for the single-ASID sfence.vma variants, because they were only used when the ASID allocator was enabled, and the affected SiFive platforms do not support multiple ASIDs. However, we are going to start using those sfence.vma variants regardless of ASID support, so now we need alternatives covering them. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-8-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/errata_list.h | 12 +++++++++++- arch/riscv/include/asm/tlbflush.h | 19 ++++++++++++++++++- arch/riscv/mm/tlbflush.c | 23 ----------------------- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 1f2dbfb8a8bf..35ce26899960 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -43,11 +43,21 @@ ALTERNATIVE(__stringify(RISCV_PTR do_page_fault), \ CONFIG_ERRATA_SIFIVE_CIP_453) #else /* !__ASSEMBLY__ */ -#define ALT_FLUSH_TLB_PAGE(x) \ +#define ALT_SFENCE_VMA_ASID(asid) \ +asm(ALTERNATIVE("sfence.vma x0, %0", "sfence.vma", SIFIVE_VENDOR_ID, \ + ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \ + : : "r" (asid) : "memory") + +#define ALT_SFENCE_VMA_ADDR(addr) \ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \ ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \ : : "r" (addr) : "memory") +#define ALT_SFENCE_VMA_ADDR_ASID(addr, asid) \ +asm(ALTERNATIVE("sfence.vma %0, %1", "sfence.vma", SIFIVE_VENDOR_ID, \ + ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \ + : : "r" (addr), "r" (asid) : "memory") + /* * _val is marked as "will be overwritten", so need to set it to 0 * in the default case. diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 4f86424b1ba5..463b615d7728 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -22,10 +22,27 @@ static inline void local_flush_tlb_all(void) __asm__ __volatile__ ("sfence.vma" : : : "memory"); } +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + if (asid != FLUSH_TLB_NO_ASID) + ALT_SFENCE_VMA_ASID(asid); + else + local_flush_tlb_all(); +} + /* Flush one page from local TLB */ static inline void local_flush_tlb_page(unsigned long addr) { - ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); + ALT_SFENCE_VMA_ADDR(addr); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + if (asid != FLUSH_TLB_NO_ASID) + ALT_SFENCE_VMA_ADDR_ASID(addr, asid); + else + local_flush_tlb_page(addr); } void flush_tlb_all(void); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 0901aa47b58f..ad7bdcfcc219 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -7,29 +7,6 @@ #include #include -static inline void local_flush_tlb_all_asid(unsigned long asid) -{ - if (asid != FLUSH_TLB_NO_ASID) - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); - else - local_flush_tlb_all(); -} - -static inline void local_flush_tlb_page_asid(unsigned long addr, - unsigned long asid) -{ - if (asid != FLUSH_TLB_NO_ASID) - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); - else - local_flush_tlb_page(addr); -} - /* * Flush entire TLB if number of entries to be flushed is greater * than the threshold below. From d6dcdabafcd7c612b164079d00da6d9775863a0b Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:49 -0700 Subject: [PATCH 32/42] riscv: Avoid TLB flush loops when affected by SiFive CIP-1200 Implementations affected by SiFive errata CIP-1200 have a bug which forces the kernel to always use the global variant of the sfence.vma instruction. When affected by this errata, do not attempt to flush a range of addresses; each iteration of the loop would actually flush the whole TLB instead. Instead, minimize the overall number of sfence.vma instructions. Signed-off-by: Samuel Holland Reviewed-by: Yunhui Cui Link: https://lore.kernel.org/r/20240327045035.368512-9-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/sifive/errata.c | 5 +++++ arch/riscv/include/asm/tlbflush.h | 2 ++ arch/riscv/mm/tlbflush.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index 3d9a32d791f7..716cfedad3a2 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -42,6 +42,11 @@ static bool errata_cip_1200_check_func(unsigned long arch_id, unsigned long imp return false; if ((impid & 0xffffff) > 0x200630 || impid == 0x1200626) return false; + +#ifdef CONFIG_MMU + tlb_flush_all_threshold = 0; +#endif + return true; } diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 463b615d7728..8e329721375b 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -66,6 +66,8 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, unsigned long uaddr); void arch_flush_tlb_batched_pending(struct mm_struct *mm); void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); + +extern unsigned long tlb_flush_all_threshold; #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #endif /* CONFIG_MMU */ diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index ad7bdcfcc219..18af7b5053af 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -11,7 +11,7 @@ * Flush entire TLB if number of entries to be flushed is greater * than the threshold below. */ -static unsigned long tlb_flush_all_threshold __read_mostly = 64; +unsigned long tlb_flush_all_threshold __read_mostly = 64; static void local_flush_tlb_range_threshold_asid(unsigned long start, unsigned long size, From 74cd17792d28162fe692d5a25fe5cc081203ad19 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:50 -0700 Subject: [PATCH 33/42] riscv: mm: Introduce cntx2asid/cntx2version helper macros When using the ASID allocator, the MM context ID contains two values: the ASID in the lower bits, and the allocator version number in the remaining bits. Use macros to make this separation more obvious. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-10-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu.h | 3 +++ arch/riscv/mm/context.c | 12 ++++++------ arch/riscv/mm/tlbflush.c | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 355504b37f8e..a550fbf770be 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -26,6 +26,9 @@ typedef struct { #endif } mm_context_t; +#define cntx2asid(cntx) ((cntx) & asid_mask) +#define cntx2version(cntx) ((cntx) & ~asid_mask) + void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot); #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index ba8eb3944687..b562b3c44487 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -81,7 +81,7 @@ static void __flush_context(void) if (cntx == 0) cntx = per_cpu(reserved_context, i); - __set_bit(cntx & asid_mask, context_asid_map); + __set_bit(cntx2asid(cntx), context_asid_map); per_cpu(reserved_context, i) = cntx; } @@ -102,7 +102,7 @@ static unsigned long __new_context(struct mm_struct *mm) lockdep_assert_held(&context_lock); if (cntx != 0) { - unsigned long newcntx = ver | (cntx & asid_mask); + unsigned long newcntx = ver | cntx2asid(cntx); /* * If our current CONTEXT was active during a rollover, we @@ -115,7 +115,7 @@ static unsigned long __new_context(struct mm_struct *mm) * We had a valid CONTEXT in a previous life, so try to * re-use it if possible. */ - if (!__test_and_set_bit(cntx & asid_mask, context_asid_map)) + if (!__test_and_set_bit(cntx2asid(cntx), context_asid_map)) return newcntx; } @@ -168,7 +168,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) */ old_active_cntx = atomic_long_read(&per_cpu(active_context, cpu)); if (old_active_cntx && - ((cntx & ~asid_mask) == atomic_long_read(¤t_version)) && + (cntx2version(cntx) == atomic_long_read(¤t_version)) && atomic_long_cmpxchg_relaxed(&per_cpu(active_context, cpu), old_active_cntx, cntx)) goto switch_mm_fast; @@ -177,7 +177,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) /* Check that our ASID belongs to the current_version. */ cntx = atomic_long_read(&mm->context.id); - if ((cntx & ~asid_mask) != atomic_long_read(¤t_version)) { + if (cntx2version(cntx) != atomic_long_read(¤t_version)) { cntx = __new_context(mm); atomic_long_set(&mm->context.id, cntx); } @@ -191,7 +191,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) switch_mm_fast: csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | - ((cntx & asid_mask) << SATP_ASID_SHIFT) | + (cntx2asid(cntx) << SATP_ASID_SHIFT) | satp_mode); if (need_flush_tlb) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 18af7b5053af..35266dd9a9a2 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -110,7 +110,7 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, static inline unsigned long get_mm_asid(struct mm_struct *mm) { return static_branch_unlikely(&use_asid_allocator) ? - atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; + cntx2asid(atomic_long_read(&mm->context.id)) : FLUSH_TLB_NO_ASID; } void flush_tlb_mm(struct mm_struct *mm) From f58e5dc45fa93a2f2a0028381b2e06fe74ae9e2c Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:51 -0700 Subject: [PATCH 34/42] riscv: mm: Use a fixed layout for the MM context ID Currently, the size of the ASID field in the MM context ID dynamically depends on the number of hardware-supported ASID bits. This requires reading a global variable to extract either field from the context ID. Instead, allocate the maximum possible number of bits to the ASID field, so the layout of the context ID is known at compile-time. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-11-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu.h | 4 ++-- arch/riscv/include/asm/tlbflush.h | 2 -- arch/riscv/mm/context.c | 6 ++---- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index a550fbf770be..dc0273f7905f 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -26,8 +26,8 @@ typedef struct { #endif } mm_context_t; -#define cntx2asid(cntx) ((cntx) & asid_mask) -#define cntx2version(cntx) ((cntx) & ~asid_mask) +#define cntx2asid(cntx) ((cntx) & SATP_ASID_MASK) +#define cntx2version(cntx) ((cntx) & ~SATP_ASID_MASK) void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot); diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 8e329721375b..72e559934952 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -15,8 +15,6 @@ #define FLUSH_TLB_NO_ASID ((unsigned long)-1) #ifdef CONFIG_MMU -extern unsigned long asid_mask; - static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index b562b3c44487..5315af06cd4d 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -22,7 +22,6 @@ DEFINE_STATIC_KEY_FALSE(use_asid_allocator); static unsigned long asid_bits; static unsigned long num_asids; -unsigned long asid_mask; static atomic_long_t current_version; @@ -128,7 +127,7 @@ static unsigned long __new_context(struct mm_struct *mm) goto set_asid; /* We're out of ASIDs, so increment current_version */ - ver = atomic_long_add_return_relaxed(num_asids, ¤t_version); + ver = atomic_long_add_return_relaxed(BIT(SATP_ASID_BITS), ¤t_version); /* Flush everything */ __flush_context(); @@ -247,7 +246,6 @@ static int __init asids_init(void) /* Pre-compute ASID details */ if (asid_bits) { num_asids = 1 << asid_bits; - asid_mask = num_asids - 1; } /* @@ -255,7 +253,7 @@ static int __init asids_init(void) * at-least twice more than CPUs */ if (num_asids > (2 * num_possible_cpus())) { - atomic_long_set(¤t_version, num_asids); + atomic_long_set(¤t_version, BIT(SATP_ASID_BITS)); context_asid_map = bitmap_zalloc(num_asids, GFP_KERNEL); if (!context_asid_map) From 8d3e7613f97e4c117467be126d9c0013e9937f77 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:52 -0700 Subject: [PATCH 35/42] riscv: mm: Make asid_bits a local variable This variable is only used inside asids_init(). Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-12-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/context.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 5315af06cd4d..0bf6d0070a14 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -20,7 +20,6 @@ DEFINE_STATIC_KEY_FALSE(use_asid_allocator); -static unsigned long asid_bits; static unsigned long num_asids; static atomic_long_t current_version; @@ -226,7 +225,7 @@ static inline void set_mm(struct mm_struct *prev, static int __init asids_init(void) { - unsigned long old; + unsigned long asid_bits, old; /* Figure-out number of ASID bits in HW */ old = csr_read(CSR_SATP); From 8fc21cc672e8ea7954fdc6b59d9ce350730e9c92 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:53 -0700 Subject: [PATCH 36/42] riscv: mm: Preserve global TLB entries when switching contexts If the CPU does not support multiple ASIDs, all MM contexts use ASID 0. In this case, it is still beneficial to flush the TLB by ASID, as the single-ASID variant of the sfence.vma instruction preserves TLB entries for global (kernel) pages. This optimization is recommended by the RISC-V privileged specification: If the implementation does not provide ASIDs, or software chooses to always use ASID 0, then after every satp write, software should execute SFENCE.VMA with rs1=x0. In the common case that no global translations have been modified, rs2 should be set to a register other than x0 but which contains the value zero, so that global translations are not flushed. It is not possible to apply this optimization when using the ASID allocator, because that code must flush the TLB for all ASIDs at once when incrementing the version number. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-13-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 0bf6d0070a14..60cb0b82240e 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -200,7 +200,7 @@ static void set_mm_noasid(struct mm_struct *mm) { /* Switch the page table and blindly nuke entire local TLB */ csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode); - local_flush_tlb_all(); + local_flush_tlb_all_asid(0); } static inline void set_mm(struct mm_struct *prev, From daef19263fc102551d734f39d9ad417098ffb360 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:54 -0700 Subject: [PATCH 37/42] riscv: mm: Always use an ASID to flush mm contexts Even if multiple ASIDs are not supported, using the single-ASID variant of the sfence.vma instruction preserves TLB entries for global (kernel) pages. So it is always more efficient to use the single-ASID code path. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-14-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/tlbflush.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 35266dd9a9a2..44e7ed4e194f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -109,8 +109,7 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, static inline unsigned long get_mm_asid(struct mm_struct *mm) { - return static_branch_unlikely(&use_asid_allocator) ? - cntx2asid(atomic_long_read(&mm->context.id)) : FLUSH_TLB_NO_ASID; + return cntx2asid(atomic_long_read(&mm->context.id)); } void flush_tlb_mm(struct mm_struct *mm) From 13953e381ae2891d1699a73c20e8e7fa31699426 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 20 Mar 2024 14:47:11 +0800 Subject: [PATCH 38/42] riscv: Remove redundant CONFIG_64BIT from pgtable_l{4,5}_enabled IS_ENABLED(CONFIG_64BIT) in initialization of pgtable_l{4,5}_enabled is redundant, remove it. Signed-off-by: Dawei Li Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240320064712.442579-2-dawei.li@shingroup.cn Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 968761843203..9961864850cb 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -49,8 +49,8 @@ u64 satp_mode __ro_after_init = SATP_MODE_32; EXPORT_SYMBOL(satp_mode); #ifdef CONFIG_64BIT -bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); -bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l4_enabled = !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l5_enabled = !IS_ENABLED(CONFIG_XIP_KERNEL); EXPORT_SYMBOL(pgtable_l4_enabled); EXPORT_SYMBOL(pgtable_l5_enabled); #endif From 0fdbb06379b1126a8c69ceec28e6e506088614a2 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 20 Mar 2024 14:47:12 +0800 Subject: [PATCH 39/42] riscv: Annotate pgtable_l{4,5}_enabled with __ro_after_init pgtable_l{4,5}_enabled are read only after initialization, make explicit annotation of __ro_after_init on them. Signed-off-by: Dawei Li Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240320064712.442579-3-dawei.li@shingroup.cn Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9961864850cb..8631b33368ec 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -49,8 +49,8 @@ u64 satp_mode __ro_after_init = SATP_MODE_32; EXPORT_SYMBOL(satp_mode); #ifdef CONFIG_64BIT -bool pgtable_l4_enabled = !IS_ENABLED(CONFIG_XIP_KERNEL); -bool pgtable_l5_enabled = !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l4_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l5_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL); EXPORT_SYMBOL(pgtable_l4_enabled); EXPORT_SYMBOL(pgtable_l5_enabled); #endif From dcb2743d1e701fc1a986c187adc11f6148316d21 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 25 Mar 2024 19:00:36 +0800 Subject: [PATCH 40/42] riscv: mm: still create swiotlb buffer for kmalloc() bouncing if required After commit f51f7a0fc2f4 ("riscv: enable DMA_BOUNCE_UNALIGNED_KMALLOC for !dma_coherent"), for non-coherent platforms with less than 4GB memory, we rely on users to pass "swiotlb=mmnn,force" kernel parameters to enable DMA bouncing for unaligned kmalloc() buffers. Now let's go further: If no bouncing needed for ZONE_DMA, let kernel automatically allocate 1MB swiotlb buffer per 1GB of RAM for kmalloc() bouncing on non-coherent platforms, so that no need to pass "swiotlb=mmnn,force" any more. The math of "1MB swiotlb buffer per 1GB of RAM for kmalloc() bouncing" is taken from arm64. Users can still force smaller swiotlb buffer by passing "swiotlb=mmnn". Signed-off-by: Jisheng Zhang Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240325110036.1564-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cache.h | 2 +- arch/riscv/mm/init.c | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h index 2174fe7bac9a..570e9d8acad1 100644 --- a/arch/riscv/include/asm/cache.h +++ b/arch/riscv/include/asm/cache.h @@ -26,8 +26,8 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_RISCV_DMA_NONCOHERENT extern int dma_cache_alignment; +#ifdef CONFIG_RISCV_DMA_NONCOHERENT #define dma_get_cache_alignment dma_get_cache_alignment static inline int dma_get_cache_alignment(void) { diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 8631b33368ec..2574f6a3b0e7 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -161,11 +161,25 @@ static void print_vm_layout(void) { } void __init mem_init(void) { + bool swiotlb = max_pfn > PFN_DOWN(dma32_phys_limit); #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif /* CONFIG_FLATMEM */ - swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE); + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb && + dma_cache_alignment != 1) { + /* + * If no bouncing needed for ZONE_DMA, allocate 1MB swiotlb + * buffer per 1GB of RAM for kmalloc() bouncing on + * non-coherent platforms. + */ + unsigned long size = + DIV_ROUND_UP(memblock_phys_mem_size(), 1024); + swiotlb_adjust_size(min(swiotlb_size_or_default(), size)); + swiotlb = true; + } + + swiotlb_init(swiotlb, SWIOTLB_VERBOSE); memblock_free_all(); print_vm_layout(); From 48b4fc66939d5ed49da305cad9788dcd953b5cd2 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 25 Mar 2024 18:58:23 +0800 Subject: [PATCH 41/42] riscv: select ARCH_HAS_FAST_MULTIPLIER Currently, riscv linux requires at least IMA, so all platforms have a multiplier. And I assume the 'mul' efficiency is comparable or better than a sequence of five or so register-dependent arithmetic instructions. Select ARCH_HAS_FAST_MULTIPLIER to get slightly nicer codegen. Refer to commit f9b4192923fa ("[PATCH] bitops: hweight() speedup") for more details. In a simple benchmark test calling hweight64() in a loop, it got: about 14% performance improvement on JH7110, tested on Milkv Mars. about 23% performance improvement on TH1520 and SG2042, tested on Sipeed LPI4A and SG2042 platform. a slight performance drop on CV1800B, tested on milkv duo. Among all riscv platforms in my hands, this is the only one which sees a slight performance drop. It means the 'mul' isn't quick enough. However, the situation exists on x86 too, for example, P4 doesn't have fast integer multiplies as said in the above commit, x86 also selects ARCH_HAS_FAST_MULTIPLIER. So let's select ARCH_HAS_FAST_MULTIPLIER which can benefit almost riscv platforms. Samuel also provided some performance numbers: On Unmatched: 20% speedup for __sw_hweight32 and 30% speedup for __sw_hweight64. On D1: 8% speedup for __sw_hweight32 and 8% slowdown for __sw_hweight64. Signed-off-by: Jisheng Zhang Reviewed-by: Samuel Holland Tested-by: Samuel Holland Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240325105823.1483-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 2dac484ea645..6bec1bce6586 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -23,6 +23,7 @@ config RISCV select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX + select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE From 92cce91949a497a8a4615f9ba5813b03f7a1f1d5 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 11 Apr 2024 20:12:40 +0800 Subject: [PATCH 42/42] riscv: defconfig: Enable CONFIG_CLK_SOPHGO_CV1800 CONFIG_CLK_SOPHGO_CV1800 is required when booting the minimum system for CV1800 series board. Signed-off-by: Inochi Amaoto Link: https://lore.kernel.org/r/IA1PR20MB49537E8B2D1FAAA7D5B8BDA2BB052@IA1PR20MB4953.namprd20.prod.outlook.com Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index fc0ec2ee13bc..8c8a77f8eac2 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -233,6 +233,7 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_RENESAS_OSTM=y +CONFIG_CLK_SOPHGO_CV1800=y CONFIG_SUN8I_DE2_CCU=m CONFIG_SUN50I_IOMMU=y CONFIG_RPMSG_CHAR=y