linux/arch/x86/lib/hweight.S
Linus Torvalds 5780e39edb x86 assembly code improvements for v6.7 are:
- Micro-optimize the x86 bitops code
 - Define target-specific {raw,this}_cpu_try_cmpxchg{64,128}() to improve code generation
 - Define and use raw_cpu_try_cmpxchg() preempt_count_set()
 - Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op
 - Remove the unused __sw_hweight64() implementation on x86-32
 - Misc fixes and cleanups
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmU9BJkRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1iBKBAAl++uUEmjJM2nfS1AtKS5Rn0YJGoj7ZLy
 MMjFJwYzw7nWqbkCZJqQATicmOVvdEUacYibYYfX4QkH3ylC9Av3ta1HeUEzqLpX
 qG8W4jJPu/qlAOtGI4mQkq/yVminea6xy6l0vcCF+pezUKnwADP/YSL2Zsg03UsX
 Nelty29NrpN/qCcLUJk40CHRjBhfYBVEk+HtCMahnftzLSNZWHYTgoYA9x4zm4Hg
 LGiION+dwJKwaafaNw8/k1ikRJYfc5c1ZImi7YoOeCXXtBq7VHOHf76axok42m4s
 2FJ7QefioQ/Gs1Gojxd99080F4H/Elt6uj05hR2493ncN4WVNKqYBOMezrlG686D
 CuoOZvMaJ1LyaEntu1YWF3dtHIDTVjHhe5dyVclgu2a+v1gP/16xTLIf2z/cWtuX
 whOcalFc7AdGXnLtGACHnhbc5Yh/Ex9Y49+6WYgBrDcgNDCGdTOa/m8kFmKFgOjh
 x8Ot1xUjFI3utGgMlfKpYqw281ws+DjPiVvGi+fmUf8eBsq2IJuO9/f9oyfFfFSj
 1bzwrL5Qop/ccZAOxtuLnVVVZ+Cz/5wHmMTI0rRE5BsoiVUrtWZfD+E0/vqdavjG
 0eabDTdwUgXNBp6ex0TajXKtQY7FKg3tzIuPqHRvhRCvlt77MoSdcWRCc37ac5GJ
 M6mAeLcitu8=
 =7WUt
 -----END PGP SIGNATURE-----

Merge tag 'x86-asm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 assembly code updates from Ingo Molnar:

 - Micro-optimize the x86 bitops code

 - Define target-specific {raw,this}_cpu_try_cmpxchg{64,128}() to
   improve code generation

 - Define and use raw_cpu_try_cmpxchg() preempt_count_set()

 - Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op

 - Remove the unused __sw_hweight64() implementation on x86-32

 - Misc fixes and cleanups

* tag 'x86-asm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/lib: Address kernel-doc warnings
  x86/entry: Fix typos in comments
  x86/entry: Remove unused argument %rsi passed to exc_nmi()
  x86/bitops: Remove unused __sw_hweight64() assembly implementation on x86-32
  x86/percpu: Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op
  x86/percpu: Use raw_cpu_try_cmpxchg() in preempt_count_set()
  x86/percpu: Define raw_cpu_try_cmpxchg and this_cpu_try_cmpxchg()
  x86/percpu: Define {raw,this}_cpu_try_cmpxchg{64,128}
  x86/asm/bitops: Use __builtin_clz{l|ll} to evaluate constant expressions
2023-10-30 14:18:00 -10:00

76 lines
2.2 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/asm.h>
/*
* unsigned int __sw_hweight32(unsigned int w)
* %rdi: w
*/
SYM_FUNC_START(__sw_hweight32)
#ifdef CONFIG_X86_64
movl %edi, %eax # w
#endif
__ASM_SIZE(push,) %__ASM_REG(dx)
movl %eax, %edx # w -> t
shrl %edx # t >>= 1
andl $0x55555555, %edx # t &= 0x55555555
subl %edx, %eax # w -= t
movl %eax, %edx # w -> t
shrl $2, %eax # w_tmp >>= 2
andl $0x33333333, %edx # t &= 0x33333333
andl $0x33333333, %eax # w_tmp &= 0x33333333
addl %edx, %eax # w = w_tmp + t
movl %eax, %edx # w -> t
shrl $4, %edx # t >>= 4
addl %edx, %eax # w_tmp += t
andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f
imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
shrl $24, %eax # w = w_tmp >> 24
__ASM_SIZE(pop,) %__ASM_REG(dx)
RET
SYM_FUNC_END(__sw_hweight32)
EXPORT_SYMBOL(__sw_hweight32)
/*
* No 32-bit variant, because it's implemented as an inline wrapper
* on top of __arch_hweight32():
*/
#ifdef CONFIG_X86_64
SYM_FUNC_START(__sw_hweight64)
pushq %rdi
pushq %rdx
movq %rdi, %rdx # w -> t
movabsq $0x5555555555555555, %rax
shrq %rdx # t >>= 1
andq %rdx, %rax # t &= 0x5555555555555555
movabsq $0x3333333333333333, %rdx
subq %rax, %rdi # w -= t
movq %rdi, %rax # w -> t
shrq $2, %rdi # w_tmp >>= 2
andq %rdx, %rax # t &= 0x3333333333333333
andq %rdi, %rdx # w_tmp &= 0x3333333333333333
addq %rdx, %rax # w = w_tmp + t
movq %rax, %rdx # w -> t
shrq $4, %rdx # t >>= 4
addq %rdx, %rax # w_tmp += t
movabsq $0x0f0f0f0f0f0f0f0f, %rdx
andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f
movabsq $0x0101010101010101, %rdx
imulq %rdx, %rax # w_tmp *= 0x0101010101010101
shrq $56, %rax # w = w_tmp >> 56
popq %rdx
popq %rdi
RET
SYM_FUNC_END(__sw_hweight64)
EXPORT_SYMBOL(__sw_hweight64)
#endif