From 8614c525b301b2b993bf99269a01975ea072b086 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Tue, 8 Nov 2022 15:46:05 -0600 Subject: [PATCH] crypto/aes: On ppc64le, use better instructions when available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several operations emulate instructions available on power9. Use the GOPPC64_power9 macro provided by the compiler to select the native instructions if the minimum cpu requirements are met. Likewise rework the LXSDX_BE to simplify usage when overriding it. It is only used in one place. All three configurations are tested via CI. On POWER9: pkg:crypto/cipher goos:linux goarch:ppc64le AESCBCEncrypt1K 949MB/s ± 0% 957MB/s ± 0% +0.83% AESCBCDecrypt1K 1.82GB/s ± 0% 1.99GB/s ± 0% +8.93% pkg:crypto/aes goos:linux goarch:ppc64le Encrypt 1.01GB/s ± 0% 1.05GB/s ± 0% +4.36% Decrypt 987MB/s ± 0% 1024MB/s ± 0% +3.77% Change-Id: I56d0eb845647dd3c43bcad71eb281b499e1d1789 Reviewed-on: https://go-review.googlesource.com/c/go/+/449116 Reviewed-by: Lynn Boger Auto-Submit: Paul Murphy TryBot-Result: Gopher Robot Reviewed-by: Joedian Reid Reviewed-by: Bryan Mills Run-TryBot: Paul Murphy --- src/crypto/aes/asm_ppc64x.s | 42 ++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/crypto/aes/asm_ppc64x.s b/src/crypto/aes/asm_ppc64x.s index c838d53923..8ac97ec281 100644 --- a/src/crypto/aes/asm_ppc64x.s +++ b/src/crypto/aes/asm_ppc64x.s @@ -48,7 +48,7 @@ // For P9 instruction emulation #define ESPERM V21 // Endian swapping permute into BE -#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXV +#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXVB16X // For {en,de}cryptBlockAsm #define BLK_INP R3 @@ -69,8 +69,15 @@ DATA ·rcon+0x40(SB)/8, $0x0000000000000000 DATA ·rcon+0x48(SB)/8, $0x0000000000000000 GLOBL ·rcon(SB), RODATA, $80 -// Emulate unaligned BE vector load/stores on LE targets #ifdef GOARCH_ppc64le +# ifdef GOPPC64_power9 +#define P8_LXVB16X(RA,RB,VT) LXVB16X (RA+RB), VT +#define P8_STXVB16X(VS,RA,RB) STXVB16X VS, (RA+RB) +#define XXBRD_ON_LE(VA,VT) XXBRD VA, VT +# else +// On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned +// doublewords and byte-swapping each doubleword to emulate BE load/stores. +#define NEEDS_ESPERM #define P8_LXVB16X(RA,RB,VT) \ LXVD2X (RA+RB), VT \ VPERM VT, VT, ESPERM, VT @@ -79,19 +86,15 @@ GLOBL ·rcon(SB), RODATA, $80 VPERM VS, VS, ESPERM, TMP2 \ STXVD2X TMP2, (RA+RB) -#define LXSDX_BE(RA,RB,VT) \ - LXSDX (RA+RB), VT \ - VPERM VT, VT, ESPERM, VT +#define XXBRD_ON_LE(VA,VT) \ + VPERM VA, VA, ESPERM, VT + +# endif // defined(GOPPC64_power9) #else -#define P8_LXVB16X(RA,RB,VT) \ - LXVD2X (RA+RB), VT - -#define P8_STXVB16X(VS,RA,RB) \ - STXVD2X VS, (RA+RB) - -#define LXSDX_BE(RA,RB,VT) \ - LXSDX (RA+RB), VT -#endif +#define P8_LXVB16X(RA,RB,VT) LXVD2X (RA+RB), VT +#define P8_STXVB16X(VS,RA,RB) STXVD2X VS, (RA+RB) +#define XXBRD_ON_LE(VA, VT) +#endif // defined(GOARCH_ppc64le) // func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32) TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0 @@ -101,7 +104,7 @@ TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0 MOVD enc+16(FP), OUTENC MOVD dec+24(FP), OUTDEC -#ifdef GOARCH_ppc64le +#ifdef NEEDS_ESPERM MOVD $·rcon(SB), PTR // PTR points to rcon addr LVX (PTR), ESPERM ADD $0x10, PTR @@ -191,7 +194,8 @@ loop128: RET l192: - LXSDX_BE(INP, R0, IN1) // Load next 8 bytes into upper half of VSR in BE order. + LXSDX (INP+R0), IN1 // Load next 8 bytes into upper half of VSR. + XXBRD_ON_LE(IN1, IN1) // and convert to BE ordering on LE hosts. MOVD $4, CNT // li 7,4 STXVD2X IN0, (R0+OUTENC) STXVD2X IN0, (R0+OUTDEC) @@ -309,7 +313,7 @@ TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 MOVD xk+8(FP), R5 // Key pointer MOVD dst+16(FP), R3 // Dest pointer MOVD src+24(FP), R4 // Src pointer -#ifdef GOARCH_ppc64le +#ifdef NEEDS_ESPERM MOVD $·rcon(SB), R7 LVX (R7), ESPERM // Permute value for P8_ macros. #endif @@ -404,7 +408,7 @@ TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 MOVD xk+8(FP), R5 // Key pointer MOVD dst+16(FP), R3 // Dest pointer MOVD src+24(FP), R4 // Src pointer -#ifdef GOARCH_ppc64le +#ifdef NEEDS_ESPERM MOVD $·rcon(SB), R7 LVX (R7), ESPERM // Permute value for P8_ macros. #endif @@ -622,7 +626,7 @@ TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0 MOVD enc+40(FP), ENC MOVD nr+48(FP), ROUNDS -#ifdef GOARCH_ppc64le +#ifdef NEEDS_ESPERM MOVD $·rcon(SB), R11 LVX (R11), ESPERM // Permute value for P8_ macros. #endif