diff --git a/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-aes.yaml b/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-aes.yaml new file mode 100644 index 000000000000..ee2c099981b2 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-aes.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/intel,keembay-ocs-aes.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Intel Keem Bay OCS AES Device Tree Bindings + +maintainers: + - Daniele Alessandrelli + +description: + The Intel Keem Bay Offload and Crypto Subsystem (OCS) AES engine provides + hardware-accelerated AES/SM4 encryption/decryption. + +properties: + compatible: + const: intel,keembay-ocs-aes + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +additionalProperties: false + +examples: + - | + #include + crypto@30008000 { + compatible = "intel,keembay-ocs-aes"; + reg = <0x30008000 0x1000>; + interrupts = ; + clocks = <&scmi_clk 95>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 80881fb36404..fb75813e23d4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8016,7 +8016,7 @@ F: drivers/staging/hikey9xx/ HISILICON TRUE RANDOM NUMBER GENERATOR V2 SUPPORT M: Zaibo Xu S: Maintained -F: drivers/char/hw_random/hisi-trng-v2.c +F: drivers/crypto/hisilicon/trng/trng.c HISILICON V3XX SPI NOR FLASH Controller Driver M: John Garry @@ -8975,13 +8975,23 @@ M: Deepak Saxena S: Maintained F: drivers/char/hw_random/ixp4xx-rng.c -INTEL KEEMBAY DRM DRIVER +INTEL KEEM BAY DRM DRIVER M: Anitha Chrisanthus M: Edmund Dea S: Maintained F: Documentation/devicetree/bindings/display/intel,kmb_display.yaml F: drivers/gpu/drm/kmb/ +INTEL KEEM BAY OCS AES/SM4 CRYPTO DRIVER +M: Daniele Alessandrelli +S: Maintained +F: Documentation/devicetree/bindings/crypto/intel,keembay-ocs-aes.yaml +F: drivers/crypto/keembay/Kconfig +F: drivers/crypto/keembay/Makefile +F: drivers/crypto/keembay/keembay-ocs-aes-core.c +F: drivers/crypto/keembay/ocs-aes.c +F: drivers/crypto/keembay/ocs-aes.h + INTEL MANAGEMENT ENGINE (mei) M: Tomas Winkler L: linux-kernel@vger.kernel.org diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 4d1707388d94..312428d83eed 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -386,20 +386,32 @@ ENTRY(ce_aes_ctr_encrypt) .Lctrloop4x: subs r4, r4, #4 bmi .Lctr1x - add r6, r6, #1 + + /* + * NOTE: the sequence below has been carefully tweaked to avoid + * a silicon erratum that exists in Cortex-A57 (#1742098) and + * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs + * may produce an incorrect result if they take their input from a + * register of which a single 32-bit lane has been updated the last + * time it was modified. To work around this, the lanes of registers + * q0-q3 below are not manipulated individually, and the different + * counter values are prepared by successive manipulations of q7. + */ + add ip, r6, #1 vmov q0, q7 + rev ip, ip + add lr, r6, #2 + vmov s31, ip @ set lane 3 of q1 via q7 + add ip, r6, #3 + rev lr, lr vmov q1, q7 - rev ip, r6 - add r6, r6, #1 + vmov s31, lr @ set lane 3 of q2 via q7 + rev ip, ip vmov q2, q7 - vmov s7, ip - rev ip, r6 - add r6, r6, #1 + vmov s31, ip @ set lane 3 of q3 via q7 + add r6, r6, #4 vmov q3, q7 - vmov s11, ip - rev ip, r6 - add r6, r6, #1 - vmov s15, ip + vld1.8 {q4-q5}, [r1]! vld1.8 {q6}, [r1]! vld1.8 {q15}, [r1]! diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index bda8bf17631e..f70af1d0514b 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -19,7 +19,7 @@ MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("ecb(aes)"); -MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("cbc(aes)-all"); MODULE_ALIAS_CRYPTO("ctr(aes)"); MODULE_ALIAS_CRYPTO("xts(aes)"); @@ -191,7 +191,8 @@ static int cbc_init(struct crypto_skcipher *tfm) struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); unsigned int reqsize; - ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); + ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->enc_tfm)) return PTR_ERR(ctx->enc_tfm); @@ -441,7 +442,8 @@ static struct skcipher_alg aes_algs[] = { { .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_flags = CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_NEED_FALLBACK, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c index 59da6c0b63b6..7b5cf8430c6d 100644 --- a/arch/arm/crypto/chacha-glue.c +++ b/arch/arm/crypto/chacha-glue.c @@ -23,7 +23,7 @@ asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, int nrounds); asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, - int nrounds); + int nrounds, unsigned int nbytes); asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); @@ -42,24 +42,24 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, { u8 buf[CHACHA_BLOCK_SIZE]; - while (bytes >= CHACHA_BLOCK_SIZE * 4) { - chacha_4block_xor_neon(state, dst, src, nrounds); - bytes -= CHACHA_BLOCK_SIZE * 4; - src += CHACHA_BLOCK_SIZE * 4; - dst += CHACHA_BLOCK_SIZE * 4; - state[12] += 4; - } - while (bytes >= CHACHA_BLOCK_SIZE) { - chacha_block_xor_neon(state, dst, src, nrounds); - bytes -= CHACHA_BLOCK_SIZE; - src += CHACHA_BLOCK_SIZE; - dst += CHACHA_BLOCK_SIZE; - state[12]++; + while (bytes > CHACHA_BLOCK_SIZE) { + unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); + + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= l; + src += l; + dst += l; + state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); } if (bytes) { - memcpy(buf, src, bytes); - chacha_block_xor_neon(state, buf, buf, nrounds); - memcpy(dst, buf, bytes); + const u8 *s = src; + u8 *d = dst; + + if (bytes != CHACHA_BLOCK_SIZE) + s = d = memcpy(buf, src, bytes); + chacha_block_xor_neon(state, d, s, nrounds); + if (d != dst) + memcpy(dst, buf, bytes); } } diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/crypto/chacha-neon-core.S index eb22926d4912..13d12f672656 100644 --- a/arch/arm/crypto/chacha-neon-core.S +++ b/arch/arm/crypto/chacha-neon-core.S @@ -47,6 +47,7 @@ */ #include +#include .text .fpu neon @@ -205,7 +206,7 @@ ENDPROC(hchacha_block_neon) .align 5 ENTRY(chacha_4block_xor_neon) - push {r4-r5} + push {r4, lr} mov r4, sp // preserve the stack pointer sub ip, sp, #0x20 // allocate a 32 byte buffer bic ip, ip, #0x1f // aligned to 32 bytes @@ -229,10 +230,10 @@ ENTRY(chacha_4block_xor_neon) vld1.32 {q0-q1}, [r0] vld1.32 {q2-q3}, [ip] - adr r5, .Lctrinc + adr lr, .Lctrinc vdup.32 q15, d7[1] vdup.32 q14, d7[0] - vld1.32 {q4}, [r5, :128] + vld1.32 {q4}, [lr, :128] vdup.32 q13, d6[1] vdup.32 q12, d6[0] vdup.32 q11, d5[1] @@ -455,7 +456,7 @@ ENTRY(chacha_4block_xor_neon) // Re-interleave the words in the first two rows of each block (x0..7). // Also add the counter values 0-3 to x12[0-3]. - vld1.32 {q8}, [r5, :128] // load counter values 0-3 + vld1.32 {q8}, [lr, :128] // load counter values 0-3 vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1) vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3) vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5) @@ -493,6 +494,8 @@ ENTRY(chacha_4block_xor_neon) // Re-interleave the words in the last two rows of each block (x8..15). vld1.32 {q8-q9}, [sp, :256] + mov sp, r4 // restore original stack pointer + ldr r4, [r4, #8] // load number of bytes vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13) vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15) vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9) @@ -520,41 +523,121 @@ ENTRY(chacha_4block_xor_neon) // XOR the rest of the data with the keystream vld1.8 {q0-q1}, [r2]! + subs r4, r4, #96 veor q0, q0, q8 veor q1, q1, q12 + ble .Lle96 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q2 veor q1, q1, q6 + ble .Lle128 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q10 veor q1, q1, q14 + ble .Lle160 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q4 veor q1, q1, q5 + ble .Lle192 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q9 veor q1, q1, q13 + ble .Lle224 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q3 veor q1, q1, q7 + blt .Llt256 +.Lout: vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2] - mov sp, r4 // restore original stack pointer veor q0, q0, q11 veor q1, q1, q15 vst1.8 {q0-q1}, [r1] - pop {r4-r5} - bx lr + pop {r4, pc} + +.Lle192: + vmov q4, q9 + vmov q5, q13 + +.Lle160: + // nothing to do + +.Lfinalblock: + // Process the final block if processing less than 4 full blocks. + // Entered with 32 bytes of ChaCha cipher stream in q4-q5, and the + // previous 32 byte output block that still needs to be written at + // [r1] in q0-q1. + beq .Lfullblock + +.Lpartialblock: + adr lr, .Lpermute + 32 + add r2, r2, r4 + add lr, lr, r4 + add r4, r4, r1 + + vld1.8 {q2-q3}, [lr] + vld1.8 {q6-q7}, [r2] + + add r4, r4, #32 + + vtbl.8 d4, {q4-q5}, d4 + vtbl.8 d5, {q4-q5}, d5 + vtbl.8 d6, {q4-q5}, d6 + vtbl.8 d7, {q4-q5}, d7 + + veor q6, q6, q2 + veor q7, q7, q3 + + vst1.8 {q6-q7}, [r4] // overlapping stores + vst1.8 {q0-q1}, [r1] + pop {r4, pc} + +.Lfullblock: + vmov q11, q4 + vmov q15, q5 + b .Lout +.Lle96: + vmov q4, q2 + vmov q5, q6 + b .Lfinalblock +.Lle128: + vmov q4, q10 + vmov q5, q14 + b .Lfinalblock +.Lle224: + vmov q4, q3 + vmov q5, q7 + b .Lfinalblock +.Llt256: + vmov q4, q11 + vmov q5, q15 + b .Lpartialblock ENDPROC(chacha_4block_xor_neon) + + .align L1_CACHE_SHIFT +.Lpermute: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 + .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 + .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c index e79b1fb4b4dc..de9100c67b37 100644 --- a/arch/arm/crypto/sha1-ce-glue.c +++ b/arch/arm/crypto/sha1-ce-glue.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/crypto/sha1.h b/arch/arm/crypto/sha1.h index 758db3e9ff0a..b1b7e21da2c3 100644 --- a/arch/arm/crypto/sha1.h +++ b/arch/arm/crypto/sha1.h @@ -3,7 +3,7 @@ #define ASM_ARM_CRYPTO_SHA1_H #include -#include +#include extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, unsigned int len); diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index 4e954b3f7ecd..6c2b849e459d 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c index 0071e5e4411a..cfe36ae0f3f5 100644 --- a/arch/arm/crypto/sha1_neon_glue.c +++ b/arch/arm/crypto/sha1_neon_glue.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c index 87f0b62386c6..c62ce89dd3e0 100644 --- a/arch/arm/crypto/sha2-ce-glue.c +++ b/arch/arm/crypto/sha2-ce-glue.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c index b8a4f79020cf..433ee4ddce6c 100644 --- a/arch/arm/crypto/sha256_glue.c +++ b/arch/arm/crypto/sha256_glue.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c index 79820b9e2541..701706262ef3 100644 --- a/arch/arm/crypto/sha256_neon_glue.c +++ b/arch/arm/crypto/sha256_neon_glue.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c index 8775aa42bbbe..0635a65aa488 100644 --- a/arch/arm/crypto/sha512-glue.c +++ b/arch/arm/crypto/sha512-glue.c @@ -6,7 +6,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c index 96cb94403540..c879ad32db51 100644 --- a/arch/arm/crypto/sha512-neon-glue.c +++ b/arch/arm/crypto/sha512-neon-glue.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5cfe3cf6f2ac..5e7d86cf5dfa 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1082,6 +1082,7 @@ CONFIG_CRYPTO_DEV_CCREE=m CONFIG_CRYPTO_DEV_HISI_SEC2=m CONFIG_CRYPTO_DEV_HISI_ZIP=m CONFIG_CRYPTO_DEV_HISI_HPRE=m +CONFIG_CRYPTO_DEV_HISI_TRNG=m CONFIG_CMA_SIZE_MBYTES=32 CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 395bbf64b2ab..34b8a89197be 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S index e90386a7db8e..b70ac76f2610 100644 --- a/arch/arm64/crypto/chacha-neon-core.S +++ b/arch/arm64/crypto/chacha-neon-core.S @@ -195,7 +195,6 @@ SYM_FUNC_START(chacha_4block_xor_neon) adr_l x10, .Lpermute and x5, x4, #63 add x10, x10, x5 - add x11, x10, #64 // // This function encrypts four consecutive ChaCha blocks by loading @@ -645,11 +644,11 @@ CPU_BE( rev a15, a15 ) zip2 v31.4s, v14.4s, v15.4s eor a15, a15, w9 - mov x3, #64 + add x3, x2, x4 + sub x3, x3, #128 // start of last block + subs x5, x4, #128 - add x6, x5, x2 - csel x3, x3, xzr, ge - csel x2, x2, x6, ge + csel x2, x2, x3, ge // interleave 64-bit words in state n, n+2 zip1 v0.2d, v16.2d, v18.2d @@ -658,13 +657,10 @@ CPU_BE( rev a15, a15 ) zip1 v8.2d, v17.2d, v19.2d zip2 v12.2d, v17.2d, v19.2d stp a2, a3, [x1, #-56] - ld1 {v16.16b-v19.16b}, [x2], x3 subs x6, x4, #192 - ccmp x3, xzr, #4, lt - add x7, x6, x2 - csel x3, x3, xzr, eq - csel x2, x2, x7, eq + ld1 {v16.16b-v19.16b}, [x2], #64 + csel x2, x2, x3, ge zip1 v1.2d, v20.2d, v22.2d zip2 v5.2d, v20.2d, v22.2d @@ -672,13 +668,10 @@ CPU_BE( rev a15, a15 ) zip1 v9.2d, v21.2d, v23.2d zip2 v13.2d, v21.2d, v23.2d stp a6, a7, [x1, #-40] - ld1 {v20.16b-v23.16b}, [x2], x3 subs x7, x4, #256 - ccmp x3, xzr, #4, lt - add x8, x7, x2 - csel x3, x3, xzr, eq - csel x2, x2, x8, eq + ld1 {v20.16b-v23.16b}, [x2], #64 + csel x2, x2, x3, ge zip1 v2.2d, v24.2d, v26.2d zip2 v6.2d, v24.2d, v26.2d @@ -686,12 +679,10 @@ CPU_BE( rev a15, a15 ) zip1 v10.2d, v25.2d, v27.2d zip2 v14.2d, v25.2d, v27.2d stp a10, a11, [x1, #-24] - ld1 {v24.16b-v27.16b}, [x2], x3 subs x8, x4, #320 - ccmp x3, xzr, #4, lt - add x9, x8, x2 - csel x2, x2, x9, eq + ld1 {v24.16b-v27.16b}, [x2], #64 + csel x2, x2, x3, ge zip1 v3.2d, v28.2d, v30.2d zip2 v7.2d, v28.2d, v30.2d @@ -699,151 +690,105 @@ CPU_BE( rev a15, a15 ) zip1 v11.2d, v29.2d, v31.2d zip2 v15.2d, v29.2d, v31.2d stp a14, a15, [x1, #-8] + + tbnz x5, #63, .Lt128 ld1 {v28.16b-v31.16b}, [x2] // xor with corresponding input, write to output - tbnz x5, #63, 0f eor v16.16b, v16.16b, v0.16b eor v17.16b, v17.16b, v1.16b eor v18.16b, v18.16b, v2.16b eor v19.16b, v19.16b, v3.16b - st1 {v16.16b-v19.16b}, [x1], #64 - cbz x5, .Lout - tbnz x6, #63, 1f + tbnz x6, #63, .Lt192 + eor v20.16b, v20.16b, v4.16b eor v21.16b, v21.16b, v5.16b eor v22.16b, v22.16b, v6.16b eor v23.16b, v23.16b, v7.16b - st1 {v20.16b-v23.16b}, [x1], #64 - cbz x6, .Lout - tbnz x7, #63, 2f + st1 {v16.16b-v19.16b}, [x1], #64 + tbnz x7, #63, .Lt256 + eor v24.16b, v24.16b, v8.16b eor v25.16b, v25.16b, v9.16b eor v26.16b, v26.16b, v10.16b eor v27.16b, v27.16b, v11.16b - st1 {v24.16b-v27.16b}, [x1], #64 - cbz x7, .Lout - tbnz x8, #63, 3f + st1 {v20.16b-v23.16b}, [x1], #64 + tbnz x8, #63, .Lt320 + eor v28.16b, v28.16b, v12.16b eor v29.16b, v29.16b, v13.16b eor v30.16b, v30.16b, v14.16b eor v31.16b, v31.16b, v15.16b + + st1 {v24.16b-v27.16b}, [x1], #64 st1 {v28.16b-v31.16b}, [x1] .Lout: frame_pop ret - // fewer than 128 bytes of in/output -0: ld1 {v8.16b}, [x10] - ld1 {v9.16b}, [x11] - movi v10.16b, #16 - sub x2, x1, #64 - add x1, x1, x5 - ld1 {v16.16b-v19.16b}, [x2] - tbl v4.16b, {v0.16b-v3.16b}, v8.16b - tbx v20.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v5.16b, {v0.16b-v3.16b}, v8.16b - tbx v21.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v6.16b, {v0.16b-v3.16b}, v8.16b - tbx v22.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v7.16b, {v0.16b-v3.16b}, v8.16b - tbx v23.16b, {v16.16b-v19.16b}, v9.16b - - eor v20.16b, v20.16b, v4.16b - eor v21.16b, v21.16b, v5.16b - eor v22.16b, v22.16b, v6.16b - eor v23.16b, v23.16b, v7.16b - st1 {v20.16b-v23.16b}, [x1] - b .Lout - // fewer than 192 bytes of in/output -1: ld1 {v8.16b}, [x10] - ld1 {v9.16b}, [x11] - movi v10.16b, #16 - add x1, x1, x6 - tbl v0.16b, {v4.16b-v7.16b}, v8.16b - tbx v20.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v1.16b, {v4.16b-v7.16b}, v8.16b - tbx v21.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v2.16b, {v4.16b-v7.16b}, v8.16b - tbx v22.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v3.16b, {v4.16b-v7.16b}, v8.16b - tbx v23.16b, {v16.16b-v19.16b}, v9.16b +.Lt192: cbz x5, 1f // exactly 128 bytes? + ld1 {v28.16b-v31.16b}, [x10] + add x5, x5, x1 + tbl v28.16b, {v4.16b-v7.16b}, v28.16b + tbl v29.16b, {v4.16b-v7.16b}, v29.16b + tbl v30.16b, {v4.16b-v7.16b}, v30.16b + tbl v31.16b, {v4.16b-v7.16b}, v31.16b - eor v20.16b, v20.16b, v0.16b - eor v21.16b, v21.16b, v1.16b - eor v22.16b, v22.16b, v2.16b - eor v23.16b, v23.16b, v3.16b - st1 {v20.16b-v23.16b}, [x1] +0: eor v20.16b, v20.16b, v28.16b + eor v21.16b, v21.16b, v29.16b + eor v22.16b, v22.16b, v30.16b + eor v23.16b, v23.16b, v31.16b + st1 {v20.16b-v23.16b}, [x5] // overlapping stores +1: st1 {v16.16b-v19.16b}, [x1] b .Lout + // fewer than 128 bytes of in/output +.Lt128: ld1 {v28.16b-v31.16b}, [x10] + add x5, x5, x1 + sub x1, x1, #64 + tbl v28.16b, {v0.16b-v3.16b}, v28.16b + tbl v29.16b, {v0.16b-v3.16b}, v29.16b + tbl v30.16b, {v0.16b-v3.16b}, v30.16b + tbl v31.16b, {v0.16b-v3.16b}, v31.16b + ld1 {v16.16b-v19.16b}, [x1] // reload first output block + b 0b + // fewer than 256 bytes of in/output -2: ld1 {v4.16b}, [x10] - ld1 {v5.16b}, [x11] - movi v6.16b, #16 - add x1, x1, x7 +.Lt256: cbz x6, 2f // exactly 192 bytes? + ld1 {v4.16b-v7.16b}, [x10] + add x6, x6, x1 tbl v0.16b, {v8.16b-v11.16b}, v4.16b - tbx v24.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v1.16b, {v8.16b-v11.16b}, v4.16b - tbx v25.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v2.16b, {v8.16b-v11.16b}, v4.16b - tbx v26.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v3.16b, {v8.16b-v11.16b}, v4.16b - tbx v27.16b, {v20.16b-v23.16b}, v5.16b - - eor v24.16b, v24.16b, v0.16b - eor v25.16b, v25.16b, v1.16b - eor v26.16b, v26.16b, v2.16b - eor v27.16b, v27.16b, v3.16b - st1 {v24.16b-v27.16b}, [x1] - b .Lout - - // fewer than 320 bytes of in/output -3: ld1 {v4.16b}, [x10] - ld1 {v5.16b}, [x11] - movi v6.16b, #16 - add x1, x1, x8 - tbl v0.16b, {v12.16b-v15.16b}, v4.16b - tbx v28.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v1.16b, {v12.16b-v15.16b}, v4.16b - tbx v29.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v2.16b, {v12.16b-v15.16b}, v4.16b - tbx v30.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v3.16b, {v12.16b-v15.16b}, v4.16b - tbx v31.16b, {v24.16b-v27.16b}, v5.16b + tbl v1.16b, {v8.16b-v11.16b}, v5.16b + tbl v2.16b, {v8.16b-v11.16b}, v6.16b + tbl v3.16b, {v8.16b-v11.16b}, v7.16b eor v28.16b, v28.16b, v0.16b eor v29.16b, v29.16b, v1.16b eor v30.16b, v30.16b, v2.16b eor v31.16b, v31.16b, v3.16b - st1 {v28.16b-v31.16b}, [x1] + st1 {v28.16b-v31.16b}, [x6] // overlapping stores +2: st1 {v20.16b-v23.16b}, [x1] + b .Lout + + // fewer than 320 bytes of in/output +.Lt320: cbz x7, 3f // exactly 256 bytes? + ld1 {v4.16b-v7.16b}, [x10] + add x7, x7, x1 + tbl v0.16b, {v12.16b-v15.16b}, v4.16b + tbl v1.16b, {v12.16b-v15.16b}, v5.16b + tbl v2.16b, {v12.16b-v15.16b}, v6.16b + tbl v3.16b, {v12.16b-v15.16b}, v7.16b + + eor v28.16b, v28.16b, v0.16b + eor v29.16b, v29.16b, v1.16b + eor v30.16b, v30.16b, v2.16b + eor v31.16b, v31.16b, v3.16b + st1 {v28.16b-v31.16b}, [x7] // overlapping stores +3: st1 {v24.16b-v27.16b}, [x1] b .Lout SYM_FUNC_END(chacha_4block_xor_neon) @@ -851,7 +796,7 @@ SYM_FUNC_END(chacha_4block_xor_neon) .align L1_CACHE_SHIFT .Lpermute: .set .Li, 0 - .rept 192 + .rept 128 .byte (.Li - 64) .set .Li, .Li + 1 .endr diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index 6b958dcdf136..7868330dd54e 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -544,7 +544,22 @@ CPU_LE( rev w8, w8 ) ext XL.16b, XL.16b, XL.16b, #8 rev64 XL.16b, XL.16b eor XL.16b, XL.16b, KS0.16b + + .if \enc == 1 st1 {XL.16b}, [x10] // store tag + .else + ldp x11, x12, [sp, #40] // load tag pointer and authsize + adr_l x17, .Lpermute_table + ld1 {KS0.16b}, [x11] // load supplied tag + add x17, x17, x12 + ld1 {KS1.16b}, [x17] // load permute vector + + cmeq XL.16b, XL.16b, KS0.16b // compare tags + mvn XL.16b, XL.16b // -1 for fail, 0 for pass + tbl XL.16b, {XL.16b}, KS1.16b // keep authsize bytes only + sminv b0, XL.16b // signed minimum across XL + smov w0, v0.b[0] // return b0 + .endif 4: ldp x29, x30, [sp], #32 ret diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 8536008e3e35..720cd3a58da3 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -55,10 +55,10 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[], u64 const h[][2], u64 dg[], u8 ctr[], u32 const rk[], int rounds, u8 tag[]); - -asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[], - u64 const h[][2], u64 dg[], u8 ctr[], - u32 const rk[], int rounds, u8 tag[]); +asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[], + u64 const h[][2], u64 dg[], u8 ctr[], + u32 const rk[], int rounds, const u8 l[], + const u8 tag[], u64 authsize); static int ghash_init(struct shash_desc *desc) { @@ -168,7 +168,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) put_unaligned_be64(ctx->digest[1], dst); put_unaligned_be64(ctx->digest[0], dst + 8); - *ctx = (struct ghash_desc_ctx){}; + memzero_explicit(ctx, sizeof(*ctx)); return 0; } @@ -458,6 +458,7 @@ static int gcm_decrypt(struct aead_request *req) unsigned int authsize = crypto_aead_authsize(aead); int nrounds = num_rounds(&ctx->aes_key); struct skcipher_walk walk; + u8 otag[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u8 iv[AES_BLOCK_SIZE]; u64 dg[2] = {}; @@ -474,9 +475,15 @@ static int gcm_decrypt(struct aead_request *req) memcpy(iv, req->iv, GCM_IV_SIZE); put_unaligned_be32(2, iv + GCM_IV_SIZE); + scatterwalk_map_and_copy(otag, req->src, + req->assoclen + req->cryptlen - authsize, + authsize, 0); + err = skcipher_walk_aead_decrypt(&walk, req, false); if (likely(crypto_simd_usable())) { + int ret; + do { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; @@ -493,9 +500,10 @@ static int gcm_decrypt(struct aead_request *req) } kernel_neon_begin(); - pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h, - dg, iv, ctx->aes_key.key_enc, nrounds, - tag); + ret = pmull_gcm_decrypt(nbytes, dst, src, + ctx->ghash_key.h, + dg, iv, ctx->aes_key.key_enc, + nrounds, tag, otag, authsize); kernel_neon_end(); if (unlikely(!nbytes)) @@ -507,6 +515,11 @@ static int gcm_decrypt(struct aead_request *req) err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } while (walk.nbytes); + + if (err) + return err; + if (ret) + return -EBADMSG; } else { while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -548,23 +561,20 @@ static int gcm_decrypt(struct aead_request *req) err = skcipher_walk_done(&walk, 0); } + if (err) + return err; + put_unaligned_be64(dg[1], tag); put_unaligned_be64(dg[0], tag + 8); put_unaligned_be32(1, iv + GCM_IV_SIZE); aes_encrypt(&ctx->aes_key, iv, iv); crypto_xor(tag, iv, AES_BLOCK_SIZE); + + if (crypto_memneq(tag, otag, authsize)) { + memzero_explicit(tag, AES_BLOCK_SIZE); + return -EBADMSG; + } } - - if (err) - return err; - - /* compare calculated auth tag with the stored one */ - scatterwalk_map_and_copy(buf, req->src, - req->assoclen + req->cryptlen - authsize, - authsize, 0); - - if (crypto_memneq(tag, buf, authsize)) - return -EBADMSG; return 0; } diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl index 6e5576d19af8..cbc980fb02e3 100644 --- a/arch/arm64/crypto/poly1305-armv8.pl +++ b/arch/arm64/crypto/poly1305-armv8.pl @@ -840,7 +840,6 @@ poly1305_blocks_neon: ldp d14,d15,[sp,#64] addp $ACC2,$ACC2,$ACC2 ldr x30,[sp,#8] - .inst 0xd50323bf // autiasp //////////////////////////////////////////////////////////////// // lazy reduction, but without narrowing @@ -882,6 +881,7 @@ poly1305_blocks_neon: str x4,[$ctx,#8] // set is_base2_26 ldr x29,[sp],#80 + .inst 0xd50323bf // autiasp ret .size poly1305_blocks_neon,.-poly1305_blocks_neon diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped index 8d1c4e420ccd..fb2822abf63a 100644 --- a/arch/arm64/crypto/poly1305-core.S_shipped +++ b/arch/arm64/crypto/poly1305-core.S_shipped @@ -779,7 +779,6 @@ poly1305_blocks_neon: ldp d14,d15,[sp,#64] addp v21.2d,v21.2d,v21.2d ldr x30,[sp,#8] - .inst 0xd50323bf // autiasp //////////////////////////////////////////////////////////////// // lazy reduction, but without narrowing @@ -821,6 +820,7 @@ poly1305_blocks_neon: str x4,[x0,#8] // set is_base2_26 ldr x29,[sp],#80 + .inst 0xd50323bf // autiasp ret .size poly1305_blocks_neon,.-poly1305_blocks_neon diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c index f33ada70c4ed..683de671741a 100644 --- a/arch/arm64/crypto/poly1305-glue.c +++ b/arch/arm64/crypto/poly1305-glue.c @@ -177,7 +177,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) } poly1305_emit(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; + memzero_explicit(dctx, sizeof(*dctx)); } EXPORT_SYMBOL(poly1305_final_arch); diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index c63b99211db3..c93121bcfdeb 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 5e956d7582a5..31ba3da5e61b 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index 77bc6e72abae..9462f6088b3f 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index 9a4bbfc45f40..e5a2936f0886 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -94,7 +94,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out) if (digest_size & 4) put_unaligned_le32(sctx->st[i], (__le32 *)digest); - *sctx = (struct sha3_state){}; + memzero_explicit(sctx, sizeof(*sctx)); return 0; } diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index dc890a719f54..faa83f6cf376 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c index 370ccb29602f..2acff1c7df5d 100644 --- a/arch/arm64/crypto/sha512-glue.c +++ b/arch/arm64/crypto/sha512-glue.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h index 7315cc307397..cb68f9e284bb 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h @@ -41,7 +41,7 @@ do { \ */ #define read_octeon_64bit_hash_dword(index) \ ({ \ - u64 __value; \ + __be64 __value; \ \ __asm__ __volatile__ ( \ "dmfc2 %[rt],0x0048+" STR(index) \ diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c index 8c8ea139653e..5ee4ade99b99 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-md5.c +++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c @@ -68,10 +68,11 @@ static int octeon_md5_init(struct shash_desc *desc) { struct md5_state *mctx = shash_desc_ctx(desc); - mctx->hash[0] = cpu_to_le32(MD5_H0); - mctx->hash[1] = cpu_to_le32(MD5_H1); - mctx->hash[2] = cpu_to_le32(MD5_H2); - mctx->hash[3] = cpu_to_le32(MD5_H3); + mctx->hash[0] = MD5_H0; + mctx->hash[1] = MD5_H1; + mctx->hash[2] = MD5_H2; + mctx->hash[3] = MD5_H3; + cpu_to_le32_array(mctx->hash, 4); mctx->byte_count = 0; return 0; @@ -139,8 +140,9 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out) } memset(p, 0, padding); - mctx->block[14] = cpu_to_le32(mctx->byte_count << 3); - mctx->block[15] = cpu_to_le32(mctx->byte_count >> 29); + mctx->block[14] = mctx->byte_count << 3; + mctx->block[15] = mctx->byte_count >> 29; + cpu_to_le32_array(mctx->block + 14, 2); octeon_md5_transform(mctx->block); octeon_md5_read_hash(mctx); diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c index 75e79b47abfe..30f1d75208a5 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-sha1.c +++ b/arch/mips/cavium-octeon/crypto/octeon-sha1.c @@ -14,7 +14,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c index a682ce76716a..36cb92895d72 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-sha256.c +++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c index 50722a0cfb53..359f039820d8 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-sha512.c +++ b/arch/mips/cavium-octeon/crypto/octeon-sha512.c @@ -14,7 +14,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c index cb57be4ada61..b1e577cbf00c 100644 --- a/arch/powerpc/crypto/sha1-spe-glue.c +++ b/arch/powerpc/crypto/sha1-spe-glue.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c index b40dc50a6908..7a55d790cdb1 100644 --- a/arch/powerpc/crypto/sha1.c +++ b/arch/powerpc/crypto/sha1.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include void powerpc_sha_transform(u32 *state, const u8 *src); diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c index ceb0b6c980b3..a6e650a97d8f 100644 --- a/arch/powerpc/crypto/sha256-spe-glue.c +++ b/arch/powerpc/crypto/sha256-spe-glue.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -177,7 +177,7 @@ static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out) static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out) { - u32 D[SHA256_DIGEST_SIZE >> 2]; + __be32 D[SHA256_DIGEST_SIZE >> 2]; __be32 *dst = (__be32 *)out; ppc_spe_sha256_final(desc, (u8 *)D); diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index ada2f98c27b7..65ea12fc87a1 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -11,7 +11,8 @@ #define _CRYPTO_ARCH_S390_SHA_H #include -#include +#include +#include #include /* must be big enough for the largest SHA variant */ diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 698b1e6d3c14..a3fabf310a38 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include "sha.h" diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index b52c87e44939..24983f175676 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include "sha.h" diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c index 460cbbbaa44a..30ac49b635bf 100644 --- a/arch/s390/crypto/sha3_256_s390.c +++ b/arch/s390/crypto/sha3_256_s390.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c index 72cf460a53e5..e70d50f7620f 100644 --- a/arch/s390/crypto/sha3_512_s390.c +++ b/arch/s390/crypto/sha3_512_s390.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index ad29db085a18..29a6bd404c59 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -8,7 +8,7 @@ * Author(s): Jan Glauber (jang@de.ibm.com) */ #include -#include +#include #include #include #include diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index 0a423bcf6746..030efda05dbe 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include int verify_sha256_digest(void) diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index 4e9323229e71..82efb7f81c28 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -35,7 +35,7 @@ static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, if (keylen != sizeof(u32)) return -EINVAL; - *(__le32 *)mctx = le32_to_cpup((__le32 *)key); + *mctx = le32_to_cpup((__le32 *)key); return 0; } diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c index 111283fe837e..511db98d590a 100644 --- a/arch/sparc/crypto/md5_glue.c +++ b/arch/sparc/crypto/md5_glue.c @@ -33,10 +33,11 @@ static int md5_sparc64_init(struct shash_desc *desc) { struct md5_state *mctx = shash_desc_ctx(desc); - mctx->hash[0] = cpu_to_le32(MD5_H0); - mctx->hash[1] = cpu_to_le32(MD5_H1); - mctx->hash[2] = cpu_to_le32(MD5_H2); - mctx->hash[3] = cpu_to_le32(MD5_H3); + mctx->hash[0] = MD5_H0; + mctx->hash[1] = MD5_H1; + mctx->hash[2] = MD5_H2; + mctx->hash[3] = MD5_H3; + le32_to_cpu_array(mctx->hash, 4); mctx->byte_count = 0; return 0; diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c index dc017782be52..86a654cce5ab 100644 --- a/arch/sparc/crypto/sha1_glue.c +++ b/arch/sparc/crypto/sha1_glue.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c index ca2547df9652..60ec524cf9ca 100644 --- a/arch/sparc/crypto/sha256_glue.c +++ b/arch/sparc/crypto/sha256_glue.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c index 3b2ca732ff7a..273ce21918c1 100644 --- a/arch/sparc/crypto/sha512_glue.c +++ b/arch/sparc/crypto/sha512_glue.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c deleted file mode 100644 index 7b7dc05fa1a4..000000000000 --- a/arch/x86/crypto/aes_glue.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 1852b19a73a0..d1436c37008b 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -318,7 +318,7 @@ _initial_blocks_\@: # Main loop - Encrypt/Decrypt remaining blocks - cmp $0, %r13 + test %r13, %r13 je _zero_cipher_left_\@ sub $64, %r13 je _four_cipher_left_\@ @@ -437,7 +437,7 @@ _multiple_of_16_bytes_\@: mov PBlockLen(%arg2), %r12 - cmp $0, %r12 + test %r12, %r12 je _partial_done\@ GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 @@ -474,7 +474,7 @@ _T_8_\@: add $8, %r10 sub $8, %r11 psrldq $8, %xmm0 - cmp $0, %r11 + test %r11, %r11 je _return_T_done_\@ _T_4_\@: movd %xmm0, %eax @@ -482,7 +482,7 @@ _T_4_\@: add $4, %r10 sub $4, %r11 psrldq $4, %xmm0 - cmp $0, %r11 + test %r11, %r11 je _return_T_done_\@ _T_123_\@: movd %xmm0, %eax @@ -619,7 +619,7 @@ _get_AAD_blocks\@: /* read the last <16B of AAD */ _get_AAD_rest\@: - cmp $0, %r11 + test %r11, %r11 je _get_AAD_done\@ READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 @@ -640,7 +640,7 @@ _get_AAD_done\@: .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ AAD_HASH operation mov PBlockLen(%arg2), %r13 - cmp $0, %r13 + test %r13, %r13 je _partial_block_done_\@ # Leave Macro if no partial blocks # Read in input data without over reading cmp $16, \PLAIN_CYPH_LEN @@ -692,7 +692,7 @@ _no_extra_mask_1_\@: pshufb %xmm2, %xmm3 pxor %xmm3, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_1_\@ # GHASH computation for the last <16 Byte block @@ -727,7 +727,7 @@ _no_extra_mask_2_\@: pshufb %xmm2, %xmm9 pxor %xmm9, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_2_\@ # GHASH computation for the last <16 Byte block @@ -747,7 +747,7 @@ _encode_done_\@: pshufb %xmm2, %xmm9 .endif # output encrypted Bytes - cmp $0, %r10 + test %r10, %r10 jl _partial_fill_\@ mov %r13, %r12 mov $16, %r13 @@ -2720,7 +2720,7 @@ SYM_FUNC_END(aesni_ctr_enc) */ SYM_FUNC_START(aesni_xts_crypt8) FRAME_BEGIN - cmpb $0, %cl + testb %cl, %cl movl $0, %ecx movl $240, %r10d leaq _aesni_enc4, %r11 diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 5fee47956f3b..2cf8e94d986a 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -369,7 +369,7 @@ _initial_num_blocks_is_0\@: _initial_blocks_encrypted\@: - cmp $0, %r13 + test %r13, %r13 je _zero_cipher_left\@ sub $128, %r13 @@ -528,7 +528,7 @@ _multiple_of_16_bytes\@: vmovdqu HashKey(arg2), %xmm13 mov PBlockLen(arg2), %r12 - cmp $0, %r12 + test %r12, %r12 je _partial_done\@ #GHASH computation for the last <16 Byte block @@ -573,7 +573,7 @@ _T_8\@: add $8, %r10 sub $8, %r11 vpsrldq $8, %xmm9, %xmm9 - cmp $0, %r11 + test %r11, %r11 je _return_T_done\@ _T_4\@: vmovd %xmm9, %eax @@ -581,7 +581,7 @@ _T_4\@: add $4, %r10 sub $4, %r11 vpsrldq $4, %xmm9, %xmm9 - cmp $0, %r11 + test %r11, %r11 je _return_T_done\@ _T_123\@: vmovd %xmm9, %eax @@ -625,7 +625,7 @@ _get_AAD_blocks\@: cmp $16, %r11 jge _get_AAD_blocks\@ vmovdqu \T8, \T7 - cmp $0, %r11 + test %r11, %r11 je _get_AAD_done\@ vpxor \T7, \T7, \T7 @@ -644,7 +644,7 @@ _get_AAD_rest8\@: vpxor \T1, \T7, \T7 jmp _get_AAD_rest8\@ _get_AAD_rest4\@: - cmp $0, %r11 + test %r11, %r11 jle _get_AAD_rest0\@ mov (%r10), %eax movq %rax, \T1 @@ -749,7 +749,7 @@ _done_read_partial_block_\@: .macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ AAD_HASH ENC_DEC mov PBlockLen(arg2), %r13 - cmp $0, %r13 + test %r13, %r13 je _partial_block_done_\@ # Leave Macro if no partial blocks # Read in input data without over reading cmp $16, \PLAIN_CYPH_LEN @@ -801,7 +801,7 @@ _no_extra_mask_1_\@: vpshufb %xmm2, %xmm3, %xmm3 vpxor %xmm3, \AAD_HASH, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_1_\@ # GHASH computation for the last <16 Byte block @@ -836,7 +836,7 @@ _no_extra_mask_2_\@: vpshufb %xmm2, %xmm9, %xmm9 vpxor %xmm9, \AAD_HASH, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_2_\@ # GHASH computation for the last <16 Byte block @@ -856,7 +856,7 @@ _encode_done_\@: vpshufb %xmm2, %xmm9, %xmm9 .endif # output encrypted Bytes - cmp $0, %r10 + test %r10, %r10 jl _partial_fill_\@ mov %r13, %r12 mov $16, %r13 diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 7d568012cc15..71fae5a09e56 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -251,7 +251,7 @@ $code.=<<___; mov %rax,8($ctx) mov %rax,16($ctx) - cmp \$0,$inp + test $inp,$inp je .Lno_key ___ $code.=<<___ if (!$kernel); diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index c44aba290fbb..646da46e8d10 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -210,7 +210,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) } poly1305_simd_emit(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; + memzero_explicit(dctx, sizeof(*dctx)); } EXPORT_SYMBOL(poly1305_final_arch); diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 18200135603f..44340a1139e0 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index dd06249229e1..3a5f6be7dbba 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 63470fd6ae32..684d58c8bc4f 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -278,7 +278,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE # "blocks" is the message length in SHA512 blocks ######################################################################## SYM_FUNC_START(sha512_transform_avx) - cmp $0, msglen + test msglen, msglen je nowork # Allocate Stack Space diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index 7946a1bee85b..50812af0b083 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -280,7 +280,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE ######################################################################## SYM_FUNC_START(sha512_transform_ssse3) - cmp $0, msglen + test msglen, msglen je nowork # Allocate Stack Space diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index b0b05c93409e..30e70f4fe2f7 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 7b37a412f829..f03b64d9cb51 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include "../boot/string.h" diff --git a/crypto/Kconfig b/crypto/Kconfig index 094ef56ab7b4..a367fcfeb5d4 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -145,7 +145,7 @@ config CRYPTO_MANAGER_DISABLE_TESTS config CRYPTO_MANAGER_EXTRA_TESTS bool "Enable extra run-time crypto self tests" - depends on DEBUG_KERNEL && !CRYPTO_MANAGER_DISABLE_TESTS + depends on DEBUG_KERNEL && !CRYPTO_MANAGER_DISABLE_TESTS && CRYPTO_MANAGER help Enable extra run-time self tests of registered crypto algorithms, including randomized fuzz tests. @@ -201,7 +201,7 @@ config CRYPTO_AUTHENC config CRYPTO_TEST tristate "Testing module" - depends on m + depends on m || EXPERT select CRYPTO_MANAGER help Quick & dirty crypto test module. diff --git a/crypto/aegis128-core.c b/crypto/aegis128-core.c index 44fb4956f0dd..89dc1c559689 100644 --- a/crypto/aegis128-core.c +++ b/crypto/aegis128-core.c @@ -67,9 +67,11 @@ void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst, const u8 *src, unsigned int size); void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst, const u8 *src, unsigned int size); -void crypto_aegis128_final_simd(struct aegis_state *state, - union aegis_block *tag_xor, - u64 assoclen, u64 cryptlen); +int crypto_aegis128_final_simd(struct aegis_state *state, + union aegis_block *tag_xor, + unsigned int assoclen, + unsigned int cryptlen, + unsigned int authsize); static void crypto_aegis128_update(struct aegis_state *state) { @@ -84,9 +86,10 @@ static void crypto_aegis128_update(struct aegis_state *state) } static void crypto_aegis128_update_a(struct aegis_state *state, - const union aegis_block *msg) + const union aegis_block *msg, + bool do_simd) { - if (aegis128_do_simd()) { + if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD) && do_simd) { crypto_aegis128_update_simd(state, msg); return; } @@ -95,9 +98,10 @@ static void crypto_aegis128_update_a(struct aegis_state *state, crypto_aegis_block_xor(&state->blocks[0], msg); } -static void crypto_aegis128_update_u(struct aegis_state *state, const void *msg) +static void crypto_aegis128_update_u(struct aegis_state *state, const void *msg, + bool do_simd) { - if (aegis128_do_simd()) { + if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD) && do_simd) { crypto_aegis128_update_simd(state, msg); return; } @@ -126,27 +130,28 @@ static void crypto_aegis128_init(struct aegis_state *state, crypto_aegis_block_xor(&state->blocks[4], &crypto_aegis_const[1]); for (i = 0; i < 5; i++) { - crypto_aegis128_update_a(state, key); - crypto_aegis128_update_a(state, &key_iv); + crypto_aegis128_update_a(state, key, false); + crypto_aegis128_update_a(state, &key_iv, false); } } static void crypto_aegis128_ad(struct aegis_state *state, - const u8 *src, unsigned int size) + const u8 *src, unsigned int size, + bool do_simd) { if (AEGIS_ALIGNED(src)) { const union aegis_block *src_blk = (const union aegis_block *)src; while (size >= AEGIS_BLOCK_SIZE) { - crypto_aegis128_update_a(state, src_blk); + crypto_aegis128_update_a(state, src_blk, do_simd); size -= AEGIS_BLOCK_SIZE; src_blk++; } } else { while (size >= AEGIS_BLOCK_SIZE) { - crypto_aegis128_update_u(state, src); + crypto_aegis128_update_u(state, src, do_simd); size -= AEGIS_BLOCK_SIZE; src += AEGIS_BLOCK_SIZE; @@ -154,6 +159,12 @@ static void crypto_aegis128_ad(struct aegis_state *state, } } +static void crypto_aegis128_wipe_chunk(struct aegis_state *state, u8 *dst, + const u8 *src, unsigned int size) +{ + memzero_explicit(dst, size); +} + static void crypto_aegis128_encrypt_chunk(struct aegis_state *state, u8 *dst, const u8 *src, unsigned int size) { @@ -172,7 +183,7 @@ static void crypto_aegis128_encrypt_chunk(struct aegis_state *state, u8 *dst, crypto_aegis_block_xor(&tmp, &state->blocks[1]); crypto_aegis_block_xor(&tmp, src_blk); - crypto_aegis128_update_a(state, src_blk); + crypto_aegis128_update_a(state, src_blk, false); *dst_blk = tmp; @@ -188,7 +199,7 @@ static void crypto_aegis128_encrypt_chunk(struct aegis_state *state, u8 *dst, crypto_aegis_block_xor(&tmp, &state->blocks[1]); crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE); - crypto_aegis128_update_u(state, src); + crypto_aegis128_update_u(state, src, false); memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE); @@ -207,7 +218,7 @@ static void crypto_aegis128_encrypt_chunk(struct aegis_state *state, u8 *dst, crypto_aegis_block_xor(&tmp, &state->blocks[4]); crypto_aegis_block_xor(&tmp, &state->blocks[1]); - crypto_aegis128_update_a(state, &msg); + crypto_aegis128_update_a(state, &msg, false); crypto_aegis_block_xor(&msg, &tmp); @@ -233,7 +244,7 @@ static void crypto_aegis128_decrypt_chunk(struct aegis_state *state, u8 *dst, crypto_aegis_block_xor(&tmp, &state->blocks[1]); crypto_aegis_block_xor(&tmp, src_blk); - crypto_aegis128_update_a(state, &tmp); + crypto_aegis128_update_a(state, &tmp, false); *dst_blk = tmp; @@ -249,7 +260,7 @@ static void crypto_aegis128_decrypt_chunk(struct aegis_state *state, u8 *dst, crypto_aegis_block_xor(&tmp, &state->blocks[1]); crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE); - crypto_aegis128_update_a(state, &tmp); + crypto_aegis128_update_a(state, &tmp, false); memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE); @@ -271,7 +282,7 @@ static void crypto_aegis128_decrypt_chunk(struct aegis_state *state, u8 *dst, memset(msg.bytes + size, 0, AEGIS_BLOCK_SIZE - size); - crypto_aegis128_update_a(state, &msg); + crypto_aegis128_update_a(state, &msg, false); memcpy(dst, msg.bytes, size); } @@ -279,7 +290,8 @@ static void crypto_aegis128_decrypt_chunk(struct aegis_state *state, u8 *dst, static void crypto_aegis128_process_ad(struct aegis_state *state, struct scatterlist *sg_src, - unsigned int assoclen) + unsigned int assoclen, + bool do_simd) { struct scatter_walk walk; union aegis_block buf; @@ -296,13 +308,13 @@ static void crypto_aegis128_process_ad(struct aegis_state *state, if (pos > 0) { unsigned int fill = AEGIS_BLOCK_SIZE - pos; memcpy(buf.bytes + pos, src, fill); - crypto_aegis128_update_a(state, &buf); + crypto_aegis128_update_a(state, &buf, do_simd); pos = 0; left -= fill; src += fill; } - crypto_aegis128_ad(state, src, left); + crypto_aegis128_ad(state, src, left, do_simd); src += left & ~(AEGIS_BLOCK_SIZE - 1); left &= AEGIS_BLOCK_SIZE - 1; } @@ -318,13 +330,12 @@ static void crypto_aegis128_process_ad(struct aegis_state *state, if (pos > 0) { memset(buf.bytes + pos, 0, AEGIS_BLOCK_SIZE - pos); - crypto_aegis128_update_a(state, &buf); + crypto_aegis128_update_a(state, &buf, do_simd); } } static __always_inline int crypto_aegis128_process_crypt(struct aegis_state *state, - struct aead_request *req, struct skcipher_walk *walk, void (*crypt)(struct aegis_state *state, u8 *dst, const u8 *src, @@ -361,7 +372,7 @@ static void crypto_aegis128_final(struct aegis_state *state, crypto_aegis_block_xor(&tmp, &state->blocks[3]); for (i = 0; i < 7; i++) - crypto_aegis128_update_a(state, &tmp); + crypto_aegis128_update_a(state, &tmp, false); for (i = 0; i < AEGIS128_STATE_BLOCKS; i++) crypto_aegis_block_xor(tag_xor, &state->blocks[i]); @@ -389,7 +400,7 @@ static int crypto_aegis128_setauthsize(struct crypto_aead *tfm, return 0; } -static int crypto_aegis128_encrypt(struct aead_request *req) +static int crypto_aegis128_encrypt_generic(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); union aegis_block tag = {}; @@ -400,27 +411,18 @@ static int crypto_aegis128_encrypt(struct aead_request *req) struct aegis_state state; skcipher_walk_aead_encrypt(&walk, req, false); - if (aegis128_do_simd()) { - crypto_aegis128_init_simd(&state, &ctx->key, req->iv); - crypto_aegis128_process_ad(&state, req->src, req->assoclen); - crypto_aegis128_process_crypt(&state, req, &walk, - crypto_aegis128_encrypt_chunk_simd); - crypto_aegis128_final_simd(&state, &tag, req->assoclen, - cryptlen); - } else { - crypto_aegis128_init(&state, &ctx->key, req->iv); - crypto_aegis128_process_ad(&state, req->src, req->assoclen); - crypto_aegis128_process_crypt(&state, req, &walk, - crypto_aegis128_encrypt_chunk); - crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen); - } + crypto_aegis128_init(&state, &ctx->key, req->iv); + crypto_aegis128_process_ad(&state, req->src, req->assoclen, false); + crypto_aegis128_process_crypt(&state, &walk, + crypto_aegis128_encrypt_chunk); + crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen); scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen, authsize, 1); return 0; } -static int crypto_aegis128_decrypt(struct aead_request *req) +static int crypto_aegis128_decrypt_generic(struct aead_request *req) { static const u8 zeros[AEGIS128_MAX_AUTH_SIZE] = {}; struct crypto_aead *tfm = crypto_aead_reqtfm(req); @@ -435,60 +437,152 @@ static int crypto_aegis128_decrypt(struct aead_request *req) authsize, 0); skcipher_walk_aead_decrypt(&walk, req, false); - if (aegis128_do_simd()) { - crypto_aegis128_init_simd(&state, &ctx->key, req->iv); - crypto_aegis128_process_ad(&state, req->src, req->assoclen); - crypto_aegis128_process_crypt(&state, req, &walk, - crypto_aegis128_decrypt_chunk_simd); - crypto_aegis128_final_simd(&state, &tag, req->assoclen, - cryptlen); - } else { - crypto_aegis128_init(&state, &ctx->key, req->iv); - crypto_aegis128_process_ad(&state, req->src, req->assoclen); - crypto_aegis128_process_crypt(&state, req, &walk, - crypto_aegis128_decrypt_chunk); - crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen); - } + crypto_aegis128_init(&state, &ctx->key, req->iv); + crypto_aegis128_process_ad(&state, req->src, req->assoclen, false); + crypto_aegis128_process_crypt(&state, &walk, + crypto_aegis128_decrypt_chunk); + crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen); - return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0; + if (unlikely(crypto_memneq(tag.bytes, zeros, authsize))) { + /* + * From Chapter 4. 'Security Analysis' of the AEGIS spec [0] + * + * "3. If verification fails, the decrypted plaintext and the + * wrong authentication tag should not be given as output." + * + * [0] https://competitions.cr.yp.to/round3/aegisv11.pdf + */ + skcipher_walk_aead_decrypt(&walk, req, false); + crypto_aegis128_process_crypt(NULL, &walk, + crypto_aegis128_wipe_chunk); + memzero_explicit(&tag, sizeof(tag)); + return -EBADMSG; + } + return 0; } -static struct aead_alg crypto_aegis128_alg = { - .setkey = crypto_aegis128_setkey, - .setauthsize = crypto_aegis128_setauthsize, - .encrypt = crypto_aegis128_encrypt, - .decrypt = crypto_aegis128_decrypt, +static int crypto_aegis128_encrypt_simd(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + union aegis_block tag = {}; + unsigned int authsize = crypto_aead_authsize(tfm); + struct aegis_ctx *ctx = crypto_aead_ctx(tfm); + unsigned int cryptlen = req->cryptlen; + struct skcipher_walk walk; + struct aegis_state state; - .ivsize = AEGIS128_NONCE_SIZE, - .maxauthsize = AEGIS128_MAX_AUTH_SIZE, - .chunksize = AEGIS_BLOCK_SIZE, + if (!aegis128_do_simd()) + return crypto_aegis128_encrypt_generic(req); - .base = { - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx), - .cra_alignmask = 0, + skcipher_walk_aead_encrypt(&walk, req, false); + crypto_aegis128_init_simd(&state, &ctx->key, req->iv); + crypto_aegis128_process_ad(&state, req->src, req->assoclen, true); + crypto_aegis128_process_crypt(&state, &walk, + crypto_aegis128_encrypt_chunk_simd); + crypto_aegis128_final_simd(&state, &tag, req->assoclen, cryptlen, 0); - .cra_priority = 100, + scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen, + authsize, 1); + return 0; +} - .cra_name = "aegis128", - .cra_driver_name = "aegis128-generic", +static int crypto_aegis128_decrypt_simd(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + union aegis_block tag; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen - authsize; + struct aegis_ctx *ctx = crypto_aead_ctx(tfm); + struct skcipher_walk walk; + struct aegis_state state; - .cra_module = THIS_MODULE, + if (!aegis128_do_simd()) + return crypto_aegis128_decrypt_generic(req); + + scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen, + authsize, 0); + + skcipher_walk_aead_decrypt(&walk, req, false); + crypto_aegis128_init_simd(&state, &ctx->key, req->iv); + crypto_aegis128_process_ad(&state, req->src, req->assoclen, true); + crypto_aegis128_process_crypt(&state, &walk, + crypto_aegis128_decrypt_chunk_simd); + + if (unlikely(crypto_aegis128_final_simd(&state, &tag, req->assoclen, + cryptlen, authsize))) { + skcipher_walk_aead_decrypt(&walk, req, false); + crypto_aegis128_process_crypt(NULL, &walk, + crypto_aegis128_wipe_chunk); + return -EBADMSG; } + return 0; +} + +static struct aead_alg crypto_aegis128_alg_generic = { + .setkey = crypto_aegis128_setkey, + .setauthsize = crypto_aegis128_setauthsize, + .encrypt = crypto_aegis128_encrypt_generic, + .decrypt = crypto_aegis128_decrypt_generic, + + .ivsize = AEGIS128_NONCE_SIZE, + .maxauthsize = AEGIS128_MAX_AUTH_SIZE, + .chunksize = AEGIS_BLOCK_SIZE, + + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aegis_ctx), + .base.cra_alignmask = 0, + .base.cra_priority = 100, + .base.cra_name = "aegis128", + .base.cra_driver_name = "aegis128-generic", + .base.cra_module = THIS_MODULE, +}; + +static struct aead_alg crypto_aegis128_alg_simd = { + .setkey = crypto_aegis128_setkey, + .setauthsize = crypto_aegis128_setauthsize, + .encrypt = crypto_aegis128_encrypt_simd, + .decrypt = crypto_aegis128_decrypt_simd, + + .ivsize = AEGIS128_NONCE_SIZE, + .maxauthsize = AEGIS128_MAX_AUTH_SIZE, + .chunksize = AEGIS_BLOCK_SIZE, + + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aegis_ctx), + .base.cra_alignmask = 0, + .base.cra_priority = 200, + .base.cra_name = "aegis128", + .base.cra_driver_name = "aegis128-simd", + .base.cra_module = THIS_MODULE, }; static int __init crypto_aegis128_module_init(void) { - if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD) && - crypto_aegis128_have_simd()) - static_branch_enable(&have_simd); + int ret; - return crypto_register_aead(&crypto_aegis128_alg); + ret = crypto_register_aead(&crypto_aegis128_alg_generic); + if (ret) + return ret; + + if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD) && + crypto_aegis128_have_simd()) { + ret = crypto_register_aead(&crypto_aegis128_alg_simd); + if (ret) { + crypto_unregister_aead(&crypto_aegis128_alg_generic); + return ret; + } + static_branch_enable(&have_simd); + } + return 0; } static void __exit crypto_aegis128_module_exit(void) { - crypto_unregister_aead(&crypto_aegis128_alg); + if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD) && + crypto_aegis128_have_simd()) + crypto_unregister_aead(&crypto_aegis128_alg_simd); + + crypto_unregister_aead(&crypto_aegis128_alg_generic); } subsys_initcall(crypto_aegis128_module_init); @@ -499,3 +593,4 @@ MODULE_AUTHOR("Ondrej Mosnacek "); MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm"); MODULE_ALIAS_CRYPTO("aegis128"); MODULE_ALIAS_CRYPTO("aegis128-generic"); +MODULE_ALIAS_CRYPTO("aegis128-simd"); diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c index 2a660ac1bc3a..7de485907d81 100644 --- a/crypto/aegis128-neon-inner.c +++ b/crypto/aegis128-neon-inner.c @@ -20,7 +20,6 @@ extern int aegis128_have_aes_insn; void *memcpy(void *dest, const void *src, size_t n); -void *memset(void *s, int c, size_t n); struct aegis128_state { uint8x16_t v[5]; @@ -173,10 +172,57 @@ void crypto_aegis128_update_neon(void *state, const void *msg) aegis128_save_state_neon(st, state); } +#ifdef CONFIG_ARM +/* + * AArch32 does not provide these intrinsics natively because it does not + * implement the underlying instructions. AArch32 only provides 64-bit + * wide vtbl.8/vtbx.8 instruction, so use those instead. + */ +static uint8x16_t vqtbl1q_u8(uint8x16_t a, uint8x16_t b) +{ + union { + uint8x16_t val; + uint8x8x2_t pair; + } __a = { a }; + + return vcombine_u8(vtbl2_u8(__a.pair, vget_low_u8(b)), + vtbl2_u8(__a.pair, vget_high_u8(b))); +} + +static uint8x16_t vqtbx1q_u8(uint8x16_t v, uint8x16_t a, uint8x16_t b) +{ + union { + uint8x16_t val; + uint8x8x2_t pair; + } __a = { a }; + + return vcombine_u8(vtbx2_u8(vget_low_u8(v), __a.pair, vget_low_u8(b)), + vtbx2_u8(vget_high_u8(v), __a.pair, vget_high_u8(b))); +} + +static int8_t vminvq_s8(int8x16_t v) +{ + int8x8_t s = vpmin_s8(vget_low_s8(v), vget_high_s8(v)); + + s = vpmin_s8(s, s); + s = vpmin_s8(s, s); + s = vpmin_s8(s, s); + + return vget_lane_s8(s, 0); +} +#endif + +static const uint8_t permute[] __aligned(64) = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src, unsigned int size) { struct aegis128_state st = aegis128_load_state_neon(state); + const int short_input = size < AEGIS_BLOCK_SIZE; uint8x16_t msg; preload_sbox(); @@ -186,7 +232,8 @@ void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src, msg = vld1q_u8(src); st = aegis128_update_neon(st, msg); - vst1q_u8(dst, msg ^ s); + msg ^= s; + vst1q_u8(dst, msg); size -= AEGIS_BLOCK_SIZE; src += AEGIS_BLOCK_SIZE; @@ -195,13 +242,26 @@ void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src, if (size > 0) { uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; - uint8_t buf[AEGIS_BLOCK_SIZE] = {}; + uint8_t buf[AEGIS_BLOCK_SIZE]; + const void *in = src; + void *out = dst; + uint8x16_t m; - memcpy(buf, src, size); - msg = vld1q_u8(buf); - st = aegis128_update_neon(st, msg); - vst1q_u8(buf, msg ^ s); - memcpy(dst, buf, size); + if (__builtin_expect(short_input, 0)) + in = out = memcpy(buf + AEGIS_BLOCK_SIZE - size, src, size); + + m = vqtbl1q_u8(vld1q_u8(in + size - AEGIS_BLOCK_SIZE), + vld1q_u8(permute + 32 - size)); + + st = aegis128_update_neon(st, m); + + vst1q_u8(out + size - AEGIS_BLOCK_SIZE, + vqtbl1q_u8(m ^ s, vld1q_u8(permute + size))); + + if (__builtin_expect(short_input, 0)) + memcpy(dst, out, size); + else + vst1q_u8(out - AEGIS_BLOCK_SIZE, msg); } aegis128_save_state_neon(st, state); @@ -211,6 +271,7 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src, unsigned int size) { struct aegis128_state st = aegis128_load_state_neon(state); + const int short_input = size < AEGIS_BLOCK_SIZE; uint8x16_t msg; preload_sbox(); @@ -228,21 +289,34 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src, if (size > 0) { uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; uint8_t buf[AEGIS_BLOCK_SIZE]; + const void *in = src; + void *out = dst; + uint8x16_t m; - vst1q_u8(buf, s); - memcpy(buf, src, size); - msg = vld1q_u8(buf) ^ s; - vst1q_u8(buf, msg); - memcpy(dst, buf, size); + if (__builtin_expect(short_input, 0)) + in = out = memcpy(buf + AEGIS_BLOCK_SIZE - size, src, size); - st = aegis128_update_neon(st, msg); + m = s ^ vqtbx1q_u8(s, vld1q_u8(in + size - AEGIS_BLOCK_SIZE), + vld1q_u8(permute + 32 - size)); + + st = aegis128_update_neon(st, m); + + vst1q_u8(out + size - AEGIS_BLOCK_SIZE, + vqtbl1q_u8(m, vld1q_u8(permute + size))); + + if (__builtin_expect(short_input, 0)) + memcpy(dst, out, size); + else + vst1q_u8(out - AEGIS_BLOCK_SIZE, msg); } aegis128_save_state_neon(st, state); } -void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen, - uint64_t cryptlen) +int crypto_aegis128_final_neon(void *state, void *tag_xor, + unsigned int assoclen, + unsigned int cryptlen, + unsigned int authsize) { struct aegis128_state st = aegis128_load_state_neon(state); uint8x16_t v; @@ -250,13 +324,21 @@ void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen, preload_sbox(); - v = st.v[3] ^ (uint8x16_t)vcombine_u64(vmov_n_u64(8 * assoclen), - vmov_n_u64(8 * cryptlen)); + v = st.v[3] ^ (uint8x16_t)vcombine_u64(vmov_n_u64(8ULL * assoclen), + vmov_n_u64(8ULL * cryptlen)); for (i = 0; i < 7; i++) st = aegis128_update_neon(st, v); - v = vld1q_u8(tag_xor); - v ^= st.v[0] ^ st.v[1] ^ st.v[2] ^ st.v[3] ^ st.v[4]; + v = st.v[0] ^ st.v[1] ^ st.v[2] ^ st.v[3] ^ st.v[4]; + + if (authsize > 0) { + v = vqtbl1q_u8(~vceqq_u8(v, vld1q_u8(tag_xor)), + vld1q_u8(permute + authsize)); + + return vminvq_s8((int8x16_t)v); + } + vst1q_u8(tag_xor, v); + return 0; } diff --git a/crypto/aegis128-neon.c b/crypto/aegis128-neon.c index 8271b1fa0fbc..94d591a002a4 100644 --- a/crypto/aegis128-neon.c +++ b/crypto/aegis128-neon.c @@ -14,8 +14,10 @@ void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src, unsigned int size); void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src, unsigned int size); -void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen, - uint64_t cryptlen); +int crypto_aegis128_final_neon(void *state, void *tag_xor, + unsigned int assoclen, + unsigned int cryptlen, + unsigned int authsize); int aegis128_have_aes_insn __ro_after_init; @@ -60,11 +62,18 @@ void crypto_aegis128_decrypt_chunk_simd(union aegis_block *state, u8 *dst, kernel_neon_end(); } -void crypto_aegis128_final_simd(union aegis_block *state, - union aegis_block *tag_xor, - u64 assoclen, u64 cryptlen) +int crypto_aegis128_final_simd(union aegis_block *state, + union aegis_block *tag_xor, + unsigned int assoclen, + unsigned int cryptlen, + unsigned int authsize) { + int ret; + kernel_neon_begin(); - crypto_aegis128_final_neon(state, tag_xor, assoclen, cryptlen); + ret = crypto_aegis128_final_neon(state, tag_xor, assoclen, cryptlen, + authsize); kernel_neon_end(); + + return ret; } diff --git a/crypto/af_alg.c b/crypto/af_alg.c index d11db80d24cd..9acb9d2c4bcf 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -147,7 +147,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) const u32 allowed = CRYPTO_ALG_KERN_DRIVER_ONLY; struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); - struct sockaddr_alg *sa = (void *)uaddr; + struct sockaddr_alg_new *sa = (void *)uaddr; const struct af_alg_type *type; void *private; int err; @@ -155,7 +155,11 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (sock->state == SS_CONNECTED) return -EINVAL; - if (addr_len < sizeof(*sa)) + BUILD_BUG_ON(offsetof(struct sockaddr_alg_new, salg_name) != + offsetof(struct sockaddr_alg, salg_name)); + BUILD_BUG_ON(offsetof(struct sockaddr_alg, salg_name) != sizeof(*sa)); + + if (addr_len < sizeof(*sa) + 1) return -EINVAL; /* If caller uses non-allowed flag, return error. */ @@ -163,7 +167,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EINVAL; sa->salg_type[sizeof(sa->salg_type) - 1] = 0; - sa->salg_name[sizeof(sa->salg_name) + addr_len - sizeof(*sa) - 1] = 0; + sa->salg_name[addr_len - sizeof(*sa) - 1] = 0; type = alg_get_type(sa->salg_type); if (PTR_ERR(type) == -ENOENT) { diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c index 378b18b9bc34..511932aa94a6 100644 --- a/crypto/asymmetric_keys/asym_tpm.c +++ b/crypto/asymmetric_keys/asym_tpm.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/crypto/ecdh.c b/crypto/ecdh.c index b0232d6ab4ce..d56b8603dec9 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -53,12 +53,13 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, return ecc_gen_privkey(ctx->curve_id, ctx->ndigits, ctx->private_key); - if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits, - (const u64 *)params.key, params.key_size) < 0) - return -EINVAL; - memcpy(ctx->private_key, params.key, params.key_size); + if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits, + ctx->private_key, params.key_size) < 0) { + memzero_explicit(ctx->private_key, params.key_size); + return -EINVAL; + } return 0; } diff --git a/crypto/seed.c b/crypto/seed.c index 5e3bef3a617d..27720140820e 100644 --- a/crypto/seed.c +++ b/crypto/seed.c @@ -322,7 +322,7 @@ static const u32 KC[SEED_NUM_KCONSTANTS] = { SS2[byte(t1, 2)] ^ SS3[byte(t1, 3)]; \ t0 += t1; \ X1 ^= t0; \ - X2 ^= t1; + X2 ^= t1 static int seed_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c index 1d43472fecbd..325b57fe28dc 100644 --- a/crypto/sha1_generic.c +++ b/crypto/sha1_generic.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index 88156e3e2a33..3b377197236e 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index e34d09dd9971..c72d72ad828e 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/crypto/sm2.c b/crypto/sm2.c index 767e160333f6..b21addc3ac06 100644 --- a/crypto/sm2.c +++ b/crypto/sm2.c @@ -119,12 +119,6 @@ static void sm2_ec_ctx_deinit(struct mpi_ec_ctx *ec) memset(ec, 0, sizeof(*ec)); } -static int sm2_ec_ctx_reset(struct mpi_ec_ctx *ec) -{ - sm2_ec_ctx_deinit(ec); - return sm2_ec_ctx_init(ec); -} - /* RESULT must have been initialized and is set on success to the * point given by VALUE. */ @@ -132,55 +126,48 @@ static int sm2_ecc_os2ec(MPI_POINT result, MPI value) { int rc; size_t n; - const unsigned char *buf; - unsigned char *buf_memory; + unsigned char *buf; MPI x, y; - n = (mpi_get_nbits(value)+7)/8; - buf_memory = kmalloc(n, GFP_KERNEL); - rc = mpi_print(GCRYMPI_FMT_USG, buf_memory, n, &n, value); - if (rc) { - kfree(buf_memory); - return rc; - } - buf = buf_memory; + n = MPI_NBYTES(value); + buf = kmalloc(n, GFP_KERNEL); + if (!buf) + return -ENOMEM; - if (n < 1) { - kfree(buf_memory); - return -EINVAL; - } - if (*buf != 4) { - kfree(buf_memory); - return -EINVAL; /* No support for point compression. */ - } - if (((n-1)%2)) { - kfree(buf_memory); - return -EINVAL; - } - n = (n-1)/2; + rc = mpi_print(GCRYMPI_FMT_USG, buf, n, &n, value); + if (rc) + goto err_freebuf; + + rc = -EINVAL; + if (n < 1 || ((n - 1) % 2)) + goto err_freebuf; + /* No support for point compression */ + if (*buf != 0x4) + goto err_freebuf; + + rc = -ENOMEM; + n = (n - 1) / 2; x = mpi_read_raw_data(buf + 1, n); - if (!x) { - kfree(buf_memory); - return -ENOMEM; - } + if (!x) + goto err_freebuf; y = mpi_read_raw_data(buf + 1 + n, n); - kfree(buf_memory); - if (!y) { - mpi_free(x); - return -ENOMEM; - } + if (!y) + goto err_freex; mpi_normalize(x); mpi_normalize(y); - mpi_set(result->x, x); mpi_set(result->y, y); mpi_set_ui(result->z, 1); - mpi_free(x); - mpi_free(y); + rc = 0; - return 0; + mpi_free(y); +err_freex: + mpi_free(x); +err_freebuf: + kfree(buf); + return rc; } struct sm2_signature_ctx { @@ -399,10 +386,6 @@ static int sm2_set_pub_key(struct crypto_akcipher *tfm, MPI a; int rc; - rc = sm2_ec_ctx_reset(ec); - if (rc) - return rc; - ec->Q = mpi_point_new(0); if (!ec->Q) return -ENOMEM; diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index eea0f453cfb6..a647bb298fbc 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -77,8 +77,8 @@ static const char *check[] = { NULL }; -static u32 block_sizes[] = { 16, 64, 256, 1024, 1472, 8192, 0 }; -static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 }; +static const int block_sizes[] = { 16, 64, 256, 1024, 1420, 4096, 0 }; +static const int aead_sizes[] = { 16, 64, 256, 512, 1024, 1420, 4096, 8192, 0 }; #define XBUFSIZE 8 #define MAX_IVLEN 32 @@ -256,10 +256,10 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, struct test_mb_aead_data *data; struct crypto_aead *tfm; unsigned int i, j, iv_len; + const int *b_size; const char *key; const char *e; void *assoc; - u32 *b_size; char *iv; int ret; @@ -337,15 +337,17 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, do { b_size = aead_sizes; do { - if (*b_size + authsize > XBUFSIZE * PAGE_SIZE) { + int bs = round_up(*b_size, crypto_aead_blocksize(tfm)); + + if (bs + authsize > XBUFSIZE * PAGE_SIZE) { pr_err("template (%u) too big for buffer (%lu)\n", - authsize + *b_size, + authsize + bs, XBUFSIZE * PAGE_SIZE); goto out; } pr_info("test %u (%d bit key, %d byte blocks): ", i, - *keysize * 8, *b_size); + *keysize * 8, bs); /* Set up tfm global state, i.e. the key */ @@ -380,11 +382,11 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, memset(assoc, 0xff, aad_size); sg_init_aead(cur->sg, cur->xbuf, - *b_size + (enc ? 0 : authsize), + bs + (enc ? 0 : authsize), assoc, aad_size); sg_init_aead(cur->sgout, cur->xoutbuf, - *b_size + (enc ? authsize : 0), + bs + (enc ? authsize : 0), assoc, aad_size); aead_request_set_ad(cur->req, aad_size); @@ -394,7 +396,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, aead_request_set_crypt(cur->req, cur->sgout, cur->sg, - *b_size, iv); + bs, iv); ret = crypto_aead_encrypt(cur->req); ret = do_one_aead_op(cur->req, ret); @@ -406,18 +408,18 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, } aead_request_set_crypt(cur->req, cur->sg, - cur->sgout, *b_size + + cur->sgout, bs + (enc ? 0 : authsize), iv); } if (secs) { - ret = test_mb_aead_jiffies(data, enc, *b_size, + ret = test_mb_aead_jiffies(data, enc, bs, secs, num_mb); cond_resched(); } else { - ret = test_mb_aead_cycles(data, enc, *b_size, + ret = test_mb_aead_cycles(data, enc, bs, num_mb); } @@ -534,7 +536,7 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, char *xbuf[XBUFSIZE]; char *xoutbuf[XBUFSIZE]; char *axbuf[XBUFSIZE]; - unsigned int *b_size; + const int *b_size; unsigned int iv_len; struct crypto_wait wait; @@ -590,12 +592,14 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, do { b_size = aead_sizes; do { + u32 bs = round_up(*b_size, crypto_aead_blocksize(tfm)); + assoc = axbuf[0]; memset(assoc, 0xff, aad_size); - if ((*keysize + *b_size) > TVMEMSIZE * PAGE_SIZE) { + if ((*keysize + bs) > TVMEMSIZE * PAGE_SIZE) { pr_err("template (%u) too big for tvmem (%lu)\n", - *keysize + *b_size, + *keysize + bs, TVMEMSIZE * PAGE_SIZE); goto out; } @@ -616,7 +620,7 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, crypto_aead_clear_flags(tfm, ~0); printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ", - i, *keysize * 8, *b_size); + i, *keysize * 8, bs); memset(tvmem[0], 0xff, PAGE_SIZE); @@ -627,11 +631,11 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, goto out; } - sg_init_aead(sg, xbuf, *b_size + (enc ? 0 : authsize), + sg_init_aead(sg, xbuf, bs + (enc ? 0 : authsize), assoc, aad_size); sg_init_aead(sgout, xoutbuf, - *b_size + (enc ? authsize : 0), assoc, + bs + (enc ? authsize : 0), assoc, aad_size); aead_request_set_ad(req, aad_size); @@ -644,7 +648,7 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, * reversed (input <-> output) to calculate it */ aead_request_set_crypt(req, sgout, sg, - *b_size, iv); + bs, iv); ret = do_one_aead_op(req, crypto_aead_encrypt(req)); @@ -656,15 +660,15 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, } aead_request_set_crypt(req, sg, sgout, - *b_size + (enc ? 0 : authsize), + bs + (enc ? 0 : authsize), iv); if (secs) { - ret = test_aead_jiffies(req, enc, *b_size, + ret = test_aead_jiffies(req, enc, bs, secs); cond_resched(); } else { - ret = test_aead_cycles(req, enc, *b_size); + ret = test_aead_cycles(req, enc, bs); } if (ret) { @@ -1253,9 +1257,9 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, struct test_mb_skcipher_data *data; struct crypto_skcipher *tfm; unsigned int i, j, iv_len; + const int *b_size; const char *key; const char *e; - u32 *b_size; char iv[128]; int ret; @@ -1316,14 +1320,16 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, do { b_size = block_sizes; do { - if (*b_size > XBUFSIZE * PAGE_SIZE) { + u32 bs = round_up(*b_size, crypto_skcipher_blocksize(tfm)); + + if (bs > XBUFSIZE * PAGE_SIZE) { pr_err("template (%u) too big for buffer (%lu)\n", *b_size, XBUFSIZE * PAGE_SIZE); goto out; } pr_info("test %u (%d bit key, %d byte blocks): ", i, - *keysize * 8, *b_size); + *keysize * 8, bs); /* Set up tfm global state, i.e. the key */ @@ -1353,7 +1359,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, for (j = 0; j < num_mb; ++j) { struct test_mb_skcipher_data *cur = &data[j]; - unsigned int k = *b_size; + unsigned int k = bs; unsigned int pages = DIV_ROUND_UP(k, PAGE_SIZE); unsigned int p = 0; @@ -1377,12 +1383,12 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, if (secs) { ret = test_mb_acipher_jiffies(data, enc, - *b_size, secs, + bs, secs, num_mb); cond_resched(); } else { ret = test_mb_acipher_cycles(data, enc, - *b_size, num_mb); + bs, num_mb); } if (ret) { @@ -1497,8 +1503,8 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs, char iv[128]; struct skcipher_request *req; struct crypto_skcipher *tfm; + const int *b_size; const char *e; - u32 *b_size; if (enc == ENCRYPT) e = "encryption"; @@ -1533,17 +1539,18 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs, b_size = block_sizes; do { + u32 bs = round_up(*b_size, crypto_skcipher_blocksize(tfm)); struct scatterlist sg[TVMEMSIZE]; - if ((*keysize + *b_size) > TVMEMSIZE * PAGE_SIZE) { + if ((*keysize + bs) > TVMEMSIZE * PAGE_SIZE) { pr_err("template (%u) too big for " - "tvmem (%lu)\n", *keysize + *b_size, + "tvmem (%lu)\n", *keysize + bs, TVMEMSIZE * PAGE_SIZE); goto out_free_req; } pr_info("test %u (%d bit key, %d byte blocks): ", i, - *keysize * 8, *b_size); + *keysize * 8, bs); memset(tvmem[0], 0xff, PAGE_SIZE); @@ -1565,7 +1572,7 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs, goto out_free_req; } - k = *keysize + *b_size; + k = *keysize + bs; sg_init_table(sg, DIV_ROUND_UP(k, PAGE_SIZE)); if (k > PAGE_SIZE) { @@ -1582,22 +1589,22 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs, sg_set_buf(sg + j, tvmem[j], k); memset(tvmem[j], 0xff, k); } else { - sg_set_buf(sg, tvmem[0] + *keysize, *b_size); + sg_set_buf(sg, tvmem[0] + *keysize, bs); } iv_len = crypto_skcipher_ivsize(tfm); if (iv_len) memset(&iv, 0xff, iv_len); - skcipher_request_set_crypt(req, sg, sg, *b_size, iv); + skcipher_request_set_crypt(req, sg, sg, bs, iv); if (secs) { ret = test_acipher_jiffies(req, enc, - *b_size, secs); + bs, secs); cond_resched(); } else { ret = test_acipher_cycles(req, enc, - *b_size); + bs); } if (ret) { @@ -3066,7 +3073,7 @@ static int __init tcrypt_mod_init(void) */ static void __exit tcrypt_mod_fini(void) { } -subsys_initcall(tcrypt_mod_init); +late_initcall(tcrypt_mod_init); module_exit(tcrypt_mod_fini); module_param(alg, charp, 0); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index a64a639eddfa..321e38eef51b 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1171,8 +1171,7 @@ static inline const void *sg_data(struct scatterlist *sg) } /* Test one hash test vector in one configuration, using the shash API */ -static int test_shash_vec_cfg(const char *driver, - const struct hash_testvec *vec, +static int test_shash_vec_cfg(const struct hash_testvec *vec, const char *vec_name, const struct testvec_config *cfg, struct shash_desc *desc, @@ -1183,6 +1182,7 @@ static int test_shash_vec_cfg(const char *driver, const unsigned int alignmask = crypto_shash_alignmask(tfm); const unsigned int digestsize = crypto_shash_digestsize(tfm); const unsigned int statesize = crypto_shash_statesize(tfm); + const char *driver = crypto_shash_driver_name(tfm); const struct test_sg_division *divs[XBUFSIZE]; unsigned int i; u8 result[HASH_MAX_DIGESTSIZE + TESTMGR_POISON_LEN]; @@ -1355,8 +1355,7 @@ static int check_nonfinal_ahash_op(const char *op, int err, } /* Test one hash test vector in one configuration, using the ahash API */ -static int test_ahash_vec_cfg(const char *driver, - const struct hash_testvec *vec, +static int test_ahash_vec_cfg(const struct hash_testvec *vec, const char *vec_name, const struct testvec_config *cfg, struct ahash_request *req, @@ -1367,6 +1366,7 @@ static int test_ahash_vec_cfg(const char *driver, const unsigned int alignmask = crypto_ahash_alignmask(tfm); const unsigned int digestsize = crypto_ahash_digestsize(tfm); const unsigned int statesize = crypto_ahash_statesize(tfm); + const char *driver = crypto_ahash_driver_name(tfm); const u32 req_flags = CRYPTO_TFM_REQ_MAY_BACKLOG | cfg->req_flags; const struct test_sg_division *divs[XBUFSIZE]; DECLARE_CRYPTO_WAIT(wait); @@ -1521,8 +1521,7 @@ static int test_ahash_vec_cfg(const char *driver, driver, cfg); } -static int test_hash_vec_cfg(const char *driver, - const struct hash_testvec *vec, +static int test_hash_vec_cfg(const struct hash_testvec *vec, const char *vec_name, const struct testvec_config *cfg, struct ahash_request *req, @@ -1539,20 +1538,18 @@ static int test_hash_vec_cfg(const char *driver, */ if (desc) { - err = test_shash_vec_cfg(driver, vec, vec_name, cfg, desc, tsgl, + err = test_shash_vec_cfg(vec, vec_name, cfg, desc, tsgl, hashstate); if (err) return err; } - return test_ahash_vec_cfg(driver, vec, vec_name, cfg, req, tsgl, - hashstate); + return test_ahash_vec_cfg(vec, vec_name, cfg, req, tsgl, hashstate); } -static int test_hash_vec(const char *driver, const struct hash_testvec *vec, - unsigned int vec_num, struct ahash_request *req, - struct shash_desc *desc, struct test_sglist *tsgl, - u8 *hashstate) +static int test_hash_vec(const struct hash_testvec *vec, unsigned int vec_num, + struct ahash_request *req, struct shash_desc *desc, + struct test_sglist *tsgl, u8 *hashstate) { char vec_name[16]; unsigned int i; @@ -1561,7 +1558,7 @@ static int test_hash_vec(const char *driver, const struct hash_testvec *vec, sprintf(vec_name, "%u", vec_num); for (i = 0; i < ARRAY_SIZE(default_hash_testvec_configs); i++) { - err = test_hash_vec_cfg(driver, vec, vec_name, + err = test_hash_vec_cfg(vec, vec_name, &default_hash_testvec_configs[i], req, desc, tsgl, hashstate); if (err) @@ -1576,7 +1573,7 @@ static int test_hash_vec(const char *driver, const struct hash_testvec *vec, for (i = 0; i < fuzz_iterations; i++) { generate_random_testvec_config(&cfg, cfgname, sizeof(cfgname)); - err = test_hash_vec_cfg(driver, vec, vec_name, &cfg, + err = test_hash_vec_cfg(vec, vec_name, &cfg, req, desc, tsgl, hashstate); if (err) return err; @@ -1633,8 +1630,7 @@ static void generate_random_hash_testvec(struct shash_desc *desc, * Test the hash algorithm represented by @req against the corresponding generic * implementation, if one is available. */ -static int test_hash_vs_generic_impl(const char *driver, - const char *generic_driver, +static int test_hash_vs_generic_impl(const char *generic_driver, unsigned int maxkeysize, struct ahash_request *req, struct shash_desc *desc, @@ -1646,6 +1642,7 @@ static int test_hash_vs_generic_impl(const char *driver, const unsigned int blocksize = crypto_ahash_blocksize(tfm); const unsigned int maxdatasize = (2 * PAGE_SIZE) - TESTMGR_POISON_LEN; const char *algname = crypto_hash_alg_common(tfm)->base.cra_name; + const char *driver = crypto_ahash_driver_name(tfm); char _generic_driver[CRYPTO_MAX_ALG_NAME]; struct crypto_shash *generic_tfm = NULL; struct shash_desc *generic_desc = NULL; @@ -1732,7 +1729,7 @@ static int test_hash_vs_generic_impl(const char *driver, vec_name, sizeof(vec_name)); generate_random_testvec_config(cfg, cfgname, sizeof(cfgname)); - err = test_hash_vec_cfg(driver, &vec, vec_name, cfg, + err = test_hash_vec_cfg(&vec, vec_name, cfg, req, desc, tsgl, hashstate); if (err) goto out; @@ -1749,8 +1746,7 @@ static int test_hash_vs_generic_impl(const char *driver, return err; } #else /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */ -static int test_hash_vs_generic_impl(const char *driver, - const char *generic_driver, +static int test_hash_vs_generic_impl(const char *generic_driver, unsigned int maxkeysize, struct ahash_request *req, struct shash_desc *desc, @@ -1820,6 +1816,7 @@ static int __alg_test_hash(const struct hash_testvec *vecs, driver, PTR_ERR(atfm)); return PTR_ERR(atfm); } + driver = crypto_ahash_driver_name(atfm); req = ahash_request_alloc(atfm, GFP_KERNEL); if (!req) { @@ -1859,13 +1856,12 @@ static int __alg_test_hash(const struct hash_testvec *vecs, } for (i = 0; i < num_vecs; i++) { - err = test_hash_vec(driver, &vecs[i], i, req, desc, tsgl, - hashstate); + err = test_hash_vec(&vecs[i], i, req, desc, tsgl, hashstate); if (err) goto out; cond_resched(); } - err = test_hash_vs_generic_impl(driver, generic_driver, maxkeysize, req, + err = test_hash_vs_generic_impl(generic_driver, maxkeysize, req, desc, tsgl, hashstate); out: kfree(hashstate); @@ -1923,8 +1919,7 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver, return err; } -static int test_aead_vec_cfg(const char *driver, int enc, - const struct aead_testvec *vec, +static int test_aead_vec_cfg(int enc, const struct aead_testvec *vec, const char *vec_name, const struct testvec_config *cfg, struct aead_request *req, @@ -1934,6 +1929,7 @@ static int test_aead_vec_cfg(const char *driver, int enc, const unsigned int alignmask = crypto_aead_alignmask(tfm); const unsigned int ivsize = crypto_aead_ivsize(tfm); const unsigned int authsize = vec->clen - vec->plen; + const char *driver = crypto_aead_driver_name(tfm); const u32 req_flags = CRYPTO_TFM_REQ_MAY_BACKLOG | cfg->req_flags; const char *op = enc ? "encryption" : "decryption"; DECLARE_CRYPTO_WAIT(wait); @@ -2106,9 +2102,8 @@ static int test_aead_vec_cfg(const char *driver, int enc, return 0; } -static int test_aead_vec(const char *driver, int enc, - const struct aead_testvec *vec, unsigned int vec_num, - struct aead_request *req, +static int test_aead_vec(int enc, const struct aead_testvec *vec, + unsigned int vec_num, struct aead_request *req, struct cipher_test_sglists *tsgls) { char vec_name[16]; @@ -2121,7 +2116,7 @@ static int test_aead_vec(const char *driver, int enc, sprintf(vec_name, "%u", vec_num); for (i = 0; i < ARRAY_SIZE(default_cipher_testvec_configs); i++) { - err = test_aead_vec_cfg(driver, enc, vec, vec_name, + err = test_aead_vec_cfg(enc, vec, vec_name, &default_cipher_testvec_configs[i], req, tsgls); if (err) @@ -2136,7 +2131,7 @@ static int test_aead_vec(const char *driver, int enc, for (i = 0; i < fuzz_iterations; i++) { generate_random_testvec_config(&cfg, cfgname, sizeof(cfgname)); - err = test_aead_vec_cfg(driver, enc, vec, vec_name, + err = test_aead_vec_cfg(enc, vec, vec_name, &cfg, req, tsgls); if (err) return err; @@ -2152,7 +2147,6 @@ static int test_aead_vec(const char *driver, int enc, struct aead_extra_tests_ctx { struct aead_request *req; struct crypto_aead *tfm; - const char *driver; const struct alg_test_desc *test_desc; struct cipher_test_sglists *tsgls; unsigned int maxdatasize; @@ -2358,7 +2352,7 @@ static int test_aead_inauthentic_inputs(struct aead_extra_tests_ctx *ctx) if (ctx->vec.novrfy) { generate_random_testvec_config(&ctx->cfg, ctx->cfgname, sizeof(ctx->cfgname)); - err = test_aead_vec_cfg(ctx->driver, DECRYPT, &ctx->vec, + err = test_aead_vec_cfg(DECRYPT, &ctx->vec, ctx->vec_name, &ctx->cfg, ctx->req, ctx->tsgls); if (err) @@ -2377,7 +2371,7 @@ static int test_aead_vs_generic_impl(struct aead_extra_tests_ctx *ctx) { struct crypto_aead *tfm = ctx->tfm; const char *algname = crypto_aead_alg(tfm)->base.cra_name; - const char *driver = ctx->driver; + const char *driver = crypto_aead_driver_name(tfm); const char *generic_driver = ctx->test_desc->generic_driver; char _generic_driver[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *generic_tfm = NULL; @@ -2454,14 +2448,14 @@ static int test_aead_vs_generic_impl(struct aead_extra_tests_ctx *ctx) generate_random_testvec_config(&ctx->cfg, ctx->cfgname, sizeof(ctx->cfgname)); if (!ctx->vec.novrfy) { - err = test_aead_vec_cfg(driver, ENCRYPT, &ctx->vec, + err = test_aead_vec_cfg(ENCRYPT, &ctx->vec, ctx->vec_name, &ctx->cfg, ctx->req, ctx->tsgls); if (err) goto out; } if (ctx->vec.crypt_error == 0 || ctx->vec.novrfy) { - err = test_aead_vec_cfg(driver, DECRYPT, &ctx->vec, + err = test_aead_vec_cfg(DECRYPT, &ctx->vec, ctx->vec_name, &ctx->cfg, ctx->req, ctx->tsgls); if (err) @@ -2476,8 +2470,7 @@ static int test_aead_vs_generic_impl(struct aead_extra_tests_ctx *ctx) return err; } -static int test_aead_extra(const char *driver, - const struct alg_test_desc *test_desc, +static int test_aead_extra(const struct alg_test_desc *test_desc, struct aead_request *req, struct cipher_test_sglists *tsgls) { @@ -2493,7 +2486,6 @@ static int test_aead_extra(const char *driver, return -ENOMEM; ctx->req = req; ctx->tfm = crypto_aead_reqtfm(req); - ctx->driver = driver; ctx->test_desc = test_desc; ctx->tsgls = tsgls; ctx->maxdatasize = (2 * PAGE_SIZE) - TESTMGR_POISON_LEN; @@ -2528,8 +2520,7 @@ static int test_aead_extra(const char *driver, return err; } #else /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */ -static int test_aead_extra(const char *driver, - const struct alg_test_desc *test_desc, +static int test_aead_extra(const struct alg_test_desc *test_desc, struct aead_request *req, struct cipher_test_sglists *tsgls) { @@ -2537,8 +2528,7 @@ static int test_aead_extra(const char *driver, } #endif /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */ -static int test_aead(const char *driver, int enc, - const struct aead_test_suite *suite, +static int test_aead(int enc, const struct aead_test_suite *suite, struct aead_request *req, struct cipher_test_sglists *tsgls) { @@ -2546,8 +2536,7 @@ static int test_aead(const char *driver, int enc, int err; for (i = 0; i < suite->count; i++) { - err = test_aead_vec(driver, enc, &suite->vecs[i], i, req, - tsgls); + err = test_aead_vec(enc, &suite->vecs[i], i, req, tsgls); if (err) return err; cond_resched(); @@ -2575,6 +2564,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver, driver, PTR_ERR(tfm)); return PTR_ERR(tfm); } + driver = crypto_aead_driver_name(tfm); req = aead_request_alloc(tfm, GFP_KERNEL); if (!req) { @@ -2592,15 +2582,15 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver, goto out; } - err = test_aead(driver, ENCRYPT, suite, req, tsgls); + err = test_aead(ENCRYPT, suite, req, tsgls); if (err) goto out; - err = test_aead(driver, DECRYPT, suite, req, tsgls); + err = test_aead(DECRYPT, suite, req, tsgls); if (err) goto out; - err = test_aead_extra(driver, desc, req, tsgls); + err = test_aead_extra(desc, req, tsgls); out: free_cipher_test_sglists(tsgls); aead_request_free(req); @@ -2695,8 +2685,7 @@ static int test_cipher(struct crypto_cipher *tfm, int enc, return ret; } -static int test_skcipher_vec_cfg(const char *driver, int enc, - const struct cipher_testvec *vec, +static int test_skcipher_vec_cfg(int enc, const struct cipher_testvec *vec, const char *vec_name, const struct testvec_config *cfg, struct skcipher_request *req, @@ -2705,6 +2694,7 @@ static int test_skcipher_vec_cfg(const char *driver, int enc, struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const unsigned int alignmask = crypto_skcipher_alignmask(tfm); const unsigned int ivsize = crypto_skcipher_ivsize(tfm); + const char *driver = crypto_skcipher_driver_name(tfm); const u32 req_flags = CRYPTO_TFM_REQ_MAY_BACKLOG | cfg->req_flags; const char *op = enc ? "encryption" : "decryption"; DECLARE_CRYPTO_WAIT(wait); @@ -2859,8 +2849,7 @@ static int test_skcipher_vec_cfg(const char *driver, int enc, return 0; } -static int test_skcipher_vec(const char *driver, int enc, - const struct cipher_testvec *vec, +static int test_skcipher_vec(int enc, const struct cipher_testvec *vec, unsigned int vec_num, struct skcipher_request *req, struct cipher_test_sglists *tsgls) @@ -2875,7 +2864,7 @@ static int test_skcipher_vec(const char *driver, int enc, sprintf(vec_name, "%u", vec_num); for (i = 0; i < ARRAY_SIZE(default_cipher_testvec_configs); i++) { - err = test_skcipher_vec_cfg(driver, enc, vec, vec_name, + err = test_skcipher_vec_cfg(enc, vec, vec_name, &default_cipher_testvec_configs[i], req, tsgls); if (err) @@ -2890,7 +2879,7 @@ static int test_skcipher_vec(const char *driver, int enc, for (i = 0; i < fuzz_iterations; i++) { generate_random_testvec_config(&cfg, cfgname, sizeof(cfgname)); - err = test_skcipher_vec_cfg(driver, enc, vec, vec_name, + err = test_skcipher_vec_cfg(enc, vec, vec_name, &cfg, req, tsgls); if (err) return err; @@ -2961,8 +2950,7 @@ static void generate_random_cipher_testvec(struct skcipher_request *req, * Test the skcipher algorithm represented by @req against the corresponding * generic implementation, if one is available. */ -static int test_skcipher_vs_generic_impl(const char *driver, - const char *generic_driver, +static int test_skcipher_vs_generic_impl(const char *generic_driver, struct skcipher_request *req, struct cipher_test_sglists *tsgls) { @@ -2972,6 +2960,7 @@ static int test_skcipher_vs_generic_impl(const char *driver, const unsigned int blocksize = crypto_skcipher_blocksize(tfm); const unsigned int maxdatasize = (2 * PAGE_SIZE) - TESTMGR_POISON_LEN; const char *algname = crypto_skcipher_alg(tfm)->base.cra_name; + const char *driver = crypto_skcipher_driver_name(tfm); char _generic_driver[CRYPTO_MAX_ALG_NAME]; struct crypto_skcipher *generic_tfm = NULL; struct skcipher_request *generic_req = NULL; @@ -3077,11 +3066,11 @@ static int test_skcipher_vs_generic_impl(const char *driver, vec_name, sizeof(vec_name)); generate_random_testvec_config(cfg, cfgname, sizeof(cfgname)); - err = test_skcipher_vec_cfg(driver, ENCRYPT, &vec, vec_name, + err = test_skcipher_vec_cfg(ENCRYPT, &vec, vec_name, cfg, req, tsgls); if (err) goto out; - err = test_skcipher_vec_cfg(driver, DECRYPT, &vec, vec_name, + err = test_skcipher_vec_cfg(DECRYPT, &vec, vec_name, cfg, req, tsgls); if (err) goto out; @@ -3099,8 +3088,7 @@ static int test_skcipher_vs_generic_impl(const char *driver, return err; } #else /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */ -static int test_skcipher_vs_generic_impl(const char *driver, - const char *generic_driver, +static int test_skcipher_vs_generic_impl(const char *generic_driver, struct skcipher_request *req, struct cipher_test_sglists *tsgls) { @@ -3108,8 +3096,7 @@ static int test_skcipher_vs_generic_impl(const char *driver, } #endif /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */ -static int test_skcipher(const char *driver, int enc, - const struct cipher_test_suite *suite, +static int test_skcipher(int enc, const struct cipher_test_suite *suite, struct skcipher_request *req, struct cipher_test_sglists *tsgls) { @@ -3117,8 +3104,7 @@ static int test_skcipher(const char *driver, int enc, int err; for (i = 0; i < suite->count; i++) { - err = test_skcipher_vec(driver, enc, &suite->vecs[i], i, req, - tsgls); + err = test_skcipher_vec(enc, &suite->vecs[i], i, req, tsgls); if (err) return err; cond_resched(); @@ -3146,6 +3132,7 @@ static int alg_test_skcipher(const struct alg_test_desc *desc, driver, PTR_ERR(tfm)); return PTR_ERR(tfm); } + driver = crypto_skcipher_driver_name(tfm); req = skcipher_request_alloc(tfm, GFP_KERNEL); if (!req) { @@ -3163,16 +3150,15 @@ static int alg_test_skcipher(const struct alg_test_desc *desc, goto out; } - err = test_skcipher(driver, ENCRYPT, suite, req, tsgls); + err = test_skcipher(ENCRYPT, suite, req, tsgls); if (err) goto out; - err = test_skcipher(driver, DECRYPT, suite, req, tsgls); + err = test_skcipher(DECRYPT, suite, req, tsgls); if (err) goto out; - err = test_skcipher_vs_generic_impl(driver, desc->generic_driver, req, - tsgls); + err = test_skcipher_vs_generic_impl(desc->generic_driver, req, tsgls); out: free_cipher_test_sglists(tsgls); skcipher_request_free(req); @@ -3602,6 +3588,7 @@ static int alg_test_crc32c(const struct alg_test_desc *desc, "%ld\n", driver, PTR_ERR(tfm)); return PTR_ERR(tfm); } + driver = crypto_shash_driver_name(tfm); do { SHASH_DESC_ON_STACK(shash, tfm); @@ -5677,15 +5664,21 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask) type, mask); test_done: - if (rc && (fips_enabled || panic_on_fail)) { - fips_fail_notify(); - panic("alg: self-tests for %s (%s) failed in %s mode!\n", - driver, alg, fips_enabled ? "fips" : "panic_on_fail"); + if (rc) { + if (fips_enabled || panic_on_fail) { + fips_fail_notify(); + panic("alg: self-tests for %s (%s) failed in %s mode!\n", + driver, alg, + fips_enabled ? "fips" : "panic_on_fail"); + } + WARN(1, "alg: self-tests for %s (%s) failed (rc=%d)", + driver, alg, rc); + } else { + if (fips_enabled) + pr_info("alg: self-tests for %s (%s) passed\n", + driver, alg); } - if (fips_enabled && !rc) - pr_info("alg: self-tests for %s (%s) passed\n", driver, alg); - return rc; notest: diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index e92c4d9469d8..17c1df8c909a 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -348,19 +348,6 @@ config HW_RANDOM_HISI If unsure, say Y. -config HW_RANDOM_HISI_V2 - tristate "HiSilicon True Random Number Generator V2 support" - depends on HW_RANDOM && ARM64 && ACPI - default HW_RANDOM - help - This driver provides kernel-side support for the True Random Number - Generator V2 hardware found on HiSilicon Hi1620 SoC. - - To compile this driver as a module, choose M here: the - module will be called hisi-trng-v2. - - If unsure, say Y. - config HW_RANDOM_ST tristate "ST Microelectronics HW Random Number Generator support" depends on HW_RANDOM && ARCH_STI @@ -508,6 +495,7 @@ config HW_RANDOM_NPCM config HW_RANDOM_KEYSTONE depends on ARCH_KEYSTONE || COMPILE_TEST + depends on HAS_IOMEM && OF default HW_RANDOM tristate "TI Keystone NETCP SA Hardware random number generator" help diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index 5da344509a4d..8933fada74f2 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile @@ -30,7 +30,6 @@ obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o obj-$(CONFIG_HW_RANDOM_POWERNV) += powernv-rng.o obj-$(CONFIG_HW_RANDOM_HISI) += hisi-rng.o -obj-$(CONFIG_HW_RANDOM_HISI_V2) += hisi-trng-v2.o obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o obj-$(CONFIG_HW_RANDOM_IPROC_RNG200) += iproc-rng200.o obj-$(CONFIG_HW_RANDOM_ST) += st-rng.o diff --git a/drivers/char/hw_random/hisi-trng-v2.c b/drivers/char/hw_random/hisi-trng-v2.c deleted file mode 100644 index 6a65b8232ce0..000000000000 --- a/drivers/char/hw_random/hisi-trng-v2.c +++ /dev/null @@ -1,99 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2019 HiSilicon Limited. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define HISI_TRNG_REG 0x00F0 -#define HISI_TRNG_BYTES 4 -#define HISI_TRNG_QUALITY 512 -#define SLEEP_US 10 -#define TIMEOUT_US 10000 - -struct hisi_trng { - void __iomem *base; - struct hwrng rng; -}; - -static int hisi_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait) -{ - struct hisi_trng *trng; - int currsize = 0; - u32 val = 0; - u32 ret; - - trng = container_of(rng, struct hisi_trng, rng); - - do { - ret = readl_poll_timeout(trng->base + HISI_TRNG_REG, val, - val, SLEEP_US, TIMEOUT_US); - if (ret) - return currsize; - - if (max - currsize >= HISI_TRNG_BYTES) { - memcpy(buf + currsize, &val, HISI_TRNG_BYTES); - currsize += HISI_TRNG_BYTES; - if (currsize == max) - return currsize; - continue; - } - - /* copy remaining bytes */ - memcpy(buf + currsize, &val, max - currsize); - currsize = max; - } while (currsize < max); - - return currsize; -} - -static int hisi_trng_probe(struct platform_device *pdev) -{ - struct hisi_trng *trng; - int ret; - - trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL); - if (!trng) - return -ENOMEM; - - trng->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(trng->base)) - return PTR_ERR(trng->base); - - trng->rng.name = pdev->name; - trng->rng.read = hisi_trng_read; - trng->rng.quality = HISI_TRNG_QUALITY; - - ret = devm_hwrng_register(&pdev->dev, &trng->rng); - if (ret) - dev_err(&pdev->dev, "failed to register hwrng!\n"); - - return ret; -} - -static const struct acpi_device_id hisi_trng_acpi_match[] = { - { "HISI02B3", 0 }, - { } -}; -MODULE_DEVICE_TABLE(acpi, hisi_trng_acpi_match); - -static struct platform_driver hisi_trng_driver = { - .probe = hisi_trng_probe, - .driver = { - .name = "hisi-trng-v2", - .acpi_match_table = ACPI_PTR(hisi_trng_acpi_match), - }, -}; - -module_platform_driver(hisi_trng_driver); - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Weili Qian "); -MODULE_AUTHOR("Zaibo Xu "); -MODULE_DESCRIPTION("HiSilicon true random number generator V2 driver"); diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 61c844baf26e..b05d676ca814 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -252,10 +252,8 @@ static int imx_rngc_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (irq <= 0) { - dev_err(&pdev->dev, "Couldn't get irq %d\n", irq); + if (irq < 0) return irq; - } ret = clk_prepare_enable(rngc->clk); if (ret) diff --git a/drivers/char/random.c b/drivers/char/random.c index 2a41b21623ae..5f3b8ac9d97b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -336,7 +336,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 37da0c070a88..bbd51703e738 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -548,6 +548,7 @@ config CRYPTO_DEV_ATMEL_SHA config CRYPTO_DEV_ATMEL_I2C tristate + select BITREVERSE config CRYPTO_DEV_ATMEL_ECC tristate "Support for Microchip / Atmel ECC hw accelerator" @@ -648,7 +649,7 @@ choice default CRYPTO_DEV_QCE_ENABLE_ALL depends on CRYPTO_DEV_QCE help - This option allows to choose whether to build support for all algorihtms + This option allows to choose whether to build support for all algorithms (default), hashes-only, or skciphers-only. The QCE engine does not appear to scale as well as the CPU to handle @@ -900,4 +901,6 @@ config CRYPTO_DEV_SA2UL used for crypto offload. Select this if you want to use hardware acceleration for cryptographic algorithms on these devices. +source "drivers/crypto/keembay/Kconfig" + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 53fc115cf459..fff9a70348e1 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -51,3 +51,4 @@ obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/ obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += xilinx/ obj-y += hisilicon/ obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic/ +obj-y += keembay/ diff --git a/drivers/crypto/allwinner/Kconfig b/drivers/crypto/allwinner/Kconfig index 0cdfe0e8cc66..180c8a9db819 100644 --- a/drivers/crypto/allwinner/Kconfig +++ b/drivers/crypto/allwinner/Kconfig @@ -43,7 +43,7 @@ config CRYPTO_DEV_SUN8I_CE depends on CRYPTO_DEV_ALLWINNER depends on PM help - Select y here to have support for the crypto Engine availlable on + Select y here to have support for the crypto Engine available on Allwinner SoC H2+, H3, H5, H6, R40 and A64. The Crypto Engine handle AES/3DES ciphers in ECB/CBC mode. diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h index 163962f9e284..5c291e4a6857 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index a94bf28f858a..2f09a37306e2 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -13,7 +13,8 @@ #include #include #include -#include +#include +#include #include #include "sun8i-ce.h" @@ -262,13 +263,13 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) u32 common; u64 byte_count; __le32 *bf; - void *buf; + void *buf = NULL; int j, i, todo; int nbw = 0; u64 fill, min_fill; __be64 *bebits; __le64 *lebits; - void *result; + void *result = NULL; u64 bs; int digestsize; dma_addr_t addr_res, addr_pad; @@ -285,13 +286,17 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) /* the padding could be up to two block. */ buf = kzalloc(bs * 2, GFP_KERNEL | GFP_DMA); - if (!buf) - return -ENOMEM; + if (!buf) { + err = -ENOMEM; + goto theend; + } bf = (__le32 *)buf; result = kzalloc(digestsize, GFP_KERNEL | GFP_DMA); - if (!result) - return -ENOMEM; + if (!result) { + err = -ENOMEM; + goto theend; + } flow = rctx->flow; chan = &ce->chanlist[flow]; @@ -403,11 +408,11 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) dma_unmap_sg(ce->dev, areq->src, nr_sgs, DMA_TO_DEVICE); dma_unmap_single(ce->dev, addr_res, digestsize, DMA_FROM_DEVICE); - kfree(buf); memcpy(areq->result, result, algt->alg.hash.halg.digestsize); - kfree(result); theend: + kfree(buf); + kfree(result); crypto_finalize_hash_request(engine, breq, err); return 0; } diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h index 558027516aed..cec781d5063c 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h @@ -16,7 +16,8 @@ #include #include #include -#include +#include +#include /* CE Registers */ #define CE_TDQ 0x00 diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index b6ab2054f217..11cbcbc83a7b 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -13,7 +13,8 @@ #include #include #include -#include +#include +#include #include #include "sun8i-ss.h" diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h index 1a66457f4a20..28188685b910 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include #define SS_START 1 diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c index 7729a637fb02..a3fa849b139a 100644 --- a/drivers/crypto/amcc/crypto4xx_alg.c +++ b/drivers/crypto/amcc/crypto4xx_alg.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include "crypto4xx_reg_def.h" diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 981de43ea5e2..8d1b918a0533 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include @@ -917,7 +917,7 @@ int crypto4xx_build_pd(struct crypto_async_request *req, } pd->pd_ctl.w = PD_CTL_HOST_READY | - ((crypto_tfm_alg_type(req->tfm) == CRYPTO_ALG_TYPE_AHASH) | + ((crypto_tfm_alg_type(req->tfm) == CRYPTO_ALG_TYPE_AHASH) || (crypto_tfm_alg_type(req->tfm) == CRYPTO_ALG_TYPE_AEAD) ? PD_CTL_HASH_FINAL : 0); pd->pd_ctl_len.w = 0x00400000 | (assoclen + datalen); diff --git a/drivers/crypto/atmel-authenc.h b/drivers/crypto/atmel-authenc.h index c6530a1c8c20..45171e89a7d2 100644 --- a/drivers/crypto/atmel-authenc.h +++ b/drivers/crypto/atmel-authenc.h @@ -16,7 +16,8 @@ #include #include -#include +#include +#include #include "atmel-sha-regs.h" struct atmel_aes_dev; diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 75ccf41a7cb9..352d80cb5ae9 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -33,7 +33,8 @@ #include #include #include -#include +#include +#include #include #include #include "atmel-sha-regs.h" @@ -459,7 +460,6 @@ static int atmel_sha_init(struct ahash_request *req) break; default: return -EINVAL; - break; } ctx->bufcnt = 0; diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c index 809c3033ca74..9ad188cffd0d 100644 --- a/drivers/crypto/axis/artpec6_crypto.c +++ b/drivers/crypto/axis/artpec6_crypto.c @@ -28,7 +28,8 @@ #include #include #include -#include +#include +#include #include /* Max length of a line in all cache levels for Artpec SoCs. */ diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index 50d169e61b41..30390a7324b2 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -26,11 +26,12 @@ #include #include #include -#include #include #include #include #include +#include +#include #include #include "util.h" diff --git a/drivers/crypto/bcm/cipher.h b/drivers/crypto/bcm/cipher.h index 035c8389cb3d..0ad5892b445d 100644 --- a/drivers/crypto/bcm/cipher.h +++ b/drivers/crypto/bcm/cipher.h @@ -16,7 +16,8 @@ #include #include #include -#include +#include +#include #include #include "spu.h" diff --git a/drivers/crypto/bcm/spu.h b/drivers/crypto/bcm/spu.h index dd132389bcaa..1c386a2d5506 100644 --- a/drivers/crypto/bcm/spu.h +++ b/drivers/crypto/bcm/spu.h @@ -17,7 +17,8 @@ #include #include -#include +#include +#include enum spu_cipher_alg { CIPHER_ALG_NONE = 0x0, diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index cf5bd7666dfc..8697ae53b063 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -3404,8 +3404,8 @@ static int caam_cra_init(struct crypto_skcipher *tfm) fallback = crypto_alloc_skcipher(tfm_name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback)) { - dev_err(ctx->jrdev, "Failed to allocate %s fallback: %ld\n", - tfm_name, PTR_ERR(fallback)); + pr_err("Failed to allocate %s fallback: %ld\n", + tfm_name, PTR_ERR(fallback)); return PTR_ERR(fallback); } diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 66f60d78bdc8..189a7438b29c 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -852,7 +852,7 @@ static struct caam_drv_ctx *get_drv_ctx(struct caam_ctx *ctx, cpu = smp_processor_id(); drv_ctx = caam_drv_ctx_init(ctx->qidev, &cpu, desc); - if (!IS_ERR_OR_NULL(drv_ctx)) + if (!IS_ERR(drv_ctx)) drv_ctx->op_type = type; ctx->drv_ctx[type] = drv_ctx; @@ -955,7 +955,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, struct caam_drv_ctx *drv_ctx; drv_ctx = get_drv_ctx(ctx, encrypt ? ENCRYPT : DECRYPT); - if (IS_ERR_OR_NULL(drv_ctx)) + if (IS_ERR(drv_ctx)) return (struct aead_edesc *)drv_ctx; /* allocate space for base edesc and hw desc commands, link tables */ @@ -1165,7 +1165,7 @@ static inline int aead_crypt(struct aead_request *req, bool encrypt) /* allocate extended descriptor */ edesc = aead_edesc_alloc(req, encrypt); - if (IS_ERR_OR_NULL(edesc)) + if (IS_ERR(edesc)) return PTR_ERR(edesc); /* Create and submit job descriptor */ @@ -1259,7 +1259,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, struct caam_drv_ctx *drv_ctx; drv_ctx = get_drv_ctx(ctx, encrypt ? ENCRYPT : DECRYPT); - if (IS_ERR_OR_NULL(drv_ctx)) + if (IS_ERR(drv_ctx)) return (struct skcipher_edesc *)drv_ctx; src_nents = sg_nents_for_len(req->src, req->cryptlen); @@ -2502,8 +2502,8 @@ static int caam_cra_init(struct crypto_skcipher *tfm) fallback = crypto_alloc_skcipher(tfm_name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback)) { - dev_err(ctx->jrdev, "Failed to allocate %s fallback: %ld\n", - tfm_name, PTR_ERR(fallback)); + pr_err("Failed to allocate %s fallback: %ld\n", + tfm_name, PTR_ERR(fallback)); return PTR_ERR(fallback); } diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 98c1ff1744bb..a780e627838a 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -1611,7 +1611,8 @@ static int caam_cra_init_skcipher(struct crypto_skcipher *tfm) fallback = crypto_alloc_skcipher(tfm_name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback)) { - dev_err(ctx->dev, "Failed to allocate %s fallback: %ld\n", + dev_err(caam_alg->caam.dev, + "Failed to allocate %s fallback: %ld\n", tfm_name, PTR_ERR(fallback)); return PTR_ERR(fallback); } diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h index c3c22a8de4c0..c4f79764172b 100644 --- a/drivers/crypto/caam/compat.h +++ b/drivers/crypto/caam/compat.h @@ -34,7 +34,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index 9112279a4de0..7d45b21bd55a 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -16,6 +16,14 @@ /* Currently comes from Kconfig param as a ^2 (driver-required) */ #define JOBR_DEPTH (1 << CONFIG_CRYPTO_DEV_FSL_CAAM_RINGSIZE) +/* + * Maximum size for crypto-engine software queue based on Job Ring + * size (JOBR_DEPTH) and a THRESHOLD (reserved for the non-crypto-API + * requests that are not passed through crypto-engine) + */ +#define THRESHOLD 15 +#define CRYPTO_ENGINE_MAX_QLEN (JOBR_DEPTH - THRESHOLD) + /* Kconfig params for interrupt coalescing if selected (else zero) */ #ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_INTC #define JOBR_INTC JRCFG_ICEN diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 6f669966ba2c..7f2b1101f567 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -550,7 +550,9 @@ static int caam_jr_probe(struct platform_device *pdev) } /* Initialize crypto engine */ - jrpriv->engine = crypto_engine_alloc_init(jrdev, false); + jrpriv->engine = crypto_engine_alloc_init_and_set(jrdev, true, NULL, + false, + CRYPTO_ENGINE_MAX_QLEN); if (!jrpriv->engine) { dev_err(jrdev, "Could not init crypto-engine\n"); return -ENOMEM; diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c index 781949027451..711b1acdd4e0 100644 --- a/drivers/crypto/cavium/cpt/cptpf_main.c +++ b/drivers/crypto/cavium/cpt/cptpf_main.c @@ -244,7 +244,7 @@ static int do_cpt_init(struct cpt_device *cpt, struct microcode *mcode) struct ucode_header { u8 version[CPT_UCODE_VERSION_SZ]; - u32 code_length; + __be32 code_length; u32 data_length; u64 sram_address; }; @@ -288,10 +288,10 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae) /* Byte swap 64-bit */ for (j = 0; j < (mcode->code_size / 8); j++) - ((u64 *)mcode->code)[j] = cpu_to_be64(((u64 *)mcode->code)[j]); + ((__be64 *)mcode->code)[j] = cpu_to_be64(((u64 *)mcode->code)[j]); /* MC needs 16-bit swap */ for (j = 0; j < (mcode->code_size / 2); j++) - ((u16 *)mcode->code)[j] = cpu_to_be16(((u16 *)mcode->code)[j]); + ((__be16 *)mcode->code)[j] = cpu_to_be16(((u16 *)mcode->code)[j]); dev_dbg(dev, "mcode->code_size = %u\n", mcode->code_size); dev_dbg(dev, "mcode->is_ae = %u\n", mcode->is_ae); @@ -569,15 +569,9 @@ static int cpt_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto cpt_err_disable_device; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) { - dev_err(dev, "Unable to get usable DMA configuration\n"); - goto cpt_err_release_regions; - } - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); - if (err) { - dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + dev_err(dev, "Unable to get usable 48-bit DMA configuration\n"); goto cpt_err_release_regions; } diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c index a15245992cf9..f016448e43bb 100644 --- a/drivers/crypto/cavium/cpt/cptvf_main.c +++ b/drivers/crypto/cavium/cpt/cptvf_main.c @@ -687,15 +687,9 @@ static int cptvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Mark as VF driver */ cptvf->flags |= CPT_FLAG_VF_DRIVER; - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) { - dev_err(dev, "Unable to get usable DMA configuration\n"); - goto cptvf_err_release_regions; - } - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); - if (err) { - dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + dev_err(dev, "Unable to get usable 48-bit DMA configuration\n"); goto cptvf_err_release_regions; } diff --git a/drivers/crypto/cavium/nitrox/nitrox_aead.c b/drivers/crypto/cavium/nitrox/nitrox_aead.c index 1be2571363fe..c93c4e41d267 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_aead.c +++ b/drivers/crypto/cavium/nitrox/nitrox_aead.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -45,9 +44,9 @@ static int nitrox_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, /* fill crypto context */ fctx = nctx->u.fctx; - flags.f = be64_to_cpu(fctx->flags.f); + flags.fu = be64_to_cpu(fctx->flags.f); flags.w0.aes_keylen = aes_keylen; - fctx->flags.f = cpu_to_be64(flags.f); + fctx->flags.f = cpu_to_be64(flags.fu); /* copy enc key to context */ memset(&fctx->crypto, 0, sizeof(fctx->crypto)); @@ -63,9 +62,9 @@ static int nitrox_aead_setauthsize(struct crypto_aead *aead, struct flexi_crypto_context *fctx = nctx->u.fctx; union fc_ctx_flags flags; - flags.f = be64_to_cpu(fctx->flags.f); + flags.fu = be64_to_cpu(fctx->flags.f); flags.w0.mac_len = authsize; - fctx->flags.f = cpu_to_be64(flags.f); + fctx->flags.f = cpu_to_be64(flags.fu); aead->authsize = authsize; @@ -319,7 +318,7 @@ static int nitrox_gcm_common_init(struct crypto_aead *aead) flags->w0.iv_source = IV_FROM_DPTR; /* ask microcode to calculate ipad/opad */ flags->w0.auth_input_type = 1; - flags->f = be64_to_cpu(flags->f); + flags->f = cpu_to_be64(flags->fu); return 0; } diff --git a/drivers/crypto/cavium/nitrox/nitrox_debugfs.c b/drivers/crypto/cavium/nitrox/nitrox_debugfs.c index 16f7d0bd1303..741572a01995 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_debugfs.c +++ b/drivers/crypto/cavium/nitrox/nitrox_debugfs.c @@ -3,6 +3,7 @@ #include #include "nitrox_csr.h" +#include "nitrox_debugfs.h" #include "nitrox_dev.h" static int firmware_show(struct seq_file *s, void *v) diff --git a/drivers/crypto/cavium/nitrox/nitrox_hal.c b/drivers/crypto/cavium/nitrox/nitrox_hal.c index 34a2f4f30a7e..13b137410b75 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_hal.c +++ b/drivers/crypto/cavium/nitrox/nitrox_hal.c @@ -3,6 +3,7 @@ #include "nitrox_dev.h" #include "nitrox_csr.h" +#include "nitrox_hal.h" #define PLL_REF_CLK 50 #define MAX_CSR_RETRIES 10 diff --git a/drivers/crypto/cavium/nitrox/nitrox_isr.c b/drivers/crypto/cavium/nitrox/nitrox_isr.c index 3dec570a190a..99b053094f5a 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_isr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_isr.c @@ -7,6 +7,7 @@ #include "nitrox_csr.h" #include "nitrox_common.h" #include "nitrox_hal.h" +#include "nitrox_isr.h" #include "nitrox_mbx.h" /** diff --git a/drivers/crypto/cavium/nitrox/nitrox_isr.h b/drivers/crypto/cavium/nitrox/nitrox_isr.h index 1062c9336c1f..2bb123cd2f1c 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_isr.h +++ b/drivers/crypto/cavium/nitrox/nitrox_isr.h @@ -9,4 +9,13 @@ void nitrox_unregister_interrupts(struct nitrox_device *ndev); int nitrox_sriov_register_interupts(struct nitrox_device *ndev); void nitrox_sriov_unregister_interrupts(struct nitrox_device *ndev); +#ifdef CONFIG_PCI_IOV +int nitrox_sriov_configure(struct pci_dev *pdev, int num_vfs); +#else +static inline int nitrox_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + return 0; +} +#endif + #endif /* __NITROX_ISR_H */ diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c index 9d14be97e381..facc8e6bc580 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_main.c +++ b/drivers/crypto/cavium/nitrox/nitrox_main.c @@ -49,15 +49,6 @@ static unsigned int qlen = DEFAULT_CMD_QLEN; module_param(qlen, uint, 0644); MODULE_PARM_DESC(qlen, "Command queue length - default 2048"); -#ifdef CONFIG_PCI_IOV -int nitrox_sriov_configure(struct pci_dev *pdev, int num_vfs); -#else -int nitrox_sriov_configure(struct pci_dev *pdev, int num_vfs) -{ - return 0; -} -#endif - /** * struct ucode - Firmware Header * @id: microcode ID @@ -555,10 +546,8 @@ static void nitrox_remove(struct pci_dev *pdev) nitrox_remove_from_devlist(ndev); -#ifdef CONFIG_PCI_IOV /* disable SR-IOV */ nitrox_sriov_configure(pdev, 0); -#endif nitrox_crypto_unregister(); nitrox_debugfs_exit(ndev); nitrox_pf_sw_cleanup(ndev); @@ -584,9 +573,7 @@ static struct pci_driver nitrox_driver = { .probe = nitrox_probe, .remove = nitrox_remove, .shutdown = nitrox_shutdown, -#ifdef CONFIG_PCI_IOV .sriov_configure = nitrox_sriov_configure, -#endif }; module_pci_driver(nitrox_driver); diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c index b51b0449b478..c1af9d4fca6e 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c +++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c @@ -4,6 +4,7 @@ #include "nitrox_csr.h" #include "nitrox_hal.h" #include "nitrox_dev.h" +#include "nitrox_mbx.h" #define RING_TO_VFNO(_x, _y) ((_x) / (_y)) @@ -112,7 +113,7 @@ static void pf2vf_resp_handler(struct work_struct *work) case MBX_MSG_TYPE_ACK: case MBX_MSG_TYPE_NACK: break; - }; + } kfree(pf2vf_resp); } diff --git a/drivers/crypto/cavium/nitrox/nitrox_req.h b/drivers/crypto/cavium/nitrox/nitrox_req.h index 12282c1b14f5..ed174883c8e3 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_req.h +++ b/drivers/crypto/cavium/nitrox/nitrox_req.h @@ -149,6 +149,7 @@ struct auth_keys { union fc_ctx_flags { __be64 f; + u64 fu; struct { #if defined(__BIG_ENDIAN_BITFIELD) u64 cipher_type : 4; @@ -280,6 +281,7 @@ struct nitrox_rfc4106_rctx { * - packet payload bytes */ union pkt_instr_hdr { + __be64 bev; u64 value; struct { #if defined(__BIG_ENDIAN_BITFIELD) @@ -324,6 +326,7 @@ union pkt_instr_hdr { * @ctxp: Context pointer. CTXP<63,2:0> must be zero in all cases. */ union pkt_hdr { + __be64 bev[2]; u64 value[2]; struct { #if defined(__BIG_ENDIAN_BITFIELD) @@ -370,6 +373,7 @@ union pkt_hdr { * sglist components at [RPTR] on the remote host. */ union slc_store_info { + __be64 bev[2]; u64 value[2]; struct { #if defined(__BIG_ENDIAN_BITFIELD) diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index 5826c2c98a50..53ef06792133 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -3,6 +3,7 @@ #include #include +#include "nitrox_common.h" #include "nitrox_dev.h" #include "nitrox_req.h" #include "nitrox_csr.h" @@ -448,7 +449,7 @@ int nitrox_process_se_request(struct nitrox_device *ndev, sr->instr.ih.s.ssz = sr->out.sgmap_cnt; sr->instr.ih.s.fsz = FDATA_SIZE + sizeof(struct gphdr); sr->instr.ih.s.tlen = sr->instr.ih.s.fsz + sr->in.total_bytes; - sr->instr.ih.value = cpu_to_be64(sr->instr.ih.value); + sr->instr.ih.bev = cpu_to_be64(sr->instr.ih.value); /* word 2 */ sr->instr.irh.value[0] = 0; @@ -460,7 +461,7 @@ int nitrox_process_se_request(struct nitrox_device *ndev, sr->instr.irh.s.ctxc = req->ctrl.s.ctxc; sr->instr.irh.s.arg = req->ctrl.s.arg; sr->instr.irh.s.opcode = req->opcode; - sr->instr.irh.value[0] = cpu_to_be64(sr->instr.irh.value[0]); + sr->instr.irh.bev[0] = cpu_to_be64(sr->instr.irh.value[0]); /* word 3 */ sr->instr.irh.s.ctxp = cpu_to_be64(ctx_handle); @@ -468,7 +469,7 @@ int nitrox_process_se_request(struct nitrox_device *ndev, /* word 4 */ sr->instr.slc.value[0] = 0; sr->instr.slc.s.ssz = sr->out.sgmap_cnt; - sr->instr.slc.value[0] = cpu_to_be64(sr->instr.slc.value[0]); + sr->instr.slc.bev[0] = cpu_to_be64(sr->instr.slc.value[0]); /* word 5 */ sr->instr.slc.s.rptr = cpu_to_be64(sr->out.sgcomp_dma); diff --git a/drivers/crypto/cavium/zip/zip_main.c b/drivers/crypto/cavium/zip/zip_main.c index d35216e2f6cd..812b4ac9afd6 100644 --- a/drivers/crypto/cavium/zip/zip_main.c +++ b/drivers/crypto/cavium/zip/zip_main.c @@ -263,15 +263,9 @@ static int zip_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_disable_device; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) { - dev_err(dev, "Unable to get usable DMA configuration\n"); - goto err_release_regions; - } - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); - if (err) { - dev_err(dev, "Unable to get 48-bit DMA for allocations\n"); + dev_err(dev, "Unable to get usable 48-bit DMA configuration\n"); goto err_release_regions; } diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 8fbfdb9e8cd3..74fa5360e722 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -17,7 +17,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h index aed3d2192d01..e42450d07168 100644 --- a/drivers/crypto/ccp/ccp-crypto.h +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -19,7 +19,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index dafa6577a845..cdfee501fbd9 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -97,6 +97,7 @@ static int validate_keys_sizes(struct cc_cipher_ctx *ctx_p, u32 size) case S_DIN_to_SM4: if (size == SM4_KEY_SIZE) return 0; + break; default: break; } @@ -139,9 +140,11 @@ static int validate_data_size(struct cc_cipher_ctx *ctx_p, case DRV_CIPHER_CBC: if (IS_ALIGNED(size, SM4_BLOCK_SIZE)) return 0; + break; default: break; } + break; default: break; } diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index 6f519d3e896c..d0e59e942568 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -100,6 +100,57 @@ static const struct of_device_id arm_ccree_dev_of_match[] = { }; MODULE_DEVICE_TABLE(of, arm_ccree_dev_of_match); +static void init_cc_cache_params(struct cc_drvdata *drvdata) +{ + struct device *dev = drvdata_to_dev(drvdata); + u32 cache_params, ace_const, val, mask; + + /* compute CC_AXIM_CACHE_PARAMS */ + cache_params = cc_ioread(drvdata, CC_REG(AXIM_CACHE_PARAMS)); + dev_dbg(dev, "Cache params previous: 0x%08X\n", cache_params); + + /* non cached or write-back, write allocate */ + val = drvdata->coherent ? 0xb : 0x2; + + mask = CC_GENMASK(CC_AXIM_CACHE_PARAMS_AWCACHE); + cache_params &= ~mask; + cache_params |= FIELD_PREP(mask, val); + + mask = CC_GENMASK(CC_AXIM_CACHE_PARAMS_AWCACHE_LAST); + cache_params &= ~mask; + cache_params |= FIELD_PREP(mask, val); + + mask = CC_GENMASK(CC_AXIM_CACHE_PARAMS_ARCACHE); + cache_params &= ~mask; + cache_params |= FIELD_PREP(mask, val); + + drvdata->cache_params = cache_params; + + dev_dbg(dev, "Cache params current: 0x%08X\n", cache_params); + + if (drvdata->hw_rev <= CC_HW_REV_710) + return; + + /* compute CC_AXIM_ACE_CONST */ + ace_const = cc_ioread(drvdata, CC_REG(AXIM_ACE_CONST)); + dev_dbg(dev, "ACE-const previous: 0x%08X\n", ace_const); + + /* system or outer-sharable */ + val = drvdata->coherent ? 0x2 : 0x3; + + mask = CC_GENMASK(CC_AXIM_ACE_CONST_ARDOMAIN); + ace_const &= ~mask; + ace_const |= FIELD_PREP(mask, val); + + mask = CC_GENMASK(CC_AXIM_ACE_CONST_AWDOMAIN); + ace_const &= ~mask; + ace_const |= FIELD_PREP(mask, val); + + dev_dbg(dev, "ACE-const current: 0x%08X\n", ace_const); + + drvdata->ace_const = ace_const; +} + static u32 cc_read_idr(struct cc_drvdata *drvdata, const u32 *idr_offsets) { int i; @@ -218,9 +269,9 @@ bool cc_wait_for_reset_completion(struct cc_drvdata *drvdata) return false; } -int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe) +int init_cc_regs(struct cc_drvdata *drvdata) { - unsigned int val, cache_params; + unsigned int val; struct device *dev = drvdata_to_dev(drvdata); /* Unmask all AXI interrupt sources AXI_CFG1 register */ @@ -245,19 +296,9 @@ int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe) cc_iowrite(drvdata, CC_REG(HOST_IMR), ~val); - cache_params = (drvdata->coherent ? CC_COHERENT_CACHE_PARAMS : 0x0); - - val = cc_ioread(drvdata, CC_REG(AXIM_CACHE_PARAMS)); - - if (is_probe) - dev_dbg(dev, "Cache params previous: 0x%08X\n", val); - - cc_iowrite(drvdata, CC_REG(AXIM_CACHE_PARAMS), cache_params); - val = cc_ioread(drvdata, CC_REG(AXIM_CACHE_PARAMS)); - - if (is_probe) - dev_dbg(dev, "Cache params current: 0x%08X (expect: 0x%08X)\n", - val, cache_params); + cc_iowrite(drvdata, CC_REG(AXIM_CACHE_PARAMS), drvdata->cache_params); + if (drvdata->hw_rev >= CC_HW_REV_712) + cc_iowrite(drvdata, CC_REG(AXIM_ACE_CONST), drvdata->ace_const); return 0; } @@ -445,7 +486,9 @@ static int init_cc_resources(struct platform_device *plat_dev) } dev_dbg(dev, "Registered to IRQ: %d\n", irq); - rc = init_cc_regs(new_drvdata, true); + init_cc_cache_params(new_drvdata); + + rc = init_cc_regs(new_drvdata); if (rc) { dev_err(dev, "init_cc_regs failed\n"); goto post_pm_err; diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index af77b2020350..5f1d4602eb8f 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -17,7 +17,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -49,8 +50,6 @@ enum cc_std_body { CC_STD_ALL = 0x3 }; -#define CC_COHERENT_CACHE_PARAMS 0xEEE - #define CC_PINS_FULL 0x0 #define CC_PINS_SLIM 0x9F @@ -155,6 +154,8 @@ struct cc_drvdata { int std_bodies; bool sec_disabled; u32 comp_mask; + u32 cache_params; + u32 ace_const; }; struct cc_crypto_alg { @@ -205,7 +206,7 @@ static inline void dump_byte_array(const char *name, const u8 *the_array, } bool cc_wait_for_reset_completion(struct cc_drvdata *drvdata); -int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe); +int init_cc_regs(struct cc_drvdata *drvdata); void fini_cc_regs(struct cc_drvdata *drvdata); unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata); diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index 3c65bf070c90..d5421b0c6831 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -45,7 +45,7 @@ static int cc_pm_resume(struct device *dev) } cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_DISABLE); - rc = init_cc_regs(drvdata, false); + rc = init_cc_regs(drvdata); if (rc) { dev_err(dev, "init_cc_regs (%x)\n", rc); return rc; diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 13b908ea4873..f5a336634daa 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -53,7 +53,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index 9c3b3ca815e6..843192666dc3 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -71,3 +71,11 @@ config CRYPTO_DEV_HISI_HPRE help Support for HiSilicon HPRE(High Performance RSA Engine) accelerator, which can accelerate RSA and DH algorithms. + +config CRYPTO_DEV_HISI_TRNG + tristate "Support for HISI TRNG Driver" + depends on ARM64 && ACPI + select HW_RANDOM + select CRYPTO_RNG + help + Support for HiSilicon TRNG Driver. diff --git a/drivers/crypto/hisilicon/Makefile b/drivers/crypto/hisilicon/Makefile index 7f5f74c72baa..1e89269a2e4b 100644 --- a/drivers/crypto/hisilicon/Makefile +++ b/drivers/crypto/hisilicon/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_CRYPTO_DEV_HISI_SEC2) += sec2/ obj-$(CONFIG_CRYPTO_DEV_HISI_QM) += hisi_qm.o hisi_qm-objs = qm.o sgl.o obj-$(CONFIG_CRYPTO_DEV_HISI_ZIP) += zip/ +obj-$(CONFIG_CRYPTO_DEV_HISI_TRNG) += trng/ diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index a33394d91bbf..e5c991913f09 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -705,9 +705,7 @@ static int hpre_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; - ret = hisi_qm_debug_init(qm); - if (ret) - goto failed_to_create; + hisi_qm_debug_init(qm); if (qm->pdev->device == HPRE_PCI_DEVICE_ID) { ret = hpre_ctrl_debug_init(qm); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 530f23116d7c..f21ccae0e8ea 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -473,7 +473,7 @@ static int qm_wait_mb_ready(struct hisi_qm *qm) return readl_relaxed_poll_timeout(qm->io_base + QM_MB_CMD_SEND_BASE, val, !((val >> QM_MB_BUSY_SHIFT) & - 0x1), 10, 1000); + 0x1), POLL_PERIOD, POLL_TIMEOUT); } /* 128 bit should be written to hardware at one time to trigger a mailbox */ @@ -583,7 +583,8 @@ static int qm_dev_mem_reset(struct hisi_qm *qm) writel(0x1, qm->io_base + QM_MEM_START_INIT); return readl_relaxed_poll_timeout(qm->io_base + QM_MEM_INIT_DONE, val, - val & BIT(0), 10, 1000); + val & BIT(0), POLL_PERIOD, + POLL_TIMEOUT); } static u32 qm_get_irq_num_v1(struct hisi_qm *qm) @@ -804,7 +805,8 @@ static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type, int ret; ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val, - val & BIT(0), 10, 1000); + val & BIT(0), POLL_PERIOD, + POLL_TIMEOUT); if (ret) return ret; @@ -818,7 +820,8 @@ static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type, writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE); return readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val, - val & BIT(0), 10, 1000); + val & BIT(0), POLL_PERIOD, + POLL_TIMEOUT); } /* The config should be conducted after qm_dev_mem_reset() */ @@ -929,7 +932,8 @@ static ssize_t qm_debug_read(struct file *filp, char __user *buf, return -EINVAL; } mutex_unlock(&file->lock); - ret = sprintf(tbuf, "%u\n", val); + + ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val); return simple_read_from_buffer(buf, count, pos, tbuf, ret); } @@ -1517,7 +1521,7 @@ static const struct file_operations qm_cmd_fops = { .write = qm_cmd_write, }; -static int qm_create_debugfs_file(struct hisi_qm *qm, enum qm_debug_file index) +static void qm_create_debugfs_file(struct hisi_qm *qm, enum qm_debug_file index) { struct dentry *qm_d = qm->debug.qm_d; struct debugfs_file *file = qm->debug.files + index; @@ -1528,8 +1532,6 @@ static int qm_create_debugfs_file(struct hisi_qm *qm, enum qm_debug_file index) file->index = index; mutex_init(&file->lock); file->debug = &qm->debug; - - return 0; } static void qm_hw_error_init_v1(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe) @@ -1733,19 +1735,15 @@ void hisi_qm_release_qp(struct hisi_qp *qp) } EXPORT_SYMBOL_GPL(hisi_qm_release_qp); -static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) +static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) { struct hisi_qm *qm = qp->qm; struct device *dev = &qm->pdev->dev; enum qm_hw_ver ver = qm->ver; struct qm_sqc *sqc; - struct qm_cqc *cqc; dma_addr_t sqc_dma; - dma_addr_t cqc_dma; int ret; - qm_init_qp_status(qp); - sqc = kzalloc(sizeof(struct qm_sqc), GFP_KERNEL); if (!sqc) return -ENOMEM; @@ -1770,8 +1768,18 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) ret = qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0); dma_unmap_single(dev, sqc_dma, sizeof(struct qm_sqc), DMA_TO_DEVICE); kfree(sqc); - if (ret) - return ret; + + return ret; +} + +static int qm_cq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) +{ + struct hisi_qm *qm = qp->qm; + struct device *dev = &qm->pdev->dev; + enum qm_hw_ver ver = qm->ver; + struct qm_cqc *cqc; + dma_addr_t cqc_dma; + int ret; cqc = kzalloc(sizeof(struct qm_cqc), GFP_KERNEL); if (!cqc) @@ -1785,11 +1793,12 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) INIT_QC_COMMON(cqc, qp->cqe_dma, pasid); if (ver == QM_HW_V1) { - cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V1(0, 0, 0, 4)); + cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V1(0, 0, 0, + QM_QC_CQE_SIZE)); cqc->w8 = cpu_to_le16(QM_Q_DEPTH - 1); } else { - cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V2(4)); - cqc->w8 = 0; + cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V2(QM_QC_CQE_SIZE)); + cqc->w8 = 0; /* rand_qc */ } cqc->dw6 = cpu_to_le32(1 << QM_CQ_PHASE_SHIFT | 1 << QM_CQ_FLAG_SHIFT); @@ -1800,6 +1809,19 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) return ret; } +static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) +{ + int ret; + + qm_init_qp_status(qp); + + ret = qm_sq_ctx_cfg(qp, qp_id, pasid); + if (ret) + return ret; + + return qm_cq_ctx_cfg(qp, qp_id, pasid); +} + static int qm_start_qp_nolock(struct hisi_qp *qp, unsigned long arg) { struct hisi_qm *qm = qp->qm; @@ -1843,6 +1865,9 @@ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg) EXPORT_SYMBOL_GPL(hisi_qm_start_qp); /** + * qm_drain_qp() - Drain a qp. + * @qp: The qp we want to drain. + * * Determine whether the queue is cleared by judging the tail pointers of * sq and cq. */ @@ -2008,7 +2033,8 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm) writel(0x1, qm->io_base + QM_CACHE_WB_START); if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE, - val, val & BIT(0), 10, 1000)) + val, val & BIT(0), POLL_PERIOD, + POLL_TIMEOUT)) dev_err(&qm->pdev->dev, "QM writeback sqc cache fail!\n"); } @@ -2112,7 +2138,7 @@ static void hisi_qm_uacce_stop_queue(struct uacce_queue *q) hisi_qm_stop_qp(q->priv); } -static int qm_set_sqctype(struct uacce_queue *q, u16 type) +static void qm_set_sqctype(struct uacce_queue *q, u16 type) { struct hisi_qm *qm = q->uacce->priv; struct hisi_qp *qp = q->priv; @@ -2120,8 +2146,6 @@ static int qm_set_sqctype(struct uacce_queue *q, u16 type) down_write(&qm->qps_lock); qp->alg_type = type; up_write(&qm->qps_lock); - - return 0; } static long hisi_qm_uacce_ioctl(struct uacce_queue *q, unsigned int cmd, @@ -2418,6 +2442,16 @@ static void hisi_qm_pre_init(struct hisi_qm *qm) qm->is_frozen = false; } +static void hisi_qm_pci_uninit(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + + pci_free_irq_vectors(pdev); + iounmap(qm->io_base); + pci_release_mem_regions(pdev); + pci_disable_device(pdev); +} + /** * hisi_qm_uninit() - Uninitialize qm. * @qm: The qm needed uninit. @@ -2436,9 +2470,6 @@ void hisi_qm_uninit(struct hisi_qm *qm) return; } - uacce_remove(qm->uacce); - qm->uacce = NULL; - hisi_qp_memory_uninit(qm, qm->qp_num); idr_destroy(&qm->qp_idr); @@ -2450,10 +2481,9 @@ void hisi_qm_uninit(struct hisi_qm *qm) } qm_irq_unregister(qm); - pci_free_irq_vectors(pdev); - iounmap(qm->io_base); - pci_release_mem_regions(pdev); - pci_disable_device(pdev); + hisi_qm_pci_uninit(qm); + uacce_remove(qm->uacce); + qm->uacce = NULL; up_write(&qm->qps_lock); } @@ -2486,6 +2516,12 @@ int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number) EXPORT_SYMBOL_GPL(hisi_qm_get_vft); /** + * hisi_qm_set_vft() - Set vft to a qm. + * @qm: The qm we want to set its vft. + * @fun_num: The function number. + * @base: The base number of queue in vft. + * @number: The number of queues in vft. + * * This function is alway called in PF driver, it is used to assign queues * among PF and VFs. * @@ -2519,14 +2555,10 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; struct qm_eqc *eqc; - struct qm_aeqc *aeqc; dma_addr_t eqc_dma; - dma_addr_t aeqc_dma; int ret; - qm_init_eq_aeq_status(qm); - - eqc = kzalloc(sizeof(struct qm_eqc), GFP_KERNEL); + eqc = kzalloc(sizeof(struct qm_eqc), GFP_KERNEL); //todo if (!eqc) return -ENOMEM; eqc_dma = dma_map_single(dev, eqc, sizeof(struct qm_eqc), @@ -2541,11 +2573,20 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm) if (qm->ver == QM_HW_V1) eqc->dw3 = cpu_to_le32(QM_EQE_AEQE_SIZE); eqc->dw6 = cpu_to_le32((QM_EQ_DEPTH - 1) | (1 << QM_EQC_PHASE_SHIFT)); + ret = qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0); dma_unmap_single(dev, eqc_dma, sizeof(struct qm_eqc), DMA_TO_DEVICE); kfree(eqc); - if (ret) - return ret; + + return ret; +} + +static int qm_aeq_ctx_cfg(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + struct qm_aeqc *aeqc; + dma_addr_t aeqc_dma; + int ret; aeqc = kzalloc(sizeof(struct qm_aeqc), GFP_KERNEL); if (!aeqc) @@ -2568,6 +2609,22 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm) return ret; } +static int qm_eq_aeq_ctx_cfg(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + int ret; + + qm_init_eq_aeq_status(qm); + + ret = qm_eq_ctx_cfg(qm); + if (ret) { + dev_err(dev, "Set eqc failed!\n"); + return ret; + } + + return qm_aeq_ctx_cfg(qm); +} + static int __hisi_qm_start(struct hisi_qm *qm) { int ret; @@ -2584,7 +2641,7 @@ static int __hisi_qm_start(struct hisi_qm *qm) return ret; } - ret = qm_eq_ctx_cfg(qm); + ret = qm_eq_aeq_ctx_cfg(qm); if (ret) return ret; @@ -2690,7 +2747,11 @@ static int qm_stop_started_qp(struct hisi_qm *qm) return 0; } + /** + * qm_clear_queues() - Clear all queues memory in a qm. + * @qm: The qm in which the queues will be cleared. + * * This function clears all queues memory in a qm. Reset of accelerator can * use this to clear queues. */ @@ -2806,12 +2867,12 @@ DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get, * * Create qm related debugfs files. */ -int hisi_qm_debug_init(struct hisi_qm *qm) +void hisi_qm_debug_init(struct hisi_qm *qm) { struct qm_dfx *dfx = &qm->debug.dfx; struct dentry *qm_d; void *data; - int i, ret; + int i; qm_d = debugfs_create_dir("qm", qm->debug.debug_root); qm->debug.qm_d = qm_d; @@ -2819,10 +2880,7 @@ int hisi_qm_debug_init(struct hisi_qm *qm) /* only show this in PF */ if (qm->fun_type == QM_HW_PF) for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++) - if (qm_create_debugfs_file(qm, i)) { - ret = -ENOENT; - goto failed_to_create; - } + qm_create_debugfs_file(qm, i); debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops); @@ -2838,12 +2896,6 @@ int hisi_qm_debug_init(struct hisi_qm *qm) data, &qm_atomic64_ops); } - - return 0; - -failed_to_create: - debugfs_remove_recursive(qm_d); - return ret; } EXPORT_SYMBOL_GPL(hisi_qm_debug_init); @@ -3273,7 +3325,7 @@ pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, } EXPORT_SYMBOL_GPL(hisi_qm_dev_err_detected); -static int qm_get_hw_error_status(struct hisi_qm *qm) +static u32 qm_get_hw_error_status(struct hisi_qm *qm) { return readl(qm->io_base + QM_ABNORMAL_INT_STATUS); } @@ -3572,7 +3624,7 @@ static int qm_vf_reset_done(struct hisi_qm *qm) return ret; } -static int qm_get_dev_err_status(struct hisi_qm *qm) +static u32 qm_get_dev_err_status(struct hisi_qm *qm) { return qm->err_ini->get_dev_hw_err_status(qm); } @@ -3992,34 +4044,22 @@ void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list) } EXPORT_SYMBOL_GPL(hisi_qm_alg_unregister); -/** - * hisi_qm_init() - Initialize configures about qm. - * @qm: The qm needing init. - * - * This function init qm, then we can call hisi_qm_start to put qm into work. - */ -int hisi_qm_init(struct hisi_qm *qm) +static int hisi_qm_pci_init(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; struct device *dev = &pdev->dev; unsigned int num_vec; int ret; - hisi_qm_pre_init(qm); - - ret = qm_alloc_uacce(qm); - if (ret < 0) - dev_warn(&pdev->dev, "fail to alloc uacce (%d)\n", ret); - ret = pci_enable_device_mem(pdev); if (ret < 0) { - dev_err(&pdev->dev, "Failed to enable device mem!\n"); - goto err_remove_uacce; + dev_err(dev, "Failed to enable device mem!\n"); + return ret; } ret = pci_request_mem_regions(pdev, qm->dev_name); if (ret < 0) { - dev_err(&pdev->dev, "Failed to request mem regions!\n"); + dev_err(dev, "Failed to request mem regions!\n"); goto err_disable_pcidev; } @@ -4047,9 +4087,42 @@ int hisi_qm_init(struct hisi_qm *qm) goto err_iounmap; } + return 0; + +err_iounmap: + iounmap(qm->io_base); +err_release_mem_regions: + pci_release_mem_regions(pdev); +err_disable_pcidev: + pci_disable_device(pdev); + return ret; +} + +/** + * hisi_qm_init() - Initialize configures about qm. + * @qm: The qm needing init. + * + * This function init qm, then we can call hisi_qm_start to put qm into work. + */ +int hisi_qm_init(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + struct device *dev = &pdev->dev; + int ret; + + hisi_qm_pre_init(qm); + + ret = qm_alloc_uacce(qm); + if (ret < 0) + dev_warn(dev, "fail to alloc uacce (%d)\n", ret); + + ret = hisi_qm_pci_init(qm); + if (ret) + goto err_remove_uacce; + ret = qm_irq_register(qm); if (ret) - goto err_free_irq_vectors; + goto err_pci_uninit; if (qm->fun_type == QM_HW_VF && qm->ver != QM_HW_V1) { /* v2 starts to support get vft by mailbox */ @@ -4072,14 +4145,8 @@ int hisi_qm_init(struct hisi_qm *qm) err_irq_unregister: qm_irq_unregister(qm); -err_free_irq_vectors: - pci_free_irq_vectors(pdev); -err_iounmap: - iounmap(qm->io_base); -err_release_mem_regions: - pci_release_mem_regions(pdev); -err_disable_pcidev: - pci_disable_device(pdev); +err_pci_uninit: + hisi_qm_pci_uninit(qm); err_remove_uacce: uacce_remove(qm->uacce); qm->uacce = NULL; @@ -4087,7 +4154,6 @@ int hisi_qm_init(struct hisi_qm *qm) } EXPORT_SYMBOL_GPL(hisi_qm_init); - MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Zhou Wang "); MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver"); diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 0420f4ce7197..8624d1288afe 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -350,7 +350,7 @@ void hisi_qm_release_qp(struct hisi_qp *qp); int hisi_qp_send(struct hisi_qp *qp, const void *msg); int hisi_qm_get_free_qp_num(struct hisi_qm *qm); int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); -int hisi_qm_debug_init(struct hisi_qm *qm); +void hisi_qm_debug_init(struct hisi_qm *qm); enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs); diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 037762b531e2..08491912afd5 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -109,7 +109,6 @@ struct sec_qp_ctx { struct list_head backlog; struct hisi_acc_sgl_pool *c_in_pool; struct hisi_acc_sgl_pool *c_out_pool; - atomic_t pending_reqs; }; enum sec_alg_type { @@ -180,7 +179,6 @@ struct sec_dev { struct sec_debug debug; u32 ctx_q_num; bool iommu_used; - unsigned long status; }; void sec_destroy_qps(struct hisi_qp **qps, int qp_num); diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index bb493423668c..2eaa516b3231 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -7,7 +7,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -101,6 +102,7 @@ static int sec_alloc_req_id(struct sec_req *req, struct sec_qp_ctx *qp_ctx) req->qp_ctx = qp_ctx; qp_ctx->req_list[req_id] = req; + return req_id; } @@ -317,6 +319,7 @@ static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res) j * SEC_PBUF_PKG + pbuf_page_offset; } } + return 0; } @@ -345,12 +348,12 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx, } return 0; + alloc_pbuf_fail: if (ctx->alg_type == SEC_AEAD) sec_free_mac_resource(dev, qp_ctx->res); alloc_fail: sec_free_civ_resource(dev, res); - return ret; } @@ -419,7 +422,6 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx, hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool); err_destroy_idr: idr_destroy(&qp_ctx->req_idr); - return ret; } @@ -557,9 +559,9 @@ static int sec_skcipher_init(struct crypto_skcipher *tfm) goto err_cipher_init; return 0; + err_cipher_init: sec_ctx_base_uninit(ctx); - return ret; } @@ -740,7 +742,6 @@ static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req, if (unlikely(pbuf_length != copy_size)) dev_err(dev, "copy pbuf data to dst error!\n"); - } static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, @@ -857,7 +858,7 @@ static int sec_aead_auth_set_key(struct sec_auth_ctx *ctx, struct crypto_authenc_keys *keys) { struct crypto_shash *hash_tfm = ctx->hash_tfm; - int blocksize, ret; + int blocksize, digestsize, ret; if (!keys->authkeylen) { pr_err("hisi_sec2: aead auth key error!\n"); @@ -865,6 +866,7 @@ static int sec_aead_auth_set_key(struct sec_auth_ctx *ctx, } blocksize = crypto_shash_blocksize(hash_tfm); + digestsize = crypto_shash_digestsize(hash_tfm); if (keys->authkeylen > blocksize) { ret = crypto_shash_tfm_digest(hash_tfm, keys->authkey, keys->authkeylen, ctx->a_key); @@ -872,7 +874,7 @@ static int sec_aead_auth_set_key(struct sec_auth_ctx *ctx, pr_err("hisi_sec2: aead auth digest error!\n"); return -EINVAL; } - ctx->a_key_len = blocksize; + ctx->a_key_len = digestsize; } else { memcpy(ctx->a_key, keys->authkey, keys->authkeylen); ctx->a_key_len = keys->authkeylen; @@ -913,9 +915,9 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key, } return 0; + bad_key: memzero_explicit(&keys, sizeof(struct crypto_authenc_keys)); - return -EINVAL; } @@ -966,7 +968,6 @@ static int sec_request_transfer(struct sec_ctx *ctx, struct sec_req *req) unmap_req_buf: ctx->req_op->buf_unmap(ctx, req); - return ret; } @@ -1107,7 +1108,6 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req, atomic64_inc(&ctx->sec->debug.dfx.recv_busy_cnt); } - sk_req->base.complete(&sk_req->base, err); } @@ -1279,7 +1279,6 @@ static int sec_process(struct sec_ctx *ctx, struct sec_req *req) sec_request_untransfer(ctx, req); err_uninit_req: sec_request_uninit(ctx, req); - return ret; } @@ -1349,7 +1348,6 @@ static int sec_aead_init(struct crypto_aead *tfm) sec_auth_uninit(ctx); err_auth_init: sec_ctx_base_uninit(ctx); - return ret; } @@ -1437,8 +1435,8 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq) } return 0; } - dev_err(dev, "skcipher algorithm error!\n"); + return -EINVAL; } @@ -1554,7 +1552,6 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq) if (unlikely(c_alg != SEC_CALG_AES)) { dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n"); return -EINVAL; - } if (sreq->c_req.encrypt) sreq->c_req.c_len = req->cryptlen; diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 548896394c4b..b35c1c2271a3 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -652,20 +652,16 @@ static int sec_debugfs_init(struct hisi_qm *qm) sec_debugfs_root); qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN; - ret = hisi_qm_debug_init(qm); - if (ret) - goto failed_to_create; + hisi_qm_debug_init(qm); ret = sec_debug_init(qm); if (ret) goto failed_to_create; - return 0; failed_to_create: debugfs_remove_recursive(sec_debugfs_root); - return ret; } @@ -683,13 +679,13 @@ static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts) while (errs->msg) { if (errs->int_msk & err_sts) { dev_err(dev, "%s [error status=0x%x] found\n", - errs->msg, errs->int_msk); + errs->msg, errs->int_msk); if (SEC_CORE_INT_STATUS_M_ECC & errs->int_msk) { err_val = readl(qm->io_base + SEC_CORE_SRAM_ECC_ERR_INFO); dev_err(dev, "multi ecc sram num=0x%x\n", - SEC_ECC_NUM(err_val)); + SEC_ECC_NUM(err_val)); } } errs++; @@ -724,13 +720,13 @@ static const struct hisi_qm_err_ini sec_err_ini = { .log_dev_hw_err = sec_log_hw_error, .open_axi_master_ooo = sec_open_axi_master_ooo, .err_info = { - .ce = QM_BASE_CE, - .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT | - QM_ACC_WB_NOT_READY_TIMEOUT, - .fe = 0, - .ecc_2bits_mask = SEC_CORE_INT_STATUS_M_ECC, - .msi_wr_port = BIT(0), - .acpi_rst = "SRST", + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT | + QM_ACC_WB_NOT_READY_TIMEOUT, + .fe = 0, + .ecc_2bits_mask = SEC_CORE_INT_STATUS_M_ECC, + .msi_wr_port = BIT(0), + .acpi_rst = "SRST", } }; @@ -899,17 +895,13 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_alg_unregister: hisi_qm_alg_unregister(qm, &sec_devices); - err_qm_stop: sec_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); - err_probe_uninit: sec_probe_uninit(qm); - err_qm_uninit: sec_qm_uninit(qm); - return ret; } @@ -936,9 +928,9 @@ static void sec_remove(struct pci_dev *pdev) static const struct pci_error_handlers sec_err_handler = { .error_detected = hisi_qm_dev_err_detected, - .slot_reset = hisi_qm_dev_slot_reset, - .reset_prepare = hisi_qm_reset_prepare, - .reset_done = hisi_qm_reset_done, + .slot_reset = hisi_qm_dev_slot_reset, + .reset_prepare = hisi_qm_reset_prepare, + .reset_done = hisi_qm_reset_done, }; static struct pci_driver sec_pci_driver = { diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index 725a739800b0..3bff6394acaf 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -246,8 +246,6 @@ EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_map_to_hw_sgl); * @dev: The device which hw sgl belongs to. * @sgl: Related scatterlist. * @hw_sgl: Virtual address of hw sgl. - * @hw_sgl_dma: DMA address of hw sgl. - * @pool: Pool which hw sgl is allocated in. * * This function unmaps allocated hw sgl. */ diff --git a/drivers/crypto/hisilicon/trng/Makefile b/drivers/crypto/hisilicon/trng/Makefile new file mode 100644 index 000000000000..d909079f351c --- /dev/null +++ b/drivers/crypto/hisilicon/trng/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_CRYPTO_DEV_HISI_TRNG) += hisi-trng-v2.o +hisi-trng-v2-objs = trng.o diff --git a/drivers/crypto/hisilicon/trng/trng.c b/drivers/crypto/hisilicon/trng/trng.c new file mode 100644 index 000000000000..29712685498a --- /dev/null +++ b/drivers/crypto/hisilicon/trng/trng.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 HiSilicon Limited. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define HISI_TRNG_REG 0x00F0 +#define HISI_TRNG_BYTES 4 +#define HISI_TRNG_QUALITY 512 +#define SLEEP_US 10 +#define TIMEOUT_US 10000 +#define SW_DRBG_NUM_SHIFT 2 +#define SW_DRBG_KEY_BASE 0x082C +#define SW_DRBG_SEED(n) (SW_DRBG_KEY_BASE - ((n) << SW_DRBG_NUM_SHIFT)) +#define SW_DRBG_SEED_REGS_NUM 12 +#define SW_DRBG_SEED_SIZE 48 +#define SW_DRBG_BLOCKS 0x0830 +#define SW_DRBG_INIT 0x0834 +#define SW_DRBG_GEN 0x083c +#define SW_DRBG_STATUS 0x0840 +#define SW_DRBG_BLOCKS_NUM 4095 +#define SW_DRBG_DATA_BASE 0x0850 +#define SW_DRBG_DATA_NUM 4 +#define SW_DRBG_DATA(n) (SW_DRBG_DATA_BASE - ((n) << SW_DRBG_NUM_SHIFT)) +#define SW_DRBG_BYTES 16 +#define SW_DRBG_ENABLE_SHIFT 12 +#define SEED_SHIFT_24 24 +#define SEED_SHIFT_16 16 +#define SEED_SHIFT_8 8 + +struct hisi_trng_list { + struct mutex lock; + struct list_head list; + bool is_init; +}; + +struct hisi_trng { + void __iomem *base; + struct hisi_trng_list *trng_list; + struct list_head list; + struct hwrng rng; + bool is_used; + struct mutex mutex; +}; + +struct hisi_trng_ctx { + struct hisi_trng *trng; +}; + +static atomic_t trng_active_devs; +static struct hisi_trng_list trng_devices; + +static void hisi_trng_set_seed(struct hisi_trng *trng, const u8 *seed) +{ + u32 val, seed_reg, i; + + for (i = 0; i < SW_DRBG_SEED_SIZE; + i += SW_DRBG_SEED_SIZE / SW_DRBG_SEED_REGS_NUM) { + val = seed[i] << SEED_SHIFT_24; + val |= seed[i + 1UL] << SEED_SHIFT_16; + val |= seed[i + 2UL] << SEED_SHIFT_8; + val |= seed[i + 3UL]; + + seed_reg = (i >> SW_DRBG_NUM_SHIFT) % SW_DRBG_SEED_REGS_NUM; + writel(val, trng->base + SW_DRBG_SEED(seed_reg)); + } +} + +static int hisi_trng_seed(struct crypto_rng *tfm, const u8 *seed, + unsigned int slen) +{ + struct hisi_trng_ctx *ctx = crypto_rng_ctx(tfm); + struct hisi_trng *trng = ctx->trng; + u32 val = 0; + int ret = 0; + + if (slen < SW_DRBG_SEED_SIZE) { + pr_err("slen(%u) is not matched with trng(%d)\n", slen, + SW_DRBG_SEED_SIZE); + return -EINVAL; + } + + writel(0x0, trng->base + SW_DRBG_BLOCKS); + hisi_trng_set_seed(trng, seed); + + writel(SW_DRBG_BLOCKS_NUM | (0x1 << SW_DRBG_ENABLE_SHIFT), + trng->base + SW_DRBG_BLOCKS); + writel(0x1, trng->base + SW_DRBG_INIT); + + ret = readl_relaxed_poll_timeout(trng->base + SW_DRBG_STATUS, + val, val & BIT(0), SLEEP_US, TIMEOUT_US); + if (ret) + pr_err("fail to init trng(%d)\n", ret); + + return ret; +} + +static int hisi_trng_generate(struct crypto_rng *tfm, const u8 *src, + unsigned int slen, u8 *dstn, unsigned int dlen) +{ + struct hisi_trng_ctx *ctx = crypto_rng_ctx(tfm); + struct hisi_trng *trng = ctx->trng; + u32 data[SW_DRBG_DATA_NUM]; + u32 currsize = 0; + u32 val = 0; + int ret; + u32 i; + + if (dlen > SW_DRBG_BLOCKS_NUM * SW_DRBG_BYTES || dlen == 0) { + pr_err("dlen(%d) exceeds limit(%d)!\n", dlen, + SW_DRBG_BLOCKS_NUM * SW_DRBG_BYTES); + return -EINVAL; + } + + do { + ret = readl_relaxed_poll_timeout(trng->base + SW_DRBG_STATUS, + val, val & BIT(1), SLEEP_US, TIMEOUT_US); + if (ret) { + pr_err("fail to generate random number(%d)!\n", ret); + break; + } + + for (i = 0; i < SW_DRBG_DATA_NUM; i++) + data[i] = readl(trng->base + SW_DRBG_DATA(i)); + + if (dlen - currsize >= SW_DRBG_BYTES) { + memcpy(dstn + currsize, data, SW_DRBG_BYTES); + currsize += SW_DRBG_BYTES; + } else { + memcpy(dstn + currsize, data, dlen - currsize); + currsize = dlen; + } + + writel(0x1, trng->base + SW_DRBG_GEN); + } while (currsize < dlen); + + return ret; +} + +static int hisi_trng_init(struct crypto_tfm *tfm) +{ + struct hisi_trng_ctx *ctx = crypto_tfm_ctx(tfm); + struct hisi_trng *trng; + int ret = -EBUSY; + + mutex_lock(&trng_devices.lock); + list_for_each_entry(trng, &trng_devices.list, list) { + if (!trng->is_used) { + trng->is_used = true; + ctx->trng = trng; + ret = 0; + break; + } + } + mutex_unlock(&trng_devices.lock); + + return ret; +} + +static void hisi_trng_exit(struct crypto_tfm *tfm) +{ + struct hisi_trng_ctx *ctx = crypto_tfm_ctx(tfm); + + mutex_lock(&trng_devices.lock); + ctx->trng->is_used = false; + mutex_unlock(&trng_devices.lock); +} + +static int hisi_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait) +{ + struct hisi_trng *trng; + int currsize = 0; + u32 val = 0; + u32 ret; + + trng = container_of(rng, struct hisi_trng, rng); + + do { + ret = readl_poll_timeout(trng->base + HISI_TRNG_REG, val, + val, SLEEP_US, TIMEOUT_US); + if (ret) + return currsize; + + if (max - currsize >= HISI_TRNG_BYTES) { + memcpy(buf + currsize, &val, HISI_TRNG_BYTES); + currsize += HISI_TRNG_BYTES; + if (currsize == max) + return currsize; + continue; + } + + /* copy remaining bytes */ + memcpy(buf + currsize, &val, max - currsize); + currsize = max; + } while (currsize < max); + + return currsize; +} + +static struct rng_alg hisi_trng_alg = { + .generate = hisi_trng_generate, + .seed = hisi_trng_seed, + .seedsize = SW_DRBG_SEED_SIZE, + .base = { + .cra_name = "stdrng", + .cra_driver_name = "hisi_stdrng", + .cra_priority = 300, + .cra_ctxsize = sizeof(struct hisi_trng_ctx), + .cra_module = THIS_MODULE, + .cra_init = hisi_trng_init, + .cra_exit = hisi_trng_exit, + }, +}; + +static void hisi_trng_add_to_list(struct hisi_trng *trng) +{ + mutex_lock(&trng_devices.lock); + list_add_tail(&trng->list, &trng_devices.list); + mutex_unlock(&trng_devices.lock); +} + +static int hisi_trng_del_from_list(struct hisi_trng *trng) +{ + int ret = -EBUSY; + + mutex_lock(&trng_devices.lock); + if (!trng->is_used) { + list_del(&trng->list); + ret = 0; + } + mutex_unlock(&trng_devices.lock); + + return ret; +} + +static int hisi_trng_probe(struct platform_device *pdev) +{ + struct hisi_trng *trng; + int ret; + + trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL); + if (!trng) + return -ENOMEM; + + platform_set_drvdata(pdev, trng); + + trng->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(trng->base)) + return PTR_ERR(trng->base); + + trng->is_used = false; + if (!trng_devices.is_init) { + INIT_LIST_HEAD(&trng_devices.list); + mutex_init(&trng_devices.lock); + trng_devices.is_init = true; + } + + hisi_trng_add_to_list(trng); + if (atomic_inc_return(&trng_active_devs) == 1) { + ret = crypto_register_rng(&hisi_trng_alg); + if (ret) { + dev_err(&pdev->dev, + "failed to register crypto(%d)\n", ret); + atomic_dec_return(&trng_active_devs); + goto err_remove_from_list; + } + } + + trng->rng.name = pdev->name; + trng->rng.read = hisi_trng_read; + trng->rng.quality = HISI_TRNG_QUALITY; + ret = devm_hwrng_register(&pdev->dev, &trng->rng); + if (ret) { + dev_err(&pdev->dev, "failed to register hwrng: %d!\n", ret); + goto err_crypto_unregister; + } + + return ret; + +err_crypto_unregister: + if (atomic_dec_return(&trng_active_devs) == 0) + crypto_unregister_rng(&hisi_trng_alg); + +err_remove_from_list: + hisi_trng_del_from_list(trng); + return ret; +} + +static int hisi_trng_remove(struct platform_device *pdev) +{ + struct hisi_trng *trng = platform_get_drvdata(pdev); + + /* Wait until the task is finished */ + while (hisi_trng_del_from_list(trng)) + ; + + if (atomic_dec_return(&trng_active_devs) == 0) + crypto_unregister_rng(&hisi_trng_alg); + + return 0; +} + +static const struct acpi_device_id hisi_trng_acpi_match[] = { + { "HISI02B3", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, hisi_trng_acpi_match); + +static struct platform_driver hisi_trng_driver = { + .probe = hisi_trng_probe, + .remove = hisi_trng_remove, + .driver = { + .name = "hisi-trng-v2", + .acpi_match_table = ACPI_PTR(hisi_trng_acpi_match), + }, +}; + +module_platform_driver(hisi_trng_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Weili Qian "); +MODULE_AUTHOR("Zaibo Xu "); +MODULE_DESCRIPTION("HiSilicon true random number generator V2 driver"); diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 4bd2c811abba..4fb5a32bf830 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -590,9 +590,7 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN; qm->debug.debug_root = dev_d; - ret = hisi_qm_debug_init(qm); - if (ret) - goto failed_to_create; + hisi_qm_debug_init(qm); if (qm->fun_type == QM_HW_PF) { ret = hisi_zip_ctrl_debug_init(qm); @@ -749,6 +747,8 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) { + int ret; + qm->pdev = pdev; qm->ver = pdev->revision; qm->algs = "zlib\ngzip"; @@ -774,7 +774,25 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->qp_num = HZIP_QUEUE_NUM_V1 - HZIP_PF_DEF_Q_NUM; } - return hisi_qm_init(qm); + qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM | + WQ_UNBOUND, num_online_cpus(), + pci_name(qm->pdev)); + if (!qm->wq) { + pci_err(qm->pdev, "fail to alloc workqueue\n"); + return -ENOMEM; + } + + ret = hisi_qm_init(qm); + if (ret) + destroy_workqueue(qm->wq); + + return ret; +} + +static void hisi_zip_qm_uninit(struct hisi_qm *qm) +{ + hisi_qm_uninit(qm); + destroy_workqueue(qm->wq); } static int hisi_zip_probe_init(struct hisi_zip *hisi_zip) @@ -856,7 +874,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) hisi_qm_dev_err_uninit(qm); err_qm_uninit: - hisi_qm_uninit(qm); + hisi_zip_qm_uninit(qm); return ret; } @@ -874,7 +892,7 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_zip_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); hisi_qm_dev_err_uninit(qm); - hisi_qm_uninit(qm); + hisi_zip_qm_uninit(qm); } static const struct pci_error_handlers hisi_zip_err_handler = { diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index 91f555ccbb31..e813115d5432 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -19,7 +19,8 @@ #include #include -#include +#include +#include #define CR_RESET 0 #define CR_RESET_SET 1 diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index eb2418450f12..2e1562108a85 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -1639,7 +1639,7 @@ static int safexcel_probe_generic(void *pdev, priv->ring[i].rdr_req = devm_kcalloc(dev, EIP197_DEFAULT_RING_SIZE, - sizeof(priv->ring[i].rdr_req), + sizeof(*priv->ring[i].rdr_req), GFP_KERNEL); if (!priv->ring[i].rdr_req) return -ENOMEM; diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h index 9045f2d7f4c6..ce1e611a163e 100644 --- a/drivers/crypto/inside-secure/safexcel.h +++ b/drivers/crypto/inside-secure/safexcel.h @@ -11,7 +11,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index 9bcfb79a030f..d68ef16650d4 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c @@ -18,7 +18,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 56d5ccb5cc00..50fb6d90a2e0 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -8,7 +8,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 276012e7c482..8b0f17fc09fb 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/crypto/keembay/Kconfig b/drivers/crypto/keembay/Kconfig new file mode 100644 index 000000000000..3c16797b25b9 --- /dev/null +++ b/drivers/crypto/keembay/Kconfig @@ -0,0 +1,39 @@ +config CRYPTO_DEV_KEEMBAY_OCS_AES_SM4 + tristate "Support for Intel Keem Bay OCS AES/SM4 HW acceleration" + depends on OF || COMPILE_TEST + select CRYPTO_SKCIPHER + select CRYPTO_AEAD + select CRYPTO_ENGINE + help + Support for Intel Keem Bay Offload and Crypto Subsystem (OCS) AES and + SM4 cihper hardware acceleration for use with Crypto API. + + Provides HW acceleration for the following transformations: + cbc(aes), ctr(aes), ccm(aes), gcm(aes), cbc(sm4), ctr(sm4), ccm(sm4) + and gcm(sm4). + + Optionally, support for the following transformations can also be + enabled: ecb(aes), cts(cbc(aes)), ecb(sm4) and cts(cbc(sm4)). + +config CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB + bool "Support for Intel Keem Bay OCS AES/SM4 ECB HW acceleration" + depends on CRYPTO_DEV_KEEMBAY_OCS_AES_SM4 + help + Support for Intel Keem Bay Offload and Crypto Subsystem (OCS) + AES/SM4 ECB mode hardware acceleration for use with Crypto API. + + Provides OCS version of ecb(aes) and ecb(sm4) + + Intel does not recommend use of ECB mode with AES/SM4. + +config CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS + bool "Support for Intel Keem Bay OCS AES/SM4 CTS HW acceleration" + depends on CRYPTO_DEV_KEEMBAY_OCS_AES_SM4 + help + Support for Intel Keem Bay Offload and Crypto Subsystem (OCS) + AES/SM4 CBC with CTS mode hardware acceleration for use with + Crypto API. + + Provides OCS version of cts(cbc(aes)) and cts(cbc(sm4)). + + Intel does not recommend use of CTS mode with AES/SM4. diff --git a/drivers/crypto/keembay/Makefile b/drivers/crypto/keembay/Makefile new file mode 100644 index 000000000000..f21e2c4ab3b3 --- /dev/null +++ b/drivers/crypto/keembay/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for Intel Keem Bay OCS Crypto API Linux drivers +# +obj-$(CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4) += keembay-ocs-aes.o +keembay-ocs-aes-objs := keembay-ocs-aes-core.o ocs-aes.o diff --git a/drivers/crypto/keembay/keembay-ocs-aes-core.c b/drivers/crypto/keembay/keembay-ocs-aes-core.c new file mode 100644 index 000000000000..b6b25d994af3 --- /dev/null +++ b/drivers/crypto/keembay/keembay-ocs-aes-core.c @@ -0,0 +1,1713 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel Keem Bay OCS AES Crypto Driver. + * + * Copyright (C) 2018-2020 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include "ocs-aes.h" + +#define KMB_OCS_PRIORITY 350 +#define DRV_NAME "keembay-ocs-aes" + +#define OCS_AES_MIN_KEY_SIZE 16 +#define OCS_AES_MAX_KEY_SIZE 32 +#define OCS_AES_KEYSIZE_128 16 +#define OCS_AES_KEYSIZE_192 24 +#define OCS_AES_KEYSIZE_256 32 +#define OCS_SM4_KEY_SIZE 16 + +/** + * struct ocs_aes_tctx - OCS AES Transform context + * @engine_ctx: Engine context. + * @aes_dev: The OCS AES device. + * @key: AES/SM4 key. + * @key_len: The length (in bytes) of @key. + * @cipher: OCS cipher to use (either AES or SM4). + * @sw_cipher: The cipher to use as fallback. + * @use_fallback: Whether or not fallback cipher should be used. + */ +struct ocs_aes_tctx { + struct crypto_engine_ctx engine_ctx; + struct ocs_aes_dev *aes_dev; + u8 key[OCS_AES_KEYSIZE_256]; + unsigned int key_len; + enum ocs_cipher cipher; + union { + struct crypto_sync_skcipher *sk; + struct crypto_aead *aead; + } sw_cipher; + bool use_fallback; +}; + +/** + * struct ocs_aes_rctx - OCS AES Request context. + * @instruction: Instruction to be executed (encrypt / decrypt). + * @mode: Mode to use (ECB, CBC, CTR, CCm, GCM, CTS) + * @src_nents: Number of source SG entries. + * @dst_nents: Number of destination SG entries. + * @src_dma_count: The number of DMA-mapped entries of the source SG. + * @dst_dma_count: The number of DMA-mapped entries of the destination SG. + * @in_place: Whether or not this is an in place request, i.e., + * src_sg == dst_sg. + * @src_dll: OCS DMA linked list for input data. + * @dst_dll: OCS DMA linked list for output data. + * @last_ct_blk: Buffer to hold last cipher text block (only used in CBC + * mode). + * @cts_swap: Whether or not CTS swap must be performed. + * @aad_src_dll: OCS DMA linked list for input AAD data. + * @aad_dst_dll: OCS DMA linked list for output AAD data. + * @in_tag: Buffer to hold input encrypted tag (only used for + * CCM/GCM decrypt). + * @out_tag: Buffer to hold output encrypted / decrypted tag (only + * used for GCM encrypt / decrypt). + */ +struct ocs_aes_rctx { + /* Fields common across all modes. */ + enum ocs_instruction instruction; + enum ocs_mode mode; + int src_nents; + int dst_nents; + int src_dma_count; + int dst_dma_count; + bool in_place; + struct ocs_dll_desc src_dll; + struct ocs_dll_desc dst_dll; + + /* CBC specific */ + u8 last_ct_blk[AES_BLOCK_SIZE]; + + /* CTS specific */ + int cts_swap; + + /* CCM/GCM specific */ + struct ocs_dll_desc aad_src_dll; + struct ocs_dll_desc aad_dst_dll; + u8 in_tag[AES_BLOCK_SIZE]; + + /* GCM specific */ + u8 out_tag[AES_BLOCK_SIZE]; +}; + +/* Driver data. */ +struct ocs_aes_drv { + struct list_head dev_list; + spinlock_t lock; /* Protects dev_list. */ +}; + +static struct ocs_aes_drv ocs_aes = { + .dev_list = LIST_HEAD_INIT(ocs_aes.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(ocs_aes.lock), +}; + +static struct ocs_aes_dev *kmb_ocs_aes_find_dev(struct ocs_aes_tctx *tctx) +{ + struct ocs_aes_dev *aes_dev; + + spin_lock(&ocs_aes.lock); + + if (tctx->aes_dev) { + aes_dev = tctx->aes_dev; + goto exit; + } + + /* Only a single OCS device available */ + aes_dev = list_first_entry(&ocs_aes.dev_list, struct ocs_aes_dev, list); + tctx->aes_dev = aes_dev; + +exit: + spin_unlock(&ocs_aes.lock); + + return aes_dev; +} + +/* + * Ensure key is 128-bit or 256-bit for AES or 128-bit for SM4 and an actual + * key is being passed in. + * + * Return: 0 if key is valid, -EINVAL otherwise. + */ +static int check_key(const u8 *in_key, size_t key_len, enum ocs_cipher cipher) +{ + if (!in_key) + return -EINVAL; + + /* For AES, only 128-byte or 256-byte keys are supported. */ + if (cipher == OCS_AES && (key_len == OCS_AES_KEYSIZE_128 || + key_len == OCS_AES_KEYSIZE_256)) + return 0; + + /* For SM4, only 128-byte keys are supported. */ + if (cipher == OCS_SM4 && key_len == OCS_AES_KEYSIZE_128) + return 0; + + /* Everything else is unsupported. */ + return -EINVAL; +} + +/* Save key into transformation context. */ +static int save_key(struct ocs_aes_tctx *tctx, const u8 *in_key, size_t key_len, + enum ocs_cipher cipher) +{ + int ret; + + ret = check_key(in_key, key_len, cipher); + if (ret) + return ret; + + memcpy(tctx->key, in_key, key_len); + tctx->key_len = key_len; + tctx->cipher = cipher; + + return 0; +} + +/* Set key for symmetric cypher. */ +static int kmb_ocs_sk_set_key(struct crypto_skcipher *tfm, const u8 *in_key, + size_t key_len, enum ocs_cipher cipher) +{ + struct ocs_aes_tctx *tctx = crypto_skcipher_ctx(tfm); + + /* Fallback is used for AES with 192-bit key. */ + tctx->use_fallback = (cipher == OCS_AES && + key_len == OCS_AES_KEYSIZE_192); + + if (!tctx->use_fallback) + return save_key(tctx, in_key, key_len, cipher); + + crypto_sync_skcipher_clear_flags(tctx->sw_cipher.sk, + CRYPTO_TFM_REQ_MASK); + crypto_sync_skcipher_set_flags(tctx->sw_cipher.sk, + tfm->base.crt_flags & + CRYPTO_TFM_REQ_MASK); + + return crypto_sync_skcipher_setkey(tctx->sw_cipher.sk, in_key, key_len); +} + +/* Set key for AEAD cipher. */ +static int kmb_ocs_aead_set_key(struct crypto_aead *tfm, const u8 *in_key, + size_t key_len, enum ocs_cipher cipher) +{ + struct ocs_aes_tctx *tctx = crypto_aead_ctx(tfm); + + /* Fallback is used for AES with 192-bit key. */ + tctx->use_fallback = (cipher == OCS_AES && + key_len == OCS_AES_KEYSIZE_192); + + if (!tctx->use_fallback) + return save_key(tctx, in_key, key_len, cipher); + + crypto_aead_clear_flags(tctx->sw_cipher.aead, CRYPTO_TFM_REQ_MASK); + crypto_aead_set_flags(tctx->sw_cipher.aead, + crypto_aead_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); + + return crypto_aead_setkey(tctx->sw_cipher.aead, in_key, key_len); +} + +/* Swap two AES blocks in SG lists. */ +static void sg_swap_blocks(struct scatterlist *sgl, unsigned int nents, + off_t blk1_offset, off_t blk2_offset) +{ + u8 tmp_buf1[AES_BLOCK_SIZE], tmp_buf2[AES_BLOCK_SIZE]; + + /* + * No easy way to copy within sg list, so copy both blocks to temporary + * buffers first. + */ + sg_pcopy_to_buffer(sgl, nents, tmp_buf1, AES_BLOCK_SIZE, blk1_offset); + sg_pcopy_to_buffer(sgl, nents, tmp_buf2, AES_BLOCK_SIZE, blk2_offset); + sg_pcopy_from_buffer(sgl, nents, tmp_buf1, AES_BLOCK_SIZE, blk2_offset); + sg_pcopy_from_buffer(sgl, nents, tmp_buf2, AES_BLOCK_SIZE, blk1_offset); +} + +/* Initialize request context to default values. */ +static void ocs_aes_init_rctx(struct ocs_aes_rctx *rctx) +{ + /* Zero everything. */ + memset(rctx, 0, sizeof(*rctx)); + + /* Set initial value for DMA addresses. */ + rctx->src_dll.dma_addr = DMA_MAPPING_ERROR; + rctx->dst_dll.dma_addr = DMA_MAPPING_ERROR; + rctx->aad_src_dll.dma_addr = DMA_MAPPING_ERROR; + rctx->aad_dst_dll.dma_addr = DMA_MAPPING_ERROR; +} + +static int kmb_ocs_sk_validate_input(struct skcipher_request *req, + enum ocs_mode mode) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + int iv_size = crypto_skcipher_ivsize(tfm); + + switch (mode) { + case OCS_MODE_ECB: + /* Ensure input length is multiple of block size */ + if (req->cryptlen % AES_BLOCK_SIZE != 0) + return -EINVAL; + + return 0; + + case OCS_MODE_CBC: + /* Ensure input length is multiple of block size */ + if (req->cryptlen % AES_BLOCK_SIZE != 0) + return -EINVAL; + + /* Ensure IV is present and block size in length */ + if (!req->iv || iv_size != AES_BLOCK_SIZE) + return -EINVAL; + /* + * NOTE: Since req->cryptlen == 0 case was already handled in + * kmb_ocs_sk_common(), the above two conditions also guarantee + * that: cryptlen >= iv_size + */ + return 0; + + case OCS_MODE_CTR: + /* Ensure IV is present and block size in length */ + if (!req->iv || iv_size != AES_BLOCK_SIZE) + return -EINVAL; + return 0; + + case OCS_MODE_CTS: + /* Ensure input length >= block size */ + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + /* Ensure IV is present and block size in length */ + if (!req->iv || iv_size != AES_BLOCK_SIZE) + return -EINVAL; + + return 0; + default: + return -EINVAL; + } +} + +/* + * Called by encrypt() / decrypt() skcipher functions. + * + * Use fallback if needed, otherwise initialize context and enqueue request + * into engine. + */ +static int kmb_ocs_sk_common(struct skcipher_request *req, + enum ocs_cipher cipher, + enum ocs_instruction instruction, + enum ocs_mode mode) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ocs_aes_rctx *rctx = skcipher_request_ctx(req); + struct ocs_aes_tctx *tctx = crypto_skcipher_ctx(tfm); + struct ocs_aes_dev *aes_dev; + int rc; + + if (tctx->use_fallback) { + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, tctx->sw_cipher.sk); + + skcipher_request_set_sync_tfm(subreq, tctx->sw_cipher.sk); + skcipher_request_set_callback(subreq, req->base.flags, NULL, + NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen, req->iv); + + if (instruction == OCS_ENCRYPT) + rc = crypto_skcipher_encrypt(subreq); + else + rc = crypto_skcipher_decrypt(subreq); + + skcipher_request_zero(subreq); + + return rc; + } + + /* + * If cryptlen == 0, no processing needed for ECB, CBC and CTR. + * + * For CTS continue: kmb_ocs_sk_validate_input() will return -EINVAL. + */ + if (!req->cryptlen && mode != OCS_MODE_CTS) + return 0; + + rc = kmb_ocs_sk_validate_input(req, mode); + if (rc) + return rc; + + aes_dev = kmb_ocs_aes_find_dev(tctx); + if (!aes_dev) + return -ENODEV; + + if (cipher != tctx->cipher) + return -EINVAL; + + ocs_aes_init_rctx(rctx); + rctx->instruction = instruction; + rctx->mode = mode; + + return crypto_transfer_skcipher_request_to_engine(aes_dev->engine, req); +} + +static void cleanup_ocs_dma_linked_list(struct device *dev, + struct ocs_dll_desc *dll) +{ + if (dll->vaddr) + dma_free_coherent(dev, dll->size, dll->vaddr, dll->dma_addr); + dll->vaddr = NULL; + dll->size = 0; + dll->dma_addr = DMA_MAPPING_ERROR; +} + +static void kmb_ocs_sk_dma_cleanup(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ocs_aes_rctx *rctx = skcipher_request_ctx(req); + struct ocs_aes_tctx *tctx = crypto_skcipher_ctx(tfm); + struct device *dev = tctx->aes_dev->dev; + + if (rctx->src_dma_count) { + dma_unmap_sg(dev, req->src, rctx->src_nents, DMA_TO_DEVICE); + rctx->src_dma_count = 0; + } + + if (rctx->dst_dma_count) { + dma_unmap_sg(dev, req->dst, rctx->dst_nents, rctx->in_place ? + DMA_BIDIRECTIONAL : + DMA_FROM_DEVICE); + rctx->dst_dma_count = 0; + } + + /* Clean up OCS DMA linked lists */ + cleanup_ocs_dma_linked_list(dev, &rctx->src_dll); + cleanup_ocs_dma_linked_list(dev, &rctx->dst_dll); +} + +static int kmb_ocs_sk_prepare_inplace(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ocs_aes_rctx *rctx = skcipher_request_ctx(req); + struct ocs_aes_tctx *tctx = crypto_skcipher_ctx(tfm); + int iv_size = crypto_skcipher_ivsize(tfm); + int rc; + + /* + * For CBC decrypt, save last block (iv) to last_ct_blk buffer. + * + * Note: if we are here, we already checked that cryptlen >= iv_size + * and iv_size == AES_BLOCK_SIZE (i.e., the size of last_ct_blk); see + * kmb_ocs_sk_validate_input(). + */ + if (rctx->mode == OCS_MODE_CBC && rctx->instruction == OCS_DECRYPT) + scatterwalk_map_and_copy(rctx->last_ct_blk, req->src, + req->cryptlen - iv_size, iv_size, 0); + + /* For CTS decrypt, swap last two blocks, if needed. */ + if (rctx->cts_swap && rctx->instruction == OCS_DECRYPT) + sg_swap_blocks(req->dst, rctx->dst_nents, + req->cryptlen - AES_BLOCK_SIZE, + req->cryptlen - (2 * AES_BLOCK_SIZE)); + + /* src and dst buffers are the same, use bidirectional DMA mapping. */ + rctx->dst_dma_count = dma_map_sg(tctx->aes_dev->dev, req->dst, + rctx->dst_nents, DMA_BIDIRECTIONAL); + if (rctx->dst_dma_count == 0) { + dev_err(tctx->aes_dev->dev, "Failed to map destination sg\n"); + return -ENOMEM; + } + + /* Create DST linked list */ + rc = ocs_create_linked_list_from_sg(tctx->aes_dev, req->dst, + rctx->dst_dma_count, &rctx->dst_dll, + req->cryptlen, 0); + if (rc) + return rc; + /* + * If descriptor creation was successful, set the src_dll.dma_addr to + * the value of dst_dll.dma_addr, as we do in-place AES operation on + * the src. + */ + rctx->src_dll.dma_addr = rctx->dst_dll.dma_addr; + + return 0; +} + +static int kmb_ocs_sk_prepare_notinplace(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ocs_aes_rctx *rctx = skcipher_request_ctx(req); + struct ocs_aes_tctx *tctx = crypto_skcipher_ctx(tfm); + int rc; + + rctx->src_nents = sg_nents_for_len(req->src, req->cryptlen); + if (rctx->src_nents < 0) + return -EBADMSG; + + /* Map SRC SG. */ + rctx->src_dma_count = dma_map_sg(tctx->aes_dev->dev, req->src, + rctx->src_nents, DMA_TO_DEVICE); + if (rctx->src_dma_count == 0) { + dev_err(tctx->aes_dev->dev, "Failed to map source sg\n"); + return -ENOMEM; + } + + /* Create SRC linked list */ + rc = ocs_create_linked_list_from_sg(tctx->aes_dev, req->src, + rctx->src_dma_count, &rctx->src_dll, + req->cryptlen, 0); + if (rc) + return rc; + + /* Map DST SG. */ + rctx->dst_dma_count = dma_map_sg(tctx->aes_dev->dev, req->dst, + rctx->dst_nents, DMA_FROM_DEVICE); + if (rctx->dst_dma_count == 0) { + dev_err(tctx->aes_dev->dev, "Failed to map destination sg\n"); + return -ENOMEM; + } + + /* Create DST linked list */ + rc = ocs_create_linked_list_from_sg(tctx->aes_dev, req->dst, + rctx->dst_dma_count, &rctx->dst_dll, + req->cryptlen, 0); + if (rc) + return rc; + + /* If this is not a CTS decrypt operation with swapping, we are done. */ + if (!(rctx->cts_swap && rctx->instruction == OCS_DECRYPT)) + return 0; + + /* + * Otherwise, we have to copy src to dst (as we cannot modify src). + * Use OCS AES bypass mode to copy src to dst via DMA. + * + * NOTE: for anything other than small data sizes this is rather + * inefficient. + */ + rc = ocs_aes_bypass_op(tctx->aes_dev, rctx->dst_dll.dma_addr, + rctx->src_dll.dma_addr, req->cryptlen); + if (rc) + return rc; + + /* + * Now dst == src, so clean up what we did so far and use in_place + * logic. + */ + kmb_ocs_sk_dma_cleanup(req); + rctx->in_place = true; + + return kmb_ocs_sk_prepare_inplace(req); +} + +static int kmb_ocs_sk_run(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ocs_aes_rctx *rctx = skcipher_request_ctx(req); + struct ocs_aes_tctx *tctx = crypto_skcipher_ctx(tfm); + struct ocs_aes_dev *aes_dev = tctx->aes_dev; + int iv_size = crypto_skcipher_ivsize(tfm); + int rc; + + rctx->dst_nents = sg_nents_for_len(req->dst, req->cryptlen); + if (rctx->dst_nents < 0) + return -EBADMSG; + + /* + * If 2 blocks or greater, and multiple of block size swap last two + * blocks to be compatible with other crypto API CTS implementations: + * OCS mode uses CBC-CS2, whereas other crypto API implementations use + * CBC-CS3. + * CBC-CS2 and CBC-CS3 defined by: + * https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a-add.pdf + */ + rctx->cts_swap = (rctx->mode == OCS_MODE_CTS && + req->cryptlen > AES_BLOCK_SIZE && + req->cryptlen % AES_BLOCK_SIZE == 0); + + rctx->in_place = (req->src == req->dst); + + if (rctx->in_place) + rc = kmb_ocs_sk_prepare_inplace(req); + else + rc = kmb_ocs_sk_prepare_notinplace(req); + + if (rc) + goto error; + + rc = ocs_aes_op(aes_dev, rctx->mode, tctx->cipher, rctx->instruction, + rctx->dst_dll.dma_addr, rctx->src_dll.dma_addr, + req->cryptlen, req->iv, iv_size); + if (rc) + goto error; + + /* Clean-up DMA before further processing output. */ + kmb_ocs_sk_dma_cleanup(req); + + /* For CTS Encrypt, swap last 2 blocks, if needed. */ + if (rctx->cts_swap && rctx->instruction == OCS_ENCRYPT) { + sg_swap_blocks(req->dst, rctx->dst_nents, + req->cryptlen - AES_BLOCK_SIZE, + req->cryptlen - (2 * AES_BLOCK_SIZE)); + return 0; + } + + /* For CBC copy IV to req->IV. */ + if (rctx->mode == OCS_MODE_CBC) { + /* CBC encrypt case. */ + if (rctx->instruction == OCS_ENCRYPT) { + scatterwalk_map_and_copy(req->iv, req->dst, + req->cryptlen - iv_size, + iv_size, 0); + return 0; + } + /* CBC decrypt case. */ + if (rctx->in_place) + memcpy(req->iv, rctx->last_ct_blk, iv_size); + else + scatterwalk_map_and_copy(req->iv, req->src, + req->cryptlen - iv_size, + iv_size, 0); + return 0; + } + /* For all other modes there's nothing to do. */ + + return 0; + +error: + kmb_ocs_sk_dma_cleanup(req); + + return rc; +} + +static int kmb_ocs_aead_validate_input(struct aead_request *req, + enum ocs_instruction instruction, + enum ocs_mode mode) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + int tag_size = crypto_aead_authsize(tfm); + int iv_size = crypto_aead_ivsize(tfm); + + /* For decrypt crytplen == len(PT) + len(tag). */ + if (instruction == OCS_DECRYPT && req->cryptlen < tag_size) + return -EINVAL; + + /* IV is mandatory. */ + if (!req->iv) + return -EINVAL; + + switch (mode) { + case OCS_MODE_GCM: + if (iv_size != GCM_AES_IV_SIZE) + return -EINVAL; + + return 0; + + case OCS_MODE_CCM: + /* Ensure IV is present and block size in length */ + if (iv_size != AES_BLOCK_SIZE) + return -EINVAL; + + return 0; + + default: + return -EINVAL; + } +} + +/* + * Called by encrypt() / decrypt() aead functions. + * + * Use fallback if needed, otherwise initialize context and enqueue request + * into engine. + */ +static int kmb_ocs_aead_common(struct aead_request *req, + enum ocs_cipher cipher, + enum ocs_instruction instruction, + enum ocs_mode mode) +{ + struct ocs_aes_tctx *tctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct ocs_aes_rctx *rctx = aead_request_ctx(req); + struct ocs_aes_dev *dd; + int rc; + + if (tctx->use_fallback) { + struct aead_request *subreq = aead_request_ctx(req); + + aead_request_set_tfm(subreq, tctx->sw_cipher.aead); + aead_request_set_callback(subreq, req->base.flags, + req->base.complete, req->base.data); + aead_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen, req->iv); + aead_request_set_ad(subreq, req->assoclen); + rc = crypto_aead_setauthsize(tctx->sw_cipher.aead, + crypto_aead_authsize(crypto_aead_reqtfm(req))); + if (rc) + return rc; + + return (instruction == OCS_ENCRYPT) ? + crypto_aead_encrypt(subreq) : + crypto_aead_decrypt(subreq); + } + + rc = kmb_ocs_aead_validate_input(req, instruction, mode); + if (rc) + return rc; + + dd = kmb_ocs_aes_find_dev(tctx); + if (!dd) + return -ENODEV; + + if (cipher != tctx->cipher) + return -EINVAL; + + ocs_aes_init_rctx(rctx); + rctx->instruction = instruction; + rctx->mode = mode; + + return crypto_transfer_aead_request_to_engine(dd->engine, req); +} + +static void kmb_ocs_aead_dma_cleanup(struct aead_request *req) +{ + struct ocs_aes_tctx *tctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct ocs_aes_rctx *rctx = aead_request_ctx(req); + struct device *dev = tctx->aes_dev->dev; + + if (rctx->src_dma_count) { + dma_unmap_sg(dev, req->src, rctx->src_nents, DMA_TO_DEVICE); + rctx->src_dma_count = 0; + } + + if (rctx->dst_dma_count) { + dma_unmap_sg(dev, req->dst, rctx->dst_nents, rctx->in_place ? + DMA_BIDIRECTIONAL : + DMA_FROM_DEVICE); + rctx->dst_dma_count = 0; + } + /* Clean up OCS DMA linked lists */ + cleanup_ocs_dma_linked_list(dev, &rctx->src_dll); + cleanup_ocs_dma_linked_list(dev, &rctx->dst_dll); + cleanup_ocs_dma_linked_list(dev, &rctx->aad_src_dll); + cleanup_ocs_dma_linked_list(dev, &rctx->aad_dst_dll); +} + +/** + * kmb_ocs_aead_dma_prepare() - Do DMA mapping for AEAD processing. + * @req: The AEAD request being processed. + * @src_dll_size: Where to store the length of the data mapped into the + * src_dll OCS DMA list. + * + * Do the following: + * - DMA map req->src and req->dst + * - Initialize the following OCS DMA linked lists: rctx->src_dll, + * rctx->dst_dll, rctx->aad_src_dll and rxtc->aad_dst_dll. + * + * Return: 0 on success, negative error code otherwise. + */ +static int kmb_ocs_aead_dma_prepare(struct aead_request *req, u32 *src_dll_size) +{ + struct ocs_aes_tctx *tctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + const int tag_size = crypto_aead_authsize(crypto_aead_reqtfm(req)); + struct ocs_aes_rctx *rctx = aead_request_ctx(req); + u32 in_size; /* The length of the data to be mapped by src_dll. */ + u32 out_size; /* The length of the data to be mapped by dst_dll. */ + u32 dst_size; /* The length of the data in dst_sg. */ + int rc; + + /* Get number of entries in input data SG list. */ + rctx->src_nents = sg_nents_for_len(req->src, + req->assoclen + req->cryptlen); + if (rctx->src_nents < 0) + return -EBADMSG; + + if (rctx->instruction == OCS_DECRYPT) { + /* + * For decrypt: + * - src sg list is: AAD|CT|tag + * - dst sg list expects: AAD|PT + * + * in_size == len(CT); out_size == len(PT) + */ + + /* req->cryptlen includes both CT and tag. */ + in_size = req->cryptlen - tag_size; + + /* out_size = PT size == CT size */ + out_size = in_size; + + /* len(dst_sg) == len(AAD) + len(PT) */ + dst_size = req->assoclen + out_size; + + /* + * Copy tag from source SG list to 'in_tag' buffer. + * + * Note: this needs to be done here, before DMA mapping src_sg. + */ + sg_pcopy_to_buffer(req->src, rctx->src_nents, rctx->in_tag, + tag_size, req->assoclen + in_size); + + } else { /* OCS_ENCRYPT */ + /* + * For encrypt: + * src sg list is: AAD|PT + * dst sg list expects: AAD|CT|tag + */ + /* in_size == len(PT) */ + in_size = req->cryptlen; + + /* + * In CCM mode the OCS engine appends the tag to the ciphertext, + * but in GCM mode the tag must be read from the tag registers + * and appended manually below + */ + out_size = (rctx->mode == OCS_MODE_CCM) ? in_size + tag_size : + in_size; + /* len(dst_sg) == len(AAD) + len(CT) + len(tag) */ + dst_size = req->assoclen + in_size + tag_size; + } + *src_dll_size = in_size; + + /* Get number of entries in output data SG list. */ + rctx->dst_nents = sg_nents_for_len(req->dst, dst_size); + if (rctx->dst_nents < 0) + return -EBADMSG; + + rctx->in_place = (req->src == req->dst) ? 1 : 0; + + /* Map destination; use bidirectional mapping for in-place case. */ + rctx->dst_dma_count = dma_map_sg(tctx->aes_dev->dev, req->dst, + rctx->dst_nents, + rctx->in_place ? DMA_BIDIRECTIONAL : + DMA_FROM_DEVICE); + if (rctx->dst_dma_count == 0 && rctx->dst_nents != 0) { + dev_err(tctx->aes_dev->dev, "Failed to map destination sg\n"); + return -ENOMEM; + } + + /* Create AAD DST list: maps dst[0:AAD_SIZE-1]. */ + rc = ocs_create_linked_list_from_sg(tctx->aes_dev, req->dst, + rctx->dst_dma_count, + &rctx->aad_dst_dll, req->assoclen, + 0); + if (rc) + return rc; + + /* Create DST list: maps dst[AAD_SIZE:out_size] */ + rc = ocs_create_linked_list_from_sg(tctx->aes_dev, req->dst, + rctx->dst_dma_count, &rctx->dst_dll, + out_size, req->assoclen); + if (rc) + return rc; + + if (rctx->in_place) { + /* If this is not CCM encrypt, we are done. */ + if (!(rctx->mode == OCS_MODE_CCM && + rctx->instruction == OCS_ENCRYPT)) { + /* + * SRC and DST are the same, so re-use the same DMA + * addresses (to avoid allocating new DMA lists + * identical to the dst ones). + */ + rctx->src_dll.dma_addr = rctx->dst_dll.dma_addr; + rctx->aad_src_dll.dma_addr = rctx->aad_dst_dll.dma_addr; + + return 0; + } + /* + * For CCM encrypt the input and output linked lists contain + * different amounts of data, so, we need to create different + * SRC and AAD SRC lists, even for the in-place case. + */ + rc = ocs_create_linked_list_from_sg(tctx->aes_dev, req->dst, + rctx->dst_dma_count, + &rctx->aad_src_dll, + req->assoclen, 0); + if (rc) + return rc; + rc = ocs_create_linked_list_from_sg(tctx->aes_dev, req->dst, + rctx->dst_dma_count, + &rctx->src_dll, in_size, + req->assoclen); + if (rc) + return rc; + + return 0; + } + /* Not in-place case. */ + + /* Map source SG. */ + rctx->src_dma_count = dma_map_sg(tctx->aes_dev->dev, req->src, + rctx->src_nents, DMA_TO_DEVICE); + if (rctx->src_dma_count == 0 && rctx->src_nents != 0) { + dev_err(tctx->aes_dev->dev, "Failed to map source sg\n"); + return -ENOMEM; + } + + /* Create AAD SRC list. */ + rc = ocs_create_linked_list_from_sg(tctx->aes_dev, req->src, + rctx->src_dma_count, + &rctx->aad_src_dll, + req->assoclen, 0); + if (rc) + return rc; + + /* Create SRC list. */ + rc = ocs_create_linked_list_from_sg(tctx->aes_dev, req->src, + rctx->src_dma_count, + &rctx->src_dll, in_size, + req->assoclen); + if (rc) + return rc; + + if (req->assoclen == 0) + return 0; + + /* Copy AAD from src sg to dst sg using OCS DMA. */ + rc = ocs_aes_bypass_op(tctx->aes_dev, rctx->aad_dst_dll.dma_addr, + rctx->aad_src_dll.dma_addr, req->cryptlen); + if (rc) + dev_err(tctx->aes_dev->dev, + "Failed to copy source AAD to destination AAD\n"); + + return rc; +} + +static int kmb_ocs_aead_run(struct aead_request *req) +{ + struct ocs_aes_tctx *tctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + const int tag_size = crypto_aead_authsize(crypto_aead_reqtfm(req)); + struct ocs_aes_rctx *rctx = aead_request_ctx(req); + u32 in_size; /* The length of the data mapped by src_dll. */ + int rc; + + rc = kmb_ocs_aead_dma_prepare(req, &in_size); + if (rc) + goto exit; + + /* For CCM, we just call the OCS processing and we are done. */ + if (rctx->mode == OCS_MODE_CCM) { + rc = ocs_aes_ccm_op(tctx->aes_dev, tctx->cipher, + rctx->instruction, rctx->dst_dll.dma_addr, + rctx->src_dll.dma_addr, in_size, + req->iv, + rctx->aad_src_dll.dma_addr, req->assoclen, + rctx->in_tag, tag_size); + goto exit; + } + /* GCM case; invoke OCS processing. */ + rc = ocs_aes_gcm_op(tctx->aes_dev, tctx->cipher, + rctx->instruction, + rctx->dst_dll.dma_addr, + rctx->src_dll.dma_addr, in_size, + req->iv, + rctx->aad_src_dll.dma_addr, req->assoclen, + rctx->out_tag, tag_size); + if (rc) + goto exit; + + /* For GCM decrypt, we have to compare in_tag with out_tag. */ + if (rctx->instruction == OCS_DECRYPT) { + rc = memcmp(rctx->in_tag, rctx->out_tag, tag_size) ? + -EBADMSG : 0; + goto exit; + } + + /* For GCM encrypt, we must manually copy out_tag to DST sg. */ + + /* Clean-up must be called before the sg_pcopy_from_buffer() below. */ + kmb_ocs_aead_dma_cleanup(req); + + /* Copy tag to destination sg after AAD and CT. */ + sg_pcopy_from_buffer(req->dst, rctx->dst_nents, rctx->out_tag, + tag_size, req->assoclen + req->cryptlen); + + /* Return directly as DMA cleanup already done. */ + return 0; + +exit: + kmb_ocs_aead_dma_cleanup(req); + + return rc; +} + +static int kmb_ocs_aes_sk_do_one_request(struct crypto_engine *engine, + void *areq) +{ + struct skcipher_request *req = + container_of(areq, struct skcipher_request, base); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ocs_aes_tctx *tctx = crypto_skcipher_ctx(tfm); + int err; + + if (!tctx->aes_dev) { + err = -ENODEV; + goto exit; + } + + err = ocs_aes_set_key(tctx->aes_dev, tctx->key_len, tctx->key, + tctx->cipher); + if (err) + goto exit; + + err = kmb_ocs_sk_run(req); + +exit: + crypto_finalize_skcipher_request(engine, req, err); + + return 0; +} + +static int kmb_ocs_aes_aead_do_one_request(struct crypto_engine *engine, + void *areq) +{ + struct aead_request *req = container_of(areq, + struct aead_request, base); + struct ocs_aes_tctx *tctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + int err; + + if (!tctx->aes_dev) + return -ENODEV; + + err = ocs_aes_set_key(tctx->aes_dev, tctx->key_len, tctx->key, + tctx->cipher); + if (err) + goto exit; + + err = kmb_ocs_aead_run(req); + +exit: + crypto_finalize_aead_request(tctx->aes_dev->engine, req, err); + + return 0; +} + +static int kmb_ocs_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + return kmb_ocs_sk_set_key(tfm, in_key, key_len, OCS_AES); +} + +static int kmb_ocs_aes_aead_set_key(struct crypto_aead *tfm, const u8 *in_key, + unsigned int key_len) +{ + return kmb_ocs_aead_set_key(tfm, in_key, key_len, OCS_AES); +} + +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB +static int kmb_ocs_aes_ecb_encrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_AES, OCS_ENCRYPT, OCS_MODE_ECB); +} + +static int kmb_ocs_aes_ecb_decrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_AES, OCS_DECRYPT, OCS_MODE_ECB); +} +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB */ + +static int kmb_ocs_aes_cbc_encrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_AES, OCS_ENCRYPT, OCS_MODE_CBC); +} + +static int kmb_ocs_aes_cbc_decrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_AES, OCS_DECRYPT, OCS_MODE_CBC); +} + +static int kmb_ocs_aes_ctr_encrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_AES, OCS_ENCRYPT, OCS_MODE_CTR); +} + +static int kmb_ocs_aes_ctr_decrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_AES, OCS_DECRYPT, OCS_MODE_CTR); +} + +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS +static int kmb_ocs_aes_cts_encrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_AES, OCS_ENCRYPT, OCS_MODE_CTS); +} + +static int kmb_ocs_aes_cts_decrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_AES, OCS_DECRYPT, OCS_MODE_CTS); +} +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS */ + +static int kmb_ocs_aes_gcm_encrypt(struct aead_request *req) +{ + return kmb_ocs_aead_common(req, OCS_AES, OCS_ENCRYPT, OCS_MODE_GCM); +} + +static int kmb_ocs_aes_gcm_decrypt(struct aead_request *req) +{ + return kmb_ocs_aead_common(req, OCS_AES, OCS_DECRYPT, OCS_MODE_GCM); +} + +static int kmb_ocs_aes_ccm_encrypt(struct aead_request *req) +{ + return kmb_ocs_aead_common(req, OCS_AES, OCS_ENCRYPT, OCS_MODE_CCM); +} + +static int kmb_ocs_aes_ccm_decrypt(struct aead_request *req) +{ + return kmb_ocs_aead_common(req, OCS_AES, OCS_DECRYPT, OCS_MODE_CCM); +} + +static int kmb_ocs_sm4_set_key(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + return kmb_ocs_sk_set_key(tfm, in_key, key_len, OCS_SM4); +} + +static int kmb_ocs_sm4_aead_set_key(struct crypto_aead *tfm, const u8 *in_key, + unsigned int key_len) +{ + return kmb_ocs_aead_set_key(tfm, in_key, key_len, OCS_SM4); +} + +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB +static int kmb_ocs_sm4_ecb_encrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_SM4, OCS_ENCRYPT, OCS_MODE_ECB); +} + +static int kmb_ocs_sm4_ecb_decrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_SM4, OCS_DECRYPT, OCS_MODE_ECB); +} +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB */ + +static int kmb_ocs_sm4_cbc_encrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_SM4, OCS_ENCRYPT, OCS_MODE_CBC); +} + +static int kmb_ocs_sm4_cbc_decrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_SM4, OCS_DECRYPT, OCS_MODE_CBC); +} + +static int kmb_ocs_sm4_ctr_encrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_SM4, OCS_ENCRYPT, OCS_MODE_CTR); +} + +static int kmb_ocs_sm4_ctr_decrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_SM4, OCS_DECRYPT, OCS_MODE_CTR); +} + +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS +static int kmb_ocs_sm4_cts_encrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_SM4, OCS_ENCRYPT, OCS_MODE_CTS); +} + +static int kmb_ocs_sm4_cts_decrypt(struct skcipher_request *req) +{ + return kmb_ocs_sk_common(req, OCS_SM4, OCS_DECRYPT, OCS_MODE_CTS); +} +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS */ + +static int kmb_ocs_sm4_gcm_encrypt(struct aead_request *req) +{ + return kmb_ocs_aead_common(req, OCS_SM4, OCS_ENCRYPT, OCS_MODE_GCM); +} + +static int kmb_ocs_sm4_gcm_decrypt(struct aead_request *req) +{ + return kmb_ocs_aead_common(req, OCS_SM4, OCS_DECRYPT, OCS_MODE_GCM); +} + +static int kmb_ocs_sm4_ccm_encrypt(struct aead_request *req) +{ + return kmb_ocs_aead_common(req, OCS_SM4, OCS_ENCRYPT, OCS_MODE_CCM); +} + +static int kmb_ocs_sm4_ccm_decrypt(struct aead_request *req) +{ + return kmb_ocs_aead_common(req, OCS_SM4, OCS_DECRYPT, OCS_MODE_CCM); +} + +static inline int ocs_common_init(struct ocs_aes_tctx *tctx) +{ + tctx->engine_ctx.op.prepare_request = NULL; + tctx->engine_ctx.op.do_one_request = kmb_ocs_aes_sk_do_one_request; + tctx->engine_ctx.op.unprepare_request = NULL; + + return 0; +} + +static int ocs_aes_init_tfm(struct crypto_skcipher *tfm) +{ + const char *alg_name = crypto_tfm_alg_name(&tfm->base); + struct ocs_aes_tctx *tctx = crypto_skcipher_ctx(tfm); + struct crypto_sync_skcipher *blk; + + /* set fallback cipher in case it will be needed */ + blk = crypto_alloc_sync_skcipher(alg_name, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(blk)) + return PTR_ERR(blk); + + tctx->sw_cipher.sk = blk; + + crypto_skcipher_set_reqsize(tfm, sizeof(struct ocs_aes_rctx)); + + return ocs_common_init(tctx); +} + +static int ocs_sm4_init_tfm(struct crypto_skcipher *tfm) +{ + struct ocs_aes_tctx *tctx = crypto_skcipher_ctx(tfm); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct ocs_aes_rctx)); + + return ocs_common_init(tctx); +} + +static inline void clear_key(struct ocs_aes_tctx *tctx) +{ + memzero_explicit(tctx->key, OCS_AES_KEYSIZE_256); + + /* Zero key registers if set */ + if (tctx->aes_dev) + ocs_aes_set_key(tctx->aes_dev, OCS_AES_KEYSIZE_256, + tctx->key, OCS_AES); +} + +static void ocs_exit_tfm(struct crypto_skcipher *tfm) +{ + struct ocs_aes_tctx *tctx = crypto_skcipher_ctx(tfm); + + clear_key(tctx); + + if (tctx->sw_cipher.sk) { + crypto_free_sync_skcipher(tctx->sw_cipher.sk); + tctx->sw_cipher.sk = NULL; + } +} + +static inline int ocs_common_aead_init(struct ocs_aes_tctx *tctx) +{ + tctx->engine_ctx.op.prepare_request = NULL; + tctx->engine_ctx.op.do_one_request = kmb_ocs_aes_aead_do_one_request; + tctx->engine_ctx.op.unprepare_request = NULL; + + return 0; +} + +static int ocs_aes_aead_cra_init(struct crypto_aead *tfm) +{ + const char *alg_name = crypto_tfm_alg_name(&tfm->base); + struct ocs_aes_tctx *tctx = crypto_aead_ctx(tfm); + struct crypto_aead *blk; + + /* Set fallback cipher in case it will be needed */ + blk = crypto_alloc_aead(alg_name, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(blk)) + return PTR_ERR(blk); + + tctx->sw_cipher.aead = blk; + + crypto_aead_set_reqsize(tfm, + max(sizeof(struct ocs_aes_rctx), + (sizeof(struct aead_request) + + crypto_aead_reqsize(tctx->sw_cipher.aead)))); + + return ocs_common_aead_init(tctx); +} + +static int kmb_ocs_aead_ccm_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + switch (authsize) { + case 4: + case 6: + case 8: + case 10: + case 12: + case 14: + case 16: + return 0; + default: + return -EINVAL; + } +} + +static int kmb_ocs_aead_gcm_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + return crypto_gcm_check_authsize(authsize); +} + +static int ocs_sm4_aead_cra_init(struct crypto_aead *tfm) +{ + struct ocs_aes_tctx *tctx = crypto_aead_ctx(tfm); + + crypto_aead_set_reqsize(tfm, sizeof(struct ocs_aes_rctx)); + + return ocs_common_aead_init(tctx); +} + +static void ocs_aead_cra_exit(struct crypto_aead *tfm) +{ + struct ocs_aes_tctx *tctx = crypto_aead_ctx(tfm); + + clear_key(tctx); + + if (tctx->sw_cipher.aead) { + crypto_free_aead(tctx->sw_cipher.aead); + tctx->sw_cipher.aead = NULL; + } +} + +static struct skcipher_alg algs[] = { +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB + { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "ecb-aes-keembay-ocs", + .base.cra_priority = KMB_OCS_PRIORITY, + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ocs_aes_tctx), + .base.cra_module = THIS_MODULE, + .base.cra_alignmask = 0, + + .min_keysize = OCS_AES_MIN_KEY_SIZE, + .max_keysize = OCS_AES_MAX_KEY_SIZE, + .setkey = kmb_ocs_aes_set_key, + .encrypt = kmb_ocs_aes_ecb_encrypt, + .decrypt = kmb_ocs_aes_ecb_decrypt, + .init = ocs_aes_init_tfm, + .exit = ocs_exit_tfm, + }, +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB */ + { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cbc-aes-keembay-ocs", + .base.cra_priority = KMB_OCS_PRIORITY, + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ocs_aes_tctx), + .base.cra_module = THIS_MODULE, + .base.cra_alignmask = 0, + + .min_keysize = OCS_AES_MIN_KEY_SIZE, + .max_keysize = OCS_AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = kmb_ocs_aes_set_key, + .encrypt = kmb_ocs_aes_cbc_encrypt, + .decrypt = kmb_ocs_aes_cbc_decrypt, + .init = ocs_aes_init_tfm, + .exit = ocs_exit_tfm, + }, + { + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-keembay-ocs", + .base.cra_priority = KMB_OCS_PRIORITY, + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct ocs_aes_tctx), + .base.cra_module = THIS_MODULE, + .base.cra_alignmask = 0, + + .min_keysize = OCS_AES_MIN_KEY_SIZE, + .max_keysize = OCS_AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = kmb_ocs_aes_set_key, + .encrypt = kmb_ocs_aes_ctr_encrypt, + .decrypt = kmb_ocs_aes_ctr_decrypt, + .init = ocs_aes_init_tfm, + .exit = ocs_exit_tfm, + }, +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS + { + .base.cra_name = "cts(cbc(aes))", + .base.cra_driver_name = "cts-aes-keembay-ocs", + .base.cra_priority = KMB_OCS_PRIORITY, + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ocs_aes_tctx), + .base.cra_module = THIS_MODULE, + .base.cra_alignmask = 0, + + .min_keysize = OCS_AES_MIN_KEY_SIZE, + .max_keysize = OCS_AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = kmb_ocs_aes_set_key, + .encrypt = kmb_ocs_aes_cts_encrypt, + .decrypt = kmb_ocs_aes_cts_decrypt, + .init = ocs_aes_init_tfm, + .exit = ocs_exit_tfm, + }, +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS */ +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB + { + .base.cra_name = "ecb(sm4)", + .base.cra_driver_name = "ecb-sm4-keembay-ocs", + .base.cra_priority = KMB_OCS_PRIORITY, + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ocs_aes_tctx), + .base.cra_module = THIS_MODULE, + .base.cra_alignmask = 0, + + .min_keysize = OCS_SM4_KEY_SIZE, + .max_keysize = OCS_SM4_KEY_SIZE, + .setkey = kmb_ocs_sm4_set_key, + .encrypt = kmb_ocs_sm4_ecb_encrypt, + .decrypt = kmb_ocs_sm4_ecb_decrypt, + .init = ocs_sm4_init_tfm, + .exit = ocs_exit_tfm, + }, +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB */ + { + .base.cra_name = "cbc(sm4)", + .base.cra_driver_name = "cbc-sm4-keembay-ocs", + .base.cra_priority = KMB_OCS_PRIORITY, + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ocs_aes_tctx), + .base.cra_module = THIS_MODULE, + .base.cra_alignmask = 0, + + .min_keysize = OCS_SM4_KEY_SIZE, + .max_keysize = OCS_SM4_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = kmb_ocs_sm4_set_key, + .encrypt = kmb_ocs_sm4_cbc_encrypt, + .decrypt = kmb_ocs_sm4_cbc_decrypt, + .init = ocs_sm4_init_tfm, + .exit = ocs_exit_tfm, + }, + { + .base.cra_name = "ctr(sm4)", + .base.cra_driver_name = "ctr-sm4-keembay-ocs", + .base.cra_priority = KMB_OCS_PRIORITY, + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct ocs_aes_tctx), + .base.cra_module = THIS_MODULE, + .base.cra_alignmask = 0, + + .min_keysize = OCS_SM4_KEY_SIZE, + .max_keysize = OCS_SM4_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = kmb_ocs_sm4_set_key, + .encrypt = kmb_ocs_sm4_ctr_encrypt, + .decrypt = kmb_ocs_sm4_ctr_decrypt, + .init = ocs_sm4_init_tfm, + .exit = ocs_exit_tfm, + }, +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS + { + .base.cra_name = "cts(cbc(sm4))", + .base.cra_driver_name = "cts-sm4-keembay-ocs", + .base.cra_priority = KMB_OCS_PRIORITY, + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ocs_aes_tctx), + .base.cra_module = THIS_MODULE, + .base.cra_alignmask = 0, + + .min_keysize = OCS_SM4_KEY_SIZE, + .max_keysize = OCS_SM4_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = kmb_ocs_sm4_set_key, + .encrypt = kmb_ocs_sm4_cts_encrypt, + .decrypt = kmb_ocs_sm4_cts_decrypt, + .init = ocs_sm4_init_tfm, + .exit = ocs_exit_tfm, + } +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS */ +}; + +static struct aead_alg algs_aead[] = { + { + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-keembay-ocs", + .cra_priority = KMB_OCS_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct ocs_aes_tctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = ocs_aes_aead_cra_init, + .exit = ocs_aead_cra_exit, + .ivsize = GCM_AES_IV_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + .setauthsize = kmb_ocs_aead_gcm_setauthsize, + .setkey = kmb_ocs_aes_aead_set_key, + .encrypt = kmb_ocs_aes_gcm_encrypt, + .decrypt = kmb_ocs_aes_gcm_decrypt, + }, + { + .base = { + .cra_name = "ccm(aes)", + .cra_driver_name = "ccm-aes-keembay-ocs", + .cra_priority = KMB_OCS_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct ocs_aes_tctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = ocs_aes_aead_cra_init, + .exit = ocs_aead_cra_exit, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + .setauthsize = kmb_ocs_aead_ccm_setauthsize, + .setkey = kmb_ocs_aes_aead_set_key, + .encrypt = kmb_ocs_aes_ccm_encrypt, + .decrypt = kmb_ocs_aes_ccm_decrypt, + }, + { + .base = { + .cra_name = "gcm(sm4)", + .cra_driver_name = "gcm-sm4-keembay-ocs", + .cra_priority = KMB_OCS_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct ocs_aes_tctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = ocs_sm4_aead_cra_init, + .exit = ocs_aead_cra_exit, + .ivsize = GCM_AES_IV_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + .setauthsize = kmb_ocs_aead_gcm_setauthsize, + .setkey = kmb_ocs_sm4_aead_set_key, + .encrypt = kmb_ocs_sm4_gcm_encrypt, + .decrypt = kmb_ocs_sm4_gcm_decrypt, + }, + { + .base = { + .cra_name = "ccm(sm4)", + .cra_driver_name = "ccm-sm4-keembay-ocs", + .cra_priority = KMB_OCS_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct ocs_aes_tctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = ocs_sm4_aead_cra_init, + .exit = ocs_aead_cra_exit, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + .setauthsize = kmb_ocs_aead_ccm_setauthsize, + .setkey = kmb_ocs_sm4_aead_set_key, + .encrypt = kmb_ocs_sm4_ccm_encrypt, + .decrypt = kmb_ocs_sm4_ccm_decrypt, + } +}; + +static void unregister_aes_algs(struct ocs_aes_dev *aes_dev) +{ + crypto_unregister_aeads(algs_aead, ARRAY_SIZE(algs_aead)); + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +} + +static int register_aes_algs(struct ocs_aes_dev *aes_dev) +{ + int ret; + + /* + * If any algorithm fails to register, all preceding algorithms that + * were successfully registered will be automatically unregistered. + */ + ret = crypto_register_aeads(algs_aead, ARRAY_SIZE(algs_aead)); + if (ret) + return ret; + + ret = crypto_register_skciphers(algs, ARRAY_SIZE(algs)); + if (ret) + crypto_unregister_aeads(algs_aead, ARRAY_SIZE(algs)); + + return ret; +} + +/* Device tree driver match. */ +static const struct of_device_id kmb_ocs_aes_of_match[] = { + { + .compatible = "intel,keembay-ocs-aes", + }, + {} +}; + +static int kmb_ocs_aes_remove(struct platform_device *pdev) +{ + struct ocs_aes_dev *aes_dev; + + aes_dev = platform_get_drvdata(pdev); + if (!aes_dev) + return -ENODEV; + + unregister_aes_algs(aes_dev); + + spin_lock(&ocs_aes.lock); + list_del(&aes_dev->list); + spin_unlock(&ocs_aes.lock); + + crypto_engine_exit(aes_dev->engine); + + return 0; +} + +static int kmb_ocs_aes_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ocs_aes_dev *aes_dev; + struct resource *aes_mem; + int rc; + + aes_dev = devm_kzalloc(dev, sizeof(*aes_dev), GFP_KERNEL); + if (!aes_dev) + return -ENOMEM; + + aes_dev->dev = dev; + + platform_set_drvdata(pdev, aes_dev); + + rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(dev, "Failed to set 32 bit dma mask %d\n", rc); + return rc; + } + + /* Get base register address. */ + aes_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!aes_mem) { + dev_err(dev, "Could not retrieve io mem resource\n"); + return -ENODEV; + } + + aes_dev->base_reg = devm_ioremap_resource(&pdev->dev, aes_mem); + if (IS_ERR(aes_dev->base_reg)) { + dev_err(dev, "Failed to get base address\n"); + return PTR_ERR(aes_dev->base_reg); + } + + /* Get and request IRQ */ + aes_dev->irq = platform_get_irq(pdev, 0); + if (aes_dev->irq < 0) + return aes_dev->irq; + + rc = devm_request_threaded_irq(dev, aes_dev->irq, ocs_aes_irq_handler, + NULL, 0, "keembay-ocs-aes", aes_dev); + if (rc < 0) { + dev_err(dev, "Could not request IRQ\n"); + return rc; + } + + INIT_LIST_HEAD(&aes_dev->list); + spin_lock(&ocs_aes.lock); + list_add_tail(&aes_dev->list, &ocs_aes.dev_list); + spin_unlock(&ocs_aes.lock); + + init_completion(&aes_dev->irq_completion); + + /* Initialize crypto engine */ + aes_dev->engine = crypto_engine_alloc_init(dev, true); + if (!aes_dev->engine) + goto list_del; + + rc = crypto_engine_start(aes_dev->engine); + if (rc) { + dev_err(dev, "Could not start crypto engine\n"); + goto cleanup; + } + + rc = register_aes_algs(aes_dev); + if (rc) { + dev_err(dev, + "Could not register OCS algorithms with Crypto API\n"); + goto cleanup; + } + + return 0; + +cleanup: + crypto_engine_exit(aes_dev->engine); +list_del: + spin_lock(&ocs_aes.lock); + list_del(&aes_dev->list); + spin_unlock(&ocs_aes.lock); + + return rc; +} + +/* The OCS driver is a platform device. */ +static struct platform_driver kmb_ocs_aes_driver = { + .probe = kmb_ocs_aes_probe, + .remove = kmb_ocs_aes_remove, + .driver = { + .name = DRV_NAME, + .of_match_table = kmb_ocs_aes_of_match, + }, +}; + +module_platform_driver(kmb_ocs_aes_driver); + +MODULE_DESCRIPTION("Intel Keem Bay Offload and Crypto Subsystem (OCS) AES/SM4 Driver"); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS_CRYPTO("cbc-aes-keembay-ocs"); +MODULE_ALIAS_CRYPTO("ctr-aes-keembay-ocs"); +MODULE_ALIAS_CRYPTO("gcm-aes-keembay-ocs"); +MODULE_ALIAS_CRYPTO("ccm-aes-keembay-ocs"); + +MODULE_ALIAS_CRYPTO("cbc-sm4-keembay-ocs"); +MODULE_ALIAS_CRYPTO("ctr-sm4-keembay-ocs"); +MODULE_ALIAS_CRYPTO("gcm-sm4-keembay-ocs"); +MODULE_ALIAS_CRYPTO("ccm-sm4-keembay-ocs"); + +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB +MODULE_ALIAS_CRYPTO("ecb-aes-keembay-ocs"); +MODULE_ALIAS_CRYPTO("ecb-sm4-keembay-ocs"); +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB */ + +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS +MODULE_ALIAS_CRYPTO("cts-aes-keembay-ocs"); +MODULE_ALIAS_CRYPTO("cts-sm4-keembay-ocs"); +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS */ diff --git a/drivers/crypto/keembay/ocs-aes.c b/drivers/crypto/keembay/ocs-aes.c new file mode 100644 index 000000000000..cc286adb1c4a --- /dev/null +++ b/drivers/crypto/keembay/ocs-aes.c @@ -0,0 +1,1489 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel Keem Bay OCS AES Crypto Driver. + * + * Copyright (C) 2018-2020 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "ocs-aes.h" + +#define AES_COMMAND_OFFSET 0x0000 +#define AES_KEY_0_OFFSET 0x0004 +#define AES_KEY_1_OFFSET 0x0008 +#define AES_KEY_2_OFFSET 0x000C +#define AES_KEY_3_OFFSET 0x0010 +#define AES_KEY_4_OFFSET 0x0014 +#define AES_KEY_5_OFFSET 0x0018 +#define AES_KEY_6_OFFSET 0x001C +#define AES_KEY_7_OFFSET 0x0020 +#define AES_IV_0_OFFSET 0x0024 +#define AES_IV_1_OFFSET 0x0028 +#define AES_IV_2_OFFSET 0x002C +#define AES_IV_3_OFFSET 0x0030 +#define AES_ACTIVE_OFFSET 0x0034 +#define AES_STATUS_OFFSET 0x0038 +#define AES_KEY_SIZE_OFFSET 0x0044 +#define AES_IER_OFFSET 0x0048 +#define AES_ISR_OFFSET 0x005C +#define AES_MULTIPURPOSE1_0_OFFSET 0x0200 +#define AES_MULTIPURPOSE1_1_OFFSET 0x0204 +#define AES_MULTIPURPOSE1_2_OFFSET 0x0208 +#define AES_MULTIPURPOSE1_3_OFFSET 0x020C +#define AES_MULTIPURPOSE2_0_OFFSET 0x0220 +#define AES_MULTIPURPOSE2_1_OFFSET 0x0224 +#define AES_MULTIPURPOSE2_2_OFFSET 0x0228 +#define AES_MULTIPURPOSE2_3_OFFSET 0x022C +#define AES_BYTE_ORDER_CFG_OFFSET 0x02C0 +#define AES_TLEN_OFFSET 0x0300 +#define AES_T_MAC_0_OFFSET 0x0304 +#define AES_T_MAC_1_OFFSET 0x0308 +#define AES_T_MAC_2_OFFSET 0x030C +#define AES_T_MAC_3_OFFSET 0x0310 +#define AES_PLEN_OFFSET 0x0314 +#define AES_A_DMA_SRC_ADDR_OFFSET 0x0400 +#define AES_A_DMA_DST_ADDR_OFFSET 0x0404 +#define AES_A_DMA_SRC_SIZE_OFFSET 0x0408 +#define AES_A_DMA_DST_SIZE_OFFSET 0x040C +#define AES_A_DMA_DMA_MODE_OFFSET 0x0410 +#define AES_A_DMA_NEXT_SRC_DESCR_OFFSET 0x0418 +#define AES_A_DMA_NEXT_DST_DESCR_OFFSET 0x041C +#define AES_A_DMA_WHILE_ACTIVE_MODE_OFFSET 0x0420 +#define AES_A_DMA_LOG_OFFSET 0x0424 +#define AES_A_DMA_STATUS_OFFSET 0x0428 +#define AES_A_DMA_PERF_CNTR_OFFSET 0x042C +#define AES_A_DMA_MSI_ISR_OFFSET 0x0480 +#define AES_A_DMA_MSI_IER_OFFSET 0x0484 +#define AES_A_DMA_MSI_MASK_OFFSET 0x0488 +#define AES_A_DMA_INBUFFER_WRITE_FIFO_OFFSET 0x0600 +#define AES_A_DMA_OUTBUFFER_READ_FIFO_OFFSET 0x0700 + +/* + * AES_A_DMA_DMA_MODE register. + * Default: 0x00000000. + * bit[31] ACTIVE + * This bit activates the DMA. When the DMA finishes, it resets + * this bit to zero. + * bit[30:26] Unused by this driver. + * bit[25] SRC_LINK_LIST_EN + * Source link list enable bit. When the linked list is terminated + * this bit is reset by the DMA. + * bit[24] DST_LINK_LIST_EN + * Destination link list enable bit. When the linked list is + * terminated this bit is reset by the DMA. + * bit[23:0] Unused by this driver. + */ +#define AES_A_DMA_DMA_MODE_ACTIVE BIT(31) +#define AES_A_DMA_DMA_MODE_SRC_LINK_LIST_EN BIT(25) +#define AES_A_DMA_DMA_MODE_DST_LINK_LIST_EN BIT(24) + +/* + * AES_ACTIVE register + * default 0x00000000 + * bit[31:10] Reserved + * bit[9] LAST_ADATA + * bit[8] LAST_GCX + * bit[7:2] Reserved + * bit[1] TERMINATION + * bit[0] TRIGGER + */ +#define AES_ACTIVE_LAST_ADATA BIT(9) +#define AES_ACTIVE_LAST_CCM_GCM BIT(8) +#define AES_ACTIVE_TERMINATION BIT(1) +#define AES_ACTIVE_TRIGGER BIT(0) + +#define AES_DISABLE_INT 0x00000000 +#define AES_DMA_CPD_ERR_INT BIT(8) +#define AES_DMA_OUTBUF_RD_ERR_INT BIT(7) +#define AES_DMA_OUTBUF_WR_ERR_INT BIT(6) +#define AES_DMA_INBUF_RD_ERR_INT BIT(5) +#define AES_DMA_INBUF_WR_ERR_INT BIT(4) +#define AES_DMA_BAD_COMP_INT BIT(3) +#define AES_DMA_SAI_INT BIT(2) +#define AES_DMA_SRC_DONE_INT BIT(0) +#define AES_COMPLETE_INT BIT(1) + +#define AES_DMA_MSI_MASK_CLEAR BIT(0) + +#define AES_128_BIT_KEY 0x00000000 +#define AES_256_BIT_KEY BIT(0) + +#define AES_DEACTIVATE_PERF_CNTR 0x00000000 +#define AES_ACTIVATE_PERF_CNTR BIT(0) + +#define AES_MAX_TAG_SIZE_U32 4 + +#define OCS_LL_DMA_FLAG_TERMINATE BIT(31) + +/* + * There is an inconsistency in the documentation. This is documented as a + * 11-bit value, but it is actually 10-bits. + */ +#define AES_DMA_STATUS_INPUT_BUFFER_OCCUPANCY_MASK 0x3FF + +/* + * During CCM decrypt, the OCS block needs to finish processing the ciphertext + * before the tag is written. For 128-bit mode this required delay is 28 OCS + * clock cycles. For 256-bit mode it is 36 OCS clock cycles. + */ +#define CCM_DECRYPT_DELAY_TAG_CLK_COUNT 36UL + +/* + * During CCM decrypt there must be a delay of at least 42 OCS clock cycles + * between setting the TRIGGER bit in AES_ACTIVE and setting the LAST_CCM_GCM + * bit in the same register (as stated in the OCS databook) + */ +#define CCM_DECRYPT_DELAY_LAST_GCX_CLK_COUNT 42UL + +/* See RFC3610 section 2.2 */ +#define L_PRIME_MIN (1) +#define L_PRIME_MAX (7) +/* + * CCM IV format from RFC 3610 section 2.3 + * + * Octet Number Contents + * ------------ --------- + * 0 Flags + * 1 ... 15-L Nonce N + * 16-L ... 15 Counter i + * + * Flags = L' = L - 1 + */ +#define L_PRIME_IDX 0 +#define COUNTER_START(lprime) (16 - ((lprime) + 1)) +#define COUNTER_LEN(lprime) ((lprime) + 1) + +enum aes_counter_mode { + AES_CTR_M_NO_INC = 0, + AES_CTR_M_32_INC = 1, + AES_CTR_M_64_INC = 2, + AES_CTR_M_128_INC = 3, +}; + +/** + * struct ocs_dma_linked_list - OCS DMA linked list entry. + * @src_addr: Source address of the data. + * @src_len: Length of data to be fetched. + * @next: Next dma_list to fetch. + * @ll_flags: Flags (Freeze @ terminate) for the DMA engine. + */ +struct ocs_dma_linked_list { + u32 src_addr; + u32 src_len; + u32 next; + u32 ll_flags; +} __packed; + +/* + * Set endianness of inputs and outputs + * AES_BYTE_ORDER_CFG + * default 0x00000000 + * bit [10] - KEY_HI_LO_SWAP + * bit [9] - KEY_HI_SWAP_DWORDS_IN_OCTWORD + * bit [8] - KEY_HI_SWAP_BYTES_IN_DWORD + * bit [7] - KEY_LO_SWAP_DWORDS_IN_OCTWORD + * bit [6] - KEY_LO_SWAP_BYTES_IN_DWORD + * bit [5] - IV_SWAP_DWORDS_IN_OCTWORD + * bit [4] - IV_SWAP_BYTES_IN_DWORD + * bit [3] - DOUT_SWAP_DWORDS_IN_OCTWORD + * bit [2] - DOUT_SWAP_BYTES_IN_DWORD + * bit [1] - DOUT_SWAP_DWORDS_IN_OCTWORD + * bit [0] - DOUT_SWAP_BYTES_IN_DWORD + */ +static inline void aes_a_set_endianness(const struct ocs_aes_dev *aes_dev) +{ + iowrite32(0x7FF, aes_dev->base_reg + AES_BYTE_ORDER_CFG_OFFSET); +} + +/* Trigger AES process start. */ +static inline void aes_a_op_trigger(const struct ocs_aes_dev *aes_dev) +{ + iowrite32(AES_ACTIVE_TRIGGER, aes_dev->base_reg + AES_ACTIVE_OFFSET); +} + +/* Indicate last bulk of data. */ +static inline void aes_a_op_termination(const struct ocs_aes_dev *aes_dev) +{ + iowrite32(AES_ACTIVE_TERMINATION, + aes_dev->base_reg + AES_ACTIVE_OFFSET); +} + +/* + * Set LAST_CCM_GCM in AES_ACTIVE register and clear all other bits. + * + * Called when DMA is programmed to fetch the last batch of data. + * - For AES-CCM it is called for the last batch of Payload data and Ciphertext + * data. + * - For AES-GCM, it is called for the last batch of Plaintext data and + * Ciphertext data. + */ +static inline void aes_a_set_last_gcx(const struct ocs_aes_dev *aes_dev) +{ + iowrite32(AES_ACTIVE_LAST_CCM_GCM, + aes_dev->base_reg + AES_ACTIVE_OFFSET); +} + +/* Wait for LAST_CCM_GCM bit to be unset. */ +static inline void aes_a_wait_last_gcx(const struct ocs_aes_dev *aes_dev) +{ + u32 aes_active_reg; + + do { + aes_active_reg = ioread32(aes_dev->base_reg + + AES_ACTIVE_OFFSET); + } while (aes_active_reg & AES_ACTIVE_LAST_CCM_GCM); +} + +/* Wait for 10 bits of input occupancy. */ +static void aes_a_dma_wait_input_buffer_occupancy(const struct ocs_aes_dev *aes_dev) +{ + u32 reg; + + do { + reg = ioread32(aes_dev->base_reg + AES_A_DMA_STATUS_OFFSET); + } while (reg & AES_DMA_STATUS_INPUT_BUFFER_OCCUPANCY_MASK); +} + + /* + * Set LAST_CCM_GCM and LAST_ADATA bits in AES_ACTIVE register (and clear all + * other bits). + * + * Called when DMA is programmed to fetch the last batch of Associated Data + * (CCM case) or Additional Authenticated Data (GCM case). + */ +static inline void aes_a_set_last_gcx_and_adata(const struct ocs_aes_dev *aes_dev) +{ + iowrite32(AES_ACTIVE_LAST_ADATA | AES_ACTIVE_LAST_CCM_GCM, + aes_dev->base_reg + AES_ACTIVE_OFFSET); +} + +/* Set DMA src and dst transfer size to 0 */ +static inline void aes_a_dma_set_xfer_size_zero(const struct ocs_aes_dev *aes_dev) +{ + iowrite32(0, aes_dev->base_reg + AES_A_DMA_SRC_SIZE_OFFSET); + iowrite32(0, aes_dev->base_reg + AES_A_DMA_DST_SIZE_OFFSET); +} + +/* Activate DMA for zero-byte transfer case. */ +static inline void aes_a_dma_active(const struct ocs_aes_dev *aes_dev) +{ + iowrite32(AES_A_DMA_DMA_MODE_ACTIVE, + aes_dev->base_reg + AES_A_DMA_DMA_MODE_OFFSET); +} + +/* Activate DMA and enable src linked list */ +static inline void aes_a_dma_active_src_ll_en(const struct ocs_aes_dev *aes_dev) +{ + iowrite32(AES_A_DMA_DMA_MODE_ACTIVE | + AES_A_DMA_DMA_MODE_SRC_LINK_LIST_EN, + aes_dev->base_reg + AES_A_DMA_DMA_MODE_OFFSET); +} + +/* Activate DMA and enable dst linked list */ +static inline void aes_a_dma_active_dst_ll_en(const struct ocs_aes_dev *aes_dev) +{ + iowrite32(AES_A_DMA_DMA_MODE_ACTIVE | + AES_A_DMA_DMA_MODE_DST_LINK_LIST_EN, + aes_dev->base_reg + AES_A_DMA_DMA_MODE_OFFSET); +} + +/* Activate DMA and enable src and dst linked lists */ +static inline void aes_a_dma_active_src_dst_ll_en(const struct ocs_aes_dev *aes_dev) +{ + iowrite32(AES_A_DMA_DMA_MODE_ACTIVE | + AES_A_DMA_DMA_MODE_SRC_LINK_LIST_EN | + AES_A_DMA_DMA_MODE_DST_LINK_LIST_EN, + aes_dev->base_reg + AES_A_DMA_DMA_MODE_OFFSET); +} + +/* Reset PERF_CNTR to 0 and activate it */ +static inline void aes_a_dma_reset_and_activate_perf_cntr(const struct ocs_aes_dev *aes_dev) +{ + iowrite32(0x00000000, aes_dev->base_reg + AES_A_DMA_PERF_CNTR_OFFSET); + iowrite32(AES_ACTIVATE_PERF_CNTR, + aes_dev->base_reg + AES_A_DMA_WHILE_ACTIVE_MODE_OFFSET); +} + +/* Wait until PERF_CNTR is > delay, then deactivate it */ +static inline void aes_a_dma_wait_and_deactivate_perf_cntr(const struct ocs_aes_dev *aes_dev, + int delay) +{ + while (ioread32(aes_dev->base_reg + AES_A_DMA_PERF_CNTR_OFFSET) < delay) + ; + iowrite32(AES_DEACTIVATE_PERF_CNTR, + aes_dev->base_reg + AES_A_DMA_WHILE_ACTIVE_MODE_OFFSET); +} + +/* Disable AES and DMA IRQ. */ +static void aes_irq_disable(struct ocs_aes_dev *aes_dev) +{ + u32 isr_val = 0; + + /* Disable interrupts */ + iowrite32(AES_DISABLE_INT, + aes_dev->base_reg + AES_A_DMA_MSI_IER_OFFSET); + iowrite32(AES_DISABLE_INT, aes_dev->base_reg + AES_IER_OFFSET); + + /* Clear any pending interrupt */ + isr_val = ioread32(aes_dev->base_reg + AES_A_DMA_MSI_ISR_OFFSET); + if (isr_val) + iowrite32(isr_val, + aes_dev->base_reg + AES_A_DMA_MSI_ISR_OFFSET); + + isr_val = ioread32(aes_dev->base_reg + AES_A_DMA_MSI_MASK_OFFSET); + if (isr_val) + iowrite32(isr_val, + aes_dev->base_reg + AES_A_DMA_MSI_MASK_OFFSET); + + isr_val = ioread32(aes_dev->base_reg + AES_ISR_OFFSET); + if (isr_val) + iowrite32(isr_val, aes_dev->base_reg + AES_ISR_OFFSET); +} + +/* Enable AES or DMA IRQ. IRQ is disabled once fired. */ +static void aes_irq_enable(struct ocs_aes_dev *aes_dev, u8 irq) +{ + if (irq == AES_COMPLETE_INT) { + /* Ensure DMA error interrupts are enabled */ + iowrite32(AES_DMA_CPD_ERR_INT | + AES_DMA_OUTBUF_RD_ERR_INT | + AES_DMA_OUTBUF_WR_ERR_INT | + AES_DMA_INBUF_RD_ERR_INT | + AES_DMA_INBUF_WR_ERR_INT | + AES_DMA_BAD_COMP_INT | + AES_DMA_SAI_INT, + aes_dev->base_reg + AES_A_DMA_MSI_IER_OFFSET); + /* + * AES_IER + * default 0x00000000 + * bits [31:3] - reserved + * bit [2] - EN_SKS_ERR + * bit [1] - EN_AES_COMPLETE + * bit [0] - reserved + */ + iowrite32(AES_COMPLETE_INT, aes_dev->base_reg + AES_IER_OFFSET); + return; + } + if (irq == AES_DMA_SRC_DONE_INT) { + /* Ensure AES interrupts are disabled */ + iowrite32(AES_DISABLE_INT, aes_dev->base_reg + AES_IER_OFFSET); + /* + * DMA_MSI_IER + * default 0x00000000 + * bits [31:9] - reserved + * bit [8] - CPD_ERR_INT_EN + * bit [7] - OUTBUF_RD_ERR_INT_EN + * bit [6] - OUTBUF_WR_ERR_INT_EN + * bit [5] - INBUF_RD_ERR_INT_EN + * bit [4] - INBUF_WR_ERR_INT_EN + * bit [3] - BAD_COMP_INT_EN + * bit [2] - SAI_INT_EN + * bit [1] - DST_DONE_INT_EN + * bit [0] - SRC_DONE_INT_EN + */ + iowrite32(AES_DMA_CPD_ERR_INT | + AES_DMA_OUTBUF_RD_ERR_INT | + AES_DMA_OUTBUF_WR_ERR_INT | + AES_DMA_INBUF_RD_ERR_INT | + AES_DMA_INBUF_WR_ERR_INT | + AES_DMA_BAD_COMP_INT | + AES_DMA_SAI_INT | + AES_DMA_SRC_DONE_INT, + aes_dev->base_reg + AES_A_DMA_MSI_IER_OFFSET); + } +} + +/* Enable and wait for IRQ (either from OCS AES engine or DMA) */ +static int ocs_aes_irq_enable_and_wait(struct ocs_aes_dev *aes_dev, u8 irq) +{ + int rc; + + reinit_completion(&aes_dev->irq_completion); + aes_irq_enable(aes_dev, irq); + rc = wait_for_completion_interruptible(&aes_dev->irq_completion); + if (rc) + return rc; + + return aes_dev->dma_err_mask ? -EIO : 0; +} + +/* Configure DMA to OCS, linked list mode */ +static inline void dma_to_ocs_aes_ll(struct ocs_aes_dev *aes_dev, + dma_addr_t dma_list) +{ + iowrite32(0, aes_dev->base_reg + AES_A_DMA_SRC_SIZE_OFFSET); + iowrite32(dma_list, + aes_dev->base_reg + AES_A_DMA_NEXT_SRC_DESCR_OFFSET); +} + +/* Configure DMA from OCS, linked list mode */ +static inline void dma_from_ocs_aes_ll(struct ocs_aes_dev *aes_dev, + dma_addr_t dma_list) +{ + iowrite32(0, aes_dev->base_reg + AES_A_DMA_DST_SIZE_OFFSET); + iowrite32(dma_list, + aes_dev->base_reg + AES_A_DMA_NEXT_DST_DESCR_OFFSET); +} + +irqreturn_t ocs_aes_irq_handler(int irq, void *dev_id) +{ + struct ocs_aes_dev *aes_dev = dev_id; + u32 aes_dma_isr; + + /* Read DMA ISR status. */ + aes_dma_isr = ioread32(aes_dev->base_reg + AES_A_DMA_MSI_ISR_OFFSET); + + /* Disable and clear interrupts. */ + aes_irq_disable(aes_dev); + + /* Save DMA error status. */ + aes_dev->dma_err_mask = aes_dma_isr & + (AES_DMA_CPD_ERR_INT | + AES_DMA_OUTBUF_RD_ERR_INT | + AES_DMA_OUTBUF_WR_ERR_INT | + AES_DMA_INBUF_RD_ERR_INT | + AES_DMA_INBUF_WR_ERR_INT | + AES_DMA_BAD_COMP_INT | + AES_DMA_SAI_INT); + + /* Signal IRQ completion. */ + complete(&aes_dev->irq_completion); + + return IRQ_HANDLED; +} + +/** + * ocs_aes_set_key() - Write key into OCS AES hardware. + * @aes_dev: The OCS AES device to write the key to. + * @key_size: The size of the key (in bytes). + * @key: The key to write. + * @cipher: The cipher the key is for. + * + * For AES @key_size must be either 16 or 32. For SM4 @key_size must be 16. + * + * Return: 0 on success, negative error code otherwise. + */ +int ocs_aes_set_key(struct ocs_aes_dev *aes_dev, u32 key_size, const u8 *key, + enum ocs_cipher cipher) +{ + const u32 *key_u32; + u32 val; + int i; + + /* OCS AES supports 128-bit and 256-bit keys only. */ + if (cipher == OCS_AES && !(key_size == 32 || key_size == 16)) { + dev_err(aes_dev->dev, + "%d-bit keys not supported by AES cipher\n", + key_size * 8); + return -EINVAL; + } + /* OCS SM4 supports 128-bit keys only. */ + if (cipher == OCS_SM4 && key_size != 16) { + dev_err(aes_dev->dev, + "%d-bit keys not supported for SM4 cipher\n", + key_size * 8); + return -EINVAL; + } + + if (!key) + return -EINVAL; + + key_u32 = (const u32 *)key; + + /* Write key to AES_KEY[0-7] registers */ + for (i = 0; i < (key_size / sizeof(u32)); i++) { + iowrite32(key_u32[i], + aes_dev->base_reg + AES_KEY_0_OFFSET + + (i * sizeof(u32))); + } + /* + * Write key size + * bits [31:1] - reserved + * bit [0] - AES_KEY_SIZE + * 0 - 128 bit key + * 1 - 256 bit key + */ + val = (key_size == 16) ? AES_128_BIT_KEY : AES_256_BIT_KEY; + iowrite32(val, aes_dev->base_reg + AES_KEY_SIZE_OFFSET); + + return 0; +} + +/* Write AES_COMMAND */ +static inline void set_ocs_aes_command(struct ocs_aes_dev *aes_dev, + enum ocs_cipher cipher, + enum ocs_mode mode, + enum ocs_instruction instruction) +{ + u32 val; + + /* AES_COMMAND + * default 0x000000CC + * bit [14] - CIPHER_SELECT + * 0 - AES + * 1 - SM4 + * bits [11:8] - OCS_AES_MODE + * 0000 - ECB + * 0001 - CBC + * 0010 - CTR + * 0110 - CCM + * 0111 - GCM + * 1001 - CTS + * bits [7:6] - AES_INSTRUCTION + * 00 - ENCRYPT + * 01 - DECRYPT + * 10 - EXPAND + * 11 - BYPASS + * bits [3:2] - CTR_M_BITS + * 00 - No increment + * 01 - Least significant 32 bits are incremented + * 10 - Least significant 64 bits are incremented + * 11 - Full 128 bits are incremented + */ + val = (cipher << 14) | (mode << 8) | (instruction << 6) | + (AES_CTR_M_128_INC << 2); + iowrite32(val, aes_dev->base_reg + AES_COMMAND_OFFSET); +} + +static void ocs_aes_init(struct ocs_aes_dev *aes_dev, + enum ocs_mode mode, + enum ocs_cipher cipher, + enum ocs_instruction instruction) +{ + /* Ensure interrupts are disabled and pending interrupts cleared. */ + aes_irq_disable(aes_dev); + + /* Set endianness recommended by data-sheet. */ + aes_a_set_endianness(aes_dev); + + /* Set AES_COMMAND register. */ + set_ocs_aes_command(aes_dev, cipher, mode, instruction); +} + +/* + * Write the byte length of the last AES/SM4 block of Payload data (without + * zero padding and without the length of the MAC) in register AES_PLEN. + */ +static inline void ocs_aes_write_last_data_blk_len(struct ocs_aes_dev *aes_dev, + u32 size) +{ + u32 val; + + if (size == 0) { + val = 0; + goto exit; + } + + val = size % AES_BLOCK_SIZE; + if (val == 0) + val = AES_BLOCK_SIZE; + +exit: + iowrite32(val, aes_dev->base_reg + AES_PLEN_OFFSET); +} + +/* + * Validate inputs according to mode. + * If OK return 0; else return -EINVAL. + */ +static int ocs_aes_validate_inputs(dma_addr_t src_dma_list, u32 src_size, + const u8 *iv, u32 iv_size, + dma_addr_t aad_dma_list, u32 aad_size, + const u8 *tag, u32 tag_size, + enum ocs_cipher cipher, enum ocs_mode mode, + enum ocs_instruction instruction, + dma_addr_t dst_dma_list) +{ + /* Ensure cipher, mode and instruction are valid. */ + if (!(cipher == OCS_AES || cipher == OCS_SM4)) + return -EINVAL; + + if (mode != OCS_MODE_ECB && mode != OCS_MODE_CBC && + mode != OCS_MODE_CTR && mode != OCS_MODE_CCM && + mode != OCS_MODE_GCM && mode != OCS_MODE_CTS) + return -EINVAL; + + if (instruction != OCS_ENCRYPT && instruction != OCS_DECRYPT && + instruction != OCS_EXPAND && instruction != OCS_BYPASS) + return -EINVAL; + + /* + * When instruction is OCS_BYPASS, OCS simply copies data from source + * to destination using DMA. + * + * AES mode is irrelevant, but both source and destination DMA + * linked-list must be defined. + */ + if (instruction == OCS_BYPASS) { + if (src_dma_list == DMA_MAPPING_ERROR || + dst_dma_list == DMA_MAPPING_ERROR) + return -EINVAL; + + return 0; + } + + /* + * For performance reasons switch based on mode to limit unnecessary + * conditionals for each mode + */ + switch (mode) { + case OCS_MODE_ECB: + /* Ensure input length is multiple of block size */ + if (src_size % AES_BLOCK_SIZE != 0) + return -EINVAL; + + /* Ensure source and destination linked lists are created */ + if (src_dma_list == DMA_MAPPING_ERROR || + dst_dma_list == DMA_MAPPING_ERROR) + return -EINVAL; + + return 0; + + case OCS_MODE_CBC: + /* Ensure input length is multiple of block size */ + if (src_size % AES_BLOCK_SIZE != 0) + return -EINVAL; + + /* Ensure source and destination linked lists are created */ + if (src_dma_list == DMA_MAPPING_ERROR || + dst_dma_list == DMA_MAPPING_ERROR) + return -EINVAL; + + /* Ensure IV is present and block size in length */ + if (!iv || iv_size != AES_BLOCK_SIZE) + return -EINVAL; + + return 0; + + case OCS_MODE_CTR: + /* Ensure input length of 1 byte or greater */ + if (src_size == 0) + return -EINVAL; + + /* Ensure source and destination linked lists are created */ + if (src_dma_list == DMA_MAPPING_ERROR || + dst_dma_list == DMA_MAPPING_ERROR) + return -EINVAL; + + /* Ensure IV is present and block size in length */ + if (!iv || iv_size != AES_BLOCK_SIZE) + return -EINVAL; + + return 0; + + case OCS_MODE_CTS: + /* Ensure input length >= block size */ + if (src_size < AES_BLOCK_SIZE) + return -EINVAL; + + /* Ensure source and destination linked lists are created */ + if (src_dma_list == DMA_MAPPING_ERROR || + dst_dma_list == DMA_MAPPING_ERROR) + return -EINVAL; + + /* Ensure IV is present and block size in length */ + if (!iv || iv_size != AES_BLOCK_SIZE) + return -EINVAL; + + return 0; + + case OCS_MODE_GCM: + /* Ensure IV is present and GCM_AES_IV_SIZE in length */ + if (!iv || iv_size != GCM_AES_IV_SIZE) + return -EINVAL; + + /* + * If input data present ensure source and destination linked + * lists are created + */ + if (src_size && (src_dma_list == DMA_MAPPING_ERROR || + dst_dma_list == DMA_MAPPING_ERROR)) + return -EINVAL; + + /* If aad present ensure aad linked list is created */ + if (aad_size && aad_dma_list == DMA_MAPPING_ERROR) + return -EINVAL; + + /* Ensure tag destination is set */ + if (!tag) + return -EINVAL; + + /* Just ensure that tag_size doesn't cause overflows. */ + if (tag_size > (AES_MAX_TAG_SIZE_U32 * sizeof(u32))) + return -EINVAL; + + return 0; + + case OCS_MODE_CCM: + /* Ensure IV is present and block size in length */ + if (!iv || iv_size != AES_BLOCK_SIZE) + return -EINVAL; + + /* 2 <= L <= 8, so 1 <= L' <= 7 */ + if (iv[L_PRIME_IDX] < L_PRIME_MIN || + iv[L_PRIME_IDX] > L_PRIME_MAX) + return -EINVAL; + + /* If aad present ensure aad linked list is created */ + if (aad_size && aad_dma_list == DMA_MAPPING_ERROR) + return -EINVAL; + + /* Just ensure that tag_size doesn't cause overflows. */ + if (tag_size > (AES_MAX_TAG_SIZE_U32 * sizeof(u32))) + return -EINVAL; + + if (instruction == OCS_DECRYPT) { + /* + * If input data present ensure source and destination + * linked lists are created + */ + if (src_size && (src_dma_list == DMA_MAPPING_ERROR || + dst_dma_list == DMA_MAPPING_ERROR)) + return -EINVAL; + + /* Ensure input tag is present */ + if (!tag) + return -EINVAL; + + return 0; + } + + /* Instruction == OCS_ENCRYPT */ + + /* + * Destination linked list always required (for tag even if no + * input data) + */ + if (dst_dma_list == DMA_MAPPING_ERROR) + return -EINVAL; + + /* If input data present ensure src linked list is created */ + if (src_size && src_dma_list == DMA_MAPPING_ERROR) + return -EINVAL; + + return 0; + + default: + return -EINVAL; + } +} + +/** + * ocs_aes_op() - Perform AES/SM4 operation. + * @aes_dev: The OCS AES device to use. + * @mode: The mode to use (ECB, CBC, CTR, or CTS). + * @cipher: The cipher to use (AES or SM4). + * @instruction: The instruction to perform (encrypt or decrypt). + * @dst_dma_list: The OCS DMA list mapping output memory. + * @src_dma_list: The OCS DMA list mapping input payload data. + * @src_size: The amount of data mapped by @src_dma_list. + * @iv: The IV vector. + * @iv_size: The size (in bytes) of @iv. + * + * Return: 0 on success, negative error code otherwise. + */ +int ocs_aes_op(struct ocs_aes_dev *aes_dev, + enum ocs_mode mode, + enum ocs_cipher cipher, + enum ocs_instruction instruction, + dma_addr_t dst_dma_list, + dma_addr_t src_dma_list, + u32 src_size, + u8 *iv, + u32 iv_size) +{ + u32 *iv32; + int rc; + + rc = ocs_aes_validate_inputs(src_dma_list, src_size, iv, iv_size, 0, 0, + NULL, 0, cipher, mode, instruction, + dst_dma_list); + if (rc) + return rc; + /* + * ocs_aes_validate_inputs() is a generic check, now ensure mode is not + * GCM or CCM. + */ + if (mode == OCS_MODE_GCM || mode == OCS_MODE_CCM) + return -EINVAL; + + /* Cast IV to u32 array. */ + iv32 = (u32 *)iv; + + ocs_aes_init(aes_dev, mode, cipher, instruction); + + if (mode == OCS_MODE_CTS) { + /* Write the byte length of the last data block to engine. */ + ocs_aes_write_last_data_blk_len(aes_dev, src_size); + } + + /* ECB is the only mode that doesn't use IV. */ + if (mode != OCS_MODE_ECB) { + iowrite32(iv32[0], aes_dev->base_reg + AES_IV_0_OFFSET); + iowrite32(iv32[1], aes_dev->base_reg + AES_IV_1_OFFSET); + iowrite32(iv32[2], aes_dev->base_reg + AES_IV_2_OFFSET); + iowrite32(iv32[3], aes_dev->base_reg + AES_IV_3_OFFSET); + } + + /* Set AES_ACTIVE.TRIGGER to start the operation. */ + aes_a_op_trigger(aes_dev); + + /* Configure and activate input / output DMA. */ + dma_to_ocs_aes_ll(aes_dev, src_dma_list); + dma_from_ocs_aes_ll(aes_dev, dst_dma_list); + aes_a_dma_active_src_dst_ll_en(aes_dev); + + if (mode == OCS_MODE_CTS) { + /* + * For CTS mode, instruct engine to activate ciphertext + * stealing if last block of data is incomplete. + */ + aes_a_set_last_gcx(aes_dev); + } else { + /* For all other modes, just write the 'termination' bit. */ + aes_a_op_termination(aes_dev); + } + + /* Wait for engine to complete processing. */ + rc = ocs_aes_irq_enable_and_wait(aes_dev, AES_COMPLETE_INT); + if (rc) + return rc; + + if (mode == OCS_MODE_CTR) { + /* Read back IV for streaming mode */ + iv32[0] = ioread32(aes_dev->base_reg + AES_IV_0_OFFSET); + iv32[1] = ioread32(aes_dev->base_reg + AES_IV_1_OFFSET); + iv32[2] = ioread32(aes_dev->base_reg + AES_IV_2_OFFSET); + iv32[3] = ioread32(aes_dev->base_reg + AES_IV_3_OFFSET); + } + + return 0; +} + +/* Compute and write J0 to engine registers. */ +static void ocs_aes_gcm_write_j0(const struct ocs_aes_dev *aes_dev, + const u8 *iv) +{ + const u32 *j0 = (u32 *)iv; + + /* + * IV must be 12 bytes; Other sizes not supported as Linux crypto API + * does only expects/allows 12 byte IV for GCM + */ + iowrite32(0x00000001, aes_dev->base_reg + AES_IV_0_OFFSET); + iowrite32(__swab32(j0[2]), aes_dev->base_reg + AES_IV_1_OFFSET); + iowrite32(__swab32(j0[1]), aes_dev->base_reg + AES_IV_2_OFFSET); + iowrite32(__swab32(j0[0]), aes_dev->base_reg + AES_IV_3_OFFSET); +} + +/* Read GCM tag from engine registers. */ +static inline void ocs_aes_gcm_read_tag(struct ocs_aes_dev *aes_dev, + u8 *tag, u32 tag_size) +{ + u32 tag_u32[AES_MAX_TAG_SIZE_U32]; + + /* + * The Authentication Tag T is stored in Little Endian order in the + * registers with the most significant bytes stored from AES_T_MAC[3] + * downward. + */ + tag_u32[0] = __swab32(ioread32(aes_dev->base_reg + AES_T_MAC_3_OFFSET)); + tag_u32[1] = __swab32(ioread32(aes_dev->base_reg + AES_T_MAC_2_OFFSET)); + tag_u32[2] = __swab32(ioread32(aes_dev->base_reg + AES_T_MAC_1_OFFSET)); + tag_u32[3] = __swab32(ioread32(aes_dev->base_reg + AES_T_MAC_0_OFFSET)); + + memcpy(tag, tag_u32, tag_size); +} + +/** + * ocs_aes_gcm_op() - Perform GCM operation. + * @aes_dev: The OCS AES device to use. + * @cipher: The Cipher to use (AES or SM4). + * @instruction: The instruction to perform (encrypt or decrypt). + * @dst_dma_list: The OCS DMA list mapping output memory. + * @src_dma_list: The OCS DMA list mapping input payload data. + * @src_size: The amount of data mapped by @src_dma_list. + * @iv: The input IV vector. + * @aad_dma_list: The OCS DMA list mapping input AAD data. + * @aad_size: The amount of data mapped by @aad_dma_list. + * @out_tag: Where to store computed tag. + * @tag_size: The size (in bytes) of @out_tag. + * + * Return: 0 on success, negative error code otherwise. + */ +int ocs_aes_gcm_op(struct ocs_aes_dev *aes_dev, + enum ocs_cipher cipher, + enum ocs_instruction instruction, + dma_addr_t dst_dma_list, + dma_addr_t src_dma_list, + u32 src_size, + const u8 *iv, + dma_addr_t aad_dma_list, + u32 aad_size, + u8 *out_tag, + u32 tag_size) +{ + u64 bit_len; + u32 val; + int rc; + + rc = ocs_aes_validate_inputs(src_dma_list, src_size, iv, + GCM_AES_IV_SIZE, aad_dma_list, + aad_size, out_tag, tag_size, cipher, + OCS_MODE_GCM, instruction, + dst_dma_list); + if (rc) + return rc; + + ocs_aes_init(aes_dev, OCS_MODE_GCM, cipher, instruction); + + /* Compute and write J0 to OCS HW. */ + ocs_aes_gcm_write_j0(aes_dev, iv); + + /* Write out_tag byte length */ + iowrite32(tag_size, aes_dev->base_reg + AES_TLEN_OFFSET); + + /* Write the byte length of the last plaintext / ciphertext block. */ + ocs_aes_write_last_data_blk_len(aes_dev, src_size); + + /* Write ciphertext bit length */ + bit_len = src_size * 8; + val = bit_len & 0xFFFFFFFF; + iowrite32(val, aes_dev->base_reg + AES_MULTIPURPOSE2_0_OFFSET); + val = bit_len >> 32; + iowrite32(val, aes_dev->base_reg + AES_MULTIPURPOSE2_1_OFFSET); + + /* Write aad bit length */ + bit_len = aad_size * 8; + val = bit_len & 0xFFFFFFFF; + iowrite32(val, aes_dev->base_reg + AES_MULTIPURPOSE2_2_OFFSET); + val = bit_len >> 32; + iowrite32(val, aes_dev->base_reg + AES_MULTIPURPOSE2_3_OFFSET); + + /* Set AES_ACTIVE.TRIGGER to start the operation. */ + aes_a_op_trigger(aes_dev); + + /* Process AAD. */ + if (aad_size) { + /* If aad present, configure DMA to feed it to the engine. */ + dma_to_ocs_aes_ll(aes_dev, aad_dma_list); + aes_a_dma_active_src_ll_en(aes_dev); + + /* Instructs engine to pad last block of aad, if needed. */ + aes_a_set_last_gcx_and_adata(aes_dev); + + /* Wait for DMA transfer to complete. */ + rc = ocs_aes_irq_enable_and_wait(aes_dev, AES_DMA_SRC_DONE_INT); + if (rc) + return rc; + } else { + aes_a_set_last_gcx_and_adata(aes_dev); + } + + /* Wait until adata (if present) has been processed. */ + aes_a_wait_last_gcx(aes_dev); + aes_a_dma_wait_input_buffer_occupancy(aes_dev); + + /* Now process payload. */ + if (src_size) { + /* Configure and activate DMA for both input and output data. */ + dma_to_ocs_aes_ll(aes_dev, src_dma_list); + dma_from_ocs_aes_ll(aes_dev, dst_dma_list); + aes_a_dma_active_src_dst_ll_en(aes_dev); + } else { + aes_a_dma_set_xfer_size_zero(aes_dev); + aes_a_dma_active(aes_dev); + } + + /* Instruct AES/SMA4 engine payload processing is over. */ + aes_a_set_last_gcx(aes_dev); + + /* Wait for OCS AES engine to complete processing. */ + rc = ocs_aes_irq_enable_and_wait(aes_dev, AES_COMPLETE_INT); + if (rc) + return rc; + + ocs_aes_gcm_read_tag(aes_dev, out_tag, tag_size); + + return 0; +} + +/* Write encrypted tag to AES/SM4 engine. */ +static void ocs_aes_ccm_write_encrypted_tag(struct ocs_aes_dev *aes_dev, + const u8 *in_tag, u32 tag_size) +{ + int i; + + /* Ensure DMA input buffer is empty */ + aes_a_dma_wait_input_buffer_occupancy(aes_dev); + + /* + * During CCM decrypt, the OCS block needs to finish processing the + * ciphertext before the tag is written. So delay needed after DMA has + * completed writing the ciphertext + */ + aes_a_dma_reset_and_activate_perf_cntr(aes_dev); + aes_a_dma_wait_and_deactivate_perf_cntr(aes_dev, + CCM_DECRYPT_DELAY_TAG_CLK_COUNT); + + /* Write encrypted tag to AES/SM4 engine. */ + for (i = 0; i < tag_size; i++) { + iowrite8(in_tag[i], aes_dev->base_reg + + AES_A_DMA_INBUFFER_WRITE_FIFO_OFFSET); + } +} + +/* + * Write B0 CCM block to OCS AES HW. + * + * Note: B0 format is documented in NIST Special Publication 800-38C + * https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38c.pdf + * (see Section A.2.1) + */ +static int ocs_aes_ccm_write_b0(const struct ocs_aes_dev *aes_dev, + const u8 *iv, u32 adata_size, u32 tag_size, + u32 cryptlen) +{ + u8 b0[16]; /* CCM B0 block is 16 bytes long. */ + int i, q; + + /* Initialize B0 to 0. */ + memset(b0, 0, sizeof(b0)); + + /* + * B0[0] is the 'Flags Octet' and has the following structure: + * bit 7: Reserved + * bit 6: Adata flag + * bit 5-3: t value encoded as (t-2)/2 + * bit 2-0: q value encoded as q - 1 + */ + /* If there is AAD data, set the Adata flag. */ + if (adata_size) + b0[0] |= BIT(6); + /* + * t denotes the octet length of T. + * t can only be an element of { 4, 6, 8, 10, 12, 14, 16} and is + * encoded as (t - 2) / 2 + */ + b0[0] |= (((tag_size - 2) / 2) & 0x7) << 3; + /* + * q is the octet length of Q. + * q can only be an element of {2, 3, 4, 5, 6, 7, 8} and is encoded as + * q - 1 == iv[0] + */ + b0[0] |= iv[0] & 0x7; + /* + * Copy the Nonce N from IV to B0; N is located in iv[1]..iv[15 - q] + * and must be copied to b0[1]..b0[15-q]. + * q == iv[0] + 1 + */ + q = iv[0] + 1; + for (i = 1; i <= 15 - q; i++) + b0[i] = iv[i]; + /* + * The rest of B0 must contain Q, i.e., the message length. + * Q is encoded in q octets, in big-endian order, so to write it, we + * start from the end of B0 and we move backward. + */ + i = sizeof(b0) - 1; + while (q) { + b0[i] = cryptlen & 0xff; + cryptlen >>= 8; + i--; + q--; + } + /* + * If cryptlen is not zero at this point, it means that its original + * value was too big. + */ + if (cryptlen) + return -EOVERFLOW; + /* Now write B0 to OCS AES input buffer. */ + for (i = 0; i < sizeof(b0); i++) + iowrite8(b0[i], aes_dev->base_reg + + AES_A_DMA_INBUFFER_WRITE_FIFO_OFFSET); + return 0; +} + +/* + * Write adata length to OCS AES HW. + * + * Note: adata len encoding is documented in NIST Special Publication 800-38C + * https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38c.pdf + * (see Section A.2.2) + */ +static void ocs_aes_ccm_write_adata_len(const struct ocs_aes_dev *aes_dev, + u64 adata_len) +{ + u8 enc_a[10]; /* Maximum encoded size: 10 octets. */ + int i, len; + + /* + * adata_len ('a') is encoded as follows: + * If 0 < a < 2^16 - 2^8 ==> 'a' encoded as [a]16, i.e., two octets + * (big endian). + * If 2^16 - 2^8 ≤ a < 2^32 ==> 'a' encoded as 0xff || 0xfe || [a]32, + * i.e., six octets (big endian). + * If 2^32 ≤ a < 2^64 ==> 'a' encoded as 0xff || 0xff || [a]64, + * i.e., ten octets (big endian). + */ + if (adata_len < 65280) { + len = 2; + *(__be16 *)enc_a = cpu_to_be16(adata_len); + } else if (adata_len <= 0xFFFFFFFF) { + len = 6; + *(__be16 *)enc_a = cpu_to_be16(0xfffe); + *(__be32 *)&enc_a[2] = cpu_to_be32(adata_len); + } else { /* adata_len >= 2^32 */ + len = 10; + *(__be16 *)enc_a = cpu_to_be16(0xffff); + *(__be64 *)&enc_a[2] = cpu_to_be64(adata_len); + } + for (i = 0; i < len; i++) + iowrite8(enc_a[i], + aes_dev->base_reg + + AES_A_DMA_INBUFFER_WRITE_FIFO_OFFSET); +} + +static int ocs_aes_ccm_do_adata(struct ocs_aes_dev *aes_dev, + dma_addr_t adata_dma_list, u32 adata_size) +{ + int rc; + + if (!adata_size) { + /* Since no aad the LAST_GCX bit can be set now */ + aes_a_set_last_gcx_and_adata(aes_dev); + goto exit; + } + + /* Adata case. */ + + /* + * Form the encoding of the Associated data length and write it + * to the AES/SM4 input buffer. + */ + ocs_aes_ccm_write_adata_len(aes_dev, adata_size); + + /* Configure the AES/SM4 DMA to fetch the Associated Data */ + dma_to_ocs_aes_ll(aes_dev, adata_dma_list); + + /* Activate DMA to fetch Associated data. */ + aes_a_dma_active_src_ll_en(aes_dev); + + /* Set LAST_GCX and LAST_ADATA in AES ACTIVE register. */ + aes_a_set_last_gcx_and_adata(aes_dev); + + /* Wait for DMA transfer to complete. */ + rc = ocs_aes_irq_enable_and_wait(aes_dev, AES_DMA_SRC_DONE_INT); + if (rc) + return rc; + +exit: + /* Wait until adata (if present) has been processed. */ + aes_a_wait_last_gcx(aes_dev); + aes_a_dma_wait_input_buffer_occupancy(aes_dev); + + return 0; +} + +static int ocs_aes_ccm_encrypt_do_payload(struct ocs_aes_dev *aes_dev, + dma_addr_t dst_dma_list, + dma_addr_t src_dma_list, + u32 src_size) +{ + if (src_size) { + /* + * Configure and activate DMA for both input and output + * data. + */ + dma_to_ocs_aes_ll(aes_dev, src_dma_list); + dma_from_ocs_aes_ll(aes_dev, dst_dma_list); + aes_a_dma_active_src_dst_ll_en(aes_dev); + } else { + /* Configure and activate DMA for output data only. */ + dma_from_ocs_aes_ll(aes_dev, dst_dma_list); + aes_a_dma_active_dst_ll_en(aes_dev); + } + + /* + * Set the LAST GCX bit in AES_ACTIVE Register to instruct + * AES/SM4 engine to pad the last block of data. + */ + aes_a_set_last_gcx(aes_dev); + + /* We are done, wait for IRQ and return. */ + return ocs_aes_irq_enable_and_wait(aes_dev, AES_COMPLETE_INT); +} + +static int ocs_aes_ccm_decrypt_do_payload(struct ocs_aes_dev *aes_dev, + dma_addr_t dst_dma_list, + dma_addr_t src_dma_list, + u32 src_size) +{ + if (!src_size) { + /* Let engine process 0-length input. */ + aes_a_dma_set_xfer_size_zero(aes_dev); + aes_a_dma_active(aes_dev); + aes_a_set_last_gcx(aes_dev); + + return 0; + } + + /* + * Configure and activate DMA for both input and output + * data. + */ + dma_to_ocs_aes_ll(aes_dev, src_dma_list); + dma_from_ocs_aes_ll(aes_dev, dst_dma_list); + aes_a_dma_active_src_dst_ll_en(aes_dev); + /* + * Set the LAST GCX bit in AES_ACTIVE Register; this allows the + * AES/SM4 engine to differentiate between encrypted data and + * encrypted MAC. + */ + aes_a_set_last_gcx(aes_dev); + /* + * Enable DMA DONE interrupt; once DMA transfer is over, + * interrupt handler will process the MAC/tag. + */ + return ocs_aes_irq_enable_and_wait(aes_dev, AES_DMA_SRC_DONE_INT); +} + +/* + * Compare Tag to Yr. + * + * Only used at the end of CCM decrypt. If tag == yr, message authentication + * has succeeded. + */ +static inline int ccm_compare_tag_to_yr(struct ocs_aes_dev *aes_dev, + u8 tag_size_bytes) +{ + u32 tag[AES_MAX_TAG_SIZE_U32]; + u32 yr[AES_MAX_TAG_SIZE_U32]; + u8 i; + + /* Read Tag and Yr from AES registers. */ + for (i = 0; i < AES_MAX_TAG_SIZE_U32; i++) { + tag[i] = ioread32(aes_dev->base_reg + + AES_T_MAC_0_OFFSET + (i * sizeof(u32))); + yr[i] = ioread32(aes_dev->base_reg + + AES_MULTIPURPOSE2_0_OFFSET + + (i * sizeof(u32))); + } + + return memcmp(tag, yr, tag_size_bytes) ? -EBADMSG : 0; +} + +/** + * ocs_aes_ccm_op() - Perform CCM operation. + * @aes_dev: The OCS AES device to use. + * @cipher: The Cipher to use (AES or SM4). + * @instruction: The instruction to perform (encrypt or decrypt). + * @dst_dma_list: The OCS DMA list mapping output memory. + * @src_dma_list: The OCS DMA list mapping input payload data. + * @src_size: The amount of data mapped by @src_dma_list. + * @iv: The input IV vector. + * @adata_dma_list: The OCS DMA list mapping input A-data. + * @adata_size: The amount of data mapped by @adata_dma_list. + * @in_tag: Input tag. + * @tag_size: The size (in bytes) of @in_tag. + * + * Note: for encrypt the tag is appended to the ciphertext (in the memory + * mapped by @dst_dma_list). + * + * Return: 0 on success, negative error code otherwise. + */ +int ocs_aes_ccm_op(struct ocs_aes_dev *aes_dev, + enum ocs_cipher cipher, + enum ocs_instruction instruction, + dma_addr_t dst_dma_list, + dma_addr_t src_dma_list, + u32 src_size, + u8 *iv, + dma_addr_t adata_dma_list, + u32 adata_size, + u8 *in_tag, + u32 tag_size) +{ + u32 *iv_32; + u8 lprime; + int rc; + + rc = ocs_aes_validate_inputs(src_dma_list, src_size, iv, + AES_BLOCK_SIZE, adata_dma_list, adata_size, + in_tag, tag_size, cipher, OCS_MODE_CCM, + instruction, dst_dma_list); + if (rc) + return rc; + + ocs_aes_init(aes_dev, OCS_MODE_CCM, cipher, instruction); + + /* + * Note: rfc 3610 and NIST 800-38C require counter of zero to encrypt + * auth tag so ensure this is the case + */ + lprime = iv[L_PRIME_IDX]; + memset(&iv[COUNTER_START(lprime)], 0, COUNTER_LEN(lprime)); + + /* + * Nonce is already converted to ctr0 before being passed into this + * function as iv. + */ + iv_32 = (u32 *)iv; + iowrite32(__swab32(iv_32[0]), + aes_dev->base_reg + AES_MULTIPURPOSE1_3_OFFSET); + iowrite32(__swab32(iv_32[1]), + aes_dev->base_reg + AES_MULTIPURPOSE1_2_OFFSET); + iowrite32(__swab32(iv_32[2]), + aes_dev->base_reg + AES_MULTIPURPOSE1_1_OFFSET); + iowrite32(__swab32(iv_32[3]), + aes_dev->base_reg + AES_MULTIPURPOSE1_0_OFFSET); + + /* Write MAC/tag length in register AES_TLEN */ + iowrite32(tag_size, aes_dev->base_reg + AES_TLEN_OFFSET); + /* + * Write the byte length of the last AES/SM4 block of Payload data + * (without zero padding and without the length of the MAC) in register + * AES_PLEN. + */ + ocs_aes_write_last_data_blk_len(aes_dev, src_size); + + /* Set AES_ACTIVE.TRIGGER to start the operation. */ + aes_a_op_trigger(aes_dev); + + aes_a_dma_reset_and_activate_perf_cntr(aes_dev); + + /* Form block B0 and write it to the AES/SM4 input buffer. */ + rc = ocs_aes_ccm_write_b0(aes_dev, iv, adata_size, tag_size, src_size); + if (rc) + return rc; + /* + * Ensure there has been at least CCM_DECRYPT_DELAY_LAST_GCX_CLK_COUNT + * clock cycles since TRIGGER bit was set + */ + aes_a_dma_wait_and_deactivate_perf_cntr(aes_dev, + CCM_DECRYPT_DELAY_LAST_GCX_CLK_COUNT); + + /* Process Adata. */ + ocs_aes_ccm_do_adata(aes_dev, adata_dma_list, adata_size); + + /* For Encrypt case we just process the payload and return. */ + if (instruction == OCS_ENCRYPT) { + return ocs_aes_ccm_encrypt_do_payload(aes_dev, dst_dma_list, + src_dma_list, src_size); + } + /* For Decypt we need to process the payload and then the tag. */ + rc = ocs_aes_ccm_decrypt_do_payload(aes_dev, dst_dma_list, + src_dma_list, src_size); + if (rc) + return rc; + + /* Process MAC/tag directly: feed tag to engine and wait for IRQ. */ + ocs_aes_ccm_write_encrypted_tag(aes_dev, in_tag, tag_size); + rc = ocs_aes_irq_enable_and_wait(aes_dev, AES_COMPLETE_INT); + if (rc) + return rc; + + return ccm_compare_tag_to_yr(aes_dev, tag_size); +} + +/** + * ocs_create_linked_list_from_sg() - Create OCS DMA linked list from SG list. + * @aes_dev: The OCS AES device the list will be created for. + * @sg: The SG list OCS DMA linked list will be created from. When + * passed to this function, @sg must have been already mapped + * with dma_map_sg(). + * @sg_dma_count: The number of DMA-mapped entries in @sg. This must be the + * value returned by dma_map_sg() when @sg was mapped. + * @dll_desc: The OCS DMA dma_list to use to store information about the + * created linked list. + * @data_size: The size of the data (from the SG list) to be mapped into the + * OCS DMA linked list. + * @data_offset: The offset (within the SG list) of the data to be mapped. + * + * Return: 0 on success, negative error code otherwise. + */ +int ocs_create_linked_list_from_sg(const struct ocs_aes_dev *aes_dev, + struct scatterlist *sg, + int sg_dma_count, + struct ocs_dll_desc *dll_desc, + size_t data_size, size_t data_offset) +{ + struct ocs_dma_linked_list *ll = NULL; + struct scatterlist *sg_tmp; + unsigned int tmp; + int dma_nents; + int i; + + if (!dll_desc || !sg || !aes_dev) + return -EINVAL; + + /* Default values for when no ddl_desc is created. */ + dll_desc->vaddr = NULL; + dll_desc->dma_addr = DMA_MAPPING_ERROR; + dll_desc->size = 0; + + if (data_size == 0) + return 0; + + /* Loop over sg_list until we reach entry at specified offset. */ + while (data_offset >= sg_dma_len(sg)) { + data_offset -= sg_dma_len(sg); + sg_dma_count--; + sg = sg_next(sg); + /* If we reach the end of the list, offset was invalid. */ + if (!sg || sg_dma_count == 0) + return -EINVAL; + } + + /* Compute number of DMA-mapped SG entries to add into OCS DMA list. */ + dma_nents = 0; + tmp = 0; + sg_tmp = sg; + while (tmp < data_offset + data_size) { + /* If we reach the end of the list, data_size was invalid. */ + if (!sg_tmp) + return -EINVAL; + tmp += sg_dma_len(sg_tmp); + dma_nents++; + sg_tmp = sg_next(sg_tmp); + } + if (dma_nents > sg_dma_count) + return -EINVAL; + + /* Allocate the DMA list, one entry for each SG entry. */ + dll_desc->size = sizeof(struct ocs_dma_linked_list) * dma_nents; + dll_desc->vaddr = dma_alloc_coherent(aes_dev->dev, dll_desc->size, + &dll_desc->dma_addr, GFP_KERNEL); + if (!dll_desc->vaddr) + return -ENOMEM; + + /* Populate DMA linked list entries. */ + ll = dll_desc->vaddr; + for (i = 0; i < dma_nents; i++, sg = sg_next(sg)) { + ll[i].src_addr = sg_dma_address(sg) + data_offset; + ll[i].src_len = (sg_dma_len(sg) - data_offset) < data_size ? + (sg_dma_len(sg) - data_offset) : data_size; + data_offset = 0; + data_size -= ll[i].src_len; + /* Current element points to the DMA address of the next one. */ + ll[i].next = dll_desc->dma_addr + (sizeof(*ll) * (i + 1)); + ll[i].ll_flags = 0; + } + /* Terminate last element. */ + ll[i - 1].next = 0; + ll[i - 1].ll_flags = OCS_LL_DMA_FLAG_TERMINATE; + + return 0; +} diff --git a/drivers/crypto/keembay/ocs-aes.h b/drivers/crypto/keembay/ocs-aes.h new file mode 100644 index 000000000000..c035fc48b7ed --- /dev/null +++ b/drivers/crypto/keembay/ocs-aes.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Intel Keem Bay OCS AES Crypto Driver. + * + * Copyright (C) 2018-2020 Intel Corporation + */ + +#ifndef _CRYPTO_OCS_AES_H +#define _CRYPTO_OCS_AES_H + +#include + +enum ocs_cipher { + OCS_AES = 0, + OCS_SM4 = 1, +}; + +enum ocs_mode { + OCS_MODE_ECB = 0, + OCS_MODE_CBC = 1, + OCS_MODE_CTR = 2, + OCS_MODE_CCM = 6, + OCS_MODE_GCM = 7, + OCS_MODE_CTS = 9, +}; + +enum ocs_instruction { + OCS_ENCRYPT = 0, + OCS_DECRYPT = 1, + OCS_EXPAND = 2, + OCS_BYPASS = 3, +}; + +/** + * struct ocs_aes_dev - AES device context. + * @list: List head for insertion into device list hold + * by driver. + * @dev: OCS AES device. + * @irq: IRQ number. + * @base_reg: IO base address of OCS AES. + * @irq_copy_completion: Completion to indicate IRQ has been triggered. + * @dma_err_mask: Error reported by OCS DMA interrupts. + * @engine: Crypto engine for the device. + */ +struct ocs_aes_dev { + struct list_head list; + struct device *dev; + int irq; + void __iomem *base_reg; + struct completion irq_completion; + u32 dma_err_mask; + struct crypto_engine *engine; +}; + +/** + * struct ocs_dll_desc - Descriptor of an OCS DMA Linked List. + * @vaddr: Virtual address of the linked list head. + * @dma_addr: DMA address of the linked list head. + * @size: Size (in bytes) of the linked list. + */ +struct ocs_dll_desc { + void *vaddr; + dma_addr_t dma_addr; + size_t size; +}; + +int ocs_aes_set_key(struct ocs_aes_dev *aes_dev, const u32 key_size, + const u8 *key, const enum ocs_cipher cipher); + +int ocs_aes_op(struct ocs_aes_dev *aes_dev, + enum ocs_mode mode, + enum ocs_cipher cipher, + enum ocs_instruction instruction, + dma_addr_t dst_dma_list, + dma_addr_t src_dma_list, + u32 src_size, + u8 *iv, + u32 iv_size); + +/** + * ocs_aes_bypass_op() - Use OCS DMA to copy data. + * @aes_dev: The OCS AES device to use. + * @dst_dma_list: The OCS DMA list mapping the memory where input data + * will be copied to. + * @src_dma_list: The OCS DMA list mapping input data. + * @src_size: The amount of data to copy. + */ +static inline int ocs_aes_bypass_op(struct ocs_aes_dev *aes_dev, + dma_addr_t dst_dma_list, + dma_addr_t src_dma_list, u32 src_size) +{ + return ocs_aes_op(aes_dev, OCS_MODE_ECB, OCS_AES, OCS_BYPASS, + dst_dma_list, src_dma_list, src_size, NULL, 0); +} + +int ocs_aes_gcm_op(struct ocs_aes_dev *aes_dev, + enum ocs_cipher cipher, + enum ocs_instruction instruction, + dma_addr_t dst_dma_list, + dma_addr_t src_dma_list, + u32 src_size, + const u8 *iv, + dma_addr_t aad_dma_list, + u32 aad_size, + u8 *out_tag, + u32 tag_size); + +int ocs_aes_ccm_op(struct ocs_aes_dev *aes_dev, + enum ocs_cipher cipher, + enum ocs_instruction instruction, + dma_addr_t dst_dma_list, + dma_addr_t src_dma_list, + u32 src_size, + u8 *iv, + dma_addr_t adata_dma_list, + u32 adata_size, + u8 *in_tag, + u32 tag_size); + +int ocs_create_linked_list_from_sg(const struct ocs_aes_dev *aes_dev, + struct scatterlist *sg, + int sg_dma_count, + struct ocs_dll_desc *dll_desc, + size_t data_size, + size_t data_offset); + +irqreturn_t ocs_aes_irq_handler(int irq, void *dev_id); + +#endif diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c index add7ea011c98..8cf9fd518d86 100644 --- a/drivers/crypto/marvell/cesa/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -11,7 +11,8 @@ #include #include -#include +#include +#include #include #include diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_main.c b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c index 34bb3063eb70..14a42559f81d 100644 --- a/drivers/crypto/marvell/octeontx/otx_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c @@ -212,15 +212,9 @@ static int otx_cpt_probe(struct pci_dev *pdev, goto err_disable_device; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) { - dev_err(dev, "Unable to get usable DMA configuration\n"); - goto err_release_regions; - } - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); - if (err) { - dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + dev_err(dev, "Unable to get usable 48-bit DMA configuration\n"); goto err_release_regions; } diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c index 90bb31329d4b..ccbef01888d4 100644 --- a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c @@ -13,7 +13,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c index 228fe8e47e0e..c076d0b3ad5f 100644 --- a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c @@ -804,15 +804,9 @@ static int otx_cptvf_probe(struct pci_dev *pdev, dev_err(dev, "PCI request regions failed 0x%x\n", err); goto disable_device; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) { - dev_err(dev, "Unable to get usable DMA configuration\n"); - goto release_regions; - } - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); - if (err) { - dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + dev_err(dev, "Unable to get usable 48-bit DMA configuration\n"); goto release_regions; } diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c index 3d5d7d68b03b..f55aacdafbef 100644 --- a/drivers/crypto/mediatek/mtk-sha.c +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -10,7 +10,8 @@ */ #include -#include +#include +#include #include "mtk-platform.h" #define SHA_ALIGN_MSK (sizeof(u32) - 1) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 909a7eb748e3..d6a7784d2988 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -17,7 +17,8 @@ #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 3642bf83d809..3b0bf6fea491 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 02fb53453195..90d9a37a57f6 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index 4c7a3e3eeebf..eb8627a0f317 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 40882d6d52c1..0d2dc5be7f19 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 4fd14d90cc40..a45bdcf3026d 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -105,6 +105,7 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd) err = pm_runtime_get_sync(dd->dev); if (err < 0) { + pm_runtime_put_noidle(dd->dev); dev_err(dd->dev, "failed to get sync: %d\n", err); return err; } @@ -1137,7 +1138,7 @@ static int omap_aes_probe(struct platform_device *pdev) if (err < 0) { dev_err(dev, "%s: failed to get_sync(%d)\n", __func__, err); - goto err_res; + goto err_pm_disable; } omap_aes_dma_stop(dd); @@ -1246,6 +1247,7 @@ static int omap_aes_probe(struct platform_device *pdev) omap_aes_dma_cleanup(dd); err_irq: tasklet_kill(&dd->done_task); +err_pm_disable: pm_runtime_disable(dev); err_res: dd = NULL; diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index a3b38d2c92e7..ae0d320d3c60 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -35,7 +35,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index a697a4a3f2d0..6865c7f1fc1a 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -9,7 +9,8 @@ #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index fb34bf92861d..84f9c16d984c 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -8,7 +8,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig index 2006322345de..beb379b23dc3 100644 --- a/drivers/crypto/qat/Kconfig +++ b/drivers/crypto/qat/Kconfig @@ -46,6 +46,17 @@ config CRYPTO_DEV_QAT_C62X To compile this as a module, choose M here: the module will be called qat_c62x. +config CRYPTO_DEV_QAT_4XXX + tristate "Support for Intel(R) QAT_4XXX" + depends on X86 && PCI + select CRYPTO_DEV_QAT + help + Support for Intel(R) QuickAssist Technology QAT_4xxx + for accelerating crypto and compression workloads. + + To compile this as a module, choose M here: the module + will be called qat_4xxx. + config CRYPTO_DEV_QAT_DH895xCCVF tristate "Support for Intel(R) DH895xCC Virtual Function" depends on X86 && PCI diff --git a/drivers/crypto/qat/Makefile b/drivers/crypto/qat/Makefile index 7dd15e751d02..258c8a626ce0 100644 --- a/drivers/crypto/qat/Makefile +++ b/drivers/crypto/qat/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_CRYPTO_DEV_QAT) += qat_common/ obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCC) += qat_dh895xcc/ obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXX) += qat_c3xxx/ obj-$(CONFIG_CRYPTO_DEV_QAT_C62X) += qat_c62x/ +obj-$(CONFIG_CRYPTO_DEV_QAT_4XXX) += qat_4xxx/ obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCCVF) += qat_dh895xccvf/ obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXXVF) += qat_c3xxxvf/ obj-$(CONFIG_CRYPTO_DEV_QAT_C62XVF) += qat_c62xvf/ diff --git a/drivers/crypto/qat/qat_4xxx/Makefile b/drivers/crypto/qat/qat_4xxx/Makefile new file mode 100644 index 000000000000..ff9c8b5897ea --- /dev/null +++ b/drivers/crypto/qat/qat_4xxx/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) +ccflags-y := -I $(srctree)/$(src)/../qat_common +obj-$(CONFIG_CRYPTO_DEV_QAT_4XXX) += qat_4xxx.o +qat_4xxx-objs := adf_drv.o adf_4xxx_hw_data.o diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c new file mode 100644 index 000000000000..344bfae45bff --- /dev/null +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) +/* Copyright(c) 2020 Intel Corporation */ +#include +#include +#include +#include +#include "adf_4xxx_hw_data.h" +#include "icp_qat_hw.h" + +struct adf_fw_config { + u32 ae_mask; + char *obj_name; +}; + +static struct adf_fw_config adf_4xxx_fw_config[] = { + {0xF0, ADF_4XXX_SYM_OBJ}, + {0xF, ADF_4XXX_ASYM_OBJ}, + {0x100, ADF_4XXX_ADMIN_OBJ}, +}; + +/* Worker thread to service arbiter mappings */ +static u32 thrd_to_arb_map[] = { + 0x5555555, 0x5555555, 0x5555555, 0x5555555, + 0xAAAAAAA, 0xAAAAAAA, 0xAAAAAAA, 0xAAAAAAA, + 0x0 +}; + +static struct adf_hw_device_class adf_4xxx_class = { + .name = ADF_4XXX_DEVICE_NAME, + .type = DEV_4XXX, + .instances = 0, +}; + +static u32 get_accel_mask(struct adf_hw_device_data *self) +{ + return ADF_4XXX_ACCELERATORS_MASK; +} + +static u32 get_ae_mask(struct adf_hw_device_data *self) +{ + u32 me_disable = self->fuses; + + return ~me_disable & ADF_4XXX_ACCELENGINES_MASK; +} + +static u32 get_num_accels(struct adf_hw_device_data *self) +{ + return ADF_4XXX_MAX_ACCELERATORS; +} + +static u32 get_num_aes(struct adf_hw_device_data *self) +{ + if (!self || !self->ae_mask) + return 0; + + return hweight32(self->ae_mask); +} + +static u32 get_misc_bar_id(struct adf_hw_device_data *self) +{ + return ADF_4XXX_PMISC_BAR; +} + +static u32 get_etr_bar_id(struct adf_hw_device_data *self) +{ + return ADF_4XXX_ETR_BAR; +} + +static u32 get_sram_bar_id(struct adf_hw_device_data *self) +{ + return ADF_4XXX_SRAM_BAR; +} + +/* + * The vector routing table is used to select the MSI-X entry to use for each + * interrupt source. + * The first ADF_4XXX_ETR_MAX_BANKS entries correspond to ring interrupts. + * The final entry corresponds to VF2PF or error interrupts. + * This vector table could be used to configure one MSI-X entry to be shared + * between multiple interrupt sources. + * + * The default routing is set to have a one to one correspondence between the + * interrupt source and the MSI-X entry used. + */ +static void set_msix_default_rttable(struct adf_accel_dev *accel_dev) +{ + void __iomem *csr; + int i; + + csr = (&GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR])->virt_addr; + for (i = 0; i <= ADF_4XXX_ETR_MAX_BANKS; i++) + ADF_CSR_WR(csr, ADF_4XXX_MSIX_RTTABLE_OFFSET(i), i); +} + +static u32 get_accel_cap(struct adf_accel_dev *accel_dev) +{ + struct pci_dev *pdev = accel_dev->accel_pci_dev.pci_dev; + u32 fusectl1; + u32 capabilities = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC | + ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC | + ICP_ACCEL_CAPABILITIES_AUTHENTICATION | + ICP_ACCEL_CAPABILITIES_AES_V2; + + /* Read accelerator capabilities mask */ + pci_read_config_dword(pdev, ADF_4XXX_FUSECTL1_OFFSET, &fusectl1); + + if (fusectl1 & ICP_ACCEL_4XXX_MASK_CIPHER_SLICE) + capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC; + if (fusectl1 & ICP_ACCEL_4XXX_MASK_AUTH_SLICE) + capabilities &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION; + if (fusectl1 & ICP_ACCEL_4XXX_MASK_PKE_SLICE) + capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC; + + return capabilities; +} + +static enum dev_sku_info get_sku(struct adf_hw_device_data *self) +{ + return DEV_SKU_1; +} + +static void adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev, + u32 const **arb_map_config) +{ + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + unsigned long ae_mask = hw_device->ae_mask; + int i; + + for_each_clear_bit(i, &ae_mask, ADF_4XXX_MAX_ACCELENGINES) + thrd_to_arb_map[i] = 0; + + *arb_map_config = thrd_to_arb_map; +} + +static void get_arb_info(struct arb_info *arb_info) +{ + arb_info->arb_cfg = ADF_4XXX_ARB_CONFIG; + arb_info->arb_offset = ADF_4XXX_ARB_OFFSET; + arb_info->wt2sam_offset = ADF_4XXX_ARB_WRK_2_SER_MAP_OFFSET; +} + +static void get_admin_info(struct admin_info *admin_csrs_info) +{ + admin_csrs_info->mailbox_offset = ADF_4XXX_MAILBOX_BASE_OFFSET; + admin_csrs_info->admin_msg_ur = ADF_4XXX_ADMINMSGUR_OFFSET; + admin_csrs_info->admin_msg_lr = ADF_4XXX_ADMINMSGLR_OFFSET; +} + +static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) +{ + struct adf_bar *misc_bar = &GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR]; + void __iomem *csr = misc_bar->virt_addr; + + /* Enable all in errsou3 except VFLR notification on host */ + ADF_CSR_WR(csr, ADF_4XXX_ERRMSK3, ADF_4XXX_VFLNOTIFY); +} + +static void adf_enable_ints(struct adf_accel_dev *accel_dev) +{ + void __iomem *addr; + + addr = (&GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR])->virt_addr; + + /* Enable bundle interrupts */ + ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_RP_X0_MASK_OFFSET, 0); + ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_RP_X1_MASK_OFFSET, 0); + + /* Enable misc interrupts */ + ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_MASK_OFFSET, 0); +} + +static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) +{ + return 0; +} + +static u32 uof_get_num_objs(void) +{ + return ARRAY_SIZE(adf_4xxx_fw_config); +} + +static char *uof_get_name(u32 obj_num) +{ + return adf_4xxx_fw_config[obj_num].obj_name; +} + +static u32 uof_get_ae_mask(u32 obj_num) +{ + return adf_4xxx_fw_config[obj_num].ae_mask; +} + +void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) +{ + hw_data->dev_class = &adf_4xxx_class; + hw_data->instance_id = adf_4xxx_class.instances++; + hw_data->num_banks = ADF_4XXX_ETR_MAX_BANKS; + hw_data->num_rings_per_bank = ADF_4XXX_NUM_RINGS_PER_BANK; + hw_data->num_accel = ADF_4XXX_MAX_ACCELERATORS; + hw_data->num_engines = ADF_4XXX_MAX_ACCELENGINES; + hw_data->num_logical_accel = 1; + hw_data->tx_rx_gap = ADF_4XXX_RX_RINGS_OFFSET; + hw_data->tx_rings_mask = ADF_4XXX_TX_RINGS_MASK; + hw_data->alloc_irq = adf_isr_resource_alloc; + hw_data->free_irq = adf_isr_resource_free; + hw_data->enable_error_correction = adf_enable_error_correction; + hw_data->get_accel_mask = get_accel_mask; + hw_data->get_ae_mask = get_ae_mask; + hw_data->get_num_accels = get_num_accels; + hw_data->get_num_aes = get_num_aes; + hw_data->get_sram_bar_id = get_sram_bar_id; + hw_data->get_etr_bar_id = get_etr_bar_id; + hw_data->get_misc_bar_id = get_misc_bar_id; + hw_data->get_arb_info = get_arb_info; + hw_data->get_admin_info = get_admin_info; + hw_data->get_accel_cap = get_accel_cap; + hw_data->get_sku = get_sku; + hw_data->fw_name = ADF_4XXX_FW; + hw_data->fw_mmp_name = ADF_4XXX_MMP; + hw_data->init_admin_comms = adf_init_admin_comms; + hw_data->exit_admin_comms = adf_exit_admin_comms; + hw_data->disable_iov = adf_disable_sriov; + hw_data->send_admin_init = adf_send_admin_init; + hw_data->init_arb = adf_init_arb; + hw_data->exit_arb = adf_exit_arb; + hw_data->get_arb_mapping = adf_get_arbiter_mapping; + hw_data->enable_ints = adf_enable_ints; + hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_flr; + hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK; + hw_data->uof_get_num_objs = uof_get_num_objs; + hw_data->uof_get_name = uof_get_name; + hw_data->uof_get_ae_mask = uof_get_ae_mask; + hw_data->set_msix_rttable = set_msix_default_rttable; + + adf_gen4_init_hw_csr_ops(&hw_data->csr_ops); +} + +void adf_clean_hw_data_4xxx(struct adf_hw_device_data *hw_data) +{ + hw_data->dev_class->instances--; +} diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h new file mode 100644 index 000000000000..4fe2a776293c --- /dev/null +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */ +/* Copyright(c) 2014 - 2020 Intel Corporation */ +#ifndef ADF_4XXX_HW_DATA_H_ +#define ADF_4XXX_HW_DATA_H_ + +#include + +/* PCIe configuration space */ +#define ADF_4XXX_SRAM_BAR 0 +#define ADF_4XXX_PMISC_BAR 1 +#define ADF_4XXX_ETR_BAR 2 +#define ADF_4XXX_RX_RINGS_OFFSET 1 +#define ADF_4XXX_TX_RINGS_MASK 0x1 +#define ADF_4XXX_MAX_ACCELERATORS 1 +#define ADF_4XXX_MAX_ACCELENGINES 9 +#define ADF_4XXX_BAR_MASK (BIT(0) | BIT(2) | BIT(4)) + +/* Physical function fuses */ +#define ADF_4XXX_FUSECTL0_OFFSET (0x2C8) +#define ADF_4XXX_FUSECTL1_OFFSET (0x2CC) +#define ADF_4XXX_FUSECTL2_OFFSET (0x2D0) +#define ADF_4XXX_FUSECTL3_OFFSET (0x2D4) +#define ADF_4XXX_FUSECTL4_OFFSET (0x2D8) +#define ADF_4XXX_FUSECTL5_OFFSET (0x2DC) + +#define ADF_4XXX_ACCELERATORS_MASK (0x1) +#define ADF_4XXX_ACCELENGINES_MASK (0x1FF) +#define ADF_4XXX_ADMIN_AE_MASK (0x100) + +#define ADF_4XXX_ETR_MAX_BANKS 64 + +/* MSIX interrupt */ +#define ADF_4XXX_SMIAPF_RP_X0_MASK_OFFSET (0x41A040) +#define ADF_4XXX_SMIAPF_RP_X1_MASK_OFFSET (0x41A044) +#define ADF_4XXX_SMIAPF_MASK_OFFSET (0x41A084) +#define ADF_4XXX_MSIX_RTTABLE_OFFSET(i) (0x409000 + ((i) * 0x04)) + +/* Bank and ring configuration */ +#define ADF_4XXX_NUM_RINGS_PER_BANK 2 + +/* Error source registers */ +#define ADF_4XXX_ERRSOU0 (0x41A200) +#define ADF_4XXX_ERRSOU1 (0x41A204) +#define ADF_4XXX_ERRSOU2 (0x41A208) +#define ADF_4XXX_ERRSOU3 (0x41A20C) + +/* Error source mask registers */ +#define ADF_4XXX_ERRMSK0 (0x41A210) +#define ADF_4XXX_ERRMSK1 (0x41A214) +#define ADF_4XXX_ERRMSK2 (0x41A218) +#define ADF_4XXX_ERRMSK3 (0x41A21C) + +#define ADF_4XXX_VFLNOTIFY BIT(7) + +/* Arbiter configuration */ +#define ADF_4XXX_ARB_CONFIG (BIT(31) | BIT(6) | BIT(0)) +#define ADF_4XXX_ARB_OFFSET (0x0) +#define ADF_4XXX_ARB_WRK_2_SER_MAP_OFFSET (0x400) + +/* Admin Interface Reg Offset */ +#define ADF_4XXX_ADMINMSGUR_OFFSET (0x500574) +#define ADF_4XXX_ADMINMSGLR_OFFSET (0x500578) +#define ADF_4XXX_MAILBOX_BASE_OFFSET (0x600970) + +/* Firmware Binaries */ +#define ADF_4XXX_FW "qat_4xxx.bin" +#define ADF_4XXX_MMP "qat_4xxx_mmp.bin" +#define ADF_4XXX_SYM_OBJ "qat_4xxx_sym.bin" +#define ADF_4XXX_ASYM_OBJ "qat_4xxx_asym.bin" +#define ADF_4XXX_ADMIN_OBJ "qat_4xxx_admin.bin" + +/* qat_4xxx fuse bits are different from old GENs, redefine them */ +enum icp_qat_4xxx_slice_mask { + ICP_ACCEL_4XXX_MASK_CIPHER_SLICE = BIT(0), + ICP_ACCEL_4XXX_MASK_AUTH_SLICE = BIT(1), + ICP_ACCEL_4XXX_MASK_PKE_SLICE = BIT(2), + ICP_ACCEL_4XXX_MASK_COMPRESS_SLICE = BIT(3), + ICP_ACCEL_4XXX_MASK_UCS_SLICE = BIT(4), + ICP_ACCEL_4XXX_MASK_EIA3_SLICE = BIT(5), + ICP_ACCEL_4XXX_MASK_SMX_SLICE = BIT(6), +}; + +void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data); +void adf_clean_hw_data_4xxx(struct adf_hw_device_data *hw_data); + +#endif diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c new file mode 100644 index 000000000000..a8805c815d16 --- /dev/null +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -0,0 +1,323 @@ +// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) +/* Copyright(c) 2020 Intel Corporation */ +#include +#include +#include + +#include +#include +#include + +#include "adf_4xxx_hw_data.h" +#include "qat_crypto.h" +#include "adf_transport_access_macros.h" + +static const struct pci_device_id adf_pci_tbl[] = { + { PCI_VDEVICE(INTEL, ADF_4XXX_PCI_DEVICE_ID), }, + { } +}; +MODULE_DEVICE_TABLE(pci, adf_pci_tbl); + +static void adf_cleanup_accel(struct adf_accel_dev *accel_dev) +{ + if (accel_dev->hw_device) { + adf_clean_hw_data_4xxx(accel_dev->hw_device); + accel_dev->hw_device = NULL; + } + adf_cfg_dev_remove(accel_dev); + debugfs_remove(accel_dev->debugfs_dir); + adf_devmgr_rm_dev(accel_dev, NULL); +} + +static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) +{ + char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; + int banks = GET_MAX_BANKS(accel_dev); + int cpus = num_online_cpus(); + unsigned long bank, val; + int instances; + int ret; + int i; + + if (adf_hw_dev_has_crypto(accel_dev)) + instances = min(cpus, banks / 2); + else + instances = 0; + + ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); + if (ret) + goto err; + + ret = adf_cfg_section_add(accel_dev, "Accelerator0"); + if (ret) + goto err; + + for (i = 0; i < instances; i++) { + val = i; + bank = i * 2; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &bank, ADF_DEC); + if (ret) + goto err; + + bank += 1; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &bank, ADF_DEC); + if (ret) + goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_ETRMGR_CORE_AFFINITY, + i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_SIZE, i); + val = 128; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 512; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_SIZE, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 1; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 1; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = ADF_COALESCING_DEF_TIME; + snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); + ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", + key, &val, ADF_DEC); + if (ret) + goto err; + } + + val = i; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, + &val, ADF_DEC); + if (ret) + goto err; + + set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); + return 0; +err: + dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n"); + return ret; +} + +static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct adf_accel_dev *accel_dev; + struct adf_accel_pci *accel_pci_dev; + struct adf_hw_device_data *hw_data; + char name[ADF_DEVICE_NAME_LENGTH]; + unsigned int i, bar_nr; + unsigned long bar_mask; + struct adf_bar *bar; + int ret; + + if (num_possible_nodes() > 1 && dev_to_node(&pdev->dev) < 0) { + /* + * If the accelerator is connected to a node with no memory + * there is no point in using the accelerator since the remote + * memory transaction will be very slow. + */ + dev_err(&pdev->dev, "Invalid NUMA configuration.\n"); + return -EINVAL; + } + + accel_dev = devm_kzalloc(&pdev->dev, sizeof(*accel_dev), GFP_KERNEL); + if (!accel_dev) + return -ENOMEM; + + INIT_LIST_HEAD(&accel_dev->crypto_list); + accel_pci_dev = &accel_dev->accel_pci_dev; + accel_pci_dev->pci_dev = pdev; + + /* + * Add accel device to accel table + * This should be called before adf_cleanup_accel is called + */ + if (adf_devmgr_add_dev(accel_dev, NULL)) { + dev_err(&pdev->dev, "Failed to add new accelerator device.\n"); + return -EFAULT; + } + + accel_dev->owner = THIS_MODULE; + /* Allocate and initialise device hardware meta-data structure */ + hw_data = devm_kzalloc(&pdev->dev, sizeof(*hw_data), GFP_KERNEL); + if (!hw_data) { + ret = -ENOMEM; + goto out_err; + } + + accel_dev->hw_device = hw_data; + adf_init_hw_data_4xxx(accel_dev->hw_device); + + pci_read_config_byte(pdev, PCI_REVISION_ID, &accel_pci_dev->revid); + pci_read_config_dword(pdev, ADF_4XXX_FUSECTL4_OFFSET, &hw_data->fuses); + + /* Get Accelerators and Accelerators Engines masks */ + hw_data->accel_mask = hw_data->get_accel_mask(hw_data); + hw_data->ae_mask = hw_data->get_ae_mask(hw_data); + accel_pci_dev->sku = hw_data->get_sku(hw_data); + /* If the device has no acceleration engines then ignore it */ + if (!hw_data->accel_mask || !hw_data->ae_mask || + (~hw_data->ae_mask & 0x01)) { + dev_err(&pdev->dev, "No acceleration units found.\n"); + ret = -EFAULT; + goto out_err; + } + + /* Create dev top level debugfs entry */ + snprintf(name, sizeof(name), "%s%s_%s", ADF_DEVICE_NAME_PREFIX, + hw_data->dev_class->name, pci_name(pdev)); + + accel_dev->debugfs_dir = debugfs_create_dir(name, NULL); + + /* Create device configuration table */ + ret = adf_cfg_dev_add(accel_dev); + if (ret) + goto out_err; + + /* Enable PCI device */ + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Can't enable PCI device.\n"); + goto out_err; + } + + /* Set DMA identifier */ + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { + if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { + dev_err(&pdev->dev, "No usable DMA configuration.\n"); + ret = -EFAULT; + goto out_err; + } else { + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + } + } else { + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + } + + /* Get accelerator capabilities mask */ + hw_data->accel_capabilities_mask = hw_data->get_accel_cap(accel_dev); + + /* Find and map all the device's BARS */ + bar_mask = pci_select_bars(pdev, IORESOURCE_MEM) & ADF_4XXX_BAR_MASK; + + ret = pcim_iomap_regions_request_all(pdev, bar_mask, pci_name(pdev)); + if (ret) { + dev_err(&pdev->dev, "Failed to map pci regions.\n"); + goto out_err; + } + + i = 0; + for_each_set_bit(bar_nr, &bar_mask, PCI_STD_NUM_BARS) { + bar = &accel_pci_dev->pci_bars[i++]; + bar->virt_addr = pcim_iomap_table(pdev)[bar_nr]; + } + + pci_set_master(pdev); + + if (adf_enable_aer(accel_dev)) { + dev_err(&pdev->dev, "Failed to enable aer.\n"); + ret = -EFAULT; + goto out_err; + } + + if (pci_save_state(pdev)) { + dev_err(&pdev->dev, "Failed to save pci state.\n"); + ret = -ENOMEM; + goto out_err_disable_aer; + } + + ret = adf_crypto_dev_config(accel_dev); + if (ret) + goto out_err_disable_aer; + + ret = adf_dev_init(accel_dev); + if (ret) + goto out_err_dev_shutdown; + + ret = adf_dev_start(accel_dev); + if (ret) + goto out_err_dev_stop; + + return ret; + +out_err_dev_stop: + adf_dev_stop(accel_dev); +out_err_dev_shutdown: + adf_dev_shutdown(accel_dev); +out_err_disable_aer: + adf_disable_aer(accel_dev); +out_err: + adf_cleanup_accel(accel_dev); + return ret; +} + +static void adf_remove(struct pci_dev *pdev) +{ + struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); + + if (!accel_dev) { + pr_err("QAT: Driver removal failed\n"); + return; + } + adf_dev_stop(accel_dev); + adf_dev_shutdown(accel_dev); + adf_disable_aer(accel_dev); + adf_cleanup_accel(accel_dev); +} + +static struct pci_driver adf_driver = { + .id_table = adf_pci_tbl, + .name = ADF_4XXX_DEVICE_NAME, + .probe = adf_probe, + .remove = adf_remove, + .sriov_configure = adf_sriov_configure, +}; + +module_pci_driver(adf_driver); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel"); +MODULE_FIRMWARE(ADF_4XXX_FW); +MODULE_FIRMWARE(ADF_4XXX_MMP); +MODULE_DESCRIPTION("Intel(R) QuickAssist Technology"); +MODULE_VERSION(ADF_DRV_VERSION); +MODULE_SOFTDEP("pre: crypto-intel_qat"); diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index aee494d3da52..eb45f1b1ae3e 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -3,7 +3,9 @@ #include #include #include +#include #include "adf_c3xxx_hw_data.h" +#include "icp_qat_hw.h" /* Worker thread to service arbiter mappings based on dev SKUs */ static const u32 thrd_to_arb_map_6_me_sku[] = { @@ -17,15 +19,33 @@ static struct adf_hw_device_class c3xxx_class = { .instances = 0 }; -static u32 get_accel_mask(u32 fuse) +static u32 get_accel_mask(struct adf_hw_device_data *self) { - return (~fuse) >> ADF_C3XXX_ACCELERATORS_REG_OFFSET & - ADF_C3XXX_ACCELERATORS_MASK; + u32 straps = self->straps; + u32 fuses = self->fuses; + u32 accel; + + accel = ~(fuses | straps) >> ADF_C3XXX_ACCELERATORS_REG_OFFSET; + accel &= ADF_C3XXX_ACCELERATORS_MASK; + + return accel; } -static u32 get_ae_mask(u32 fuse) +static u32 get_ae_mask(struct adf_hw_device_data *self) { - return (~fuse) & ADF_C3XXX_ACCELENGINES_MASK; + u32 straps = self->straps; + u32 fuses = self->fuses; + unsigned long disabled; + u32 ae_disable; + int accel; + + /* If an accel is disabled, then disable the corresponding two AEs */ + disabled = ~get_accel_mask(self) & ADF_C3XXX_ACCELERATORS_MASK; + ae_disable = BIT(1) | BIT(0); + for_each_set_bit(accel, &disabled, ADF_C3XXX_MAX_ACCELERATORS) + straps |= ae_disable << (accel << 1); + + return ~(fuses | straps) & ADF_C3XXX_ACCELENGINES_MASK; } static u32 get_num_accels(struct adf_hw_device_data *self) @@ -109,11 +129,13 @@ static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_device = accel_dev->hw_device; struct adf_bar *misc_bar = &GET_BARS(accel_dev)[ADF_C3XXX_PMISC_BAR]; + unsigned long accel_mask = hw_device->accel_mask; + unsigned long ae_mask = hw_device->ae_mask; void __iomem *csr = misc_bar->virt_addr; unsigned int val, i; /* Enable Accel Engine error detection & correction */ - for (i = 0; i < hw_device->get_num_aes(hw_device); i++) { + for_each_set_bit(i, &ae_mask, GET_MAX_ACCELENGINES(accel_dev)) { val = ADF_CSR_RD(csr, ADF_C3XXX_AE_CTX_ENABLES(i)); val |= ADF_C3XXX_ENABLE_AE_ECC_ERR; ADF_CSR_WR(csr, ADF_C3XXX_AE_CTX_ENABLES(i), val); @@ -123,7 +145,7 @@ static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) } /* Enable shared memory error detection & correction */ - for (i = 0; i < hw_device->get_num_accels(hw_device); i++) { + for_each_set_bit(i, &accel_mask, ADF_C3XXX_MAX_ACCELERATORS) { val = ADF_CSR_RD(csr, ADF_C3XXX_UERRSSMSH(i)); val |= ADF_C3XXX_ERRSSMSH_EN; ADF_CSR_WR(csr, ADF_C3XXX_UERRSSMSH(i), val); @@ -151,11 +173,19 @@ static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) return 0; } +static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable) +{ + adf_gen2_cfg_iov_thds(accel_dev, enable, + ADF_C3XXX_AE2FUNC_MAP_GRP_A_NUM_REGS, + ADF_C3XXX_AE2FUNC_MAP_GRP_B_NUM_REGS); +} + void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &c3xxx_class; hw_data->instance_id = c3xxx_class.instances++; hw_data->num_banks = ADF_C3XXX_ETR_MAX_BANKS; + hw_data->num_rings_per_bank = ADF_ETR_MAX_RINGS_PER_BANK; hw_data->num_accel = ADF_C3XXX_MAX_ACCELERATORS; hw_data->num_logical_accel = 1; hw_data->num_engines = ADF_C3XXX_MAX_ACCELENGINES; @@ -166,6 +196,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_enable_error_correction; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; + hw_data->get_accel_cap = adf_gen2_get_accel_cap; hw_data->get_num_accels = get_num_accels; hw_data->get_num_aes = get_num_aes; hw_data->get_sram_bar_id = get_sram_bar_id; @@ -173,11 +204,14 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->get_misc_bar_id = get_misc_bar_id; hw_data->get_pf2vf_offset = get_pf2vf_offset; hw_data->get_vintmsk_offset = get_vintmsk_offset; + hw_data->get_admin_info = adf_gen2_get_admin_info; + hw_data->get_arb_info = adf_gen2_get_arb_info; hw_data->get_sku = get_sku; hw_data->fw_name = ADF_C3XXX_FW; hw_data->fw_mmp_name = ADF_C3XXX_MMP; hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; + hw_data->configure_iov_threads = configure_iov_threads; hw_data->disable_iov = adf_disable_sriov; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; @@ -187,6 +221,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_flr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } void adf_clean_hw_data_c3xxx(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h index 8b5dd2c94ebf..fece8e38025a 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h @@ -18,6 +18,7 @@ #define ADF_C3XXX_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30) #define ADF_C3XXX_SMIA0_MASK 0xFFFF #define ADF_C3XXX_SMIA1_MASK 0x1 +#define ADF_C3XXX_SOFTSTRAP_CSR_OFFSET 0x2EC /* Error detection and correction */ #define ADF_C3XXX_AE_CTX_ENABLES(i) (i * 0x1000 + 0x20818) #define ADF_C3XXX_AE_MISC_CONTROL(i) (i * 0x1000 + 0x20960) @@ -30,6 +31,10 @@ #define ADF_C3XXX_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04)) #define ADF_C3XXX_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04)) +/* AE to function mapping */ +#define ADF_C3XXX_AE2FUNC_MAP_GRP_A_NUM_REGS 48 +#define ADF_C3XXX_AE2FUNC_MAP_GRP_B_NUM_REGS 6 + /* Firmware Binary */ #define ADF_C3XXX_FW "qat_c3xxx.bin" #define ADF_C3XXX_MMP "qat_c3xxx_mmp.bin" diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index ed0e8e33fe4b..7fb3343ae8b0 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -126,10 +126,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_read_config_byte(pdev, PCI_REVISION_ID, &accel_pci_dev->revid); pci_read_config_dword(pdev, ADF_DEVICE_FUSECTL_OFFSET, &hw_data->fuses); + pci_read_config_dword(pdev, ADF_C3XXX_SOFTSTRAP_CSR_OFFSET, + &hw_data->straps); /* Get Accelerators and Accelerators Engines masks */ - hw_data->accel_mask = hw_data->get_accel_mask(hw_data->fuses); - hw_data->ae_mask = hw_data->get_ae_mask(hw_data->fuses); + hw_data->accel_mask = hw_data->get_accel_mask(hw_data); + hw_data->ae_mask = hw_data->get_ae_mask(hw_data); accel_pci_dev->sku = hw_data->get_sku(hw_data); /* If the device has no acceleration engines then ignore it. */ if (!hw_data->accel_mask || !hw_data->ae_mask || @@ -175,9 +177,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_err_disable; } - /* Read accelerator capabilities mask */ - pci_read_config_dword(pdev, ADF_DEVICE_LEGFUSE_OFFSET, - &hw_data->accel_capabilities_mask); + /* Get accelerator capabilities mask */ + hw_data->accel_capabilities_mask = hw_data->get_accel_cap(accel_dev); /* Find and map all the device's BARS */ i = 0; diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index d2fedbd7113c..15f6b9bdfb22 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "adf_c3xxxvf_hw_data.h" static struct adf_hw_device_class c3xxxiov_class = { @@ -11,12 +12,12 @@ static struct adf_hw_device_class c3xxxiov_class = { .instances = 0 }; -static u32 get_accel_mask(u32 fuse) +static u32 get_accel_mask(struct adf_hw_device_data *self) { return ADF_C3XXXIOV_ACCELERATORS_MASK; } -static u32 get_ae_mask(u32 fuse) +static u32 get_ae_mask(struct adf_hw_device_data *self) { return ADF_C3XXXIOV_ACCELENGINES_MASK; } @@ -69,6 +70,7 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &c3xxxiov_class; hw_data->num_banks = ADF_C3XXXIOV_ETR_MAX_BANKS; + hw_data->num_rings_per_bank = ADF_ETR_MAX_RINGS_PER_BANK; hw_data->num_accel = ADF_C3XXXIOV_MAX_ACCELERATORS; hw_data->num_logical_accel = 1; hw_data->num_engines = ADF_C3XXXIOV_MAX_ACCELENGINES; @@ -97,6 +99,7 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } void adf_clean_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index 456979b136a2..1d1532e8fb6d 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -119,8 +119,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adf_init_hw_data_c3xxxiov(accel_dev->hw_device); /* Get Accelerators and Accelerators Engines masks */ - hw_data->accel_mask = hw_data->get_accel_mask(hw_data->fuses); - hw_data->ae_mask = hw_data->get_ae_mask(hw_data->fuses); + hw_data->accel_mask = hw_data->get_accel_mask(hw_data); + hw_data->ae_mask = hw_data->get_ae_mask(hw_data); accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index 844ad5ed33fc..babdffbcb846 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -3,7 +3,9 @@ #include #include #include +#include #include "adf_c62x_hw_data.h" +#include "icp_qat_hw.h" /* Worker thread to service arbiter mappings based on dev SKUs */ static const u32 thrd_to_arb_map_8_me_sku[] = { @@ -22,15 +24,33 @@ static struct adf_hw_device_class c62x_class = { .instances = 0 }; -static u32 get_accel_mask(u32 fuse) +static u32 get_accel_mask(struct adf_hw_device_data *self) { - return (~fuse) >> ADF_C62X_ACCELERATORS_REG_OFFSET & - ADF_C62X_ACCELERATORS_MASK; + u32 straps = self->straps; + u32 fuses = self->fuses; + u32 accel; + + accel = ~(fuses | straps) >> ADF_C62X_ACCELERATORS_REG_OFFSET; + accel &= ADF_C62X_ACCELERATORS_MASK; + + return accel; } -static u32 get_ae_mask(u32 fuse) +static u32 get_ae_mask(struct adf_hw_device_data *self) { - return (~fuse) & ADF_C62X_ACCELENGINES_MASK; + u32 straps = self->straps; + u32 fuses = self->fuses; + unsigned long disabled; + u32 ae_disable; + int accel; + + /* If an accel is disabled, then disable the corresponding two AEs */ + disabled = ~get_accel_mask(self) & ADF_C62X_ACCELERATORS_MASK; + ae_disable = BIT(1) | BIT(0); + for_each_set_bit(accel, &disabled, ADF_C62X_MAX_ACCELERATORS) + straps |= ae_disable << (accel << 1); + + return ~(fuses | straps) & ADF_C62X_ACCELENGINES_MASK; } static u32 get_num_accels(struct adf_hw_device_data *self) @@ -119,11 +139,13 @@ static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_device = accel_dev->hw_device; struct adf_bar *misc_bar = &GET_BARS(accel_dev)[ADF_C62X_PMISC_BAR]; + unsigned long accel_mask = hw_device->accel_mask; + unsigned long ae_mask = hw_device->ae_mask; void __iomem *csr = misc_bar->virt_addr; unsigned int val, i; /* Enable Accel Engine error detection & correction */ - for (i = 0; i < hw_device->get_num_aes(hw_device); i++) { + for_each_set_bit(i, &ae_mask, GET_MAX_ACCELENGINES(accel_dev)) { val = ADF_CSR_RD(csr, ADF_C62X_AE_CTX_ENABLES(i)); val |= ADF_C62X_ENABLE_AE_ECC_ERR; ADF_CSR_WR(csr, ADF_C62X_AE_CTX_ENABLES(i), val); @@ -133,7 +155,7 @@ static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) } /* Enable shared memory error detection & correction */ - for (i = 0; i < hw_device->get_num_accels(hw_device); i++) { + for_each_set_bit(i, &accel_mask, ADF_C62X_MAX_ACCELERATORS) { val = ADF_CSR_RD(csr, ADF_C62X_UERRSSMSH(i)); val |= ADF_C62X_ERRSSMSH_EN; ADF_CSR_WR(csr, ADF_C62X_UERRSSMSH(i), val); @@ -161,11 +183,19 @@ static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) return 0; } +static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable) +{ + adf_gen2_cfg_iov_thds(accel_dev, enable, + ADF_C62X_AE2FUNC_MAP_GRP_A_NUM_REGS, + ADF_C62X_AE2FUNC_MAP_GRP_B_NUM_REGS); +} + void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &c62x_class; hw_data->instance_id = c62x_class.instances++; hw_data->num_banks = ADF_C62X_ETR_MAX_BANKS; + hw_data->num_rings_per_bank = ADF_ETR_MAX_RINGS_PER_BANK; hw_data->num_accel = ADF_C62X_MAX_ACCELERATORS; hw_data->num_logical_accel = 1; hw_data->num_engines = ADF_C62X_MAX_ACCELENGINES; @@ -176,6 +206,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_enable_error_correction; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; + hw_data->get_accel_cap = adf_gen2_get_accel_cap; hw_data->get_num_accels = get_num_accels; hw_data->get_num_aes = get_num_aes; hw_data->get_sram_bar_id = get_sram_bar_id; @@ -183,11 +214,14 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->get_misc_bar_id = get_misc_bar_id; hw_data->get_pf2vf_offset = get_pf2vf_offset; hw_data->get_vintmsk_offset = get_vintmsk_offset; + hw_data->get_admin_info = adf_gen2_get_admin_info; + hw_data->get_arb_info = adf_gen2_get_arb_info; hw_data->get_sku = get_sku; hw_data->fw_name = ADF_C62X_FW; hw_data->fw_mmp_name = ADF_C62X_MMP; hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; + hw_data->configure_iov_threads = configure_iov_threads; hw_data->disable_iov = adf_disable_sriov; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; @@ -197,6 +231,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_flr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } void adf_clean_hw_data_c62x(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h index 88504d2bf30d..53d3cb577f5b 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h @@ -19,6 +19,7 @@ #define ADF_C62X_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30) #define ADF_C62X_SMIA0_MASK 0xFFFF #define ADF_C62X_SMIA1_MASK 0x1 +#define ADF_C62X_SOFTSTRAP_CSR_OFFSET 0x2EC /* Error detection and correction */ #define ADF_C62X_AE_CTX_ENABLES(i) (i * 0x1000 + 0x20818) #define ADF_C62X_AE_MISC_CONTROL(i) (i * 0x1000 + 0x20960) @@ -31,6 +32,10 @@ #define ADF_C62X_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04)) #define ADF_C62X_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04)) +/* AE to function mapping */ +#define ADF_C62X_AE2FUNC_MAP_GRP_A_NUM_REGS 80 +#define ADF_C62X_AE2FUNC_MAP_GRP_B_NUM_REGS 10 + /* Firmware Binary */ #define ADF_C62X_FW "qat_c62x.bin" #define ADF_C62X_MMP "qat_c62x_mmp.bin" diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index d8e7c9c25590..1f5de442e1e6 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -126,10 +126,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_read_config_byte(pdev, PCI_REVISION_ID, &accel_pci_dev->revid); pci_read_config_dword(pdev, ADF_DEVICE_FUSECTL_OFFSET, &hw_data->fuses); + pci_read_config_dword(pdev, ADF_C62X_SOFTSTRAP_CSR_OFFSET, + &hw_data->straps); /* Get Accelerators and Accelerators Engines masks */ - hw_data->accel_mask = hw_data->get_accel_mask(hw_data->fuses); - hw_data->ae_mask = hw_data->get_ae_mask(hw_data->fuses); + hw_data->accel_mask = hw_data->get_accel_mask(hw_data); + hw_data->ae_mask = hw_data->get_ae_mask(hw_data); accel_pci_dev->sku = hw_data->get_sku(hw_data); /* If the device has no acceleration engines then ignore it. */ if (!hw_data->accel_mask || !hw_data->ae_mask || @@ -175,9 +177,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_err_disable; } - /* Read accelerator capabilities mask */ - pci_read_config_dword(pdev, ADF_DEVICE_LEGFUSE_OFFSET, - &hw_data->accel_capabilities_mask); + /* Get accelerator capabilities mask */ + hw_data->accel_capabilities_mask = hw_data->get_accel_cap(accel_dev); /* Find and map all the device's BARS */ i = (hw_data->fuses & ADF_DEVICE_FUSECTL_MASK) ? 1 : 0; diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index 29fd3f1091ab..d231583428c9 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "adf_c62xvf_hw_data.h" static struct adf_hw_device_class c62xiov_class = { @@ -11,12 +12,12 @@ static struct adf_hw_device_class c62xiov_class = { .instances = 0 }; -static u32 get_accel_mask(u32 fuse) +static u32 get_accel_mask(struct adf_hw_device_data *self) { return ADF_C62XIOV_ACCELERATORS_MASK; } -static u32 get_ae_mask(u32 fuse) +static u32 get_ae_mask(struct adf_hw_device_data *self) { return ADF_C62XIOV_ACCELENGINES_MASK; } @@ -69,6 +70,7 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &c62xiov_class; hw_data->num_banks = ADF_C62XIOV_ETR_MAX_BANKS; + hw_data->num_rings_per_bank = ADF_ETR_MAX_RINGS_PER_BANK; hw_data->num_accel = ADF_C62XIOV_MAX_ACCELERATORS; hw_data->num_logical_accel = 1; hw_data->num_engines = ADF_C62XIOV_MAX_ACCELENGINES; @@ -97,6 +99,7 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } void adf_clean_hw_data_c62xiov(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index b9810f79eb84..04742a6d91ca 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -119,8 +119,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adf_init_hw_data_c62xiov(accel_dev->hw_device); /* Get Accelerators and Accelerators Engines masks */ - hw_data->accel_mask = hw_data->get_accel_mask(hw_data->fuses); - hw_data->ae_mask = hw_data->get_ae_mask(hw_data->fuses); + hw_data->accel_mask = hw_data->get_accel_mask(hw_data); + hw_data->ae_mask = hw_data->get_ae_mask(hw_data); accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index 47a8e3d8b81a..9c57abdf56b7 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -10,6 +10,8 @@ intel_qat-objs := adf_cfg.o \ adf_transport.o \ adf_admin.o \ adf_hw_arbiter.o \ + adf_gen2_hw_data.o \ + adf_gen4_hw_data.o \ qat_crypto.o \ qat_algs.o \ qat_asym_algs.o \ diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 06952ece53d9..c46a5805b294 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -15,6 +15,9 @@ #define ADF_C62XVF_DEVICE_NAME "c6xxvf" #define ADF_C3XXX_DEVICE_NAME "c3xxx" #define ADF_C3XXXVF_DEVICE_NAME "c3xxxvf" +#define ADF_4XXX_DEVICE_NAME "4xxx" +#define ADF_4XXX_PCI_DEVICE_ID 0x4940 +#define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941 #define ADF_ERRSOU3 (0x3A000 + 0x0C) #define ADF_ERRSOU5 (0x3A000 + 0xD8) #define ADF_DEVICE_FUSECTL_OFFSET 0x40 @@ -97,6 +100,46 @@ struct adf_hw_device_class { u32 instances; } __packed; +struct arb_info { + u32 arb_cfg; + u32 arb_offset; + u32 wt2sam_offset; +}; + +struct admin_info { + u32 admin_msg_ur; + u32 admin_msg_lr; + u32 mailbox_offset; +}; + +struct adf_hw_csr_ops { + u64 (*build_csr_ring_base_addr)(dma_addr_t addr, u32 size); + u32 (*read_csr_ring_head)(void __iomem *csr_base_addr, u32 bank, + u32 ring); + void (*write_csr_ring_head)(void __iomem *csr_base_addr, u32 bank, + u32 ring, u32 value); + u32 (*read_csr_ring_tail)(void __iomem *csr_base_addr, u32 bank, + u32 ring); + void (*write_csr_ring_tail)(void __iomem *csr_base_addr, u32 bank, + u32 ring, u32 value); + u32 (*read_csr_e_stat)(void __iomem *csr_base_addr, u32 bank); + void (*write_csr_ring_config)(void __iomem *csr_base_addr, u32 bank, + u32 ring, u32 value); + void (*write_csr_ring_base)(void __iomem *csr_base_addr, u32 bank, + u32 ring, dma_addr_t addr); + void (*write_csr_int_flag)(void __iomem *csr_base_addr, u32 bank, + u32 value); + void (*write_csr_int_srcsel)(void __iomem *csr_base_addr, u32 bank); + void (*write_csr_int_col_en)(void __iomem *csr_base_addr, u32 bank, + u32 value); + void (*write_csr_int_col_ctl)(void __iomem *csr_base_addr, u32 bank, + u32 value); + void (*write_csr_int_flag_and_col)(void __iomem *csr_base_addr, + u32 bank, u32 value); + void (*write_csr_ring_srv_arb_en)(void __iomem *csr_base_addr, u32 bank, + u32 value); +}; + struct adf_cfg_device_data; struct adf_accel_dev; struct adf_etr_data; @@ -104,8 +147,9 @@ struct adf_etr_ring_data; struct adf_hw_device_data { struct adf_hw_device_class *dev_class; - u32 (*get_accel_mask)(u32 fuse); - u32 (*get_ae_mask)(u32 fuse); + u32 (*get_accel_mask)(struct adf_hw_device_data *self); + u32 (*get_ae_mask)(struct adf_hw_device_data *self); + u32 (*get_accel_cap)(struct adf_accel_dev *accel_dev); u32 (*get_sram_bar_id)(struct adf_hw_device_data *self); u32 (*get_misc_bar_id)(struct adf_hw_device_data *self); u32 (*get_etr_bar_id)(struct adf_hw_device_data *self); @@ -113,6 +157,8 @@ struct adf_hw_device_data { u32 (*get_num_accels)(struct adf_hw_device_data *self); u32 (*get_pf2vf_offset)(u32 i); u32 (*get_vintmsk_offset)(u32 i); + void (*get_arb_info)(struct arb_info *arb_csrs_info); + void (*get_admin_info)(struct admin_info *admin_csrs_info); enum dev_sku_info (*get_sku)(struct adf_hw_device_data *self); int (*alloc_irq)(struct adf_accel_dev *accel_dev); void (*free_irq)(struct adf_accel_dev *accel_dev); @@ -125,19 +171,29 @@ struct adf_hw_device_data { void (*get_arb_mapping)(struct adf_accel_dev *accel_dev, const u32 **cfg); void (*disable_iov)(struct adf_accel_dev *accel_dev); + void (*configure_iov_threads)(struct adf_accel_dev *accel_dev, + bool enable); void (*enable_ints)(struct adf_accel_dev *accel_dev); int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev); void (*reset_device)(struct adf_accel_dev *accel_dev); + void (*set_msix_rttable)(struct adf_accel_dev *accel_dev); + char *(*uof_get_name)(u32 obj_num); + u32 (*uof_get_num_objs)(void); + u32 (*uof_get_ae_mask)(u32 obj_num); + struct adf_hw_csr_ops csr_ops; const char *fw_name; const char *fw_mmp_name; u32 fuses; + u32 straps; u32 accel_capabilities_mask; u32 instance_id; u16 accel_mask; - u16 ae_mask; + u32 ae_mask; + u32 admin_ae_mask; u16 tx_rings_mask; u8 tx_rx_gap; u8 num_banks; + u8 num_rings_per_bank; u8 num_accel; u8 num_logical_accel; u8 num_engines; @@ -155,7 +211,10 @@ struct adf_hw_device_data { #define GET_BARS(accel_dev) ((accel_dev)->accel_pci_dev.pci_bars) #define GET_HW_DATA(accel_dev) (accel_dev->hw_device) #define GET_MAX_BANKS(accel_dev) (GET_HW_DATA(accel_dev)->num_banks) +#define GET_NUM_RINGS_PER_BANK(accel_dev) \ + GET_HW_DATA(accel_dev)->num_rings_per_bank #define GET_MAX_ACCELENGINES(accel_dev) (GET_HW_DATA(accel_dev)->num_engines) +#define GET_CSR_OPS(accel_dev) (&(accel_dev)->hw_device->csr_ops) #define accel_to_pci_dev(accel_ptr) accel_ptr->accel_pci_dev.pci_dev struct adf_admin_comms; diff --git a/drivers/crypto/qat/qat_common/adf_accel_engine.c b/drivers/crypto/qat/qat_common/adf_accel_engine.c index c8ad85b882be..ca4eae8cdd0b 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_engine.c +++ b/drivers/crypto/qat/qat_common/adf_accel_engine.c @@ -7,12 +7,55 @@ #include "adf_common_drv.h" #include "icp_qat_uclo.h" +static int adf_ae_fw_load_images(struct adf_accel_dev *accel_dev, void *fw_addr, + u32 fw_size) +{ + struct adf_fw_loader_data *loader_data = accel_dev->fw_loader; + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + struct icp_qat_fw_loader_handle *loader; + char *obj_name; + u32 num_objs; + u32 ae_mask; + int i; + + loader = loader_data->fw_loader; + num_objs = hw_device->uof_get_num_objs(); + + for (i = 0; i < num_objs; i++) { + obj_name = hw_device->uof_get_name(i); + ae_mask = hw_device->uof_get_ae_mask(i); + + if (qat_uclo_set_cfg_ae_mask(loader, ae_mask)) { + dev_err(&GET_DEV(accel_dev), + "Invalid mask for UOF image\n"); + goto out_err; + } + if (qat_uclo_map_obj(loader, fw_addr, fw_size, obj_name)) { + dev_err(&GET_DEV(accel_dev), + "Failed to map UOF firmware\n"); + goto out_err; + } + if (qat_uclo_wr_all_uimage(loader)) { + dev_err(&GET_DEV(accel_dev), + "Failed to load UOF firmware\n"); + goto out_err; + } + qat_uclo_del_obj(loader); + } + + return 0; + +out_err: + adf_ae_fw_release(accel_dev); + return -EFAULT; +} + int adf_ae_fw_load(struct adf_accel_dev *accel_dev) { struct adf_fw_loader_data *loader_data = accel_dev->fw_loader; struct adf_hw_device_data *hw_device = accel_dev->hw_device; - void *uof_addr, *mmp_addr; - u32 uof_size, mmp_size; + void *fw_addr, *mmp_addr; + u32 fw_size, mmp_size; if (!hw_device->fw_name) return 0; @@ -30,15 +73,20 @@ int adf_ae_fw_load(struct adf_accel_dev *accel_dev) goto out_err; } - uof_size = loader_data->uof_fw->size; - uof_addr = (void *)loader_data->uof_fw->data; + fw_size = loader_data->uof_fw->size; + fw_addr = (void *)loader_data->uof_fw->data; mmp_size = loader_data->mmp_fw->size; mmp_addr = (void *)loader_data->mmp_fw->data; + if (qat_uclo_wr_mimage(loader_data->fw_loader, mmp_addr, mmp_size)) { dev_err(&GET_DEV(accel_dev), "Failed to load MMP\n"); goto out_err; } - if (qat_uclo_map_obj(loader_data->fw_loader, uof_addr, uof_size)) { + + if (hw_device->uof_get_num_objs) + return adf_ae_fw_load_images(accel_dev, fw_addr, fw_size); + + if (qat_uclo_map_obj(loader_data->fw_loader, fw_addr, fw_size, NULL)) { dev_err(&GET_DEV(accel_dev), "Failed to map FW\n"); goto out_err; } @@ -61,7 +109,7 @@ void adf_ae_fw_release(struct adf_accel_dev *accel_dev) if (!hw_device->fw_name) return; - qat_uclo_del_uof_obj(loader_data->fw_loader); + qat_uclo_del_obj(loader_data->fw_loader); qat_hal_deinit(loader_data->fw_loader); release_firmware(loader_data->uof_fw); release_firmware(loader_data->mmp_fw); @@ -74,17 +122,12 @@ int adf_ae_start(struct adf_accel_dev *accel_dev) { struct adf_fw_loader_data *loader_data = accel_dev->fw_loader; struct adf_hw_device_data *hw_data = accel_dev->hw_device; - u32 ae_ctr, ae, max_aes = GET_MAX_ACCELENGINES(accel_dev); + u32 ae_ctr; if (!hw_data->fw_name) return 0; - for (ae = 0, ae_ctr = 0; ae < max_aes; ae++) { - if (hw_data->ae_mask & (1 << ae)) { - qat_hal_start(loader_data->fw_loader, ae, 0xFF); - ae_ctr++; - } - } + ae_ctr = qat_hal_start(loader_data->fw_loader); dev_info(&GET_DEV(accel_dev), "qat_dev%d started %d acceleration engines\n", accel_dev->accel_id, ae_ctr); diff --git a/drivers/crypto/qat/qat_common/adf_admin.c b/drivers/crypto/qat/qat_common/adf_admin.c index ec9b390276d6..43680e178242 100644 --- a/drivers/crypto/qat/qat_common/adf_admin.c +++ b/drivers/crypto/qat/qat_common/adf_admin.c @@ -10,11 +10,7 @@ #include "adf_common_drv.h" #include "icp_qat_fw_init_admin.h" -/* Admin Messages Registers */ -#define ADF_DH895XCC_ADMINMSGUR_OFFSET (0x3A000 + 0x574) -#define ADF_DH895XCC_ADMINMSGLR_OFFSET (0x3A000 + 0x578) -#define ADF_DH895XCC_MAILBOX_BASE_OFFSET 0x20970 -#define ADF_DH895XCC_MAILBOX_STRIDE 0x1000 +#define ADF_ADMIN_MAILBOX_STRIDE 0x1000 #define ADF_ADMINMSG_LEN 32 #define ADF_CONST_TABLE_SIZE 1024 #define ADF_ADMIN_POLL_DELAY_US 20 @@ -70,28 +66,28 @@ static const u8 const_tab[1024] __aligned(1024) = { 0xf8, 0x2b, 0xa5, 0x4f, 0xf5, 0x3a, 0x5f, 0x1d, 0x36, 0xf1, 0x51, 0x0e, 0x52, 0x7f, 0xad, 0xe6, 0x82, 0xd1, 0x9b, 0x05, 0x68, 0x8c, 0x2b, 0x3e, 0x6c, 0x1f, 0x1f, 0x83, 0xd9, 0xab, 0xfb, 0x41, 0xbd, 0x6b, 0x5b, 0xe0, 0xcd, 0x19, 0x13, -0x7e, 0x21, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x7e, 0x21, 0x79, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x01, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x15, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x02, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x14, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x02, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x24, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x25, +0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x12, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x01, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x01, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x2B, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -118,7 +114,7 @@ static int adf_put_admin_msg_sync(struct adf_accel_dev *accel_dev, u32 ae, struct adf_admin_comms *admin = accel_dev->admin; int offset = ae * ADF_ADMINMSG_LEN * 2; void __iomem *mailbox = admin->mailbox_addr; - int mb_offset = ae * ADF_DH895XCC_MAILBOX_STRIDE; + int mb_offset = ae * ADF_ADMIN_MAILBOX_STRIDE; struct icp_qat_fw_init_admin_req *request = in; mutex_lock(&admin->lock); @@ -167,7 +163,7 @@ static int adf_send_admin(struct adf_accel_dev *accel_dev, return 0; } -static int adf_init_me(struct adf_accel_dev *accel_dev) +static int adf_init_ae(struct adf_accel_dev *accel_dev) { struct icp_qat_fw_init_admin_req req; struct icp_qat_fw_init_admin_resp resp; @@ -176,7 +172,7 @@ static int adf_init_me(struct adf_accel_dev *accel_dev) memset(&req, 0, sizeof(req)); memset(&resp, 0, sizeof(resp)); - req.cmd_id = ICP_QAT_FW_INIT_ME; + req.cmd_id = ICP_QAT_FW_INIT_AE; return adf_send_admin(accel_dev, &req, &resp, ae_mask); } @@ -186,7 +182,7 @@ static int adf_set_fw_constants(struct adf_accel_dev *accel_dev) struct icp_qat_fw_init_admin_req req; struct icp_qat_fw_init_admin_resp resp; struct adf_hw_device_data *hw_device = accel_dev->hw_device; - u32 ae_mask = hw_device->ae_mask; + u32 ae_mask = hw_device->admin_ae_mask ?: hw_device->ae_mask; memset(&req, 0, sizeof(req)); memset(&resp, 0, sizeof(resp)); @@ -210,11 +206,11 @@ int adf_send_admin_init(struct adf_accel_dev *accel_dev) { int ret; - ret = adf_init_me(accel_dev); + ret = adf_set_fw_constants(accel_dev); if (ret) return ret; - return adf_set_fw_constants(accel_dev); + return adf_init_ae(accel_dev); } EXPORT_SYMBOL_GPL(adf_send_admin_init); @@ -225,8 +221,9 @@ int adf_init_admin_comms(struct adf_accel_dev *accel_dev) struct adf_bar *pmisc = &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; void __iomem *csr = pmisc->virt_addr; - void __iomem *mailbox = (void __iomem *)((uintptr_t)csr + - ADF_DH895XCC_MAILBOX_BASE_OFFSET); + struct admin_info admin_csrs_info; + u32 mailbox_offset, adminmsg_u, adminmsg_l; + void __iomem *mailbox; u64 reg_val; admin = kzalloc_node(sizeof(*accel_dev->admin), GFP_KERNEL, @@ -254,9 +251,17 @@ int adf_init_admin_comms(struct adf_accel_dev *accel_dev) } memcpy(admin->virt_tbl_addr, const_tab, sizeof(const_tab)); + hw_data->get_admin_info(&admin_csrs_info); + + mailbox_offset = admin_csrs_info.mailbox_offset; + mailbox = csr + mailbox_offset; + adminmsg_u = admin_csrs_info.admin_msg_ur; + adminmsg_l = admin_csrs_info.admin_msg_lr; + reg_val = (u64)admin->phy_addr; - ADF_CSR_WR(csr, ADF_DH895XCC_ADMINMSGUR_OFFSET, reg_val >> 32); - ADF_CSR_WR(csr, ADF_DH895XCC_ADMINMSGLR_OFFSET, reg_val); + ADF_CSR_WR(csr, adminmsg_u, upper_32_bits(reg_val)); + ADF_CSR_WR(csr, adminmsg_l, lower_32_bits(reg_val)); + mutex_init(&admin->lock); admin->mailbox_addr = mailbox; accel_dev->admin = admin; diff --git a/drivers/crypto/qat/qat_common/adf_cfg.c b/drivers/crypto/qat/qat_common/adf_cfg.c index 22ae32838113..575b6f002303 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg.c +++ b/drivers/crypto/qat/qat_common/adf_cfg.c @@ -196,7 +196,7 @@ static int adf_cfg_key_val_get(struct adf_accel_dev *accel_dev, memcpy(val, keyval->val, ADF_CFG_MAX_VAL_LEN_IN_BYTES); return 0; } - return -1; + return -ENODATA; } /** @@ -243,7 +243,7 @@ int adf_cfg_add_key_value_param(struct adf_accel_dev *accel_dev, } else { dev_err(&GET_DEV(accel_dev), "Unknown type given.\n"); kfree(key_val); - return -1; + return -EINVAL; } key_val->type = type; down_write(&cfg->lock); diff --git a/drivers/crypto/qat/qat_common/adf_cfg_common.h b/drivers/crypto/qat/qat_common/adf_cfg_common.h index 1ef46ccfba47..4fabb70b1f18 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_common.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_common.h @@ -32,7 +32,8 @@ enum adf_device_type { DEV_C62X, DEV_C62XVF, DEV_C3XXX, - DEV_C3XXXVF + DEV_C3XXXVF, + DEV_4XXX, }; struct adf_dev_status_info { diff --git a/drivers/crypto/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/qat/qat_common/adf_cfg_strings.h index 314790f5b0af..09651e1f937a 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_strings.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_strings.h @@ -18,7 +18,8 @@ #define ADF_RING_DC_TX "RingTx" #define ADF_RING_DC_RX "RingRx" #define ADF_ETRMGR_BANK "Bank" -#define ADF_RING_BANK_NUM "BankNumber" +#define ADF_RING_SYM_BANK_NUM "BankSymNumber" +#define ADF_RING_ASYM_BANK_NUM "BankAsymNumber" #define ADF_CY "Cy" #define ADF_DC "Dc" #define ADF_ETRMGR_COALESCING_ENABLED "InterruptCoalescingEnabled" diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index f22342f612c1..c61476553728 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -133,8 +133,7 @@ void adf_vf_isr_resource_free(struct adf_accel_dev *accel_dev); int qat_hal_init(struct adf_accel_dev *accel_dev); void qat_hal_deinit(struct icp_qat_fw_loader_handle *handle); -void qat_hal_start(struct icp_qat_fw_loader_handle *handle, unsigned char ae, - unsigned int ctx_mask); +int qat_hal_start(struct icp_qat_fw_loader_handle *handle); void qat_hal_stop(struct icp_qat_fw_loader_handle *handle, unsigned char ae, unsigned int ctx_mask); void qat_hal_reset(struct icp_qat_fw_loader_handle *handle); @@ -163,28 +162,32 @@ int qat_hal_batch_wr_lm(struct icp_qat_fw_loader_handle *handle, unsigned char ae, struct icp_qat_uof_batch_init *lm_init_header); int qat_hal_init_gpr(struct icp_qat_fw_loader_handle *handle, - unsigned char ae, unsigned char ctx_mask, + unsigned char ae, unsigned long ctx_mask, enum icp_qat_uof_regtype reg_type, unsigned short reg_num, unsigned int regdata); int qat_hal_init_wr_xfer(struct icp_qat_fw_loader_handle *handle, - unsigned char ae, unsigned char ctx_mask, + unsigned char ae, unsigned long ctx_mask, enum icp_qat_uof_regtype reg_type, unsigned short reg_num, unsigned int regdata); int qat_hal_init_rd_xfer(struct icp_qat_fw_loader_handle *handle, - unsigned char ae, unsigned char ctx_mask, + unsigned char ae, unsigned long ctx_mask, enum icp_qat_uof_regtype reg_type, unsigned short reg_num, unsigned int regdata); int qat_hal_init_nn(struct icp_qat_fw_loader_handle *handle, - unsigned char ae, unsigned char ctx_mask, + unsigned char ae, unsigned long ctx_mask, unsigned short reg_num, unsigned int regdata); int qat_hal_wr_lm(struct icp_qat_fw_loader_handle *handle, unsigned char ae, unsigned short lm_addr, unsigned int value); +void qat_hal_set_ae_tindex_mode(struct icp_qat_fw_loader_handle *handle, + unsigned char ae, unsigned char mode); int qat_uclo_wr_all_uimage(struct icp_qat_fw_loader_handle *handle); -void qat_uclo_del_uof_obj(struct icp_qat_fw_loader_handle *handle); +void qat_uclo_del_obj(struct icp_qat_fw_loader_handle *handle); int qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle, void *addr_ptr, int mem_size); int qat_uclo_map_obj(struct icp_qat_fw_loader_handle *handle, - void *addr_ptr, int mem_size); + void *addr_ptr, u32 mem_size, char *obj_name); +int qat_uclo_set_cfg_ae_mask(struct icp_qat_fw_loader_handle *handle, + unsigned int cfg_ae_mask); #if defined(CONFIG_PCI_IOV) int adf_sriov_configure(struct pci_dev *pdev, int numvfs); void adf_disable_sriov(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/qat/qat_common/adf_dev_mgr.c index 92ec035576df..4c752eed10fe 100644 --- a/drivers/crypto/qat/qat_common/adf_dev_mgr.c +++ b/drivers/crypto/qat/qat_common/adf_dev_mgr.c @@ -151,8 +151,8 @@ int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev, mutex_lock(&table_lock); atomic_set(&accel_dev->ref_count, 0); - /* PF on host or VF on guest */ - if (!accel_dev->is_vf || (accel_dev->is_vf && !pf)) { + /* PF on host or VF on guest - optimized to remove redundant is_vf */ + if (!accel_dev->is_vf || !pf) { struct vf_id_map *map; list_for_each(itr, &accel_table) { @@ -248,7 +248,8 @@ void adf_devmgr_rm_dev(struct adf_accel_dev *accel_dev, struct adf_accel_dev *pf) { mutex_lock(&table_lock); - if (!accel_dev->is_vf || (accel_dev->is_vf && !pf)) { + /* PF on host or VF on guest - optimized to remove redundant is_vf */ + if (!accel_dev->is_vf || !pf) { id_map[accel_dev->accel_id] = 0; num_devices--; } else if (accel_dev->is_vf && pf) { @@ -285,9 +286,9 @@ struct adf_accel_dev *adf_devmgr_get_first(void) /** * adf_devmgr_pci_to_accel_dev() - Get accel_dev associated with the pci_dev. - * @pci_dev: Pointer to pci device. + * @pci_dev: Pointer to PCI device. * - * Function returns acceleration device associated with the given pci device. + * Function returns acceleration device associated with the given PCI device. * To be used by QAT device specific drivers. * * Return: pointer to accel_dev or NULL if not found. diff --git a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c new file mode 100644 index 000000000000..1aa17303838d --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) +/* Copyright(c) 2020 Intel Corporation */ +#include "adf_gen2_hw_data.h" +#include "icp_qat_hw.h" +#include + +void adf_gen2_cfg_iov_thds(struct adf_accel_dev *accel_dev, bool enable, + int num_a_regs, int num_b_regs) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + void __iomem *pmisc_addr; + struct adf_bar *pmisc; + int pmisc_id, i; + u32 reg; + + pmisc_id = hw_data->get_misc_bar_id(hw_data); + pmisc = &GET_BARS(accel_dev)[pmisc_id]; + pmisc_addr = pmisc->virt_addr; + + /* Set/Unset Valid bit in AE Thread to PCIe Function Mapping Group A */ + for (i = 0; i < num_a_regs; i++) { + reg = READ_CSR_AE2FUNCTION_MAP_A(pmisc_addr, i); + if (enable) + reg |= AE2FUNCTION_MAP_VALID; + else + reg &= ~AE2FUNCTION_MAP_VALID; + WRITE_CSR_AE2FUNCTION_MAP_A(pmisc_addr, i, reg); + } + + /* Set/Unset Valid bit in AE Thread to PCIe Function Mapping Group B */ + for (i = 0; i < num_b_regs; i++) { + reg = READ_CSR_AE2FUNCTION_MAP_B(pmisc_addr, i); + if (enable) + reg |= AE2FUNCTION_MAP_VALID; + else + reg &= ~AE2FUNCTION_MAP_VALID; + WRITE_CSR_AE2FUNCTION_MAP_B(pmisc_addr, i, reg); + } +} +EXPORT_SYMBOL_GPL(adf_gen2_cfg_iov_thds); + +void adf_gen2_get_admin_info(struct admin_info *admin_csrs_info) +{ + admin_csrs_info->mailbox_offset = ADF_MAILBOX_BASE_OFFSET; + admin_csrs_info->admin_msg_ur = ADF_ADMINMSGUR_OFFSET; + admin_csrs_info->admin_msg_lr = ADF_ADMINMSGLR_OFFSET; +} +EXPORT_SYMBOL_GPL(adf_gen2_get_admin_info); + +void adf_gen2_get_arb_info(struct arb_info *arb_info) +{ + arb_info->arb_cfg = ADF_ARB_CONFIG; + arb_info->arb_offset = ADF_ARB_OFFSET; + arb_info->wt2sam_offset = ADF_ARB_WRK_2_SER_MAP_OFFSET; +} +EXPORT_SYMBOL_GPL(adf_gen2_get_arb_info); + +static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) +{ + return BUILD_RING_BASE_ADDR(addr, size); +} + +static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); +} + +static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); +} + +static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_E_STAT(csr_base_addr, bank); +} + +static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, + u32 ring, u32 value) +{ + WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); +} + +static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, + dma_addr_t addr) +{ + WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); +} + +static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, u32 value) +{ + WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); +} + +static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) +{ + WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); +} + +static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); +} + +static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); +} + +static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); +} + +static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); +} + +void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) +{ + csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; + csr_ops->read_csr_ring_head = read_csr_ring_head; + csr_ops->write_csr_ring_head = write_csr_ring_head; + csr_ops->read_csr_ring_tail = read_csr_ring_tail; + csr_ops->write_csr_ring_tail = write_csr_ring_tail; + csr_ops->read_csr_e_stat = read_csr_e_stat; + csr_ops->write_csr_ring_config = write_csr_ring_config; + csr_ops->write_csr_ring_base = write_csr_ring_base; + csr_ops->write_csr_int_flag = write_csr_int_flag; + csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; + csr_ops->write_csr_int_col_en = write_csr_int_col_en; + csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; + csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; + csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; +} +EXPORT_SYMBOL_GPL(adf_gen2_init_hw_csr_ops); + +u32 adf_gen2_get_accel_cap(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct pci_dev *pdev = accel_dev->accel_pci_dev.pci_dev; + u32 straps = hw_data->straps; + u32 fuses = hw_data->fuses; + u32 legfuses; + u32 capabilities = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC | + ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC | + ICP_ACCEL_CAPABILITIES_AUTHENTICATION; + + /* Read accelerator capabilities mask */ + pci_read_config_dword(pdev, ADF_DEVICE_LEGFUSE_OFFSET, &legfuses); + + if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE) + capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC; + if (legfuses & ICP_ACCEL_MASK_PKE_SLICE) + capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC; + if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE) + capabilities &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION; + + if ((straps | fuses) & ADF_POWERGATE_PKE) + capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC; + + return capabilities; +} +EXPORT_SYMBOL_GPL(adf_gen2_get_accel_cap); diff --git a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h new file mode 100644 index 000000000000..3816e6500352 --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */ +/* Copyright(c) 2020 Intel Corporation */ +#ifndef ADF_GEN2_HW_DATA_H_ +#define ADF_GEN2_HW_DATA_H_ + +#include "adf_accel_devices.h" + +/* Transport access */ +#define ADF_BANK_INT_SRC_SEL_MASK_0 0x4444444CUL +#define ADF_BANK_INT_SRC_SEL_MASK_X 0x44444444UL +#define ADF_RING_CSR_RING_CONFIG 0x000 +#define ADF_RING_CSR_RING_LBASE 0x040 +#define ADF_RING_CSR_RING_UBASE 0x080 +#define ADF_RING_CSR_RING_HEAD 0x0C0 +#define ADF_RING_CSR_RING_TAIL 0x100 +#define ADF_RING_CSR_E_STAT 0x14C +#define ADF_RING_CSR_INT_FLAG 0x170 +#define ADF_RING_CSR_INT_SRCSEL 0x174 +#define ADF_RING_CSR_INT_SRCSEL_2 0x178 +#define ADF_RING_CSR_INT_COL_EN 0x17C +#define ADF_RING_CSR_INT_COL_CTL 0x180 +#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 +#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 +#define ADF_RING_BUNDLE_SIZE 0x1000 + +#define BUILD_RING_BASE_ADDR(addr, size) \ + (((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) +#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2)) +#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2)) +#define READ_CSR_E_STAT(csr_base_addr, bank) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_E_STAT) +#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) +#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ +do { \ + u32 l_base = 0, u_base = 0; \ + l_base = (u32)((value) & 0xFFFFFFFF); \ + u_base = (u32)(((value) & 0xFFFFFFFF00000000ULL) >> 32); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_LBASE + ((ring) << 2), l_base); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_UBASE + ((ring) << 2), u_base); \ +} while (0) + +#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) +#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) +#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_FLAG, value) +#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ +do { \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK_0); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_SRCSEL_2, ADF_BANK_INT_SRC_SEL_MASK_X); \ +} while (0) +#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_COL_EN, value) +#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_COL_CTL, \ + ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) +#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_FLAG_AND_COL, value) + +/* AE to function map */ +#define AE2FUNCTION_MAP_A_OFFSET (0x3A400 + 0x190) +#define AE2FUNCTION_MAP_B_OFFSET (0x3A400 + 0x310) +#define AE2FUNCTION_MAP_REG_SIZE 4 +#define AE2FUNCTION_MAP_VALID BIT(7) + +#define READ_CSR_AE2FUNCTION_MAP_A(pmisc_bar_addr, index) \ + ADF_CSR_RD(pmisc_bar_addr, AE2FUNCTION_MAP_A_OFFSET + \ + AE2FUNCTION_MAP_REG_SIZE * (index)) +#define WRITE_CSR_AE2FUNCTION_MAP_A(pmisc_bar_addr, index, value) \ + ADF_CSR_WR(pmisc_bar_addr, AE2FUNCTION_MAP_A_OFFSET + \ + AE2FUNCTION_MAP_REG_SIZE * (index), value) +#define READ_CSR_AE2FUNCTION_MAP_B(pmisc_bar_addr, index) \ + ADF_CSR_RD(pmisc_bar_addr, AE2FUNCTION_MAP_B_OFFSET + \ + AE2FUNCTION_MAP_REG_SIZE * (index)) +#define WRITE_CSR_AE2FUNCTION_MAP_B(pmisc_bar_addr, index, value) \ + ADF_CSR_WR(pmisc_bar_addr, AE2FUNCTION_MAP_B_OFFSET + \ + AE2FUNCTION_MAP_REG_SIZE * (index), value) + +/* Admin Interface Offsets */ +#define ADF_ADMINMSGUR_OFFSET (0x3A000 + 0x574) +#define ADF_ADMINMSGLR_OFFSET (0x3A000 + 0x578) +#define ADF_MAILBOX_BASE_OFFSET 0x20970 + +/* Arbiter configuration */ +#define ADF_ARB_OFFSET 0x30000 +#define ADF_ARB_WRK_2_SER_MAP_OFFSET 0x180 +#define ADF_ARB_CONFIG (BIT(31) | BIT(6) | BIT(0)) +#define ADF_ARB_REG_SLOT 0x1000 +#define ADF_ARB_RINGSRVARBEN_OFFSET 0x19C + +#define WRITE_CSR_RING_SRV_ARB_EN(csr_addr, index, value) \ + ADF_CSR_WR(csr_addr, ADF_ARB_RINGSRVARBEN_OFFSET + \ + (ADF_ARB_REG_SLOT * (index)), value) + +/* Power gating */ +#define ADF_POWERGATE_PKE BIT(24) + +void adf_gen2_cfg_iov_thds(struct adf_accel_dev *accel_dev, bool enable, + int num_a_regs, int num_b_regs); +void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); +void adf_gen2_get_admin_info(struct admin_info *admin_csrs_info); +void adf_gen2_get_arb_info(struct arb_info *arb_info); +u32 adf_gen2_get_accel_cap(struct adf_accel_dev *accel_dev); + +#endif diff --git a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.c new file mode 100644 index 000000000000..b72ff58e0bc7 --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) +/* Copyright(c) 2020 Intel Corporation */ +#include "adf_accel_devices.h" +#include "adf_gen4_hw_data.h" + +static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) +{ + return BUILD_RING_BASE_ADDR(addr, size); +} + +static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); +} + +static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); +} + +static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_E_STAT(csr_base_addr, bank); +} + +static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); +} + +static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, + dma_addr_t addr) +{ + WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); +} + +static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); +} + +static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) +{ + WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); +} + +static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, u32 value) +{ + WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); +} + +static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); +} + +static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); +} + +static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); +} + +void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) +{ + csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; + csr_ops->read_csr_ring_head = read_csr_ring_head; + csr_ops->write_csr_ring_head = write_csr_ring_head; + csr_ops->read_csr_ring_tail = read_csr_ring_tail; + csr_ops->write_csr_ring_tail = write_csr_ring_tail; + csr_ops->read_csr_e_stat = read_csr_e_stat; + csr_ops->write_csr_ring_config = write_csr_ring_config; + csr_ops->write_csr_ring_base = write_csr_ring_base; + csr_ops->write_csr_int_flag = write_csr_int_flag; + csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; + csr_ops->write_csr_int_col_en = write_csr_int_col_en; + csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; + csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; + csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_hw_csr_ops); diff --git a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h new file mode 100644 index 000000000000..8ab62b2ac311 --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */ +/* Copyright(c) 2020 Intel Corporation */ +#ifndef ADF_GEN4_HW_CSR_DATA_H_ +#define ADF_GEN4_HW_CSR_DATA_H_ + +#include "adf_accel_devices.h" + +/* Transport access */ +#define ADF_BANK_INT_SRC_SEL_MASK 0x44UL +#define ADF_RING_CSR_RING_CONFIG 0x1000 +#define ADF_RING_CSR_RING_LBASE 0x1040 +#define ADF_RING_CSR_RING_UBASE 0x1080 +#define ADF_RING_CSR_RING_HEAD 0x0C0 +#define ADF_RING_CSR_RING_TAIL 0x100 +#define ADF_RING_CSR_E_STAT 0x14C +#define ADF_RING_CSR_INT_FLAG 0x170 +#define ADF_RING_CSR_INT_SRCSEL 0x174 +#define ADF_RING_CSR_INT_COL_CTL 0x180 +#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 +#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 +#define ADF_RING_CSR_INT_COL_EN 0x17C +#define ADF_RING_CSR_ADDR_OFFSET 0x100000 +#define ADF_RING_BUNDLE_SIZE 0x2000 + +#define BUILD_RING_BASE_ADDR(addr, size) \ + ((((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) << 6) +#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2)) +#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2)) +#define READ_CSR_E_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_E_STAT) +#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) +#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ +do { \ + void __iomem *_csr_base_addr = csr_base_addr; \ + u32 _bank = bank; \ + u32 _ring = ring; \ + dma_addr_t _value = value; \ + u32 l_base = 0, u_base = 0; \ + l_base = lower_32_bits(_value); \ + u_base = upper_32_bits(_value); \ + ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (_bank) + \ + ADF_RING_CSR_RING_LBASE + ((_ring) << 2), l_base); \ + ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (_bank) + \ + ADF_RING_CSR_RING_UBASE + ((_ring) << 2), u_base); \ +} while (0) + +#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) +#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) +#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG, (value)) +#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK) +#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_COL_EN, (value)) +#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_COL_CTL, \ + ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) +#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG_AND_COL, (value)) + +/* Arbiter configuration */ +#define ADF_RING_CSR_RING_SRV_ARB_EN 0x19C + +#define WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_SRV_ARB_EN, (value)) + +void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); + +#endif diff --git a/drivers/crypto/qat/qat_common/adf_hw_arbiter.c b/drivers/crypto/qat/qat_common/adf_hw_arbiter.c index d4162783f970..9f5240d9488b 100644 --- a/drivers/crypto/qat/qat_common/adf_hw_arbiter.c +++ b/drivers/crypto/qat/qat_common/adf_hw_arbiter.c @@ -6,48 +6,33 @@ #define ADF_ARB_NUM 4 #define ADF_ARB_REG_SIZE 0x4 -#define ADF_ARB_WTR_SIZE 0x20 -#define ADF_ARB_OFFSET 0x30000 -#define ADF_ARB_REG_SLOT 0x1000 -#define ADF_ARB_WTR_OFFSET 0x010 -#define ADF_ARB_RO_EN_OFFSET 0x090 -#define ADF_ARB_WQCFG_OFFSET 0x100 -#define ADF_ARB_WRK_2_SER_MAP_OFFSET 0x180 -#define ADF_ARB_RINGSRVARBEN_OFFSET 0x19C -#define WRITE_CSR_ARB_RINGSRVARBEN(csr_addr, index, value) \ - ADF_CSR_WR(csr_addr, ADF_ARB_RINGSRVARBEN_OFFSET + \ - (ADF_ARB_REG_SLOT * index), value) +#define WRITE_CSR_ARB_SARCONFIG(csr_addr, arb_offset, index, value) \ + ADF_CSR_WR(csr_addr, (arb_offset) + \ + (ADF_ARB_REG_SIZE * (index)), value) -#define WRITE_CSR_ARB_SARCONFIG(csr_addr, index, value) \ - ADF_CSR_WR(csr_addr, ADF_ARB_OFFSET + \ - (ADF_ARB_REG_SIZE * index), value) - -#define WRITE_CSR_ARB_WRK_2_SER_MAP(csr_addr, index, value) \ - ADF_CSR_WR(csr_addr, (ADF_ARB_OFFSET + \ - ADF_ARB_WRK_2_SER_MAP_OFFSET) + \ - (ADF_ARB_REG_SIZE * index), value) - -#define WRITE_CSR_ARB_WQCFG(csr_addr, index, value) \ - ADF_CSR_WR(csr_addr, (ADF_ARB_OFFSET + \ - ADF_ARB_WQCFG_OFFSET) + (ADF_ARB_REG_SIZE * index), value) +#define WRITE_CSR_ARB_WT2SAM(csr_addr, arb_offset, wt_offset, index, value) \ + ADF_CSR_WR(csr_addr, ((arb_offset) + (wt_offset)) + \ + (ADF_ARB_REG_SIZE * (index)), value) int adf_init_arb(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; void __iomem *csr = accel_dev->transport->banks[0].csr_addr; - u32 arb_cfg = 0x1 << 31 | 0x4 << 4 | 0x1; - u32 arb, i; + u32 arb_off, wt_off, arb_cfg; const u32 *thd_2_arb_cfg; + struct arb_info info; + int arb, i; + + hw_data->get_arb_info(&info); + arb_cfg = info.arb_cfg; + arb_off = info.arb_offset; + wt_off = info.wt2sam_offset; /* Service arb configured for 32 bytes responses and * ring flow control check enabled. */ for (arb = 0; arb < ADF_ARB_NUM; arb++) - WRITE_CSR_ARB_SARCONFIG(csr, arb, arb_cfg); - - /* Setup worker queue registers */ - for (i = 0; i < hw_data->num_engines; i++) - WRITE_CSR_ARB_WQCFG(csr, i, i); + WRITE_CSR_ARB_SARCONFIG(csr, arb_off, arb, arb_cfg); /* Map worker threads to service arbiters */ hw_data->get_arb_mapping(accel_dev, &thd_2_arb_cfg); @@ -56,7 +41,7 @@ int adf_init_arb(struct adf_accel_dev *accel_dev) return -EFAULT; for (i = 0; i < hw_data->num_engines; i++) - WRITE_CSR_ARB_WRK_2_SER_MAP(csr, i, *(thd_2_arb_cfg + i)); + WRITE_CSR_ARB_WT2SAM(csr, arb_off, wt_off, i, thd_2_arb_cfg[i]); return 0; } @@ -64,36 +49,59 @@ EXPORT_SYMBOL_GPL(adf_init_arb); void adf_update_ring_arb(struct adf_etr_ring_data *ring) { - WRITE_CSR_ARB_RINGSRVARBEN(ring->bank->csr_addr, - ring->bank->bank_number, - ring->bank->ring_mask & 0xFF); + struct adf_accel_dev *accel_dev = ring->bank->accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + u32 tx_ring_mask = hw_data->tx_rings_mask; + u32 shift = hw_data->tx_rx_gap; + u32 arben, arben_tx, arben_rx; + u32 rx_ring_mask; + + /* + * Enable arbitration on a ring only if the TX half of the ring mask + * matches the RX part. This results in writes to CSR on both TX and + * RX update - only one is necessary, but both are done for + * simplicity. + */ + rx_ring_mask = tx_ring_mask << shift; + arben_tx = (ring->bank->ring_mask & tx_ring_mask) >> 0; + arben_rx = (ring->bank->ring_mask & rx_ring_mask) >> shift; + arben = arben_tx & arben_rx; + + csr_ops->write_csr_ring_srv_arb_en(ring->bank->csr_addr, + ring->bank->bank_number, arben); } void adf_exit_arb(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + u32 arb_off, wt_off; + struct arb_info info; void __iomem *csr; unsigned int i; + hw_data->get_arb_info(&info); + arb_off = info.arb_offset; + wt_off = info.wt2sam_offset; + if (!accel_dev->transport) return; csr = accel_dev->transport->banks[0].csr_addr; + hw_data->get_arb_info(&info); + /* Reset arbiter configuration */ for (i = 0; i < ADF_ARB_NUM; i++) - WRITE_CSR_ARB_SARCONFIG(csr, i, 0); - - /* Shutdown work queue */ - for (i = 0; i < hw_data->num_engines; i++) - WRITE_CSR_ARB_WQCFG(csr, i, 0); + WRITE_CSR_ARB_SARCONFIG(csr, arb_off, i, 0); /* Unmap worker threads to service arbiters */ for (i = 0; i < hw_data->num_engines; i++) - WRITE_CSR_ARB_WRK_2_SER_MAP(csr, i, 0); + WRITE_CSR_ARB_WT2SAM(csr, arb_off, wt_off, i, 0); /* Disable arbitration on all rings */ for (i = 0; i < GET_MAX_BANKS(accel_dev); i++) - WRITE_CSR_ARB_RINGSRVARBEN(csr, i, 0); + csr_ops->write_csr_ring_srv_arb_en(csr, i, 0); } EXPORT_SYMBOL_GPL(adf_exit_arb); diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index 36136f7db509..c45853463530 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -21,6 +21,9 @@ static int adf_enable_msix(struct adf_accel_dev *accel_dev) struct adf_hw_device_data *hw_data = accel_dev->hw_device; u32 msix_num_entries = 1; + if (hw_data->set_msix_rttable) + hw_data->set_msix_rttable(accel_dev); + /* If SR-IOV is disabled, add entries for each bank */ if (!accel_dev->pf.vf_info) { int i; @@ -50,8 +53,10 @@ static void adf_disable_msix(struct adf_accel_pci *pci_dev_info) static irqreturn_t adf_msix_isr_bundle(int irq, void *bank_ptr) { struct adf_etr_bank_data *bank = bank_ptr; + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(bank->accel_dev); - WRITE_CSR_INT_FLAG_AND_COL(bank->csr_addr, bank->bank_number, 0); + csr_ops->write_csr_int_flag_and_col(bank->csr_addr, bank->bank_number, + 0); tasklet_hi_schedule(&bank->resp_handler); return IRQ_HANDLED; } diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 963b2bea78f2..8c822c2861c2 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -10,31 +10,6 @@ static struct workqueue_struct *pf2vf_resp_wq; -#define ME2FUNCTION_MAP_A_OFFSET (0x3A400 + 0x190) -#define ME2FUNCTION_MAP_A_NUM_REGS 96 - -#define ME2FUNCTION_MAP_B_OFFSET (0x3A400 + 0x310) -#define ME2FUNCTION_MAP_B_NUM_REGS 12 - -#define ME2FUNCTION_MAP_REG_SIZE 4 -#define ME2FUNCTION_MAP_VALID BIT(7) - -#define READ_CSR_ME2FUNCTION_MAP_A(pmisc_bar_addr, index) \ - ADF_CSR_RD(pmisc_bar_addr, ME2FUNCTION_MAP_A_OFFSET + \ - ME2FUNCTION_MAP_REG_SIZE * index) - -#define WRITE_CSR_ME2FUNCTION_MAP_A(pmisc_bar_addr, index, value) \ - ADF_CSR_WR(pmisc_bar_addr, ME2FUNCTION_MAP_A_OFFSET + \ - ME2FUNCTION_MAP_REG_SIZE * index, value) - -#define READ_CSR_ME2FUNCTION_MAP_B(pmisc_bar_addr, index) \ - ADF_CSR_RD(pmisc_bar_addr, ME2FUNCTION_MAP_B_OFFSET + \ - ME2FUNCTION_MAP_REG_SIZE * index) - -#define WRITE_CSR_ME2FUNCTION_MAP_B(pmisc_bar_addr, index, value) \ - ADF_CSR_WR(pmisc_bar_addr, ME2FUNCTION_MAP_B_OFFSET + \ - ME2FUNCTION_MAP_REG_SIZE * index, value) - struct adf_pf2vf_resp { struct work_struct pf2vf_resp_work; struct adf_accel_vf_info *vf_info; @@ -68,12 +43,8 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) struct pci_dev *pdev = accel_to_pci_dev(accel_dev); int totalvfs = pci_sriov_get_totalvfs(pdev); struct adf_hw_device_data *hw_data = accel_dev->hw_device; - struct adf_bar *pmisc = - &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; - void __iomem *pmisc_addr = pmisc->virt_addr; struct adf_accel_vf_info *vf_info; int i; - u32 reg; for (i = 0, vf_info = accel_dev->pf.vf_info; i < totalvfs; i++, vf_info++) { @@ -90,22 +61,13 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) DEFAULT_RATELIMIT_BURST); } - /* Set Valid bits in ME Thread to PCIe Function Mapping Group A */ - for (i = 0; i < ME2FUNCTION_MAP_A_NUM_REGS; i++) { - reg = READ_CSR_ME2FUNCTION_MAP_A(pmisc_addr, i); - reg |= ME2FUNCTION_MAP_VALID; - WRITE_CSR_ME2FUNCTION_MAP_A(pmisc_addr, i, reg); - } - - /* Set Valid bits in ME Thread to PCIe Function Mapping Group B */ - for (i = 0; i < ME2FUNCTION_MAP_B_NUM_REGS; i++) { - reg = READ_CSR_ME2FUNCTION_MAP_B(pmisc_addr, i); - reg |= ME2FUNCTION_MAP_VALID; - WRITE_CSR_ME2FUNCTION_MAP_B(pmisc_addr, i, reg); - } + /* Set Valid bits in AE Thread to PCIe Function Mapping */ + if (hw_data->configure_iov_threads) + hw_data->configure_iov_threads(accel_dev, true); /* Enable VF to PF interrupts for all VFs */ - adf_enable_vf2pf_interrupts(accel_dev, GENMASK_ULL(totalvfs - 1, 0)); + if (hw_data->get_pf2vf_offset) + adf_enable_vf2pf_interrupts(accel_dev, BIT_ULL(totalvfs) - 1); /* * Due to the hardware design, when SR-IOV and the ring arbiter @@ -127,37 +89,25 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) void adf_disable_sriov(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; - struct adf_bar *pmisc = - &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; - void __iomem *pmisc_addr = pmisc->virt_addr; int totalvfs = pci_sriov_get_totalvfs(accel_to_pci_dev(accel_dev)); struct adf_accel_vf_info *vf; - u32 reg; int i; if (!accel_dev->pf.vf_info) return; - adf_pf2vf_notify_restarting(accel_dev); + if (hw_data->get_pf2vf_offset) + adf_pf2vf_notify_restarting(accel_dev); pci_disable_sriov(accel_to_pci_dev(accel_dev)); /* Disable VF to PF interrupts */ - adf_disable_vf2pf_interrupts(accel_dev, 0xFFFFFFFF); + if (hw_data->get_pf2vf_offset) + adf_disable_vf2pf_interrupts(accel_dev, GENMASK(31, 0)); - /* Clear Valid bits in ME Thread to PCIe Function Mapping Group A */ - for (i = 0; i < ME2FUNCTION_MAP_A_NUM_REGS; i++) { - reg = READ_CSR_ME2FUNCTION_MAP_A(pmisc_addr, i); - reg &= ~ME2FUNCTION_MAP_VALID; - WRITE_CSR_ME2FUNCTION_MAP_A(pmisc_addr, i, reg); - } - - /* Clear Valid bits in ME Thread to PCIe Function Mapping Group B */ - for (i = 0; i < ME2FUNCTION_MAP_B_NUM_REGS; i++) { - reg = READ_CSR_ME2FUNCTION_MAP_B(pmisc_addr, i); - reg &= ~ME2FUNCTION_MAP_VALID; - WRITE_CSR_ME2FUNCTION_MAP_B(pmisc_addr, i, reg); - } + /* Clear Valid bits in AE Thread to PCIe Function Mapping */ + if (hw_data->configure_iov_threads) + hw_data->configure_iov_threads(accel_dev, false); for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) { tasklet_disable(&vf->vf2pf_bh_tasklet); @@ -172,13 +122,13 @@ EXPORT_SYMBOL_GPL(adf_disable_sriov); /** * adf_sriov_configure() - Enable SRIOV for the device - * @pdev: Pointer to pci device. + * @pdev: Pointer to PCI device. * @numvfs: Number of virtual functions (VFs) to enable. * * Note that the @numvfs parameter is ignored and all VFs supported by the * device are enabled due to the design of the hardware. * - * Function enables SRIOV for the pci device. + * Function enables SRIOV for the PCI device. * * Return: number of VFs enabled on success, error code otherwise. */ diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c index 2ad774017200..5a7030acdc33 100644 --- a/drivers/crypto/qat/qat_common/adf_transport.c +++ b/drivers/crypto/qat/qat_common/adf_transport.c @@ -54,24 +54,32 @@ static void adf_unreserve_ring(struct adf_etr_bank_data *bank, u32 ring) static void adf_enable_ring_irq(struct adf_etr_bank_data *bank, u32 ring) { + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(bank->accel_dev); + spin_lock_bh(&bank->lock); bank->irq_mask |= (1 << ring); spin_unlock_bh(&bank->lock); - WRITE_CSR_INT_COL_EN(bank->csr_addr, bank->bank_number, bank->irq_mask); - WRITE_CSR_INT_COL_CTL(bank->csr_addr, bank->bank_number, - bank->irq_coalesc_timer); + csr_ops->write_csr_int_col_en(bank->csr_addr, bank->bank_number, + bank->irq_mask); + csr_ops->write_csr_int_col_ctl(bank->csr_addr, bank->bank_number, + bank->irq_coalesc_timer); } static void adf_disable_ring_irq(struct adf_etr_bank_data *bank, u32 ring) { + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(bank->accel_dev); + spin_lock_bh(&bank->lock); bank->irq_mask &= ~(1 << ring); spin_unlock_bh(&bank->lock); - WRITE_CSR_INT_COL_EN(bank->csr_addr, bank->bank_number, bank->irq_mask); + csr_ops->write_csr_int_col_en(bank->csr_addr, bank->bank_number, + bank->irq_mask); } int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg) { + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(ring->bank->accel_dev); + if (atomic_add_return(1, ring->inflights) > ADF_MAX_INFLIGHTS(ring->ring_size, ring->msg_size)) { atomic_dec(ring->inflights); @@ -84,14 +92,17 @@ int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg) ring->tail = adf_modulo(ring->tail + ADF_MSG_SIZE_TO_BYTES(ring->msg_size), ADF_RING_SIZE_MODULO(ring->ring_size)); - WRITE_CSR_RING_TAIL(ring->bank->csr_addr, ring->bank->bank_number, - ring->ring_number, ring->tail); + csr_ops->write_csr_ring_tail(ring->bank->csr_addr, + ring->bank->bank_number, ring->ring_number, + ring->tail); spin_unlock_bh(&ring->lock); + return 0; } static int adf_handle_response(struct adf_etr_ring_data *ring) { + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(ring->bank->accel_dev); u32 msg_counter = 0; u32 *msg = (u32 *)((uintptr_t)ring->base_addr + ring->head); @@ -105,30 +116,36 @@ static int adf_handle_response(struct adf_etr_ring_data *ring) msg_counter++; msg = (u32 *)((uintptr_t)ring->base_addr + ring->head); } - if (msg_counter > 0) - WRITE_CSR_RING_HEAD(ring->bank->csr_addr, - ring->bank->bank_number, - ring->ring_number, ring->head); + if (msg_counter > 0) { + csr_ops->write_csr_ring_head(ring->bank->csr_addr, + ring->bank->bank_number, + ring->ring_number, ring->head); + } return 0; } static void adf_configure_tx_ring(struct adf_etr_ring_data *ring) { + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(ring->bank->accel_dev); u32 ring_config = BUILD_RING_CONFIG(ring->ring_size); - WRITE_CSR_RING_CONFIG(ring->bank->csr_addr, ring->bank->bank_number, - ring->ring_number, ring_config); + csr_ops->write_csr_ring_config(ring->bank->csr_addr, + ring->bank->bank_number, + ring->ring_number, ring_config); + } static void adf_configure_rx_ring(struct adf_etr_ring_data *ring) { + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(ring->bank->accel_dev); u32 ring_config = BUILD_RESP_RING_CONFIG(ring->ring_size, ADF_RING_NEAR_WATERMARK_512, ADF_RING_NEAR_WATERMARK_0); - WRITE_CSR_RING_CONFIG(ring->bank->csr_addr, ring->bank->bank_number, - ring->ring_number, ring_config); + csr_ops->write_csr_ring_config(ring->bank->csr_addr, + ring->bank->bank_number, + ring->ring_number, ring_config); } static int adf_init_ring(struct adf_etr_ring_data *ring) @@ -136,6 +153,7 @@ static int adf_init_ring(struct adf_etr_ring_data *ring) struct adf_etr_bank_data *bank = ring->bank; struct adf_accel_dev *accel_dev = bank->accel_dev; struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); u64 ring_base; u32 ring_size_bytes = ADF_SIZE_TO_RING_SIZE_IN_BYTES(ring->ring_size); @@ -162,9 +180,12 @@ static int adf_init_ring(struct adf_etr_ring_data *ring) else adf_configure_rx_ring(ring); - ring_base = BUILD_RING_BASE_ADDR(ring->dma_addr, ring->ring_size); - WRITE_CSR_RING_BASE(ring->bank->csr_addr, ring->bank->bank_number, - ring->ring_number, ring_base); + ring_base = csr_ops->build_csr_ring_base_addr(ring->dma_addr, + ring->ring_size); + + csr_ops->write_csr_ring_base(ring->bank->csr_addr, + ring->bank->bank_number, ring->ring_number, + ring_base); spin_lock_init(&ring->lock); return 0; } @@ -190,6 +211,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, struct adf_etr_ring_data **ring_ptr) { struct adf_etr_data *transport_data = accel_dev->transport; + u8 num_rings_per_bank = GET_NUM_RINGS_PER_BANK(accel_dev); struct adf_etr_bank_data *bank; struct adf_etr_ring_data *ring; char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; @@ -219,7 +241,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, dev_err(&GET_DEV(accel_dev), "Can't get ring number\n"); return -EFAULT; } - if (ring_num >= ADF_ETR_MAX_RINGS_PER_BANK) { + if (ring_num >= num_rings_per_bank) { dev_err(&GET_DEV(accel_dev), "Invalid ring number\n"); return -EFAULT; } @@ -268,15 +290,17 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, void adf_remove_ring(struct adf_etr_ring_data *ring) { struct adf_etr_bank_data *bank = ring->bank; + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(bank->accel_dev); /* Disable interrupts for the given ring */ adf_disable_ring_irq(bank, ring->ring_number); /* Clear PCI config space */ - WRITE_CSR_RING_CONFIG(bank->csr_addr, bank->bank_number, - ring->ring_number, 0); - WRITE_CSR_RING_BASE(bank->csr_addr, bank->bank_number, - ring->ring_number, 0); + + csr_ops->write_csr_ring_config(bank->csr_addr, bank->bank_number, + ring->ring_number, 0); + csr_ops->write_csr_ring_base(bank->csr_addr, bank->bank_number, + ring->ring_number, 0); adf_ring_debugfs_rm(ring); adf_unreserve_ring(bank, ring->ring_number); /* Disable HW arbitration for the given ring */ @@ -286,25 +310,30 @@ void adf_remove_ring(struct adf_etr_ring_data *ring) static void adf_ring_response_handler(struct adf_etr_bank_data *bank) { - u32 empty_rings, i; + struct adf_accel_dev *accel_dev = bank->accel_dev; + u8 num_rings_per_bank = GET_NUM_RINGS_PER_BANK(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + unsigned long empty_rings; + int i; - empty_rings = READ_CSR_E_STAT(bank->csr_addr, bank->bank_number); + empty_rings = csr_ops->read_csr_e_stat(bank->csr_addr, + bank->bank_number); empty_rings = ~empty_rings & bank->irq_mask; - for (i = 0; i < ADF_ETR_MAX_RINGS_PER_BANK; ++i) { - if (empty_rings & (1 << i)) - adf_handle_response(&bank->rings[i]); - } + for_each_set_bit(i, &empty_rings, num_rings_per_bank) + adf_handle_response(&bank->rings[i]); } void adf_response_handler(uintptr_t bank_addr) { struct adf_etr_bank_data *bank = (void *)bank_addr; + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(bank->accel_dev); /* Handle all the responses and reenable IRQs */ adf_ring_response_handler(bank); - WRITE_CSR_INT_FLAG_AND_COL(bank->csr_addr, bank->bank_number, - bank->irq_mask); + + csr_ops->write_csr_int_flag_and_col(bank->csr_addr, bank->bank_number, + bank->irq_mask); } static inline int adf_get_cfg_int(struct adf_accel_dev *accel_dev, @@ -343,9 +372,14 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev, u32 bank_num, void __iomem *csr_addr) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; + u8 num_rings_per_bank = hw_data->num_rings_per_bank; + struct adf_hw_csr_ops *csr_ops = &hw_data->csr_ops; + u32 irq_mask = BIT(num_rings_per_bank) - 1; struct adf_etr_ring_data *ring; struct adf_etr_ring_data *tx_ring; u32 i, coalesc_enabled = 0; + unsigned long ring_mask; + int size; memset(bank, 0, sizeof(*bank)); bank->bank_number = bank_num; @@ -353,6 +387,13 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev, bank->accel_dev = accel_dev; spin_lock_init(&bank->lock); + /* Allocate the rings in the bank */ + size = num_rings_per_bank * sizeof(struct adf_etr_ring_data); + bank->rings = kzalloc_node(size, GFP_KERNEL, + dev_to_node(&GET_DEV(accel_dev))); + if (!bank->rings) + return -ENOMEM; + /* Enable IRQ coalescing always. This will allow to use * the optimised flag and coalesc register. * If it is disabled in the config file just use min time value */ @@ -363,9 +404,10 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev, else bank->irq_coalesc_timer = ADF_COALESCING_MIN_TIME; - for (i = 0; i < ADF_ETR_MAX_RINGS_PER_BANK; i++) { - WRITE_CSR_RING_CONFIG(csr_addr, bank_num, i, 0); - WRITE_CSR_RING_BASE(csr_addr, bank_num, i, 0); + for (i = 0; i < num_rings_per_bank; i++) { + csr_ops->write_csr_ring_config(csr_addr, bank_num, i, 0); + csr_ops->write_csr_ring_base(csr_addr, bank_num, i, 0); + ring = &bank->rings[i]; if (hw_data->tx_rings_mask & (1 << i)) { ring->inflights = @@ -390,15 +432,18 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev, goto err; } - WRITE_CSR_INT_FLAG(csr_addr, bank_num, ADF_BANK_INT_FLAG_CLEAR_MASK); - WRITE_CSR_INT_SRCSEL(csr_addr, bank_num); + csr_ops->write_csr_int_flag(csr_addr, bank_num, irq_mask); + csr_ops->write_csr_int_srcsel(csr_addr, bank_num); + return 0; err: - for (i = 0; i < ADF_ETR_MAX_RINGS_PER_BANK; i++) { + ring_mask = hw_data->tx_rings_mask; + for_each_set_bit(i, &ring_mask, num_rings_per_bank) { ring = &bank->rings[i]; - if (hw_data->tx_rings_mask & (1 << i)) - kfree(ring->inflights); + kfree(ring->inflights); + ring->inflights = NULL; } + kfree(bank->rings); return -ENOMEM; } @@ -464,11 +509,12 @@ EXPORT_SYMBOL_GPL(adf_init_etr_data); static void cleanup_bank(struct adf_etr_bank_data *bank) { + struct adf_accel_dev *accel_dev = bank->accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + u8 num_rings_per_bank = hw_data->num_rings_per_bank; u32 i; - for (i = 0; i < ADF_ETR_MAX_RINGS_PER_BANK; i++) { - struct adf_accel_dev *accel_dev = bank->accel_dev; - struct adf_hw_device_data *hw_data = accel_dev->hw_device; + for (i = 0; i < num_rings_per_bank; i++) { struct adf_etr_ring_data *ring = &bank->rings[i]; if (bank->ring_mask & (1 << i)) @@ -477,6 +523,7 @@ static void cleanup_bank(struct adf_etr_bank_data *bank) if (hw_data->tx_rings_mask & (1 << i)) kfree(ring->inflights); } + kfree(bank->rings); adf_bank_debugfs_rm(bank); memset(bank, 0, sizeof(*bank)); } @@ -507,6 +554,7 @@ void adf_cleanup_etr_data(struct adf_accel_dev *accel_dev) if (etr_data) { adf_cleanup_etr_handles(accel_dev); debugfs_remove(etr_data->debug); + kfree(etr_data->banks->rings); kfree(etr_data->banks); kfree(etr_data); accel_dev->transport = NULL; diff --git a/drivers/crypto/qat/qat_common/adf_transport_access_macros.h b/drivers/crypto/qat/qat_common/adf_transport_access_macros.h index 950d1988556c..3b6b0267bbec 100644 --- a/drivers/crypto/qat/qat_common/adf_transport_access_macros.h +++ b/drivers/crypto/qat/qat_common/adf_transport_access_macros.h @@ -4,23 +4,6 @@ #define ADF_TRANSPORT_ACCESS_MACROS_H #include "adf_accel_devices.h" -#define ADF_BANK_INT_SRC_SEL_MASK_0 0x4444444CUL -#define ADF_BANK_INT_SRC_SEL_MASK_X 0x44444444UL -#define ADF_BANK_INT_FLAG_CLEAR_MASK 0xFFFF -#define ADF_RING_CSR_RING_CONFIG 0x000 -#define ADF_RING_CSR_RING_LBASE 0x040 -#define ADF_RING_CSR_RING_UBASE 0x080 -#define ADF_RING_CSR_RING_HEAD 0x0C0 -#define ADF_RING_CSR_RING_TAIL 0x100 -#define ADF_RING_CSR_E_STAT 0x14C -#define ADF_RING_CSR_INT_FLAG 0x170 -#define ADF_RING_CSR_INT_SRCSEL 0x174 -#define ADF_RING_CSR_INT_SRCSEL_2 0x178 -#define ADF_RING_CSR_INT_COL_EN 0x17C -#define ADF_RING_CSR_INT_COL_CTL 0x180 -#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 -#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 -#define ADF_RING_BUNDLE_SIZE 0x1000 #define ADF_RING_CONFIG_NEAR_FULL_WM 0x0A #define ADF_RING_CONFIG_NEAR_EMPTY_WM 0x05 #define ADF_COALESCING_MIN_TIME 0x1FF @@ -72,54 +55,4 @@ ((watermark_nf << ADF_RING_CONFIG_NEAR_FULL_WM) \ | (watermark_ne << ADF_RING_CONFIG_NEAR_EMPTY_WM) \ | size) -#define BUILD_RING_BASE_ADDR(addr, size) \ - ((addr >> 6) & (0xFFFFFFFFFFFFFFFFULL << size)) -#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_RING_HEAD + (ring << 2)) -#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_RING_TAIL + (ring << 2)) -#define READ_CSR_E_STAT(csr_base_addr, bank) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_E_STAT) -#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_RING_CONFIG + (ring << 2), value) -#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ -do { \ - u32 l_base = 0, u_base = 0; \ - l_base = (u32)(value & 0xFFFFFFFF); \ - u_base = (u32)((value & 0xFFFFFFFF00000000ULL) >> 32); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_RING_LBASE + (ring << 2), l_base); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_RING_UBASE + (ring << 2), u_base); \ -} while (0) -#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_RING_HEAD + (ring << 2), value) -#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_RING_TAIL + (ring << 2), value) -#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_FLAG, value) -#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ -do { \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK_0); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_INT_SRCSEL_2, ADF_BANK_INT_SRC_SEL_MASK_X); \ -} while (0) -#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_INT_COL_EN, value) -#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_INT_COL_CTL, \ - ADF_RING_CSR_INT_COL_CTL_ENABLE | value) -#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \ - ADF_RING_CSR_INT_FLAG_AND_COL, value) #endif diff --git a/drivers/crypto/qat/qat_common/adf_transport_debug.c b/drivers/crypto/qat/qat_common/adf_transport_debug.c index dac25ba47260..1205186ad51e 100644 --- a/drivers/crypto/qat/qat_common/adf_transport_debug.c +++ b/drivers/crypto/qat/qat_common/adf_transport_debug.c @@ -42,16 +42,17 @@ static int adf_ring_show(struct seq_file *sfile, void *v) { struct adf_etr_ring_data *ring = sfile->private; struct adf_etr_bank_data *bank = ring->bank; + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(bank->accel_dev); void __iomem *csr = ring->bank->csr_addr; if (v == SEQ_START_TOKEN) { int head, tail, empty; - head = READ_CSR_RING_HEAD(csr, bank->bank_number, - ring->ring_number); - tail = READ_CSR_RING_TAIL(csr, bank->bank_number, - ring->ring_number); - empty = READ_CSR_E_STAT(csr, bank->bank_number); + head = csr_ops->read_csr_ring_head(csr, bank->bank_number, + ring->ring_number); + tail = csr_ops->read_csr_ring_tail(csr, bank->bank_number, + ring->ring_number); + empty = csr_ops->read_csr_e_stat(csr, bank->bank_number); seq_puts(sfile, "------- Ring configuration -------\n"); seq_printf(sfile, "ring name: %s\n", @@ -117,11 +118,14 @@ void adf_ring_debugfs_rm(struct adf_etr_ring_data *ring) static void *adf_bank_start(struct seq_file *sfile, loff_t *pos) { + struct adf_etr_bank_data *bank = sfile->private; + u8 num_rings_per_bank = GET_NUM_RINGS_PER_BANK(bank->accel_dev); + mutex_lock(&bank_read_lock); if (*pos == 0) return SEQ_START_TOKEN; - if (*pos >= ADF_ETR_MAX_RINGS_PER_BANK) + if (*pos >= num_rings_per_bank) return NULL; return pos; @@ -129,7 +133,10 @@ static void *adf_bank_start(struct seq_file *sfile, loff_t *pos) static void *adf_bank_next(struct seq_file *sfile, void *v, loff_t *pos) { - if (++(*pos) >= ADF_ETR_MAX_RINGS_PER_BANK) + struct adf_etr_bank_data *bank = sfile->private; + u8 num_rings_per_bank = GET_NUM_RINGS_PER_BANK(bank->accel_dev); + + if (++(*pos) >= num_rings_per_bank) return NULL; return pos; @@ -138,6 +145,7 @@ static void *adf_bank_next(struct seq_file *sfile, void *v, loff_t *pos) static int adf_bank_show(struct seq_file *sfile, void *v) { struct adf_etr_bank_data *bank = sfile->private; + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(bank->accel_dev); if (v == SEQ_START_TOKEN) { seq_printf(sfile, "------- Bank %d configuration -------\n", @@ -151,11 +159,11 @@ static int adf_bank_show(struct seq_file *sfile, void *v) if (!(bank->ring_mask & 1 << ring_id)) return 0; - head = READ_CSR_RING_HEAD(csr, bank->bank_number, - ring->ring_number); - tail = READ_CSR_RING_TAIL(csr, bank->bank_number, - ring->ring_number); - empty = READ_CSR_E_STAT(csr, bank->bank_number); + head = csr_ops->read_csr_ring_head(csr, bank->bank_number, + ring->ring_number); + tail = csr_ops->read_csr_ring_tail(csr, bank->bank_number, + ring->ring_number); + empty = csr_ops->read_csr_e_stat(csr, bank->bank_number); seq_printf(sfile, "ring num %02d, head %04x, tail %04x, empty: %d\n", diff --git a/drivers/crypto/qat/qat_common/adf_transport_internal.h b/drivers/crypto/qat/qat_common/adf_transport_internal.h index c7faf4e2d302..501bcf0f1809 100644 --- a/drivers/crypto/qat/qat_common/adf_transport_internal.h +++ b/drivers/crypto/qat/qat_common/adf_transport_internal.h @@ -28,7 +28,7 @@ struct adf_etr_ring_data { }; struct adf_etr_bank_data { - struct adf_etr_ring_data rings[ADF_ETR_MAX_RINGS_PER_BANK]; + struct adf_etr_ring_data *rings; struct tasklet_struct resp_handler; void __iomem *csr_addr; u32 irq_coalesc_timer; diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index c4a44dc6af3e..38d316a42ba6 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -156,6 +156,7 @@ static irqreturn_t adf_isr(int irq, void *privdata) { struct adf_accel_dev *accel_dev = privdata; struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_hw_csr_ops *csr_ops = &hw_data->csr_ops; struct adf_bar *pmisc = &GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)]; void __iomem *pmisc_bar_addr = pmisc->virt_addr; @@ -180,8 +181,8 @@ static irqreturn_t adf_isr(int irq, void *privdata) struct adf_etr_bank_data *bank = &etr_data->banks[0]; /* Disable Flag and Coalesce Ring Interrupts */ - WRITE_CSR_INT_FLAG_AND_COL(bank->csr_addr, bank->bank_number, - 0); + csr_ops->write_csr_int_flag_and_col(bank->csr_addr, + bank->bank_number, 0); tasklet_hi_schedule(&bank->resp_handler); return IRQ_HANDLED; } diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h b/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h index d4d188cd7ed0..f05ad17fbdd6 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h +++ b/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h @@ -6,7 +6,7 @@ #include "icp_qat_fw.h" enum icp_qat_fw_init_admin_cmd_id { - ICP_QAT_FW_INIT_ME = 0, + ICP_QAT_FW_INIT_AE = 0, ICP_QAT_FW_TRNG_ENABLE = 1, ICP_QAT_FW_TRNG_DISABLE = 2, ICP_QAT_FW_CONSTANTS_CFG = 3, @@ -39,7 +39,7 @@ struct icp_qat_fw_init_admin_req { }; __u32 resrvd4; -}; +} __packed; struct icp_qat_fw_init_admin_resp { __u8 flags; @@ -92,7 +92,7 @@ struct icp_qat_fw_init_admin_resp { __u64 resrvd8; }; }; -}; +} __packed; #define ICP_QAT_FW_COMN_HEARTBEAT_OK 0 #define ICP_QAT_FW_COMN_HEARTBEAT_BLOCKED 1 diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_la.h b/drivers/crypto/qat/qat_common/icp_qat_fw_la.h index 6757ec09d81f..28fa17f14be4 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_fw_la.h +++ b/drivers/crypto/qat/qat_common/icp_qat_fw_la.h @@ -33,6 +33,9 @@ struct icp_qat_fw_la_bulk_req { struct icp_qat_fw_comn_req_cd_ctrl cd_ctrl; }; +#define ICP_QAT_FW_LA_USE_UCS_SLICE_TYPE 1 +#define QAT_LA_SLICE_TYPE_BITPOS 14 +#define QAT_LA_SLICE_TYPE_MASK 0x3 #define ICP_QAT_FW_LA_GCM_IV_LEN_12_OCTETS 1 #define ICP_QAT_FW_LA_GCM_IV_LEN_NOT_12_OCTETS 0 #define QAT_FW_LA_ZUC_3G_PROTO_FLAG_BITPOS 12 @@ -179,6 +182,10 @@ struct icp_qat_fw_la_bulk_req { QAT_FIELD_SET(flags, val, QAT_LA_PARTIAL_BITPOS, \ QAT_LA_PARTIAL_MASK) +#define ICP_QAT_FW_LA_SLICE_TYPE_SET(flags, val) \ + QAT_FIELD_SET(flags, val, QAT_LA_SLICE_TYPE_BITPOS, \ + QAT_LA_SLICE_TYPE_MASK) + struct icp_qat_fw_cipher_req_hdr_cd_pars { union { struct { diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h b/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h index 3e8e291cd122..b8f3463be6ef 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h +++ b/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h @@ -15,6 +15,7 @@ struct icp_qat_fw_loader_ae_data { struct icp_qat_fw_loader_hal_handle { struct icp_qat_fw_loader_ae_data aes[ICP_QAT_UCLO_MAX_AE]; unsigned int ae_mask; + unsigned int admin_ae_mask; unsigned int slice_mask; unsigned int revision_id; unsigned int ae_max_num; @@ -22,12 +23,35 @@ struct icp_qat_fw_loader_hal_handle { unsigned int max_ustore; }; +struct icp_qat_fw_loader_chip_info { + bool sram_visible; + bool nn; + bool lm2lm3; + u32 lm_size; + u32 icp_rst_csr; + u32 icp_rst_mask; + u32 glb_clk_enable_csr; + u32 misc_ctl_csr; + u32 wakeup_event_val; + bool fw_auth; + bool css_3k; + bool tgroup_share_ustore; + u32 fcu_ctl_csr; + u32 fcu_sts_csr; + u32 fcu_dram_addr_hi; + u32 fcu_dram_addr_lo; + u32 fcu_loaded_ae_csr; + u8 fcu_loaded_ae_pos; +}; + struct icp_qat_fw_loader_handle { struct icp_qat_fw_loader_hal_handle *hal_handle; + struct icp_qat_fw_loader_chip_info *chip_info; struct pci_dev *pci_dev; void *obj_handle; void *sobj_handle; - bool fw_auth; + void *mobj_handle; + unsigned int cfg_ae_mask; void __iomem *hal_sram_addr_v; void __iomem *hal_cap_g_ctl_csr_addr_v; void __iomem *hal_cap_ae_xfer_csr_addr_v; diff --git a/drivers/crypto/qat/qat_common/icp_qat_hal.h b/drivers/crypto/qat/qat_common/icp_qat_hal.h index c0e9fc0c93dd..20b2ee1fc65a 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_hal.h +++ b/drivers/crypto/qat/qat_common/icp_qat_hal.h @@ -5,9 +5,17 @@ #include "icp_qat_fw_loader_handle.h" enum hal_global_csr { - MISC_CONTROL = 0x04, - ICP_RESET = 0x0c, - ICP_GLOBAL_CLK_ENABLE = 0x50 + MISC_CONTROL = 0xA04, + ICP_RESET = 0xA0c, + ICP_GLOBAL_CLK_ENABLE = 0xA50 +}; + +enum { + MISC_CONTROL_C4XXX = 0xAA0, + ICP_RESET_CPP0 = 0x938, + ICP_RESET_CPP1 = 0x93c, + ICP_GLOBAL_CLK_ENABLE_CPP0 = 0x964, + ICP_GLOBAL_CLK_ENABLE_CPP1 = 0x968 }; enum hal_ae_csr { @@ -26,8 +34,14 @@ enum hal_ae_csr { CTX_WAKEUP_EVENTS_INDIRECT = 0x050, LM_ADDR_0_INDIRECT = 0x060, LM_ADDR_1_INDIRECT = 0x068, + LM_ADDR_2_INDIRECT = 0x0cc, + LM_ADDR_3_INDIRECT = 0x0d4, INDIRECT_LM_ADDR_0_BYTE_INDEX = 0x0e0, INDIRECT_LM_ADDR_1_BYTE_INDEX = 0x0e8, + INDIRECT_LM_ADDR_2_BYTE_INDEX = 0x10c, + INDIRECT_LM_ADDR_3_BYTE_INDEX = 0x114, + INDIRECT_T_INDEX = 0x0f8, + INDIRECT_T_INDEX_BYTE_INDEX = 0x0fc, FUTURE_COUNT_SIGNAL_INDIRECT = 0x078, TIMESTAMP_LOW = 0x0c0, TIMESTAMP_HIGH = 0x0c4, @@ -47,6 +61,15 @@ enum fcu_csr { FCU_RAMBASE_ADDR_LO = 0x8d8 }; +enum fcu_csr_4xxx { + FCU_CONTROL_4XXX = 0x1000, + FCU_STATUS_4XXX = 0x1004, + FCU_ME_BROADCAST_MASK_TYPE = 0x1008, + FCU_AE_LOADED_4XXX = 0x1010, + FCU_DRAM_ADDR_LO_4XXX = 0x1014, + FCU_DRAM_ADDR_HI_4XXX = 0x1018, +}; + enum fcu_cmd { FCU_CTRL_CMD_NOOP = 0, FCU_CTRL_CMD_AUTH = 1, @@ -62,12 +85,17 @@ enum fcu_sts { FCU_STS_LOAD_FAIL = 4, FCU_STS_BUSY = 5 }; + +#define ALL_AE_MASK 0xFFFFFFFF #define UA_ECS (0x1 << 31) #define ACS_ABO_BITPOS 31 #define ACS_ACNO 0x7 #define CE_ENABLE_BITPOS 0x8 #define CE_LMADDR_0_GLOBAL_BITPOS 16 #define CE_LMADDR_1_GLOBAL_BITPOS 17 +#define CE_LMADDR_2_GLOBAL_BITPOS 22 +#define CE_LMADDR_3_GLOBAL_BITPOS 23 +#define CE_T_INDEX_GLOBAL_BITPOS 21 #define CE_NN_MODE_BITPOS 20 #define CE_REG_PAR_ERR_BITPOS 25 #define CE_BREAKPOINT_BITPOS 27 @@ -78,7 +106,8 @@ enum fcu_sts { #define XCWE_VOLUNTARY (0x1) #define LCS_STATUS (0x1) #define MMC_SHARE_CS_BITPOS 2 -#define GLOBAL_CSR 0xA00 +#define WAKEUP_EVENT 0x10000 +#define FCU_CTRL_BROADCAST_POS 0x4 #define FCU_CTRL_AE_POS 0x8 #define FCU_AUTH_STS_MASK 0x7 #define FCU_STS_DONE_POS 0x9 @@ -86,27 +115,29 @@ enum fcu_sts { #define FCU_LOADED_AE_POS 0x16 #define FW_AUTH_WAIT_PERIOD 10 #define FW_AUTH_MAX_RETRY 300 - +#define ICP_QAT_AE_OFFSET 0x20000 +#define ICP_QAT_CAP_OFFSET (ICP_QAT_AE_OFFSET + 0x10000) +#define LOCAL_TO_XFER_REG_OFFSET 0x800 +#define ICP_QAT_EP_OFFSET 0x3a000 +#define ICP_QAT_EP_OFFSET_4XXX 0x200000 /* HI MMIO CSRs */ +#define ICP_QAT_AE_OFFSET_4XXX 0x600000 +#define ICP_QAT_CAP_OFFSET_4XXX 0x640000 #define SET_CAP_CSR(handle, csr, val) \ - ADF_CSR_WR(handle->hal_cap_g_ctl_csr_addr_v, csr, val) + ADF_CSR_WR((handle)->hal_cap_g_ctl_csr_addr_v, csr, val) #define GET_CAP_CSR(handle, csr) \ - ADF_CSR_RD(handle->hal_cap_g_ctl_csr_addr_v, csr) -#define SET_GLB_CSR(handle, csr, val) SET_CAP_CSR(handle, csr + GLOBAL_CSR, val) -#define GET_GLB_CSR(handle, csr) GET_CAP_CSR(handle, GLOBAL_CSR + csr) + ADF_CSR_RD((handle)->hal_cap_g_ctl_csr_addr_v, csr) #define AE_CSR(handle, ae) \ - ((char __iomem *)handle->hal_cap_ae_local_csr_addr_v + \ - ((ae & handle->hal_handle->ae_mask) << 12)) -#define AE_CSR_ADDR(handle, ae, csr) (AE_CSR(handle, ae) + (0x3ff & csr)) + ((char __iomem *)(handle)->hal_cap_ae_local_csr_addr_v + ((ae) << 12)) +#define AE_CSR_ADDR(handle, ae, csr) (AE_CSR(handle, ae) + (0x3ff & (csr))) #define SET_AE_CSR(handle, ae, csr, val) \ ADF_CSR_WR(AE_CSR_ADDR(handle, ae, csr), 0, val) #define GET_AE_CSR(handle, ae, csr) ADF_CSR_RD(AE_CSR_ADDR(handle, ae, csr), 0) #define AE_XFER(handle, ae) \ - ((char __iomem *)handle->hal_cap_ae_xfer_csr_addr_v + \ - ((ae & handle->hal_handle->ae_mask) << 12)) + ((char __iomem *)(handle)->hal_cap_ae_xfer_csr_addr_v + ((ae) << 12)) #define AE_XFER_ADDR(handle, ae, reg) (AE_XFER(handle, ae) + \ - ((reg & 0xff) << 2)) + (((reg) & 0xff) << 2)) #define SET_AE_XFER(handle, ae, reg, val) \ ADF_CSR_WR(AE_XFER_ADDR(handle, ae, reg), 0, val) #define SRAM_WRITE(handle, addr, val) \ - ADF_CSR_WR(handle->hal_sram_addr_v, addr, val) + ADF_CSR_WR((handle)->hal_sram_addr_v, addr, val) #endif diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw.h b/drivers/crypto/qat/qat_common/icp_qat_hw.h index c4b6ef1506ab..e39e8a2d51a7 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_hw.h +++ b/drivers/crypto/qat/qat_common/icp_qat_hw.h @@ -65,6 +65,36 @@ struct icp_qat_hw_auth_config { __u32 reserved; }; +struct icp_qat_hw_ucs_cipher_config { + __u32 val; + __u32 reserved[3]; +}; + +enum icp_qat_slice_mask { + ICP_ACCEL_MASK_CIPHER_SLICE = BIT(0), + ICP_ACCEL_MASK_AUTH_SLICE = BIT(1), + ICP_ACCEL_MASK_PKE_SLICE = BIT(2), + ICP_ACCEL_MASK_COMPRESS_SLICE = BIT(3), + ICP_ACCEL_MASK_LZS_SLICE = BIT(4), + ICP_ACCEL_MASK_EIA3_SLICE = BIT(5), + ICP_ACCEL_MASK_SHA3_SLICE = BIT(6), +}; + +enum icp_qat_capabilities_mask { + ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC = BIT(0), + ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC = BIT(1), + ICP_ACCEL_CAPABILITIES_CIPHER = BIT(2), + ICP_ACCEL_CAPABILITIES_AUTHENTICATION = BIT(3), + ICP_ACCEL_CAPABILITIES_RESERVED_1 = BIT(4), + ICP_ACCEL_CAPABILITIES_COMPRESSION = BIT(5), + ICP_ACCEL_CAPABILITIES_LZS_COMPRESSION = BIT(6), + ICP_ACCEL_CAPABILITIES_RAND = BIT(7), + ICP_ACCEL_CAPABILITIES_ZUC = BIT(8), + ICP_ACCEL_CAPABILITIES_SHA3 = BIT(9), + /* Bits 10-25 are currently reserved */ + ICP_ACCEL_CAPABILITIES_AES_V2 = BIT(26) +}; + #define QAT_AUTH_MODE_BITPOS 4 #define QAT_AUTH_MODE_MASK 0xF #define QAT_AUTH_ALGO_BITPOS 0 @@ -255,7 +285,15 @@ struct icp_qat_hw_cipher_aes256_f8 { __u8 key[ICP_QAT_HW_AES_256_F8_KEY_SZ]; }; +struct icp_qat_hw_ucs_cipher_aes256_f8 { + struct icp_qat_hw_ucs_cipher_config cipher_config; + __u8 key[ICP_QAT_HW_AES_256_F8_KEY_SZ]; +}; + struct icp_qat_hw_cipher_algo_blk { - struct icp_qat_hw_cipher_aes256_f8 aes; + union { + struct icp_qat_hw_cipher_aes256_f8 aes; + struct icp_qat_hw_ucs_cipher_aes256_f8 ucs_aes; + }; } __aligned(64); #endif diff --git a/drivers/crypto/qat/qat_common/icp_qat_uclo.h b/drivers/crypto/qat/qat_common/icp_qat_uclo.h index 8fe1ec344fa2..4b36869bf460 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_uclo.h +++ b/drivers/crypto/qat/qat_common/icp_qat_uclo.h @@ -6,6 +6,7 @@ #define ICP_QAT_AC_895XCC_DEV_TYPE 0x00400000 #define ICP_QAT_AC_C62X_DEV_TYPE 0x01000000 #define ICP_QAT_AC_C3XXX_DEV_TYPE 0x02000000 +#define ICP_QAT_AC_4XXX_A_DEV_TYPE 0x08000000 #define ICP_QAT_UCLO_MAX_AE 12 #define ICP_QAT_UCLO_MAX_CTX 8 #define ICP_QAT_UCLO_MAX_UIMAGE (ICP_QAT_UCLO_MAX_AE * ICP_QAT_UCLO_MAX_CTX) @@ -13,6 +14,7 @@ #define ICP_QAT_UCLO_MAX_XFER_REG 128 #define ICP_QAT_UCLO_MAX_GPR_REG 128 #define ICP_QAT_UCLO_MAX_LMEM_REG 1024 +#define ICP_QAT_UCLO_MAX_LMEM_REG_2X 1280 #define ICP_QAT_UCLO_AE_ALL_CTX 0xff #define ICP_QAT_UOF_OBJID_LEN 8 #define ICP_QAT_UOF_FID 0xc6c2 @@ -31,26 +33,59 @@ #define ICP_QAT_SUOF_FID 0x53554f46 #define ICP_QAT_SUOF_MAJVER 0x0 #define ICP_QAT_SUOF_MINVER 0x1 +#define ICP_QAT_SUOF_OBJ_NAME_LEN 128 +#define ICP_QAT_MOF_OBJ_ID_LEN 8 +#define ICP_QAT_MOF_OBJ_CHUNKID_LEN 8 +#define ICP_QAT_MOF_FID 0x00666f6d +#define ICP_QAT_MOF_MAJVER 0x0 +#define ICP_QAT_MOF_MINVER 0x1 +#define ICP_QAT_MOF_SYM_OBJS "SYM_OBJS" +#define ICP_QAT_SUOF_OBJS "SUF_OBJS" +#define ICP_QAT_SUOF_IMAG "SUF_IMAG" #define ICP_QAT_SIMG_AE_INIT_SEQ_LEN (50 * sizeof(unsigned long long)) #define ICP_QAT_SIMG_AE_INSTS_LEN (0x4000 * sizeof(unsigned long long)) -#define ICP_QAT_CSS_FWSK_MODULUS_LEN 256 -#define ICP_QAT_CSS_FWSK_EXPONENT_LEN 4 -#define ICP_QAT_CSS_FWSK_PAD_LEN 252 -#define ICP_QAT_CSS_FWSK_PUB_LEN (ICP_QAT_CSS_FWSK_MODULUS_LEN + \ - ICP_QAT_CSS_FWSK_EXPONENT_LEN + \ - ICP_QAT_CSS_FWSK_PAD_LEN) -#define ICP_QAT_CSS_SIGNATURE_LEN 256 + +#define DSS_FWSK_MODULUS_LEN 384 /* RSA3K */ +#define DSS_FWSK_EXPONENT_LEN 4 +#define DSS_FWSK_PADDING_LEN 380 +#define DSS_SIGNATURE_LEN 384 /* RSA3K */ + +#define CSS_FWSK_MODULUS_LEN 256 /* RSA2K */ +#define CSS_FWSK_EXPONENT_LEN 4 +#define CSS_FWSK_PADDING_LEN 252 +#define CSS_SIGNATURE_LEN 256 /* RSA2K */ + +#define ICP_QAT_CSS_FWSK_MODULUS_LEN(handle) ((handle)->chip_info->css_3k ? \ + DSS_FWSK_MODULUS_LEN : \ + CSS_FWSK_MODULUS_LEN) + +#define ICP_QAT_CSS_FWSK_EXPONENT_LEN(handle) ((handle)->chip_info->css_3k ? \ + DSS_FWSK_EXPONENT_LEN : \ + CSS_FWSK_EXPONENT_LEN) + +#define ICP_QAT_CSS_FWSK_PAD_LEN(handle) ((handle)->chip_info->css_3k ? \ + DSS_FWSK_PADDING_LEN : \ + CSS_FWSK_PADDING_LEN) + +#define ICP_QAT_CSS_FWSK_PUB_LEN(handle) (ICP_QAT_CSS_FWSK_MODULUS_LEN(handle) + \ + ICP_QAT_CSS_FWSK_EXPONENT_LEN(handle) + \ + ICP_QAT_CSS_FWSK_PAD_LEN(handle)) + +#define ICP_QAT_CSS_SIGNATURE_LEN(handle) ((handle)->chip_info->css_3k ? \ + DSS_SIGNATURE_LEN : \ + CSS_SIGNATURE_LEN) + #define ICP_QAT_CSS_AE_IMG_LEN (sizeof(struct icp_qat_simg_ae_mode) + \ ICP_QAT_SIMG_AE_INIT_SEQ_LEN + \ ICP_QAT_SIMG_AE_INSTS_LEN) -#define ICP_QAT_CSS_AE_SIMG_LEN (sizeof(struct icp_qat_css_hdr) + \ - ICP_QAT_CSS_FWSK_PUB_LEN + \ - ICP_QAT_CSS_SIGNATURE_LEN + \ - ICP_QAT_CSS_AE_IMG_LEN) -#define ICP_QAT_AE_IMG_OFFSET (sizeof(struct icp_qat_css_hdr) + \ - ICP_QAT_CSS_FWSK_MODULUS_LEN + \ - ICP_QAT_CSS_FWSK_EXPONENT_LEN + \ - ICP_QAT_CSS_SIGNATURE_LEN) +#define ICP_QAT_CSS_AE_SIMG_LEN(handle) (sizeof(struct icp_qat_css_hdr) + \ + ICP_QAT_CSS_FWSK_PUB_LEN(handle) + \ + ICP_QAT_CSS_SIGNATURE_LEN(handle) + \ + ICP_QAT_CSS_AE_IMG_LEN) +#define ICP_QAT_AE_IMG_OFFSET(handle) (sizeof(struct icp_qat_css_hdr) + \ + ICP_QAT_CSS_FWSK_MODULUS_LEN(handle) + \ + ICP_QAT_CSS_FWSK_EXPONENT_LEN(handle) + \ + ICP_QAT_CSS_SIGNATURE_LEN(handle)) #define ICP_QAT_CSS_MAX_IMAGE_LEN 0x40000 #define ICP_QAT_CTX_MODE(ae_mode) ((ae_mode) & 0xf) @@ -60,6 +95,9 @@ #define ICP_QAT_LOC_MEM0_MODE(ae_mode) (((ae_mode) >> 0x8) & 0x1) #define ICP_QAT_LOC_MEM1_MODE(ae_mode) (((ae_mode) >> 0x9) & 0x1) +#define ICP_QAT_LOC_MEM2_MODE(ae_mode) (((ae_mode) >> 0x6) & 0x1) +#define ICP_QAT_LOC_MEM3_MODE(ae_mode) (((ae_mode) >> 0x7) & 0x1) +#define ICP_QAT_LOC_TINDEX_MODE(ae_mode) (((ae_mode) >> 0xe) & 0x1) enum icp_qat_uof_mem_region { ICP_QAT_UOF_SRAM_REGION = 0x0, @@ -89,6 +127,8 @@ enum icp_qat_uof_regtype { ICP_LMEM0 = 27, ICP_LMEM1 = 28, ICP_NEIGH_REL = 31, + ICP_LMEM2 = 61, + ICP_LMEM3 = 62, }; enum icp_qat_css_fwtype { @@ -394,7 +434,7 @@ struct icp_qat_suof_handle { struct icp_qat_fw_auth_desc { unsigned int img_len; - unsigned int reserved; + unsigned int ae_mask; unsigned int css_hdr_high; unsigned int css_hdr_low; unsigned int img_high; @@ -481,4 +521,64 @@ struct icp_qat_suof_objhdr { unsigned int img_length; unsigned int reserved; }; + +struct icp_qat_mof_file_hdr { + unsigned int file_id; + unsigned int checksum; + char min_ver; + char maj_ver; + unsigned short reserved; + unsigned short max_chunks; + unsigned short num_chunks; +}; + +struct icp_qat_mof_chunkhdr { + char chunk_id[ICP_QAT_MOF_OBJ_ID_LEN]; + u64 offset; + u64 size; +}; + +struct icp_qat_mof_str_table { + unsigned int tab_len; + unsigned int strings; +}; + +struct icp_qat_mof_obj_hdr { + unsigned short max_chunks; + unsigned short num_chunks; + unsigned int reserved; +}; + +struct icp_qat_mof_obj_chunkhdr { + char chunk_id[ICP_QAT_MOF_OBJ_CHUNKID_LEN]; + u64 offset; + u64 size; + unsigned int name; + unsigned int reserved; +}; + +struct icp_qat_mof_objhdr { + char *obj_name; + char *obj_buf; + unsigned int obj_size; +}; + +struct icp_qat_mof_table { + unsigned int num_objs; + struct icp_qat_mof_objhdr *obj_hdr; +}; + +struct icp_qat_mof_handle { + unsigned int file_id; + unsigned int checksum; + char min_ver; + char maj_ver; + char *mof_buf; + u32 mof_size; + char *sym_str; + unsigned int sym_size; + char *uobjs_hdr; + char *sobjs_hdr; + struct icp_qat_mof_table obj_table; +}; #endif diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index d552dbcfe0a0..31c7a206a629 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -6,11 +6,13 @@ #include #include #include -#include +#include +#include #include #include #include #include +#include #include #include #include "adf_accel_devices.h" @@ -31,6 +33,15 @@ ICP_QAT_HW_CIPHER_KEY_CONVERT, \ ICP_QAT_HW_CIPHER_DECRYPT) +#define QAT_AES_HW_CONFIG_DEC_NO_CONV(alg, mode) \ + ICP_QAT_HW_CIPHER_CONFIG_BUILD(mode, alg, \ + ICP_QAT_HW_CIPHER_NO_CONVERT, \ + ICP_QAT_HW_CIPHER_DECRYPT) + +#define HW_CAP_AES_V2(accel_dev) \ + (GET_HW_DATA(accel_dev)->accel_capabilities_mask & \ + ICP_ACCEL_CAPABILITIES_AES_V2) + static DEFINE_MUTEX(algs_lock); static unsigned int active_devs; @@ -89,7 +100,9 @@ struct qat_alg_skcipher_ctx { struct icp_qat_fw_la_bulk_req dec_fw_req; struct qat_crypto_instance *inst; struct crypto_skcipher *ftfm; + struct crypto_cipher *tweak; bool fallback; + int mode; }; static int qat_get_inter_state_size(enum icp_qat_hw_auth_algo qat_hash_alg) @@ -103,7 +116,7 @@ static int qat_get_inter_state_size(enum icp_qat_hw_auth_algo qat_hash_alg) return ICP_QAT_HW_SHA512_STATE1_SZ; default: return -EFAULT; - }; + } return -EFAULT; } @@ -214,24 +227,7 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, return 0; } -static void qat_alg_init_hdr_iv_updt(struct icp_qat_fw_comn_req_hdr *header) -{ - ICP_QAT_FW_LA_CIPH_IV_FLD_FLAG_SET(header->serv_specif_flags, - ICP_QAT_FW_CIPH_IV_64BIT_PTR); - ICP_QAT_FW_LA_UPDATE_STATE_SET(header->serv_specif_flags, - ICP_QAT_FW_LA_UPDATE_STATE); -} - -static void qat_alg_init_hdr_no_iv_updt(struct icp_qat_fw_comn_req_hdr *header) -{ - ICP_QAT_FW_LA_CIPH_IV_FLD_FLAG_SET(header->serv_specif_flags, - ICP_QAT_FW_CIPH_IV_16BYTE_DATA); - ICP_QAT_FW_LA_UPDATE_STATE_SET(header->serv_specif_flags, - ICP_QAT_FW_LA_NO_UPDATE_STATE); -} - -static void qat_alg_init_common_hdr(struct icp_qat_fw_comn_req_hdr *header, - int aead) +static void qat_alg_init_common_hdr(struct icp_qat_fw_comn_req_hdr *header) { header->hdr_flags = ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET); @@ -241,12 +237,12 @@ static void qat_alg_init_common_hdr(struct icp_qat_fw_comn_req_hdr *header, QAT_COMN_PTR_TYPE_SGL); ICP_QAT_FW_LA_PARTIAL_SET(header->serv_specif_flags, ICP_QAT_FW_LA_PARTIAL_NONE); - if (aead) - qat_alg_init_hdr_no_iv_updt(header); - else - qat_alg_init_hdr_iv_updt(header); + ICP_QAT_FW_LA_CIPH_IV_FLD_FLAG_SET(header->serv_specif_flags, + ICP_QAT_FW_CIPH_IV_16BYTE_DATA); ICP_QAT_FW_LA_PROTO_SET(header->serv_specif_flags, ICP_QAT_FW_LA_NO_PROTO); + ICP_QAT_FW_LA_UPDATE_STATE_SET(header->serv_specif_flags, + ICP_QAT_FW_LA_NO_UPDATE_STATE); } static int qat_alg_aead_init_enc_session(struct crypto_aead *aead_tfm, @@ -281,7 +277,7 @@ static int qat_alg_aead_init_enc_session(struct crypto_aead *aead_tfm, return -EFAULT; /* Request setup */ - qat_alg_init_common_hdr(header, 1); + qat_alg_init_common_hdr(header); header->service_cmd_id = ICP_QAT_FW_LA_CMD_CIPHER_HASH; ICP_QAT_FW_LA_DIGEST_IN_BUFFER_SET(header->serv_specif_flags, ICP_QAT_FW_LA_DIGEST_IN_BUFFER); @@ -368,7 +364,7 @@ static int qat_alg_aead_init_dec_session(struct crypto_aead *aead_tfm, return -EFAULT; /* Request setup */ - qat_alg_init_common_hdr(header, 1); + qat_alg_init_common_hdr(header); header->service_cmd_id = ICP_QAT_FW_LA_CMD_HASH_CIPHER; ICP_QAT_FW_LA_DIGEST_IN_BUFFER_SET(header->serv_specif_flags, ICP_QAT_FW_LA_DIGEST_IN_BUFFER); @@ -430,12 +426,32 @@ static void qat_alg_skcipher_init_com(struct qat_alg_skcipher_ctx *ctx, struct icp_qat_fw_comn_req_hdr_cd_pars *cd_pars = &req->cd_pars; struct icp_qat_fw_comn_req_hdr *header = &req->comn_hdr; struct icp_qat_fw_cipher_cd_ctrl_hdr *cd_ctrl = (void *)&req->cd_ctrl; + bool aes_v2_capable = HW_CAP_AES_V2(ctx->inst->accel_dev); + int mode = ctx->mode; - memcpy(cd->aes.key, key, keylen); - qat_alg_init_common_hdr(header, 0); + qat_alg_init_common_hdr(header); header->service_cmd_id = ICP_QAT_FW_LA_CMD_CIPHER; cd_pars->u.s.content_desc_params_sz = sizeof(struct icp_qat_hw_cipher_algo_blk) >> 3; + + if (aes_v2_capable && mode == ICP_QAT_HW_CIPHER_XTS_MODE) { + ICP_QAT_FW_LA_SLICE_TYPE_SET(header->serv_specif_flags, + ICP_QAT_FW_LA_USE_UCS_SLICE_TYPE); + + /* Store both XTS keys in CD, only the first key is sent + * to the HW, the second key is used for tweak calculation + */ + memcpy(cd->ucs_aes.key, key, keylen); + keylen = keylen / 2; + } else if (aes_v2_capable && mode == ICP_QAT_HW_CIPHER_CTR_MODE) { + ICP_QAT_FW_LA_SLICE_TYPE_SET(header->serv_specif_flags, + ICP_QAT_FW_LA_USE_UCS_SLICE_TYPE); + keylen = round_up(keylen, 16); + memcpy(cd->ucs_aes.key, key, keylen); + } else { + memcpy(cd->aes.key, key, keylen); + } + /* Cipher CD config setup */ cd_ctrl->cipher_key_sz = keylen >> 3; cd_ctrl->cipher_state_sz = AES_BLOCK_SIZE >> 3; @@ -457,6 +473,28 @@ static void qat_alg_skcipher_init_enc(struct qat_alg_skcipher_ctx *ctx, enc_cd->aes.cipher_config.val = QAT_AES_HW_CONFIG_ENC(alg, mode); } +static void qat_alg_xts_reverse_key(const u8 *key_forward, unsigned int keylen, + u8 *key_reverse) +{ + struct crypto_aes_ctx aes_expanded; + int nrounds; + u8 *key; + + aes_expandkey(&aes_expanded, key_forward, keylen); + if (keylen == AES_KEYSIZE_128) { + nrounds = 10; + key = (u8 *)aes_expanded.key_enc + (AES_BLOCK_SIZE * nrounds); + memcpy(key_reverse, key, AES_BLOCK_SIZE); + } else { + /* AES_KEYSIZE_256 */ + nrounds = 14; + key = (u8 *)aes_expanded.key_enc + (AES_BLOCK_SIZE * nrounds); + memcpy(key_reverse, key, AES_BLOCK_SIZE); + memcpy(key_reverse + AES_BLOCK_SIZE, key - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + } +} + static void qat_alg_skcipher_init_dec(struct qat_alg_skcipher_ctx *ctx, int alg, const u8 *key, unsigned int keylen, int mode) @@ -464,16 +502,26 @@ static void qat_alg_skcipher_init_dec(struct qat_alg_skcipher_ctx *ctx, struct icp_qat_hw_cipher_algo_blk *dec_cd = ctx->dec_cd; struct icp_qat_fw_la_bulk_req *req = &ctx->dec_fw_req; struct icp_qat_fw_comn_req_hdr_cd_pars *cd_pars = &req->cd_pars; + bool aes_v2_capable = HW_CAP_AES_V2(ctx->inst->accel_dev); qat_alg_skcipher_init_com(ctx, req, dec_cd, key, keylen); cd_pars->u.s.content_desc_addr = ctx->dec_cd_paddr; - if (mode != ICP_QAT_HW_CIPHER_CTR_MODE) + if (aes_v2_capable && mode == ICP_QAT_HW_CIPHER_XTS_MODE) { + /* Key reversing not supported, set no convert */ + dec_cd->aes.cipher_config.val = + QAT_AES_HW_CONFIG_DEC_NO_CONV(alg, mode); + + /* In-place key reversal */ + qat_alg_xts_reverse_key(dec_cd->ucs_aes.key, keylen / 2, + dec_cd->ucs_aes.key); + } else if (mode != ICP_QAT_HW_CIPHER_CTR_MODE) { dec_cd->aes.cipher_config.val = QAT_AES_HW_CONFIG_DEC(alg, mode); - else + } else { dec_cd->aes.cipher_config.val = QAT_AES_HW_CONFIG_ENC(alg, mode); + } } static int qat_alg_validate_key(int key_len, int *alg, int mode) @@ -787,6 +835,61 @@ static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp, areq->base.complete(&areq->base, res); } +static void qat_alg_update_iv_ctr_mode(struct qat_crypto_request *qat_req) +{ + struct skcipher_request *sreq = qat_req->skcipher_req; + u64 iv_lo_prev; + u64 iv_lo; + u64 iv_hi; + + memcpy(qat_req->iv, sreq->iv, AES_BLOCK_SIZE); + + iv_lo = be64_to_cpu(qat_req->iv_lo); + iv_hi = be64_to_cpu(qat_req->iv_hi); + + iv_lo_prev = iv_lo; + iv_lo += DIV_ROUND_UP(sreq->cryptlen, AES_BLOCK_SIZE); + if (iv_lo < iv_lo_prev) + iv_hi++; + + qat_req->iv_lo = cpu_to_be64(iv_lo); + qat_req->iv_hi = cpu_to_be64(iv_hi); +} + +static void qat_alg_update_iv_cbc_mode(struct qat_crypto_request *qat_req) +{ + struct skcipher_request *sreq = qat_req->skcipher_req; + int offset = sreq->cryptlen - AES_BLOCK_SIZE; + struct scatterlist *sgl; + + if (qat_req->encryption) + sgl = sreq->dst; + else + sgl = sreq->src; + + scatterwalk_map_and_copy(qat_req->iv, sgl, offset, AES_BLOCK_SIZE, 0); +} + +static void qat_alg_update_iv(struct qat_crypto_request *qat_req) +{ + struct qat_alg_skcipher_ctx *ctx = qat_req->skcipher_ctx; + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + + switch (ctx->mode) { + case ICP_QAT_HW_CIPHER_CTR_MODE: + qat_alg_update_iv_ctr_mode(qat_req); + break; + case ICP_QAT_HW_CIPHER_CBC_MODE: + qat_alg_update_iv_cbc_mode(qat_req); + break; + case ICP_QAT_HW_CIPHER_XTS_MODE: + break; + default: + dev_warn(dev, "Unsupported IV update for cipher mode %d\n", + ctx->mode); + } +} + static void qat_skcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp, struct qat_crypto_request *qat_req) { @@ -794,16 +897,16 @@ static void qat_skcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp, struct qat_crypto_instance *inst = ctx->inst; struct skcipher_request *sreq = qat_req->skcipher_req; u8 stat_filed = qat_resp->comn_resp.comn_status; - struct device *dev = &GET_DEV(ctx->inst->accel_dev); int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed); qat_alg_free_bufl(inst, qat_req); if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) res = -EINVAL; + if (qat_req->encryption) + qat_alg_update_iv(qat_req); + memcpy(sreq->iv, qat_req->iv, AES_BLOCK_SIZE); - dma_free_coherent(dev, AES_BLOCK_SIZE, qat_req->iv, - qat_req->iv_paddr); sreq->base.complete(&sreq->base, res); } @@ -981,6 +1084,8 @@ static int qat_alg_skcipher_setkey(struct crypto_skcipher *tfm, { struct qat_alg_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + ctx->mode = mode; + if (ctx->enc_cd) return qat_alg_skcipher_rekey(ctx, key, keylen, mode); else @@ -1023,8 +1128,33 @@ static int qat_alg_skcipher_xts_setkey(struct crypto_skcipher *tfm, ctx->fallback = false; - return qat_alg_skcipher_setkey(tfm, key, keylen, - ICP_QAT_HW_CIPHER_XTS_MODE); + ret = qat_alg_skcipher_setkey(tfm, key, keylen, + ICP_QAT_HW_CIPHER_XTS_MODE); + if (ret) + return ret; + + if (HW_CAP_AES_V2(ctx->inst->accel_dev)) + ret = crypto_cipher_setkey(ctx->tweak, key + (keylen / 2), + keylen / 2); + + return ret; +} + +static void qat_alg_set_req_iv(struct qat_crypto_request *qat_req) +{ + struct icp_qat_fw_la_cipher_req_params *cipher_param; + struct qat_alg_skcipher_ctx *ctx = qat_req->skcipher_ctx; + bool aes_v2_capable = HW_CAP_AES_V2(ctx->inst->accel_dev); + u8 *iv = qat_req->skcipher_req->iv; + + cipher_param = (void *)&qat_req->req.serv_specif_rqpars; + + if (aes_v2_capable && ctx->mode == ICP_QAT_HW_CIPHER_XTS_MODE) + crypto_cipher_encrypt_one(ctx->tweak, + (u8 *)cipher_param->u.cipher_IV_array, + iv); + else + memcpy(cipher_param->u.cipher_IV_array, iv, AES_BLOCK_SIZE); } static int qat_alg_skcipher_encrypt(struct skcipher_request *req) @@ -1035,23 +1165,14 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) struct qat_crypto_request *qat_req = skcipher_request_ctx(req); struct icp_qat_fw_la_cipher_req_params *cipher_param; struct icp_qat_fw_la_bulk_req *msg; - struct device *dev = &GET_DEV(ctx->inst->accel_dev); int ret, ctr = 0; if (req->cryptlen == 0) return 0; - qat_req->iv = dma_alloc_coherent(dev, AES_BLOCK_SIZE, - &qat_req->iv_paddr, GFP_ATOMIC); - if (!qat_req->iv) - return -ENOMEM; - ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req); - if (unlikely(ret)) { - dma_free_coherent(dev, AES_BLOCK_SIZE, qat_req->iv, - qat_req->iv_paddr); + if (unlikely(ret)) return ret; - } msg = &qat_req->req; *msg = ctx->enc_fw_req; @@ -1061,19 +1182,19 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) qat_req->req.comn_mid.opaque_data = (u64)(__force long)qat_req; qat_req->req.comn_mid.src_data_addr = qat_req->buf.blp; qat_req->req.comn_mid.dest_data_addr = qat_req->buf.bloutp; + qat_req->encryption = true; cipher_param = (void *)&qat_req->req.serv_specif_rqpars; cipher_param->cipher_length = req->cryptlen; cipher_param->cipher_offset = 0; - cipher_param->u.s.cipher_IV_ptr = qat_req->iv_paddr; - memcpy(qat_req->iv, req->iv, AES_BLOCK_SIZE); + + qat_alg_set_req_iv(qat_req); + do { ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg); } while (ret == -EAGAIN && ctr++ < 10); if (ret == -EAGAIN) { qat_alg_free_bufl(ctx->inst, qat_req); - dma_free_coherent(dev, AES_BLOCK_SIZE, qat_req->iv, - qat_req->iv_paddr); return -EBUSY; } return -EINPROGRESS; @@ -1113,23 +1234,14 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) struct qat_crypto_request *qat_req = skcipher_request_ctx(req); struct icp_qat_fw_la_cipher_req_params *cipher_param; struct icp_qat_fw_la_bulk_req *msg; - struct device *dev = &GET_DEV(ctx->inst->accel_dev); int ret, ctr = 0; if (req->cryptlen == 0) return 0; - qat_req->iv = dma_alloc_coherent(dev, AES_BLOCK_SIZE, - &qat_req->iv_paddr, GFP_ATOMIC); - if (!qat_req->iv) - return -ENOMEM; - ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req); - if (unlikely(ret)) { - dma_free_coherent(dev, AES_BLOCK_SIZE, qat_req->iv, - qat_req->iv_paddr); + if (unlikely(ret)) return ret; - } msg = &qat_req->req; *msg = ctx->dec_fw_req; @@ -1139,19 +1251,20 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) qat_req->req.comn_mid.opaque_data = (u64)(__force long)qat_req; qat_req->req.comn_mid.src_data_addr = qat_req->buf.blp; qat_req->req.comn_mid.dest_data_addr = qat_req->buf.bloutp; + qat_req->encryption = false; cipher_param = (void *)&qat_req->req.serv_specif_rqpars; cipher_param->cipher_length = req->cryptlen; cipher_param->cipher_offset = 0; - cipher_param->u.s.cipher_IV_ptr = qat_req->iv_paddr; - memcpy(qat_req->iv, req->iv, AES_BLOCK_SIZE); + + qat_alg_set_req_iv(qat_req); + qat_alg_update_iv(qat_req); + do { ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg); } while (ret == -EAGAIN && ctr++ < 10); if (ret == -EAGAIN) { qat_alg_free_bufl(ctx->inst, qat_req); - dma_free_coherent(dev, AES_BLOCK_SIZE, qat_req->iv, - qat_req->iv_paddr); return -EBUSY; } return -EINPROGRESS; @@ -1253,6 +1366,12 @@ static int qat_alg_skcipher_init_xts_tfm(struct crypto_skcipher *tfm) if (IS_ERR(ctx->ftfm)) return PTR_ERR(ctx->ftfm); + ctx->tweak = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(ctx->tweak)) { + crypto_free_skcipher(ctx->ftfm); + return PTR_ERR(ctx->tweak); + } + reqsize = max(sizeof(struct qat_crypto_request), sizeof(struct skcipher_request) + crypto_skcipher_reqsize(ctx->ftfm)); @@ -1295,6 +1414,9 @@ static void qat_alg_skcipher_exit_xts_tfm(struct crypto_skcipher *tfm) if (ctx->ftfm) crypto_free_skcipher(ctx->ftfm); + if (ctx->tweak) + crypto_free_cipher(ctx->tweak); + qat_alg_skcipher_exit_tfm(tfm); } diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 846569ec9066..2c863d25327a 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -201,12 +201,7 @@ static unsigned long qat_dh_fn_id(unsigned int len, bool g2) return g2 ? PKE_DH_G2_4096 : PKE_DH_4096; default: return 0; - }; -} - -static inline struct qat_dh_ctx *qat_dh_get_params(struct crypto_kpp *tfm) -{ - return kpp_tfm_ctx(tfm); + } } static int qat_dh_compute_value(struct kpp_request *req) @@ -577,7 +572,7 @@ static unsigned long qat_rsa_enc_fn_id(unsigned int len) return PKE_RSA_EP_4096; default: return 0; - }; + } } #define PKE_RSA_DP1_512 0x1c161b3c @@ -606,7 +601,7 @@ static unsigned long qat_rsa_dec_fn_id(unsigned int len) return PKE_RSA_DP1_4096; default: return 0; - }; + } } #define PKE_RSA_DP2_512 0x1c131b57 @@ -635,7 +630,7 @@ static unsigned long qat_rsa_dec_fn_id_crt(unsigned int len) return PKE_RSA_DP2_4096; default: return 0; - }; + } } static int qat_rsa_enc(struct akcipher_request *req) diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index ab621b7dbd20..ece6776fbd53 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -115,165 +115,217 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node) */ int qat_crypto_dev_config(struct adf_accel_dev *accel_dev) { - int cpus = num_online_cpus(); - int banks = GET_MAX_BANKS(accel_dev); - int instances = min(cpus, banks); char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; - int i; + int banks = GET_MAX_BANKS(accel_dev); + int cpus = num_online_cpus(); unsigned long val; + int instances; + int ret; + int i; - if (adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC)) + if (adf_hw_dev_has_crypto(accel_dev)) + instances = min(cpus, banks); + else + instances = 0; + + ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); + if (ret) goto err; - if (adf_cfg_section_add(accel_dev, "Accelerator0")) + + ret = adf_cfg_section_add(accel_dev, "Accelerator0"); + if (ret) goto err; + for (i = 0; i < instances; i++) { val = i; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_BANK_NUM, i); - if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, (void *)&val, ADF_DEC)) + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) goto err; snprintf(key, sizeof(key), ADF_CY "%d" ADF_ETRMGR_CORE_AFFINITY, i); - if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, (void *)&val, ADF_DEC)) + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) goto err; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_SIZE, i); val = 128; - if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, (void *)&val, ADF_DEC)) + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) goto err; val = 512; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_SIZE, i); - if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, (void *)&val, ADF_DEC)) + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) goto err; val = 0; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i); - if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, (void *)&val, ADF_DEC)) + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) goto err; val = 2; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_TX, i); - if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, (void *)&val, ADF_DEC)) + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) goto err; val = 8; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i); - if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, (void *)&val, ADF_DEC)) + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) goto err; val = 10; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_RX, i); - if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, (void *)&val, ADF_DEC)) + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) goto err; val = ADF_COALESCING_DEF_TIME; snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); - if (adf_cfg_add_key_value_param(accel_dev, "Accelerator0", - key, (void *)&val, ADF_DEC)) + ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", + key, &val, ADF_DEC); + if (ret) goto err; } val = i; - if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - ADF_NUM_CY, (void *)&val, ADF_DEC)) + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, + &val, ADF_DEC); + if (ret) goto err; set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); return 0; err: dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n"); - return -EINVAL; + return ret; } EXPORT_SYMBOL_GPL(qat_crypto_dev_config); static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev) { - int i; - unsigned long bank; unsigned long num_inst, num_msg_sym, num_msg_asym; - int msg_size; - struct qat_crypto_instance *inst; char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; + unsigned long sym_bank, asym_bank; + struct qat_crypto_instance *inst; + int msg_size; + int ret; + int i; INIT_LIST_HEAD(&accel_dev->crypto_list); - if (adf_cfg_get_param_value(accel_dev, SEC, ADF_NUM_CY, val)) - return -EFAULT; + ret = adf_cfg_get_param_value(accel_dev, SEC, ADF_NUM_CY, val); + if (ret) + return ret; - if (kstrtoul(val, 0, &num_inst)) - return -EFAULT; + ret = kstrtoul(val, 0, &num_inst); + if (ret) + return ret; for (i = 0; i < num_inst; i++) { inst = kzalloc_node(sizeof(*inst), GFP_KERNEL, dev_to_node(&GET_DEV(accel_dev))); - if (!inst) + if (!inst) { + ret = -ENOMEM; goto err; + } list_add_tail(&inst->list, &accel_dev->crypto_list); inst->id = i; atomic_set(&inst->refctr, 0); inst->accel_dev = accel_dev; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_BANK_NUM, i); - if (adf_cfg_get_param_value(accel_dev, SEC, key, val)) + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_BANK_NUM, i); + ret = adf_cfg_get_param_value(accel_dev, SEC, key, val); + if (ret) goto err; - if (kstrtoul(val, 10, &bank)) + ret = kstrtoul(val, 10, &sym_bank); + if (ret) goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); + ret = adf_cfg_get_param_value(accel_dev, SEC, key, val); + if (ret) + goto err; + + ret = kstrtoul(val, 10, &asym_bank); + if (ret) + goto err; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_SIZE, i); - if (adf_cfg_get_param_value(accel_dev, SEC, key, val)) + ret = adf_cfg_get_param_value(accel_dev, SEC, key, val); + if (ret) goto err; - if (kstrtoul(val, 10, &num_msg_sym)) + ret = kstrtoul(val, 10, &num_msg_sym); + if (ret) goto err; num_msg_sym = num_msg_sym >> 1; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_SIZE, i); - if (adf_cfg_get_param_value(accel_dev, SEC, key, val)) + ret = adf_cfg_get_param_value(accel_dev, SEC, key, val); + if (ret) goto err; - if (kstrtoul(val, 10, &num_msg_asym)) + ret = kstrtoul(val, 10, &num_msg_asym); + if (ret) goto err; num_msg_asym = num_msg_asym >> 1; msg_size = ICP_QAT_FW_REQ_DEFAULT_SZ; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_TX, i); - if (adf_create_ring(accel_dev, SEC, bank, num_msg_sym, - msg_size, key, NULL, 0, &inst->sym_tx)) + ret = adf_create_ring(accel_dev, SEC, sym_bank, num_msg_sym, + msg_size, key, NULL, 0, &inst->sym_tx); + if (ret) goto err; msg_size = msg_size >> 1; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i); - if (adf_create_ring(accel_dev, SEC, bank, num_msg_asym, - msg_size, key, NULL, 0, &inst->pke_tx)) + ret = adf_create_ring(accel_dev, SEC, asym_bank, num_msg_asym, + msg_size, key, NULL, 0, &inst->pke_tx); + if (ret) goto err; msg_size = ICP_QAT_FW_RESP_DEFAULT_SZ; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_RX, i); - if (adf_create_ring(accel_dev, SEC, bank, num_msg_sym, - msg_size, key, qat_alg_callback, 0, - &inst->sym_rx)) + ret = adf_create_ring(accel_dev, SEC, sym_bank, num_msg_sym, + msg_size, key, qat_alg_callback, 0, + &inst->sym_rx); + if (ret) goto err; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i); - if (adf_create_ring(accel_dev, SEC, bank, num_msg_asym, - msg_size, key, qat_alg_asym_callback, 0, - &inst->pke_rx)) + ret = adf_create_ring(accel_dev, SEC, asym_bank, num_msg_asym, + msg_size, key, qat_alg_asym_callback, 0, + &inst->pke_rx); + if (ret) goto err; } return 0; err: qat_crypto_free_instances(accel_dev); - return -ENOMEM; + return ret; } static int qat_crypto_init(struct adf_accel_dev *accel_dev) diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index 12682d1e9f5f..b6a4c95ae003 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -3,6 +3,7 @@ #ifndef _QAT_CRYPTO_INSTANCE_H_ #define _QAT_CRYPTO_INSTANCE_H_ +#include #include #include #include "adf_accel_devices.h" @@ -44,8 +45,29 @@ struct qat_crypto_request { struct qat_crypto_request_buffs buf; void (*cb)(struct icp_qat_fw_la_resp *resp, struct qat_crypto_request *req); - void *iv; - dma_addr_t iv_paddr; + union { + struct { + __be64 iv_hi; + __be64 iv_lo; + }; + u8 iv[AES_BLOCK_SIZE]; + }; + bool encryption; }; +static inline bool adf_hw_dev_has_crypto(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + u32 mask = ~hw_device->accel_capabilities_mask; + + if (mask & ADF_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC) + return false; + if (mask & ADF_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC) + return false; + if (mask & ADF_ACCEL_CAPABILITIES_AUTHENTICATION) + return false; + + return true; +} + #endif diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c index 6b9d47682d04..bd3028126cbe 100644 --- a/drivers/crypto/qat/qat_common/qat_hal.c +++ b/drivers/crypto/qat/qat_common/qat_hal.c @@ -33,7 +33,7 @@ ((((const_val) << 12) & 0x0FF00000ull) | \ (((const_val) << 0) & 0x000000FFull)))) -#define AE(handle, ae) handle->hal_handle->aes[ae] +#define AE(handle, ae) ((handle)->hal_handle->aes[ae]) static const u64 inst_4b[] = { 0x0F0400C0000ull, 0x0F4400C0000ull, 0x0F040000300ull, 0x0F440000300ull, @@ -150,15 +150,15 @@ static int qat_hal_wait_cycles(struct icp_qat_fw_loader_handle *handle, return 0; } -#define CLR_BIT(wrd, bit) (wrd & ~(1 << bit)) -#define SET_BIT(wrd, bit) (wrd | 1 << bit) +#define CLR_BIT(wrd, bit) ((wrd) & ~(1 << (bit))) +#define SET_BIT(wrd, bit) ((wrd) | 1 << (bit)) int qat_hal_set_ae_ctx_mode(struct icp_qat_fw_loader_handle *handle, unsigned char ae, unsigned char mode) { unsigned int csr, new_csr; - if ((mode != 4) && (mode != 8)) { + if (mode != 4 && mode != 8) { pr_err("QAT: bad ctx mode=%d\n", mode); return -EINVAL; } @@ -210,6 +210,16 @@ int qat_hal_set_ae_lm_mode(struct icp_qat_fw_loader_handle *handle, SET_BIT(csr, CE_LMADDR_1_GLOBAL_BITPOS) : CLR_BIT(csr, CE_LMADDR_1_GLOBAL_BITPOS); break; + case ICP_LMEM2: + new_csr = (mode) ? + SET_BIT(csr, CE_LMADDR_2_GLOBAL_BITPOS) : + CLR_BIT(csr, CE_LMADDR_2_GLOBAL_BITPOS); + break; + case ICP_LMEM3: + new_csr = (mode) ? + SET_BIT(csr, CE_LMADDR_3_GLOBAL_BITPOS) : + CLR_BIT(csr, CE_LMADDR_3_GLOBAL_BITPOS); + break; default: pr_err("QAT: lmType = 0x%x\n", lm_type); return -EINVAL; @@ -220,6 +230,20 @@ int qat_hal_set_ae_lm_mode(struct icp_qat_fw_loader_handle *handle, return 0; } +void qat_hal_set_ae_tindex_mode(struct icp_qat_fw_loader_handle *handle, + unsigned char ae, unsigned char mode) +{ + unsigned int csr, new_csr; + + csr = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES); + csr &= IGNORE_W1C_MASK; + new_csr = (mode) ? + SET_BIT(csr, CE_T_INDEX_GLOBAL_BITPOS) : + CLR_BIT(csr, CE_T_INDEX_GLOBAL_BITPOS); + if (new_csr != csr) + qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, new_csr); +} + static unsigned short qat_hal_get_reg_addr(unsigned int type, unsigned short reg_num) { @@ -259,6 +283,12 @@ static unsigned short qat_hal_get_reg_addr(unsigned int type, case ICP_LMEM1: reg_addr = 0x220; break; + case ICP_LMEM2: + reg_addr = 0x2c0; + break; + case ICP_LMEM3: + reg_addr = 0x2e0; + break; case ICP_NO_DEST: reg_addr = 0x300 | (reg_num & 0xff); break; @@ -271,12 +301,13 @@ static unsigned short qat_hal_get_reg_addr(unsigned int type, void qat_hal_reset(struct icp_qat_fw_loader_handle *handle) { - unsigned int ae_reset_csr; + unsigned int reset_mask = handle->chip_info->icp_rst_mask; + unsigned int reset_csr = handle->chip_info->icp_rst_csr; + unsigned int csr_val; - ae_reset_csr = GET_GLB_CSR(handle, ICP_RESET); - ae_reset_csr |= handle->hal_handle->ae_mask << RST_CSR_AE_LSB; - ae_reset_csr |= handle->hal_handle->slice_mask << RST_CSR_QAT_LSB; - SET_GLB_CSR(handle, ICP_RESET, ae_reset_csr); + csr_val = GET_CAP_CSR(handle, reset_csr); + csr_val |= reset_mask; + SET_CAP_CSR(handle, reset_csr, csr_val); } static void qat_hal_wr_indr_csr(struct icp_qat_fw_loader_handle *handle, @@ -346,11 +377,12 @@ static void qat_hal_put_wakeup_event(struct icp_qat_fw_loader_handle *handle, static int qat_hal_check_ae_alive(struct icp_qat_fw_loader_handle *handle) { + unsigned long ae_mask = handle->hal_handle->ae_mask; unsigned int base_cnt, cur_cnt; unsigned char ae; int times = MAX_RETRY_TIMES; - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { base_cnt = qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT); base_cnt &= 0xffff; @@ -384,21 +416,23 @@ int qat_hal_check_ae_active(struct icp_qat_fw_loader_handle *handle, static void qat_hal_reset_timestamp(struct icp_qat_fw_loader_handle *handle) { - unsigned int misc_ctl; + unsigned long ae_mask = handle->hal_handle->ae_mask; + unsigned int misc_ctl_csr, misc_ctl; unsigned char ae; + misc_ctl_csr = handle->chip_info->misc_ctl_csr; /* stop the timestamp timers */ - misc_ctl = GET_GLB_CSR(handle, MISC_CONTROL); + misc_ctl = GET_CAP_CSR(handle, misc_ctl_csr); if (misc_ctl & MC_TIMESTAMP_ENABLE) - SET_GLB_CSR(handle, MISC_CONTROL, misc_ctl & + SET_CAP_CSR(handle, misc_ctl_csr, misc_ctl & (~MC_TIMESTAMP_ENABLE)); - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { qat_hal_wr_ae_csr(handle, ae, TIMESTAMP_LOW, 0); qat_hal_wr_ae_csr(handle, ae, TIMESTAMP_HIGH, 0); } /* start timestamp timers */ - SET_GLB_CSR(handle, MISC_CONTROL, misc_ctl | MC_TIMESTAMP_ENABLE); + SET_CAP_CSR(handle, misc_ctl_csr, misc_ctl | MC_TIMESTAMP_ENABLE); } #define ESRAM_AUTO_TINIT BIT(2) @@ -428,7 +462,7 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle) qat_hal_wait_cycles(handle, 0, ESRAM_AUTO_INIT_USED_CYCLES, 0); csr_val = ADF_CSR_RD(csr_addr, 0); } while (!(csr_val & ESRAM_AUTO_TINIT_DONE) && times--); - if ((times < 0)) { + if (times < 0) { pr_err("QAT: Fail to init eSram!\n"); return -EFAULT; } @@ -438,33 +472,33 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle) #define SHRAM_INIT_CYCLES 2060 int qat_hal_clr_reset(struct icp_qat_fw_loader_handle *handle) { - unsigned int ae_reset_csr; - unsigned char ae; - unsigned int clk_csr; + unsigned int clk_csr = handle->chip_info->glb_clk_enable_csr; + unsigned int reset_mask = handle->chip_info->icp_rst_mask; + unsigned int reset_csr = handle->chip_info->icp_rst_csr; + unsigned long ae_mask = handle->hal_handle->ae_mask; + unsigned char ae = 0; unsigned int times = 100; - unsigned int csr; + unsigned int csr_val; /* write to the reset csr */ - ae_reset_csr = GET_GLB_CSR(handle, ICP_RESET); - ae_reset_csr &= ~(handle->hal_handle->ae_mask << RST_CSR_AE_LSB); - ae_reset_csr &= ~(handle->hal_handle->slice_mask << RST_CSR_QAT_LSB); + csr_val = GET_CAP_CSR(handle, reset_csr); + csr_val &= ~reset_mask; do { - SET_GLB_CSR(handle, ICP_RESET, ae_reset_csr); + SET_CAP_CSR(handle, reset_csr, csr_val); if (!(times--)) goto out_err; - csr = GET_GLB_CSR(handle, ICP_RESET); - } while ((handle->hal_handle->ae_mask | - (handle->hal_handle->slice_mask << RST_CSR_QAT_LSB)) & csr); + csr_val = GET_CAP_CSR(handle, reset_csr); + csr_val &= reset_mask; + } while (csr_val); /* enable clock */ - clk_csr = GET_GLB_CSR(handle, ICP_GLOBAL_CLK_ENABLE); - clk_csr |= handle->hal_handle->ae_mask << 0; - clk_csr |= handle->hal_handle->slice_mask << 20; - SET_GLB_CSR(handle, ICP_GLOBAL_CLK_ENABLE, clk_csr); + csr_val = GET_CAP_CSR(handle, clk_csr); + csr_val |= reset_mask; + SET_CAP_CSR(handle, clk_csr, csr_val); if (qat_hal_check_ae_alive(handle)) goto out_err; /* Set undefined power-up/reset states to reasonable default values */ - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, INIT_CTX_ENABLE_VALUE); qat_hal_wr_indr_csr(handle, ae, ICP_QAT_UCLO_AE_ALL_CTX, @@ -570,10 +604,11 @@ static void qat_hal_enable_ctx(struct icp_qat_fw_loader_handle *handle, static void qat_hal_clear_xfer(struct icp_qat_fw_loader_handle *handle) { + unsigned long ae_mask = handle->hal_handle->ae_mask; unsigned char ae; unsigned short reg; - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { for (reg = 0; reg < ICP_QAT_UCLO_MAX_GPR_REG; reg++) { qat_hal_init_rd_xfer(handle, ae, 0, ICP_SR_RD_ABS, reg, 0); @@ -585,6 +620,7 @@ static void qat_hal_clear_xfer(struct icp_qat_fw_loader_handle *handle) static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle) { + unsigned long ae_mask = handle->hal_handle->ae_mask; unsigned char ae; unsigned int ctx_mask = ICP_QAT_UCLO_AE_ALL_CTX; int times = MAX_RETRY_TIMES; @@ -592,13 +628,15 @@ static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle) unsigned int savctx = 0; int ret = 0; - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { csr_val = qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL); csr_val &= ~(1 << MMC_SHARE_CS_BITPOS); qat_hal_wr_ae_csr(handle, ae, AE_MISC_CONTROL, csr_val); csr_val = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES); csr_val &= IGNORE_W1C_MASK; - csr_val |= CE_NN_MODE; + if (handle->chip_info->nn) + csr_val |= CE_NN_MODE; + qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, csr_val); qat_hal_wr_uwords(handle, ae, 0, ARRAY_SIZE(inst), (u64 *)inst); @@ -613,7 +651,7 @@ static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle) qat_hal_wr_ae_csr(handle, ae, CTX_SIG_EVENTS_ACTIVE, 0); qat_hal_enable_ctx(handle, ae, ctx_mask); } - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { /* wait for AE to finish */ do { ret = qat_hal_wait_cycles(handle, ae, 20, 1); @@ -641,57 +679,143 @@ static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle) return 0; } -#define ICP_QAT_AE_OFFSET 0x20000 -#define ICP_QAT_CAP_OFFSET (ICP_QAT_AE_OFFSET + 0x10000) -#define LOCAL_TO_XFER_REG_OFFSET 0x800 -#define ICP_QAT_EP_OFFSET 0x3a000 -int qat_hal_init(struct adf_accel_dev *accel_dev) +static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle, + struct adf_accel_dev *accel_dev) { - unsigned char ae; - unsigned int max_en_ae_id = 0; - struct icp_qat_fw_loader_handle *handle; struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev; struct adf_hw_device_data *hw_data = accel_dev->hw_device; struct adf_bar *misc_bar = &pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)]; + unsigned int max_en_ae_id = 0; struct adf_bar *sram_bar; + unsigned int csr_val = 0; + unsigned long ae_mask; + unsigned char ae = 0; + int ret = 0; - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - - handle->hal_cap_g_ctl_csr_addr_v = - (void __iomem *)((uintptr_t)misc_bar->virt_addr + - ICP_QAT_CAP_OFFSET); - handle->hal_cap_ae_xfer_csr_addr_v = - (void __iomem *)((uintptr_t)misc_bar->virt_addr + - ICP_QAT_AE_OFFSET); - handle->hal_ep_csr_addr_v = - (void __iomem *)((uintptr_t)misc_bar->virt_addr + - ICP_QAT_EP_OFFSET); - handle->hal_cap_ae_local_csr_addr_v = - (void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v + - LOCAL_TO_XFER_REG_OFFSET); handle->pci_dev = pci_info->pci_dev; - if (handle->pci_dev->device == PCI_DEVICE_ID_INTEL_QAT_DH895XCC) { + switch (handle->pci_dev->device) { + case ADF_4XXX_PCI_DEVICE_ID: + handle->chip_info->sram_visible = false; + handle->chip_info->nn = false; + handle->chip_info->lm2lm3 = true; + handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG_2X; + handle->chip_info->icp_rst_csr = ICP_RESET_CPP0; + handle->chip_info->icp_rst_mask = 0x100015; + handle->chip_info->glb_clk_enable_csr = ICP_GLOBAL_CLK_ENABLE_CPP0; + handle->chip_info->misc_ctl_csr = MISC_CONTROL_C4XXX; + handle->chip_info->wakeup_event_val = 0x80000000; + handle->chip_info->fw_auth = true; + handle->chip_info->css_3k = true; + handle->chip_info->tgroup_share_ustore = true; + handle->chip_info->fcu_ctl_csr = FCU_CONTROL_4XXX; + handle->chip_info->fcu_sts_csr = FCU_STATUS_4XXX; + handle->chip_info->fcu_dram_addr_hi = FCU_DRAM_ADDR_HI_4XXX; + handle->chip_info->fcu_dram_addr_lo = FCU_DRAM_ADDR_LO_4XXX; + handle->chip_info->fcu_loaded_ae_csr = FCU_AE_LOADED_4XXX; + handle->chip_info->fcu_loaded_ae_pos = 0; + + handle->hal_cap_g_ctl_csr_addr_v = + (void __iomem *)((uintptr_t)misc_bar->virt_addr + + ICP_QAT_CAP_OFFSET_4XXX); + handle->hal_cap_ae_xfer_csr_addr_v = + (void __iomem *)((uintptr_t)misc_bar->virt_addr + + ICP_QAT_AE_OFFSET_4XXX); + handle->hal_ep_csr_addr_v = + (void __iomem *)((uintptr_t)misc_bar->virt_addr + + ICP_QAT_EP_OFFSET_4XXX); + handle->hal_cap_ae_local_csr_addr_v = + (void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v + + LOCAL_TO_XFER_REG_OFFSET); + break; + case PCI_DEVICE_ID_INTEL_QAT_C62X: + case PCI_DEVICE_ID_INTEL_QAT_C3XXX: + handle->chip_info->sram_visible = false; + handle->chip_info->nn = true; + handle->chip_info->lm2lm3 = false; + handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG; + handle->chip_info->icp_rst_csr = ICP_RESET; + handle->chip_info->icp_rst_mask = (hw_data->ae_mask << RST_CSR_AE_LSB) | + (hw_data->accel_mask << RST_CSR_QAT_LSB); + handle->chip_info->glb_clk_enable_csr = ICP_GLOBAL_CLK_ENABLE; + handle->chip_info->misc_ctl_csr = MISC_CONTROL; + handle->chip_info->wakeup_event_val = WAKEUP_EVENT; + handle->chip_info->fw_auth = true; + handle->chip_info->css_3k = false; + handle->chip_info->tgroup_share_ustore = false; + handle->chip_info->fcu_ctl_csr = FCU_CONTROL; + handle->chip_info->fcu_sts_csr = FCU_STATUS; + handle->chip_info->fcu_dram_addr_hi = FCU_DRAM_ADDR_HI; + handle->chip_info->fcu_dram_addr_lo = FCU_DRAM_ADDR_LO; + handle->chip_info->fcu_loaded_ae_csr = FCU_STATUS; + handle->chip_info->fcu_loaded_ae_pos = FCU_LOADED_AE_POS; + handle->hal_cap_g_ctl_csr_addr_v = + (void __iomem *)((uintptr_t)misc_bar->virt_addr + + ICP_QAT_CAP_OFFSET); + handle->hal_cap_ae_xfer_csr_addr_v = + (void __iomem *)((uintptr_t)misc_bar->virt_addr + + ICP_QAT_AE_OFFSET); + handle->hal_ep_csr_addr_v = + (void __iomem *)((uintptr_t)misc_bar->virt_addr + + ICP_QAT_EP_OFFSET); + handle->hal_cap_ae_local_csr_addr_v = + (void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v + + LOCAL_TO_XFER_REG_OFFSET); + break; + case PCI_DEVICE_ID_INTEL_QAT_DH895XCC: + handle->chip_info->sram_visible = true; + handle->chip_info->nn = true; + handle->chip_info->lm2lm3 = false; + handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG; + handle->chip_info->icp_rst_csr = ICP_RESET; + handle->chip_info->icp_rst_mask = (hw_data->ae_mask << RST_CSR_AE_LSB) | + (hw_data->accel_mask << RST_CSR_QAT_LSB); + handle->chip_info->glb_clk_enable_csr = ICP_GLOBAL_CLK_ENABLE; + handle->chip_info->misc_ctl_csr = MISC_CONTROL; + handle->chip_info->wakeup_event_val = WAKEUP_EVENT; + handle->chip_info->fw_auth = false; + handle->chip_info->css_3k = false; + handle->chip_info->tgroup_share_ustore = false; + handle->chip_info->fcu_ctl_csr = 0; + handle->chip_info->fcu_sts_csr = 0; + handle->chip_info->fcu_dram_addr_hi = 0; + handle->chip_info->fcu_dram_addr_lo = 0; + handle->chip_info->fcu_loaded_ae_csr = 0; + handle->chip_info->fcu_loaded_ae_pos = 0; + handle->hal_cap_g_ctl_csr_addr_v = + (void __iomem *)((uintptr_t)misc_bar->virt_addr + + ICP_QAT_CAP_OFFSET); + handle->hal_cap_ae_xfer_csr_addr_v = + (void __iomem *)((uintptr_t)misc_bar->virt_addr + + ICP_QAT_AE_OFFSET); + handle->hal_ep_csr_addr_v = + (void __iomem *)((uintptr_t)misc_bar->virt_addr + + ICP_QAT_EP_OFFSET); + handle->hal_cap_ae_local_csr_addr_v = + (void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v + + LOCAL_TO_XFER_REG_OFFSET); + break; + default: + ret = -EINVAL; + goto out_err; + } + + if (handle->chip_info->sram_visible) { sram_bar = &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)]; handle->hal_sram_addr_v = sram_bar->virt_addr; } - handle->fw_auth = (handle->pci_dev->device == - PCI_DEVICE_ID_INTEL_QAT_DH895XCC) ? false : true; - handle->hal_handle = kzalloc(sizeof(*handle->hal_handle), GFP_KERNEL); - if (!handle->hal_handle) - goto out_hal_handle; handle->hal_handle->revision_id = accel_dev->accel_pci_dev.revid; handle->hal_handle->ae_mask = hw_data->ae_mask; + handle->hal_handle->admin_ae_mask = hw_data->admin_ae_mask; handle->hal_handle->slice_mask = hw_data->accel_mask; + handle->cfg_ae_mask = ALL_AE_MASK; /* create AE objects */ handle->hal_handle->upc_mask = 0x1ffff; handle->hal_handle->max_ustore = 0x4000; - for (ae = 0; ae < ICP_QAT_UCLO_MAX_AE; ae++) { - if (!(hw_data->ae_mask & (1 << ae))) - continue; + + ae_mask = handle->hal_handle->ae_mask; + for_each_set_bit(ae, &ae_mask, ICP_QAT_UCLO_MAX_AE) { handle->hal_handle->aes[ae].free_addr = 0; handle->hal_handle->aes[ae].free_size = handle->hal_handle->max_ustore; @@ -702,70 +826,116 @@ int qat_hal_init(struct adf_accel_dev *accel_dev) max_en_ae_id = ae; } handle->hal_handle->ae_max_num = max_en_ae_id + 1; - /* take all AEs out of reset */ - if (qat_hal_clr_reset(handle)) { - dev_err(&GET_DEV(accel_dev), "qat_hal_clr_reset error\n"); - goto out_err; - } - qat_hal_clear_xfer(handle); - if (!handle->fw_auth) { - if (qat_hal_clear_gpr(handle)) - goto out_err; - } /* Set SIGNATURE_ENABLE[0] to 0x1 in order to enable ALU_OUT csr */ - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { - unsigned int csr_val = 0; - + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { csr_val = qat_hal_rd_ae_csr(handle, ae, SIGNATURE_ENABLE); csr_val |= 0x1; qat_hal_wr_ae_csr(handle, ae, SIGNATURE_ENABLE, csr_val); } +out_err: + return ret; +} + +int qat_hal_init(struct adf_accel_dev *accel_dev) +{ + struct icp_qat_fw_loader_handle *handle; + int ret = 0; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->hal_handle = kzalloc(sizeof(*handle->hal_handle), GFP_KERNEL); + if (!handle->hal_handle) { + ret = -ENOMEM; + goto out_hal_handle; + } + + handle->chip_info = kzalloc(sizeof(*handle->chip_info), GFP_KERNEL); + if (!handle->chip_info) { + ret = -ENOMEM; + goto out_chip_info; + } + + ret = qat_hal_chip_init(handle, accel_dev); + if (ret) { + dev_err(&GET_DEV(accel_dev), "qat_hal_chip_init error\n"); + goto out_err; + } + + /* take all AEs out of reset */ + ret = qat_hal_clr_reset(handle); + if (ret) { + dev_err(&GET_DEV(accel_dev), "qat_hal_clr_reset error\n"); + goto out_err; + } + + qat_hal_clear_xfer(handle); + if (!handle->chip_info->fw_auth) { + ret = qat_hal_clear_gpr(handle); + if (ret) + goto out_err; + } + accel_dev->fw_loader->fw_loader = handle; return 0; out_err: + kfree(handle->chip_info); +out_chip_info: kfree(handle->hal_handle); out_hal_handle: kfree(handle); - return -EFAULT; + return ret; } void qat_hal_deinit(struct icp_qat_fw_loader_handle *handle) { if (!handle) return; + kfree(handle->chip_info); kfree(handle->hal_handle); kfree(handle); } -void qat_hal_start(struct icp_qat_fw_loader_handle *handle, unsigned char ae, - unsigned int ctx_mask) +int qat_hal_start(struct icp_qat_fw_loader_handle *handle) { + unsigned long ae_mask = handle->hal_handle->ae_mask; + u32 wakeup_val = handle->chip_info->wakeup_event_val; + u32 fcu_ctl_csr, fcu_sts_csr; + unsigned int fcu_sts; + unsigned char ae; + u32 ae_ctr = 0; int retry = 0; - unsigned int fcu_sts = 0; - if (handle->fw_auth) { - SET_CAP_CSR(handle, FCU_CONTROL, FCU_CTRL_CMD_START); + if (handle->chip_info->fw_auth) { + fcu_ctl_csr = handle->chip_info->fcu_ctl_csr; + fcu_sts_csr = handle->chip_info->fcu_sts_csr; + ae_ctr = hweight32(ae_mask); + SET_CAP_CSR(handle, fcu_ctl_csr, FCU_CTRL_CMD_START); do { msleep(FW_AUTH_WAIT_PERIOD); - fcu_sts = GET_CAP_CSR(handle, FCU_STATUS); + fcu_sts = GET_CAP_CSR(handle, fcu_sts_csr); if (((fcu_sts >> FCU_STS_DONE_POS) & 0x1)) - return; + return ae_ctr; } while (retry++ < FW_AUTH_MAX_RETRY); - pr_err("QAT: start error (AE 0x%x FCU_STS = 0x%x)\n", ae, - fcu_sts); + pr_err("QAT: start error (FCU_STS = 0x%x)\n", fcu_sts); + return 0; } else { - qat_hal_put_wakeup_event(handle, ae, (~ctx_mask) & - ICP_QAT_UCLO_AE_ALL_CTX, 0x10000); - qat_hal_enable_ctx(handle, ae, ctx_mask); + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { + qat_hal_put_wakeup_event(handle, ae, 0, wakeup_val); + qat_hal_enable_ctx(handle, ae, ICP_QAT_UCLO_AE_ALL_CTX); + ae_ctr++; + } + return ae_ctr; } } void qat_hal_stop(struct icp_qat_fw_loader_handle *handle, unsigned char ae, unsigned int ctx_mask) { - if (!handle->fw_auth) + if (!handle->chip_info->fw_auth) qat_hal_disable_ctx(handle, ae, ctx_mask); } @@ -832,9 +1002,12 @@ static int qat_hal_exec_micro_inst(struct icp_qat_fw_loader_handle *handle, int code_off, unsigned int max_cycle, unsigned int *endpc) { + unsigned int ind_lm_addr_byte0 = 0, ind_lm_addr_byte1 = 0; + unsigned int ind_lm_addr_byte2 = 0, ind_lm_addr_byte3 = 0; + unsigned int ind_t_index = 0, ind_t_index_byte = 0; + unsigned int ind_lm_addr0 = 0, ind_lm_addr1 = 0; + unsigned int ind_lm_addr2 = 0, ind_lm_addr3 = 0; u64 savuwords[MAX_EXEC_INST]; - unsigned int ind_lm_addr0, ind_lm_addr1; - unsigned int ind_lm_addr_byte0, ind_lm_addr_byte1; unsigned int ind_cnt_sig; unsigned int ind_sig, act_sig; unsigned int csr_val = 0, newcsr_val; @@ -853,6 +1026,20 @@ static int qat_hal_exec_micro_inst(struct icp_qat_fw_loader_handle *handle, INDIRECT_LM_ADDR_0_BYTE_INDEX); ind_lm_addr_byte1 = qat_hal_rd_indr_csr(handle, ae, ctx, INDIRECT_LM_ADDR_1_BYTE_INDEX); + if (handle->chip_info->lm2lm3) { + ind_lm_addr2 = qat_hal_rd_indr_csr(handle, ae, ctx, + LM_ADDR_2_INDIRECT); + ind_lm_addr3 = qat_hal_rd_indr_csr(handle, ae, ctx, + LM_ADDR_3_INDIRECT); + ind_lm_addr_byte2 = qat_hal_rd_indr_csr(handle, ae, ctx, + INDIRECT_LM_ADDR_2_BYTE_INDEX); + ind_lm_addr_byte3 = qat_hal_rd_indr_csr(handle, ae, ctx, + INDIRECT_LM_ADDR_3_BYTE_INDEX); + ind_t_index = qat_hal_rd_indr_csr(handle, ae, ctx, + INDIRECT_T_INDEX); + ind_t_index_byte = qat_hal_rd_indr_csr(handle, ae, ctx, + INDIRECT_T_INDEX_BYTE_INDEX); + } if (inst_num <= MAX_EXEC_INST) qat_hal_get_uwords(handle, ae, 0, inst_num, savuwords); qat_hal_get_wakeup_event(handle, ae, ctx, &wakeup_events); @@ -910,6 +1097,23 @@ static int qat_hal_exec_micro_inst(struct icp_qat_fw_loader_handle *handle, INDIRECT_LM_ADDR_0_BYTE_INDEX, ind_lm_addr_byte0); qat_hal_wr_indr_csr(handle, ae, (1 << ctx), INDIRECT_LM_ADDR_1_BYTE_INDEX, ind_lm_addr_byte1); + if (handle->chip_info->lm2lm3) { + qat_hal_wr_indr_csr(handle, ae, BIT(ctx), LM_ADDR_2_INDIRECT, + ind_lm_addr2); + qat_hal_wr_indr_csr(handle, ae, BIT(ctx), LM_ADDR_3_INDIRECT, + ind_lm_addr3); + qat_hal_wr_indr_csr(handle, ae, BIT(ctx), + INDIRECT_LM_ADDR_2_BYTE_INDEX, + ind_lm_addr_byte2); + qat_hal_wr_indr_csr(handle, ae, BIT(ctx), + INDIRECT_LM_ADDR_3_BYTE_INDEX, + ind_lm_addr_byte3); + qat_hal_wr_indr_csr(handle, ae, BIT(ctx), + INDIRECT_T_INDEX, ind_t_index); + qat_hal_wr_indr_csr(handle, ae, BIT(ctx), + INDIRECT_T_INDEX_BYTE_INDEX, + ind_t_index_byte); + } qat_hal_wr_indr_csr(handle, ae, (1 << ctx), FUTURE_COUNT_SIGNAL_INDIRECT, ind_cnt_sig); qat_hal_wr_indr_csr(handle, ae, (1 << ctx), @@ -1125,7 +1329,7 @@ int qat_hal_batch_wr_lm(struct icp_qat_fw_loader_handle *handle, plm_init = plm_init->next; } /* exec micro codes */ - if (micro_inst_arry && (micro_inst_num > 0)) { + if (micro_inst_arry && micro_inst_num > 0) { micro_inst_arry[micro_inst_num++] = 0x0E000010000ull; stat = qat_hal_exec_micro_init_lm(handle, ae, 0, &first_exec, micro_inst_arry, @@ -1146,7 +1350,7 @@ static int qat_hal_put_rel_rd_xfer(struct icp_qat_fw_loader_handle *handle, unsigned short mask; unsigned short dr_offset = 0x10; - status = ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES); + ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES); if (CE_INUSE_CONTEXTS & ctx_enables) { if (ctx & 0x1) { pr_err("QAT: bad 4-ctx mode,ctx=0x%x\n", ctx); @@ -1271,7 +1475,7 @@ static int qat_hal_convert_abs_to_rel(struct icp_qat_fw_loader_handle } int qat_hal_init_gpr(struct icp_qat_fw_loader_handle *handle, - unsigned char ae, unsigned char ctx_mask, + unsigned char ae, unsigned long ctx_mask, enum icp_qat_uof_regtype reg_type, unsigned short reg_num, unsigned int regdata) { @@ -1291,7 +1495,7 @@ int qat_hal_init_gpr(struct icp_qat_fw_loader_handle *handle, } else { reg = reg_num; type = reg_type; - if (!test_bit(ctx, (unsigned long *)&ctx_mask)) + if (!test_bit(ctx, &ctx_mask)) continue; } stat = qat_hal_wr_rel_reg(handle, ae, ctx, type, reg, regdata); @@ -1305,7 +1509,7 @@ int qat_hal_init_gpr(struct icp_qat_fw_loader_handle *handle, } int qat_hal_init_wr_xfer(struct icp_qat_fw_loader_handle *handle, - unsigned char ae, unsigned char ctx_mask, + unsigned char ae, unsigned long ctx_mask, enum icp_qat_uof_regtype reg_type, unsigned short reg_num, unsigned int regdata) { @@ -1325,7 +1529,7 @@ int qat_hal_init_wr_xfer(struct icp_qat_fw_loader_handle *handle, } else { reg = reg_num; type = reg_type; - if (!test_bit(ctx, (unsigned long *)&ctx_mask)) + if (!test_bit(ctx, &ctx_mask)) continue; } stat = qat_hal_put_rel_wr_xfer(handle, ae, ctx, type, reg, @@ -1340,7 +1544,7 @@ int qat_hal_init_wr_xfer(struct icp_qat_fw_loader_handle *handle, } int qat_hal_init_rd_xfer(struct icp_qat_fw_loader_handle *handle, - unsigned char ae, unsigned char ctx_mask, + unsigned char ae, unsigned long ctx_mask, enum icp_qat_uof_regtype reg_type, unsigned short reg_num, unsigned int regdata) { @@ -1360,7 +1564,7 @@ int qat_hal_init_rd_xfer(struct icp_qat_fw_loader_handle *handle, } else { reg = reg_num; type = reg_type; - if (!test_bit(ctx, (unsigned long *)&ctx_mask)) + if (!test_bit(ctx, &ctx_mask)) continue; } stat = qat_hal_put_rel_rd_xfer(handle, ae, ctx, type, reg, @@ -1375,17 +1579,22 @@ int qat_hal_init_rd_xfer(struct icp_qat_fw_loader_handle *handle, } int qat_hal_init_nn(struct icp_qat_fw_loader_handle *handle, - unsigned char ae, unsigned char ctx_mask, + unsigned char ae, unsigned long ctx_mask, unsigned short reg_num, unsigned int regdata) { int stat = 0; unsigned char ctx; + if (!handle->chip_info->nn) { + dev_err(&handle->pci_dev->dev, "QAT: No next neigh in 0x%x\n", + handle->pci_dev->device); + return -EINVAL; + } if (ctx_mask == 0) return -EINVAL; for (ctx = 0; ctx < ICP_QAT_UCLO_MAX_CTX; ctx++) { - if (!test_bit(ctx, (unsigned long *)&ctx_mask)) + if (!test_bit(ctx, &ctx_mask)) continue; stat = qat_hal_put_rel_nn(handle, ae, ctx, reg_num, regdata); if (stat) { diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c index 5d1f28cd6680..1fb5fc852f6b 100644 --- a/drivers/crypto/qat/qat_common/qat_uclo.c +++ b/drivers/crypto/qat/qat_common/qat_uclo.c @@ -74,7 +74,7 @@ static int qat_uclo_free_ae_data(struct icp_qat_uclo_aedata *ae_data) static char *qat_uclo_get_string(struct icp_qat_uof_strtable *str_table, unsigned int str_offset) { - if ((!str_table->table_len) || (str_offset > str_table->table_len)) + if (!str_table->table_len || str_offset > str_table->table_len) return NULL; return (char *)(((uintptr_t)(str_table->strings)) + str_offset); } @@ -311,7 +311,7 @@ static int qat_uclo_init_lmem_seg(struct icp_qat_fw_loader_handle *handle, unsigned int ae; if (qat_uclo_fetch_initmem_ae(handle, init_mem, - ICP_QAT_UCLO_MAX_LMEM_REG, &ae)) + handle->chip_info->lm_size, &ae)) return -EINVAL; if (qat_uclo_create_batch_init_list(handle, init_mem, ae, &obj_handle->lm_init_tab[ae])) @@ -324,6 +324,7 @@ static int qat_uclo_init_umem_seg(struct icp_qat_fw_loader_handle *handle, { struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle; unsigned int ae, ustore_size, uaddr, i; + struct icp_qat_uclo_aedata *aed; ustore_size = obj_handle->ustore_phy_size; if (qat_uclo_fetch_initmem_ae(handle, init_mem, ustore_size, &ae)) @@ -333,11 +334,10 @@ static int qat_uclo_init_umem_seg(struct icp_qat_fw_loader_handle *handle, return -EINVAL; /* set the highest ustore address referenced */ uaddr = (init_mem->addr + init_mem->num_in_bytes) >> 0x2; - for (i = 0; i < obj_handle->ae_data[ae].slice_num; i++) { - if (obj_handle->ae_data[ae].ae_slices[i]. - encap_image->uwords_num < uaddr) - obj_handle->ae_data[ae].ae_slices[i]. - encap_image->uwords_num = uaddr; + aed = &obj_handle->ae_data[ae]; + for (i = 0; i < aed->slice_num; i++) { + if (aed->ae_slices[i].encap_image->uwords_num < uaddr) + aed->ae_slices[i].encap_image->uwords_num = uaddr; } return 0; } @@ -373,6 +373,8 @@ static int qat_uclo_init_ustore(struct icp_qat_fw_loader_handle *handle, unsigned int ustore_size; unsigned int patt_pos; struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle; + unsigned long ae_mask = handle->hal_handle->ae_mask; + unsigned long cfg_ae_mask = handle->cfg_ae_mask; u64 *fill_data; uof_image = image->img_ptr; @@ -385,9 +387,13 @@ static int qat_uclo_init_ustore(struct icp_qat_fw_loader_handle *handle, sizeof(u64)); page = image->page; - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { if (!test_bit(ae, (unsigned long *)&uof_image->ae_assigned)) continue; + + if (!test_bit(ae, &cfg_ae_mask)) + continue; + ustore_size = obj_handle->ae_data[ae].eff_ustore_size; patt_pos = page->beg_addr_p + page->micro_words_num; @@ -406,6 +412,7 @@ static int qat_uclo_init_memory(struct icp_qat_fw_loader_handle *handle) int i, ae; struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle; struct icp_qat_uof_initmem *initmem = obj_handle->init_mem_tab.init_mem; + unsigned long ae_mask = handle->hal_handle->ae_mask; for (i = 0; i < obj_handle->init_mem_tab.entry_num; i++) { if (initmem->num_in_bytes) { @@ -418,7 +425,8 @@ static int qat_uclo_init_memory(struct icp_qat_fw_loader_handle *handle) (sizeof(struct icp_qat_uof_memvar_attr) * initmem->val_attr_num)); } - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { + + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { if (qat_hal_batch_wr_lm(handle, ae, obj_handle->lm_init_tab[ae])) { pr_err("QAT: fail to batch init lmem for AE %d\n", ae); @@ -536,7 +544,7 @@ qat_uclo_check_image_compat(struct icp_qat_uof_encap_obj *encap_uof_obj, (encap_uof_obj->beg_uof + code_page->neigh_reg_tab_offset); if (neigh_reg_tab->entry_num) { - pr_err("QAT: UOF can't contain shared control store feature\n"); + pr_err("QAT: UOF can't contain neighbor register table\n"); return -EINVAL; } if (image->numpages > 1) { @@ -649,11 +657,13 @@ static int qat_uclo_map_ae(struct icp_qat_fw_loader_handle *handle, int max_ae) int i, ae; int mflag = 0; struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle; + unsigned long ae_mask = handle->hal_handle->ae_mask; + unsigned long cfg_ae_mask = handle->cfg_ae_mask; - for (ae = 0; ae < max_ae; ae++) { - if (!test_bit(ae, - (unsigned long *)&handle->hal_handle->ae_mask)) + for_each_set_bit(ae, &ae_mask, max_ae) { + if (!test_bit(ae, &cfg_ae_mask)) continue; + for (i = 0; i < obj_handle->uimage_num; i++) { if (!test_bit(ae, (unsigned long *) &obj_handle->ae_uimage[i].img_ptr->ae_assigned)) @@ -718,6 +728,8 @@ qat_uclo_get_dev_type(struct icp_qat_fw_loader_handle *handle) return ICP_QAT_AC_C62X_DEV_TYPE; case PCI_DEVICE_ID_INTEL_QAT_C3XXX: return ICP_QAT_AC_C3XXX_DEV_TYPE; + case ADF_4XXX_PCI_DEVICE_ID: + return ICP_QAT_AC_4XXX_A_DEV_TYPE; default: pr_err("QAT: unsupported device 0x%x\n", handle->pci_dev->device); @@ -736,8 +748,8 @@ static int qat_uclo_check_uof_compat(struct icp_qat_uclo_objhandle *obj_handle) return -EINVAL; } maj_ver = obj_handle->prod_rev & 0xff; - if ((obj_handle->encap_uof_obj.obj_hdr->max_cpu_ver < maj_ver) || - (obj_handle->encap_uof_obj.obj_hdr->min_cpu_ver > maj_ver)) { + if (obj_handle->encap_uof_obj.obj_hdr->max_cpu_ver < maj_ver || + obj_handle->encap_uof_obj.obj_hdr->min_cpu_ver > maj_ver) { pr_err("QAT: UOF majVer 0x%x out of range\n", maj_ver); return -EINVAL; } @@ -845,6 +857,8 @@ static int qat_uclo_init_reg_sym(struct icp_qat_fw_loader_handle *handle, static int qat_uclo_init_globals(struct icp_qat_fw_loader_handle *handle) { struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle; + unsigned long ae_mask = handle->hal_handle->ae_mask; + struct icp_qat_uclo_aedata *aed; unsigned int s, ae; if (obj_handle->global_inited) @@ -855,13 +869,13 @@ static int qat_uclo_init_globals(struct icp_qat_fw_loader_handle *handle) return -EINVAL; } } - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { - for (s = 0; s < obj_handle->ae_data[ae].slice_num; s++) { - if (!obj_handle->ae_data[ae].ae_slices[s].encap_image) + + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { + aed = &obj_handle->ae_data[ae]; + for (s = 0; s < aed->slice_num; s++) { + if (!aed->ae_slices[s].encap_image) continue; - if (qat_uclo_init_reg_sym(handle, ae, - obj_handle->ae_data[ae]. - ae_slices[s].encap_image)) + if (qat_uclo_init_reg_sym(handle, ae, aed->ae_slices[s].encap_image)) return -EINVAL; } } @@ -869,46 +883,83 @@ static int qat_uclo_init_globals(struct icp_qat_fw_loader_handle *handle) return 0; } +static int qat_hal_set_modes(struct icp_qat_fw_loader_handle *handle, + struct icp_qat_uclo_objhandle *obj_handle, + unsigned char ae, + struct icp_qat_uof_image *uof_image) +{ + unsigned char mode; + int ret; + + mode = ICP_QAT_CTX_MODE(uof_image->ae_mode); + ret = qat_hal_set_ae_ctx_mode(handle, ae, mode); + if (ret) { + pr_err("QAT: qat_hal_set_ae_ctx_mode error\n"); + return ret; + } + if (handle->chip_info->nn) { + mode = ICP_QAT_NN_MODE(uof_image->ae_mode); + ret = qat_hal_set_ae_nn_mode(handle, ae, mode); + if (ret) { + pr_err("QAT: qat_hal_set_ae_nn_mode error\n"); + return ret; + } + } + mode = ICP_QAT_LOC_MEM0_MODE(uof_image->ae_mode); + ret = qat_hal_set_ae_lm_mode(handle, ae, ICP_LMEM0, mode); + if (ret) { + pr_err("QAT: qat_hal_set_ae_lm_mode LMEM0 error\n"); + return ret; + } + mode = ICP_QAT_LOC_MEM1_MODE(uof_image->ae_mode); + ret = qat_hal_set_ae_lm_mode(handle, ae, ICP_LMEM1, mode); + if (ret) { + pr_err("QAT: qat_hal_set_ae_lm_mode LMEM1 error\n"); + return ret; + } + if (handle->chip_info->lm2lm3) { + mode = ICP_QAT_LOC_MEM2_MODE(uof_image->ae_mode); + ret = qat_hal_set_ae_lm_mode(handle, ae, ICP_LMEM2, mode); + if (ret) { + pr_err("QAT: qat_hal_set_ae_lm_mode LMEM2 error\n"); + return ret; + } + mode = ICP_QAT_LOC_MEM3_MODE(uof_image->ae_mode); + ret = qat_hal_set_ae_lm_mode(handle, ae, ICP_LMEM3, mode); + if (ret) { + pr_err("QAT: qat_hal_set_ae_lm_mode LMEM3 error\n"); + return ret; + } + mode = ICP_QAT_LOC_TINDEX_MODE(uof_image->ae_mode); + qat_hal_set_ae_tindex_mode(handle, ae, mode); + } + return 0; +} + static int qat_uclo_set_ae_mode(struct icp_qat_fw_loader_handle *handle) { - unsigned char ae, nn_mode, s; struct icp_qat_uof_image *uof_image; struct icp_qat_uclo_aedata *ae_data; struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle; + unsigned long ae_mask = handle->hal_handle->ae_mask; + unsigned long cfg_ae_mask = handle->cfg_ae_mask; + unsigned char ae, s; + int error; - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { - if (!test_bit(ae, - (unsigned long *)&handle->hal_handle->ae_mask)) + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { + if (!test_bit(ae, &cfg_ae_mask)) continue; + ae_data = &obj_handle->ae_data[ae]; for (s = 0; s < min_t(unsigned int, ae_data->slice_num, ICP_QAT_UCLO_MAX_CTX); s++) { if (!obj_handle->ae_data[ae].ae_slices[s].encap_image) continue; uof_image = ae_data->ae_slices[s].encap_image->img_ptr; - if (qat_hal_set_ae_ctx_mode(handle, ae, - (char)ICP_QAT_CTX_MODE - (uof_image->ae_mode))) { - pr_err("QAT: qat_hal_set_ae_ctx_mode error\n"); - return -EFAULT; - } - nn_mode = ICP_QAT_NN_MODE(uof_image->ae_mode); - if (qat_hal_set_ae_nn_mode(handle, ae, nn_mode)) { - pr_err("QAT: qat_hal_set_ae_nn_mode error\n"); - return -EFAULT; - } - if (qat_hal_set_ae_lm_mode(handle, ae, ICP_LMEM0, - (char)ICP_QAT_LOC_MEM0_MODE - (uof_image->ae_mode))) { - pr_err("QAT: qat_hal_set_ae_lm_mode LMEM0 error\n"); - return -EFAULT; - } - if (qat_hal_set_ae_lm_mode(handle, ae, ICP_LMEM1, - (char)ICP_QAT_LOC_MEM1_MODE - (uof_image->ae_mode))) { - pr_err("QAT: qat_hal_set_ae_lm_mode LMEM1 error\n"); - return -EFAULT; - } + error = qat_hal_set_modes(handle, obj_handle, ae, + uof_image); + if (error) + return error; } } return 0; @@ -1003,10 +1054,11 @@ static int qat_uclo_map_suof_file_hdr(struct icp_qat_fw_loader_handle *handle, return 0; } -static void qat_uclo_map_simg(struct icp_qat_suof_handle *suof_handle, +static void qat_uclo_map_simg(struct icp_qat_fw_loader_handle *handle, struct icp_qat_suof_img_hdr *suof_img_hdr, struct icp_qat_suof_chunk_hdr *suof_chunk_hdr) { + struct icp_qat_suof_handle *suof_handle = handle->sobj_handle; struct icp_qat_simg_ae_mode *ae_mode; struct icp_qat_suof_objhdr *suof_objhdr; @@ -1021,10 +1073,10 @@ static void qat_uclo_map_simg(struct icp_qat_suof_handle *suof_handle, suof_img_hdr->css_key = (suof_img_hdr->css_header + sizeof(struct icp_qat_css_hdr)); suof_img_hdr->css_signature = suof_img_hdr->css_key + - ICP_QAT_CSS_FWSK_MODULUS_LEN + - ICP_QAT_CSS_FWSK_EXPONENT_LEN; + ICP_QAT_CSS_FWSK_MODULUS_LEN(handle) + + ICP_QAT_CSS_FWSK_EXPONENT_LEN(handle); suof_img_hdr->css_simg = suof_img_hdr->css_signature + - ICP_QAT_CSS_SIGNATURE_LEN; + ICP_QAT_CSS_SIGNATURE_LEN(handle); ae_mode = (struct icp_qat_simg_ae_mode *)(suof_img_hdr->css_simg); suof_img_hdr->ae_mask = ae_mode->ae_mask; @@ -1064,8 +1116,8 @@ static int qat_uclo_check_simg_compat(struct icp_qat_fw_loader_handle *handle, return -EINVAL; } maj_ver = prod_rev & 0xff; - if ((maj_ver > img_ae_mode->devmax_ver) || - (maj_ver < img_ae_mode->devmin_ver)) { + if (maj_ver > img_ae_mode->devmax_ver || + maj_ver < img_ae_mode->devmin_ver) { pr_err("QAT: incompatible device majver 0x%x\n", maj_ver); return -EINVAL; } @@ -1108,7 +1160,7 @@ static int qat_uclo_map_suof(struct icp_qat_fw_loader_handle *handle, unsigned int i = 0; struct icp_qat_suof_img_hdr img_header; - if (!suof_ptr || (suof_size == 0)) { + if (!suof_ptr || suof_size == 0) { pr_err("QAT: input parameter SUOF pointer/size is NULL\n"); return -EINVAL; } @@ -1130,20 +1182,24 @@ static int qat_uclo_map_suof(struct icp_qat_fw_loader_handle *handle, if (!suof_img_hdr) return -ENOMEM; suof_handle->img_table.simg_hdr = suof_img_hdr; - } - for (i = 0; i < suof_handle->img_table.num_simgs; i++) { - qat_uclo_map_simg(handle->sobj_handle, &suof_img_hdr[i], - &suof_chunk_hdr[1 + i]); - ret = qat_uclo_check_simg_compat(handle, - &suof_img_hdr[i]); - if (ret) - return ret; - if ((suof_img_hdr[i].ae_mask & 0x1) != 0) - ae0_img = i; + for (i = 0; i < suof_handle->img_table.num_simgs; i++) { + qat_uclo_map_simg(handle, &suof_img_hdr[i], + &suof_chunk_hdr[1 + i]); + ret = qat_uclo_check_simg_compat(handle, + &suof_img_hdr[i]); + if (ret) + return ret; + suof_img_hdr[i].ae_mask &= handle->cfg_ae_mask; + if ((suof_img_hdr[i].ae_mask & 0x1) != 0) + ae0_img = i; + } + + if (!handle->chip_info->tgroup_share_ustore) { + qat_uclo_tail_img(suof_img_hdr, ae0_img, + suof_handle->img_table.num_simgs); + } } - qat_uclo_tail_img(suof_img_hdr, ae0_img, - suof_handle->img_table.num_simgs); return 0; } @@ -1153,18 +1209,26 @@ static int qat_uclo_map_suof(struct icp_qat_fw_loader_handle *handle, static int qat_uclo_auth_fw(struct icp_qat_fw_loader_handle *handle, struct icp_qat_fw_auth_desc *desc) { - unsigned int fcu_sts, retry = 0; + u32 fcu_sts, retry = 0; + u32 fcu_ctl_csr, fcu_sts_csr; + u32 fcu_dram_hi_csr, fcu_dram_lo_csr; u64 bus_addr; bus_addr = ADD_ADDR(desc->css_hdr_high, desc->css_hdr_low) - sizeof(struct icp_qat_auth_chunk); - SET_CAP_CSR(handle, FCU_DRAM_ADDR_HI, (bus_addr >> BITS_IN_DWORD)); - SET_CAP_CSR(handle, FCU_DRAM_ADDR_LO, bus_addr); - SET_CAP_CSR(handle, FCU_CONTROL, FCU_CTRL_CMD_AUTH); + + fcu_ctl_csr = handle->chip_info->fcu_ctl_csr; + fcu_sts_csr = handle->chip_info->fcu_sts_csr; + fcu_dram_hi_csr = handle->chip_info->fcu_dram_addr_hi; + fcu_dram_lo_csr = handle->chip_info->fcu_dram_addr_lo; + + SET_CAP_CSR(handle, fcu_dram_hi_csr, (bus_addr >> BITS_IN_DWORD)); + SET_CAP_CSR(handle, fcu_dram_lo_csr, bus_addr); + SET_CAP_CSR(handle, fcu_ctl_csr, FCU_CTRL_CMD_AUTH); do { msleep(FW_AUTH_WAIT_PERIOD); - fcu_sts = GET_CAP_CSR(handle, FCU_STATUS); + fcu_sts = GET_CAP_CSR(handle, fcu_sts_csr); if ((fcu_sts & FCU_AUTH_STS_MASK) == FCU_STS_VERI_FAIL) goto auth_fail; if (((fcu_sts >> FCU_STS_AUTHFWLD_POS) & 0x1)) @@ -1177,6 +1241,83 @@ static int qat_uclo_auth_fw(struct icp_qat_fw_loader_handle *handle, return -EINVAL; } +static bool qat_uclo_is_broadcast(struct icp_qat_fw_loader_handle *handle, + int imgid) +{ + struct icp_qat_suof_handle *sobj_handle; + + if (!handle->chip_info->tgroup_share_ustore) + return false; + + sobj_handle = (struct icp_qat_suof_handle *)handle->sobj_handle; + if (handle->hal_handle->admin_ae_mask & + sobj_handle->img_table.simg_hdr[imgid].ae_mask) + return false; + + return true; +} + +static int qat_uclo_broadcast_load_fw(struct icp_qat_fw_loader_handle *handle, + struct icp_qat_fw_auth_desc *desc) +{ + unsigned long ae_mask = handle->hal_handle->ae_mask; + unsigned long desc_ae_mask = desc->ae_mask; + u32 fcu_sts, ae_broadcast_mask = 0; + u32 fcu_loaded_csr, ae_loaded; + u32 fcu_sts_csr, fcu_ctl_csr; + unsigned int ae, retry = 0; + + if (handle->chip_info->tgroup_share_ustore) { + fcu_ctl_csr = handle->chip_info->fcu_ctl_csr; + fcu_sts_csr = handle->chip_info->fcu_sts_csr; + fcu_loaded_csr = handle->chip_info->fcu_loaded_ae_csr; + } else { + pr_err("Chip 0x%x doesn't support broadcast load\n", + handle->pci_dev->device); + return -EINVAL; + } + + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { + if (qat_hal_check_ae_active(handle, (unsigned char)ae)) { + pr_err("QAT: Broadcast load failed. AE is not enabled or active.\n"); + return -EINVAL; + } + + if (test_bit(ae, &desc_ae_mask)) + ae_broadcast_mask |= 1 << ae; + } + + if (ae_broadcast_mask) { + SET_CAP_CSR(handle, FCU_ME_BROADCAST_MASK_TYPE, + ae_broadcast_mask); + + SET_CAP_CSR(handle, fcu_ctl_csr, FCU_CTRL_CMD_LOAD); + + do { + msleep(FW_AUTH_WAIT_PERIOD); + fcu_sts = GET_CAP_CSR(handle, fcu_sts_csr); + fcu_sts &= FCU_AUTH_STS_MASK; + + if (fcu_sts == FCU_STS_LOAD_FAIL) { + pr_err("Broadcast load failed: 0x%x)\n", fcu_sts); + return -EINVAL; + } else if (fcu_sts == FCU_STS_LOAD_DONE) { + ae_loaded = GET_CAP_CSR(handle, fcu_loaded_csr); + ae_loaded >>= handle->chip_info->fcu_loaded_ae_pos; + + if ((ae_loaded & ae_broadcast_mask) == ae_broadcast_mask) + break; + } + } while (retry++ < FW_AUTH_MAX_RETRY); + + if (retry > FW_AUTH_MAX_RETRY) { + pr_err("QAT: broadcast load failed timeout %d\n", retry); + return -EINVAL; + } + } + return 0; +} + static int qat_uclo_simg_alloc(struct icp_qat_fw_loader_handle *handle, struct icp_firml_dram_desc *dram_desc, unsigned int size) @@ -1197,11 +1338,15 @@ static int qat_uclo_simg_alloc(struct icp_qat_fw_loader_handle *handle, static void qat_uclo_simg_free(struct icp_qat_fw_loader_handle *handle, struct icp_firml_dram_desc *dram_desc) { - dma_free_coherent(&handle->pci_dev->dev, - (size_t)(dram_desc->dram_size), - (dram_desc->dram_base_addr_v), - dram_desc->dram_bus_addr); - memset(dram_desc, 0, sizeof(*dram_desc)); + if (handle && dram_desc && dram_desc->dram_base_addr_v) { + dma_free_coherent(&handle->pci_dev->dev, + (size_t)(dram_desc->dram_size), + dram_desc->dram_base_addr_v, + dram_desc->dram_bus_addr); + } + + if (dram_desc) + memset(dram_desc, 0, sizeof(*dram_desc)); } static void qat_uclo_ummap_auth_fw(struct icp_qat_fw_loader_handle *handle, @@ -1209,12 +1354,14 @@ static void qat_uclo_ummap_auth_fw(struct icp_qat_fw_loader_handle *handle, { struct icp_firml_dram_desc dram_desc; - dram_desc.dram_base_addr_v = *desc; - dram_desc.dram_bus_addr = ((struct icp_qat_auth_chunk *) - (*desc))->chunk_bus_addr; - dram_desc.dram_size = ((struct icp_qat_auth_chunk *) - (*desc))->chunk_size; - qat_uclo_simg_free(handle, &dram_desc); + if (*desc) { + dram_desc.dram_base_addr_v = *desc; + dram_desc.dram_bus_addr = ((struct icp_qat_auth_chunk *) + (*desc))->chunk_bus_addr; + dram_desc.dram_size = ((struct icp_qat_auth_chunk *) + (*desc))->chunk_size; + qat_uclo_simg_free(handle, &dram_desc); + } } static int qat_uclo_map_auth_fw(struct icp_qat_fw_loader_handle *handle, @@ -1226,15 +1373,16 @@ static int qat_uclo_map_auth_fw(struct icp_qat_fw_loader_handle *handle, struct icp_qat_auth_chunk *auth_chunk; u64 virt_addr, bus_addr, virt_base; unsigned int length, simg_offset = sizeof(*auth_chunk); + struct icp_qat_simg_ae_mode *simg_ae_mode; struct icp_firml_dram_desc img_desc; - if (size > (ICP_QAT_AE_IMG_OFFSET + ICP_QAT_CSS_MAX_IMAGE_LEN)) { + if (size > (ICP_QAT_AE_IMG_OFFSET(handle) + ICP_QAT_CSS_MAX_IMAGE_LEN)) { pr_err("QAT: error, input image size overflow %d\n", size); return -EINVAL; } length = (css_hdr->fw_type == CSS_AE_FIRMWARE) ? - ICP_QAT_CSS_AE_SIMG_LEN + simg_offset : - size + ICP_QAT_CSS_FWSK_PAD_LEN + simg_offset; + ICP_QAT_CSS_AE_SIMG_LEN(handle) + simg_offset : + size + ICP_QAT_CSS_FWSK_PAD_LEN(handle) + simg_offset; if (qat_uclo_simg_alloc(handle, &img_desc, length)) { pr_err("QAT: error, allocate continuous dram fail\n"); return -ENOMEM; @@ -1261,42 +1409,42 @@ static int qat_uclo_map_auth_fw(struct icp_qat_fw_loader_handle *handle, memcpy((void *)(uintptr_t)virt_addr, (void *)(image + sizeof(*css_hdr)), - ICP_QAT_CSS_FWSK_MODULUS_LEN); + ICP_QAT_CSS_FWSK_MODULUS_LEN(handle)); /* padding */ - memset((void *)(uintptr_t)(virt_addr + ICP_QAT_CSS_FWSK_MODULUS_LEN), - 0, ICP_QAT_CSS_FWSK_PAD_LEN); + memset((void *)(uintptr_t)(virt_addr + ICP_QAT_CSS_FWSK_MODULUS_LEN(handle)), + 0, ICP_QAT_CSS_FWSK_PAD_LEN(handle)); /* exponent */ - memcpy((void *)(uintptr_t)(virt_addr + ICP_QAT_CSS_FWSK_MODULUS_LEN + - ICP_QAT_CSS_FWSK_PAD_LEN), + memcpy((void *)(uintptr_t)(virt_addr + ICP_QAT_CSS_FWSK_MODULUS_LEN(handle) + + ICP_QAT_CSS_FWSK_PAD_LEN(handle)), (void *)(image + sizeof(*css_hdr) + - ICP_QAT_CSS_FWSK_MODULUS_LEN), + ICP_QAT_CSS_FWSK_MODULUS_LEN(handle)), sizeof(unsigned int)); /* signature */ bus_addr = ADD_ADDR(auth_desc->fwsk_pub_high, auth_desc->fwsk_pub_low) + - ICP_QAT_CSS_FWSK_PUB_LEN; - virt_addr = virt_addr + ICP_QAT_CSS_FWSK_PUB_LEN; + ICP_QAT_CSS_FWSK_PUB_LEN(handle); + virt_addr = virt_addr + ICP_QAT_CSS_FWSK_PUB_LEN(handle); auth_desc->signature_high = (unsigned int)(bus_addr >> BITS_IN_DWORD); auth_desc->signature_low = (unsigned int)bus_addr; memcpy((void *)(uintptr_t)virt_addr, (void *)(image + sizeof(*css_hdr) + - ICP_QAT_CSS_FWSK_MODULUS_LEN + - ICP_QAT_CSS_FWSK_EXPONENT_LEN), - ICP_QAT_CSS_SIGNATURE_LEN); + ICP_QAT_CSS_FWSK_MODULUS_LEN(handle) + + ICP_QAT_CSS_FWSK_EXPONENT_LEN(handle)), + ICP_QAT_CSS_SIGNATURE_LEN(handle)); bus_addr = ADD_ADDR(auth_desc->signature_high, auth_desc->signature_low) + - ICP_QAT_CSS_SIGNATURE_LEN; - virt_addr += ICP_QAT_CSS_SIGNATURE_LEN; + ICP_QAT_CSS_SIGNATURE_LEN(handle); + virt_addr += ICP_QAT_CSS_SIGNATURE_LEN(handle); auth_desc->img_high = (unsigned int)(bus_addr >> BITS_IN_DWORD); auth_desc->img_low = (unsigned int)bus_addr; - auth_desc->img_len = size - ICP_QAT_AE_IMG_OFFSET; + auth_desc->img_len = size - ICP_QAT_AE_IMG_OFFSET(handle); memcpy((void *)(uintptr_t)virt_addr, - (void *)(image + ICP_QAT_AE_IMG_OFFSET), + (void *)(image + ICP_QAT_AE_IMG_OFFSET(handle)), auth_desc->img_len); virt_addr = virt_base; /* AE firmware */ @@ -1315,6 +1463,11 @@ static int qat_uclo_map_auth_fw(struct icp_qat_fw_loader_handle *handle, auth_desc->img_ae_insts_high = (unsigned int) (bus_addr >> BITS_IN_DWORD); auth_desc->img_ae_insts_low = (unsigned int)bus_addr; + virt_addr += sizeof(struct icp_qat_css_hdr); + virt_addr += ICP_QAT_CSS_FWSK_PUB_LEN(handle); + virt_addr += ICP_QAT_CSS_SIGNATURE_LEN(handle); + simg_ae_mode = (struct icp_qat_simg_ae_mode *)(uintptr_t)virt_addr; + auth_desc->ae_mask = simg_ae_mode->ae_mask & handle->cfg_ae_mask; } else { auth_desc->img_ae_insts_high = auth_desc->img_high; auth_desc->img_ae_insts_low = auth_desc->img_low; @@ -1326,35 +1479,40 @@ static int qat_uclo_map_auth_fw(struct icp_qat_fw_loader_handle *handle, static int qat_uclo_load_fw(struct icp_qat_fw_loader_handle *handle, struct icp_qat_fw_auth_desc *desc) { + unsigned long ae_mask = handle->hal_handle->ae_mask; + u32 fcu_sts_csr, fcu_ctl_csr; + u32 loaded_aes, loaded_csr; unsigned int i; - unsigned int fcu_sts; - struct icp_qat_simg_ae_mode *virt_addr; - unsigned int fcu_loaded_ae_pos = FCU_LOADED_AE_POS; + u32 fcu_sts; - virt_addr = (void *)((uintptr_t)desc + - sizeof(struct icp_qat_auth_chunk) + - sizeof(struct icp_qat_css_hdr) + - ICP_QAT_CSS_FWSK_PUB_LEN + - ICP_QAT_CSS_SIGNATURE_LEN); - for (i = 0; i < handle->hal_handle->ae_max_num; i++) { + fcu_ctl_csr = handle->chip_info->fcu_ctl_csr; + fcu_sts_csr = handle->chip_info->fcu_sts_csr; + loaded_csr = handle->chip_info->fcu_loaded_ae_csr; + + for_each_set_bit(i, &ae_mask, handle->hal_handle->ae_max_num) { int retry = 0; - if (!((virt_addr->ae_mask >> i) & 0x1)) + if (!((desc->ae_mask >> i) & 0x1)) continue; if (qat_hal_check_ae_active(handle, i)) { pr_err("QAT: AE %d is active\n", i); return -EINVAL; } - SET_CAP_CSR(handle, FCU_CONTROL, - (FCU_CTRL_CMD_LOAD | (i << FCU_CTRL_AE_POS))); + SET_CAP_CSR(handle, fcu_ctl_csr, + (FCU_CTRL_CMD_LOAD | + (1 << FCU_CTRL_BROADCAST_POS) | + (i << FCU_CTRL_AE_POS))); do { msleep(FW_AUTH_WAIT_PERIOD); - fcu_sts = GET_CAP_CSR(handle, FCU_STATUS); - if (((fcu_sts & FCU_AUTH_STS_MASK) == - FCU_STS_LOAD_DONE) && - ((fcu_sts >> fcu_loaded_ae_pos) & (1 << i))) - break; + fcu_sts = GET_CAP_CSR(handle, fcu_sts_csr); + if ((fcu_sts & FCU_AUTH_STS_MASK) == + FCU_STS_LOAD_DONE) { + loaded_aes = GET_CAP_CSR(handle, loaded_csr); + loaded_aes >>= handle->chip_info->fcu_loaded_ae_pos; + if (loaded_aes & (1 << i)) + break; + } } while (retry++ < FW_AUTH_MAX_RETRY); if (retry > FW_AUTH_MAX_RETRY) { pr_err("QAT: firmware load failed timeout %x\n", retry); @@ -1387,14 +1545,16 @@ int qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle, struct icp_qat_fw_auth_desc *desc = NULL; int status = 0; - if (handle->fw_auth) { + if (handle->chip_info->fw_auth) { if (!qat_uclo_map_auth_fw(handle, addr_ptr, mem_size, &desc)) status = qat_uclo_auth_fw(handle, desc); qat_uclo_ummap_auth_fw(handle, &desc); } else { - if (handle->pci_dev->device == PCI_DEVICE_ID_INTEL_QAT_C3XXX) { - pr_err("QAT: C3XXX doesn't support unsigned MMP\n"); - return -EINVAL; + if (!handle->chip_info->sram_visible) { + dev_dbg(&handle->pci_dev->dev, + "QAT MMP fw not loaded for device 0x%x", + handle->pci_dev->device); + return status; } qat_uclo_wr_sram_by_words(handle, 0, addr_ptr, mem_size); } @@ -1437,25 +1597,281 @@ static int qat_uclo_map_uof_obj(struct icp_qat_fw_loader_handle *handle, return -ENOMEM; } -int qat_uclo_map_obj(struct icp_qat_fw_loader_handle *handle, - void *addr_ptr, int mem_size) +static int qat_uclo_map_mof_file_hdr(struct icp_qat_fw_loader_handle *handle, + struct icp_qat_mof_file_hdr *mof_ptr, + u32 mof_size) { + struct icp_qat_mof_handle *mobj_handle = handle->mobj_handle; + unsigned int min_ver_offset; + unsigned int checksum; + + mobj_handle->file_id = ICP_QAT_MOF_FID; + mobj_handle->mof_buf = (char *)mof_ptr; + mobj_handle->mof_size = mof_size; + + min_ver_offset = mof_size - offsetof(struct icp_qat_mof_file_hdr, + min_ver); + checksum = qat_uclo_calc_str_checksum(&mof_ptr->min_ver, + min_ver_offset); + if (checksum != mof_ptr->checksum) { + pr_err("QAT: incorrect MOF checksum\n"); + return -EINVAL; + } + + mobj_handle->checksum = mof_ptr->checksum; + mobj_handle->min_ver = mof_ptr->min_ver; + mobj_handle->maj_ver = mof_ptr->maj_ver; + return 0; +} + +static void qat_uclo_del_mof(struct icp_qat_fw_loader_handle *handle) +{ + struct icp_qat_mof_handle *mobj_handle = handle->mobj_handle; + + kfree(mobj_handle->obj_table.obj_hdr); + mobj_handle->obj_table.obj_hdr = NULL; + kfree(handle->mobj_handle); + handle->mobj_handle = NULL; +} + +static int qat_uclo_seek_obj_inside_mof(struct icp_qat_mof_handle *mobj_handle, + char *obj_name, char **obj_ptr, + unsigned int *obj_size) +{ + struct icp_qat_mof_objhdr *obj_hdr = mobj_handle->obj_table.obj_hdr; + unsigned int i; + + for (i = 0; i < mobj_handle->obj_table.num_objs; i++) { + if (!strncmp(obj_hdr[i].obj_name, obj_name, + ICP_QAT_SUOF_OBJ_NAME_LEN)) { + *obj_ptr = obj_hdr[i].obj_buf; + *obj_size = obj_hdr[i].obj_size; + return 0; + } + } + + pr_err("QAT: object %s is not found inside MOF\n", obj_name); + return -EINVAL; +} + +static int qat_uclo_map_obj_from_mof(struct icp_qat_mof_handle *mobj_handle, + struct icp_qat_mof_objhdr *mobj_hdr, + struct icp_qat_mof_obj_chunkhdr *obj_chunkhdr) +{ + u8 *obj; + + if (!strncmp(obj_chunkhdr->chunk_id, ICP_QAT_UOF_IMAG, + ICP_QAT_MOF_OBJ_CHUNKID_LEN)) { + obj = mobj_handle->uobjs_hdr + obj_chunkhdr->offset; + } else if (!strncmp(obj_chunkhdr->chunk_id, ICP_QAT_SUOF_IMAG, + ICP_QAT_MOF_OBJ_CHUNKID_LEN)) { + obj = mobj_handle->sobjs_hdr + obj_chunkhdr->offset; + } else { + pr_err("QAT: unsupported chunk id\n"); + return -EINVAL; + } + mobj_hdr->obj_buf = obj; + mobj_hdr->obj_size = (unsigned int)obj_chunkhdr->size; + mobj_hdr->obj_name = obj_chunkhdr->name + mobj_handle->sym_str; + return 0; +} + +static int qat_uclo_map_objs_from_mof(struct icp_qat_mof_handle *mobj_handle) +{ + struct icp_qat_mof_obj_chunkhdr *uobj_chunkhdr; + struct icp_qat_mof_obj_chunkhdr *sobj_chunkhdr; + struct icp_qat_mof_obj_hdr *uobj_hdr; + struct icp_qat_mof_obj_hdr *sobj_hdr; + struct icp_qat_mof_objhdr *mobj_hdr; + unsigned int uobj_chunk_num = 0; + unsigned int sobj_chunk_num = 0; + unsigned int *valid_chunk; + int ret, i; + + uobj_hdr = (struct icp_qat_mof_obj_hdr *)mobj_handle->uobjs_hdr; + sobj_hdr = (struct icp_qat_mof_obj_hdr *)mobj_handle->sobjs_hdr; + if (uobj_hdr) + uobj_chunk_num = uobj_hdr->num_chunks; + if (sobj_hdr) + sobj_chunk_num = sobj_hdr->num_chunks; + + mobj_hdr = kzalloc((uobj_chunk_num + sobj_chunk_num) * + sizeof(*mobj_hdr), GFP_KERNEL); + if (!mobj_hdr) + return -ENOMEM; + + mobj_handle->obj_table.obj_hdr = mobj_hdr; + valid_chunk = &mobj_handle->obj_table.num_objs; + uobj_chunkhdr = (struct icp_qat_mof_obj_chunkhdr *) + ((uintptr_t)uobj_hdr + sizeof(*uobj_hdr)); + sobj_chunkhdr = (struct icp_qat_mof_obj_chunkhdr *) + ((uintptr_t)sobj_hdr + sizeof(*sobj_hdr)); + + /* map uof objects */ + for (i = 0; i < uobj_chunk_num; i++) { + ret = qat_uclo_map_obj_from_mof(mobj_handle, + &mobj_hdr[*valid_chunk], + &uobj_chunkhdr[i]); + if (ret) + return ret; + (*valid_chunk)++; + } + + /* map suof objects */ + for (i = 0; i < sobj_chunk_num; i++) { + ret = qat_uclo_map_obj_from_mof(mobj_handle, + &mobj_hdr[*valid_chunk], + &sobj_chunkhdr[i]); + if (ret) + return ret; + (*valid_chunk)++; + } + + if ((uobj_chunk_num + sobj_chunk_num) != *valid_chunk) { + pr_err("QAT: inconsistent UOF/SUOF chunk amount\n"); + return -EINVAL; + } + return 0; +} + +static void qat_uclo_map_mof_symobjs(struct icp_qat_mof_handle *mobj_handle, + struct icp_qat_mof_chunkhdr *mof_chunkhdr) +{ + char **sym_str = (char **)&mobj_handle->sym_str; + unsigned int *sym_size = &mobj_handle->sym_size; + struct icp_qat_mof_str_table *str_table_obj; + + *sym_size = *(unsigned int *)(uintptr_t) + (mof_chunkhdr->offset + mobj_handle->mof_buf); + *sym_str = (char *)(uintptr_t) + (mobj_handle->mof_buf + mof_chunkhdr->offset + + sizeof(str_table_obj->tab_len)); +} + +static void qat_uclo_map_mof_chunk(struct icp_qat_mof_handle *mobj_handle, + struct icp_qat_mof_chunkhdr *mof_chunkhdr) +{ + char *chunk_id = mof_chunkhdr->chunk_id; + + if (!strncmp(chunk_id, ICP_QAT_MOF_SYM_OBJS, ICP_QAT_MOF_OBJ_ID_LEN)) + qat_uclo_map_mof_symobjs(mobj_handle, mof_chunkhdr); + else if (!strncmp(chunk_id, ICP_QAT_UOF_OBJS, ICP_QAT_MOF_OBJ_ID_LEN)) + mobj_handle->uobjs_hdr = mobj_handle->mof_buf + + mof_chunkhdr->offset; + else if (!strncmp(chunk_id, ICP_QAT_SUOF_OBJS, ICP_QAT_MOF_OBJ_ID_LEN)) + mobj_handle->sobjs_hdr = mobj_handle->mof_buf + + mof_chunkhdr->offset; +} + +static int qat_uclo_check_mof_format(struct icp_qat_mof_file_hdr *mof_hdr) +{ + int maj = mof_hdr->maj_ver & 0xff; + int min = mof_hdr->min_ver & 0xff; + + if (mof_hdr->file_id != ICP_QAT_MOF_FID) { + pr_err("QAT: invalid header 0x%x\n", mof_hdr->file_id); + return -EINVAL; + } + + if (mof_hdr->num_chunks <= 0x1) { + pr_err("QAT: MOF chunk amount is incorrect\n"); + return -EINVAL; + } + if (maj != ICP_QAT_MOF_MAJVER || min != ICP_QAT_MOF_MINVER) { + pr_err("QAT: bad MOF version, major 0x%x, minor 0x%x\n", + maj, min); + return -EINVAL; + } + return 0; +} + +static int qat_uclo_map_mof_obj(struct icp_qat_fw_loader_handle *handle, + struct icp_qat_mof_file_hdr *mof_ptr, + u32 mof_size, char *obj_name, char **obj_ptr, + unsigned int *obj_size) +{ + struct icp_qat_mof_chunkhdr *mof_chunkhdr; + unsigned int file_id = mof_ptr->file_id; + struct icp_qat_mof_handle *mobj_handle; + unsigned short chunks_num; + unsigned int i; + int ret; + + if (file_id == ICP_QAT_UOF_FID || file_id == ICP_QAT_SUOF_FID) { + if (obj_ptr) + *obj_ptr = (char *)mof_ptr; + if (obj_size) + *obj_size = mof_size; + return 0; + } + if (qat_uclo_check_mof_format(mof_ptr)) + return -EINVAL; + + mobj_handle = kzalloc(sizeof(*mobj_handle), GFP_KERNEL); + if (!mobj_handle) + return -ENOMEM; + + handle->mobj_handle = mobj_handle; + ret = qat_uclo_map_mof_file_hdr(handle, mof_ptr, mof_size); + if (ret) + return ret; + + mof_chunkhdr = (void *)mof_ptr + sizeof(*mof_ptr); + chunks_num = mof_ptr->num_chunks; + + /* Parse MOF file chunks */ + for (i = 0; i < chunks_num; i++) + qat_uclo_map_mof_chunk(mobj_handle, &mof_chunkhdr[i]); + + /* All sym_objs uobjs and sobjs should be available */ + if (!mobj_handle->sym_str || + (!mobj_handle->uobjs_hdr && !mobj_handle->sobjs_hdr)) + return -EINVAL; + + ret = qat_uclo_map_objs_from_mof(mobj_handle); + if (ret) + return ret; + + /* Seek specified uof object in MOF */ + return qat_uclo_seek_obj_inside_mof(mobj_handle, obj_name, + obj_ptr, obj_size); +} + +int qat_uclo_map_obj(struct icp_qat_fw_loader_handle *handle, + void *addr_ptr, u32 mem_size, char *obj_name) +{ + char *obj_addr; + u32 obj_size; + int ret; + BUILD_BUG_ON(ICP_QAT_UCLO_MAX_AE >= (sizeof(handle->hal_handle->ae_mask) * 8)); if (!handle || !addr_ptr || mem_size < 24) return -EINVAL; - return (handle->fw_auth) ? - qat_uclo_map_suof_obj(handle, addr_ptr, mem_size) : - qat_uclo_map_uof_obj(handle, addr_ptr, mem_size); + if (obj_name) { + ret = qat_uclo_map_mof_obj(handle, addr_ptr, mem_size, obj_name, + &obj_addr, &obj_size); + if (ret) + return ret; + } else { + obj_addr = addr_ptr; + obj_size = mem_size; + } + + return (handle->chip_info->fw_auth) ? + qat_uclo_map_suof_obj(handle, obj_addr, obj_size) : + qat_uclo_map_uof_obj(handle, obj_addr, obj_size); } -void qat_uclo_del_uof_obj(struct icp_qat_fw_loader_handle *handle) +void qat_uclo_del_obj(struct icp_qat_fw_loader_handle *handle) { struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle; unsigned int a; + if (handle->mobj_handle) + qat_uclo_del_mof(handle); if (handle->sobj_handle) qat_uclo_del_suof(handle); if (!obj_handle) @@ -1479,23 +1895,24 @@ static void qat_uclo_fill_uwords(struct icp_qat_uclo_objhandle *obj_handle, u64 *uword, unsigned int addr_p, unsigned int raddr, u64 fill) { + unsigned int i, addr; u64 uwrd = 0; - unsigned int i; if (!encap_page) { *uword = fill; return; } + addr = (encap_page->page_region) ? raddr : addr_p; for (i = 0; i < encap_page->uwblock_num; i++) { - if (raddr >= encap_page->uwblock[i].start_addr && - raddr <= encap_page->uwblock[i].start_addr + + if (addr >= encap_page->uwblock[i].start_addr && + addr <= encap_page->uwblock[i].start_addr + encap_page->uwblock[i].words_num - 1) { - raddr -= encap_page->uwblock[i].start_addr; - raddr *= obj_handle->uword_in_bytes; + addr -= encap_page->uwblock[i].start_addr; + addr *= obj_handle->uword_in_bytes; memcpy(&uwrd, (void *)(((uintptr_t) - encap_page->uwblock[i].micro_words) + raddr), + encap_page->uwblock[i].micro_words) + addr), obj_handle->uword_in_bytes); - uwrd = uwrd & 0xbffffffffffull; + uwrd = uwrd & GENMASK_ULL(43, 0); } } *uword = uwrd; @@ -1546,6 +1963,10 @@ static void qat_uclo_wr_uimage_page(struct icp_qat_fw_loader_handle *handle, struct icp_qat_uof_image *image) { struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle; + unsigned long ae_mask = handle->hal_handle->ae_mask; + unsigned long cfg_ae_mask = handle->cfg_ae_mask; + unsigned long ae_assigned = image->ae_assigned; + struct icp_qat_uclo_aedata *aed; unsigned int ctx_mask, s; struct icp_qat_uclo_page *page; unsigned char ae; @@ -1557,25 +1978,30 @@ static void qat_uclo_wr_uimage_page(struct icp_qat_fw_loader_handle *handle, ctx_mask = 0x55; /* load the default page and set assigned CTX PC * to the entrypoint address */ - for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) { - if (!test_bit(ae, (unsigned long *)&image->ae_assigned)) + for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) { + if (!test_bit(ae, &cfg_ae_mask)) continue; + + if (!test_bit(ae, &ae_assigned)) + continue; + + aed = &obj_handle->ae_data[ae]; /* find the slice to which this image is assigned */ - for (s = 0; s < obj_handle->ae_data[ae].slice_num; s++) { - if (image->ctx_assigned & obj_handle->ae_data[ae]. - ae_slices[s].ctx_mask_assigned) + for (s = 0; s < aed->slice_num; s++) { + if (image->ctx_assigned & + aed->ae_slices[s].ctx_mask_assigned) break; } - if (s >= obj_handle->ae_data[ae].slice_num) + if (s >= aed->slice_num) continue; - page = obj_handle->ae_data[ae].ae_slices[s].page; + page = aed->ae_slices[s].page; if (!page->encap_page->def_page) continue; qat_uclo_wr_uimage_raw_page(handle, page->encap_page, ae); - page = obj_handle->ae_data[ae].ae_slices[s].page; + page = aed->ae_slices[s].page; for (ctx = 0; ctx < ICP_QAT_UCLO_MAX_CTX; ctx++) - obj_handle->ae_data[ae].ae_slices[s].cur_page[ctx] = + aed->ae_slices[s].cur_page[ctx] = (ctx_mask & (1 << ctx)) ? page : NULL; qat_hal_set_live_ctx(handle, (unsigned char)ae, image->ctx_assigned); @@ -1595,13 +2021,18 @@ static int qat_uclo_wr_suof_img(struct icp_qat_fw_loader_handle *handle) if (qat_uclo_map_auth_fw(handle, (char *)simg_hdr[i].simg_buf, (unsigned int) - (simg_hdr[i].simg_len), + simg_hdr[i].simg_len, &desc)) goto wr_err; if (qat_uclo_auth_fw(handle, desc)) goto wr_err; - if (qat_uclo_load_fw(handle, desc)) - goto wr_err; + if (qat_uclo_is_broadcast(handle, i)) { + if (qat_uclo_broadcast_load_fw(handle, desc)) + goto wr_err; + } else { + if (qat_uclo_load_fw(handle, desc)) + goto wr_err; + } qat_uclo_ummap_auth_fw(handle, &desc); } return 0; @@ -1630,6 +2061,16 @@ static int qat_uclo_wr_uof_img(struct icp_qat_fw_loader_handle *handle) int qat_uclo_wr_all_uimage(struct icp_qat_fw_loader_handle *handle) { - return (handle->fw_auth) ? qat_uclo_wr_suof_img(handle) : + return (handle->chip_info->fw_auth) ? qat_uclo_wr_suof_img(handle) : qat_uclo_wr_uof_img(handle); } + +int qat_uclo_set_cfg_ae_mask(struct icp_qat_fw_loader_handle *handle, + unsigned int cfg_ae_mask) +{ + if (!cfg_ae_mask) + return -EINVAL; + + handle->cfg_ae_mask = cfg_ae_mask; + return 0; +} diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index b975c263446d..1e83d9397b11 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -3,7 +3,9 @@ #include #include #include +#include #include "adf_dh895xcc_hw_data.h" +#include "icp_qat_hw.h" /* Worker thread to service arbiter mappings based on dev SKUs */ static const u32 thrd_to_arb_map_sku4[] = { @@ -24,15 +26,19 @@ static struct adf_hw_device_class dh895xcc_class = { .instances = 0 }; -static u32 get_accel_mask(u32 fuse) +static u32 get_accel_mask(struct adf_hw_device_data *self) { - return (~fuse) >> ADF_DH895XCC_ACCELERATORS_REG_OFFSET & - ADF_DH895XCC_ACCELERATORS_MASK; + u32 fuses = self->fuses; + + return ~fuses >> ADF_DH895XCC_ACCELERATORS_REG_OFFSET & + ADF_DH895XCC_ACCELERATORS_MASK; } -static u32 get_ae_mask(u32 fuse) +static u32 get_ae_mask(struct adf_hw_device_data *self) { - return (~fuse) & ADF_DH895XCC_ACCELENGINES_MASK; + u32 fuses = self->fuses; + + return ~fuses & ADF_DH895XCC_ACCELENGINES_MASK; } static u32 get_num_accels(struct adf_hw_device_data *self) @@ -78,6 +84,29 @@ static u32 get_sram_bar_id(struct adf_hw_device_data *self) return ADF_DH895XCC_SRAM_BAR; } +static u32 get_accel_cap(struct adf_accel_dev *accel_dev) +{ + struct pci_dev *pdev = accel_dev->accel_pci_dev.pci_dev; + u32 capabilities; + u32 legfuses; + + capabilities = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC | + ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC | + ICP_ACCEL_CAPABILITIES_AUTHENTICATION; + + /* Read accelerator capabilities mask */ + pci_read_config_dword(pdev, ADF_DEVICE_LEGFUSE_OFFSET, &legfuses); + + if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE) + capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC; + if (legfuses & ICP_ACCEL_MASK_PKE_SLICE) + capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC; + if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE) + capabilities &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION; + + return capabilities; +} + static enum dev_sku_info get_sku(struct adf_hw_device_data *self) { int sku = (self->fuses & ADF_DH895XCC_FUSECTL_SKU_MASK) @@ -131,11 +160,13 @@ static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_device = accel_dev->hw_device; struct adf_bar *misc_bar = &GET_BARS(accel_dev)[ADF_DH895XCC_PMISC_BAR]; + unsigned long accel_mask = hw_device->accel_mask; + unsigned long ae_mask = hw_device->ae_mask; void __iomem *csr = misc_bar->virt_addr; unsigned int val, i; /* Enable Accel Engine error detection & correction */ - for (i = 0; i < hw_device->get_num_aes(hw_device); i++) { + for_each_set_bit(i, &ae_mask, GET_MAX_ACCELENGINES(accel_dev)) { val = ADF_CSR_RD(csr, ADF_DH895XCC_AE_CTX_ENABLES(i)); val |= ADF_DH895XCC_ENABLE_AE_ECC_ERR; ADF_CSR_WR(csr, ADF_DH895XCC_AE_CTX_ENABLES(i), val); @@ -145,7 +176,7 @@ static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) } /* Enable shared memory error detection & correction */ - for (i = 0; i < hw_device->get_num_accels(hw_device); i++) { + for_each_set_bit(i, &accel_mask, ADF_DH895XCC_MAX_ACCELERATORS) { val = ADF_CSR_RD(csr, ADF_DH895XCC_UERRSSMSH(i)); val |= ADF_DH895XCC_ERRSSMSH_EN; ADF_CSR_WR(csr, ADF_DH895XCC_UERRSSMSH(i), val); @@ -164,7 +195,7 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) /* Enable bundle and misc interrupts */ ADF_CSR_WR(addr, ADF_DH895XCC_SMIAPF0_MASK_OFFSET, accel_dev->pf.vf_info ? 0 : - GENMASK_ULL(GET_MAX_BANKS(accel_dev) - 1, 0)); + BIT_ULL(GET_MAX_BANKS(accel_dev)) - 1); ADF_CSR_WR(addr, ADF_DH895XCC_SMIAPF1_MASK_OFFSET, ADF_DH895XCC_SMIA1_MASK); } @@ -174,11 +205,19 @@ static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev) return 0; } +static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable) +{ + adf_gen2_cfg_iov_thds(accel_dev, enable, + ADF_DH895XCC_AE2FUNC_MAP_GRP_A_NUM_REGS, + ADF_DH895XCC_AE2FUNC_MAP_GRP_B_NUM_REGS); +} + void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &dh895xcc_class; hw_data->instance_id = dh895xcc_class.instances++; hw_data->num_banks = ADF_DH895XCC_ETR_MAX_BANKS; + hw_data->num_rings_per_bank = ADF_ETR_MAX_RINGS_PER_BANK; hw_data->num_accel = ADF_DH895XCC_MAX_ACCELERATORS; hw_data->num_logical_accel = 1; hw_data->num_engines = ADF_DH895XCC_MAX_ACCELENGINES; @@ -189,18 +228,22 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->enable_error_correction = adf_enable_error_correction; hw_data->get_accel_mask = get_accel_mask; hw_data->get_ae_mask = get_ae_mask; + hw_data->get_accel_cap = get_accel_cap; hw_data->get_num_accels = get_num_accels; hw_data->get_num_aes = get_num_aes; hw_data->get_etr_bar_id = get_etr_bar_id; hw_data->get_misc_bar_id = get_misc_bar_id; hw_data->get_pf2vf_offset = get_pf2vf_offset; hw_data->get_vintmsk_offset = get_vintmsk_offset; + hw_data->get_admin_info = adf_gen2_get_admin_info; + hw_data->get_arb_info = adf_gen2_get_arb_info; hw_data->get_sram_bar_id = get_sram_bar_id; hw_data->get_sku = get_sku; hw_data->fw_name = ADF_DH895XCC_FW; hw_data->fw_mmp_name = ADF_DH895XCC_MMP; hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; + hw_data->configure_iov_threads = configure_iov_threads; hw_data->disable_iov = adf_disable_sriov; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; @@ -210,6 +253,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; hw_data->reset_device = adf_reset_sbr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } void adf_clean_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h index 082a04466dca..4d613923d155 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h @@ -36,6 +36,11 @@ #define ADF_DH895XCC_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04)) #define ADF_DH895XCC_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04)) + +/* AE to function mapping */ +#define ADF_DH895XCC_AE2FUNC_MAP_GRP_A_NUM_REGS 96 +#define ADF_DH895XCC_AE2FUNC_MAP_GRP_B_NUM_REGS 12 + /* FW names */ #define ADF_DH895XCC_FW "qat_895xcc.bin" #define ADF_DH895XCC_MMP "qat_895xcc_mmp.bin" diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index ecb4f6f20e22..a9ec4357144c 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -128,8 +128,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) &hw_data->fuses); /* Get Accelerators and Accelerators Engines masks */ - hw_data->accel_mask = hw_data->get_accel_mask(hw_data->fuses); - hw_data->ae_mask = hw_data->get_ae_mask(hw_data->fuses); + hw_data->accel_mask = hw_data->get_accel_mask(hw_data); + hw_data->ae_mask = hw_data->get_ae_mask(hw_data); accel_pci_dev->sku = hw_data->get_sku(hw_data); /* If the device has no acceleration engines then ignore it. */ if (!hw_data->accel_mask || !hw_data->ae_mask || @@ -177,9 +177,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_err_disable; } - /* Read accelerator capabilities mask */ - pci_read_config_dword(pdev, ADF_DEVICE_LEGFUSE_OFFSET, - &hw_data->accel_capabilities_mask); + /* Get accelerator capabilities mask */ + hw_data->accel_capabilities_mask = hw_data->get_accel_cap(accel_dev); /* Find and map all the device's BARS */ i = 0; diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index 5246f0524ca3..f14fb82ed6df 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "adf_dh895xccvf_hw_data.h" static struct adf_hw_device_class dh895xcciov_class = { @@ -11,12 +12,12 @@ static struct adf_hw_device_class dh895xcciov_class = { .instances = 0 }; -static u32 get_accel_mask(u32 fuse) +static u32 get_accel_mask(struct adf_hw_device_data *self) { return ADF_DH895XCCIOV_ACCELERATORS_MASK; } -static u32 get_ae_mask(u32 fuse) +static u32 get_ae_mask(struct adf_hw_device_data *self) { return ADF_DH895XCCIOV_ACCELENGINES_MASK; } @@ -69,6 +70,7 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &dh895xcciov_class; hw_data->num_banks = ADF_DH895XCCIOV_ETR_MAX_BANKS; + hw_data->num_rings_per_bank = ADF_ETR_MAX_RINGS_PER_BANK; hw_data->num_accel = ADF_DH895XCCIOV_MAX_ACCELERATORS; hw_data->num_logical_accel = 1; hw_data->num_engines = ADF_DH895XCCIOV_MAX_ACCELENGINES; @@ -97,6 +99,7 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; hw_data->dev_class->instances++; adf_devmgr_update_class_index(hw_data); + adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } void adf_clean_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index 404cf9df6922..c972554a755e 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -119,8 +119,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adf_init_hw_data_dh895xcciov(accel_dev->hw_device); /* Get Accelerators and Accelerators Engines masks */ - hw_data->accel_mask = hw_data->get_accel_mask(hw_data->fuses); - hw_data->ae_mask = hw_data->get_ae_mask(hw_data->fuses); + hw_data->accel_mask = hw_data->get_accel_mask(hw_data); + hw_data->ae_mask = hw_data->get_ae_mask(hw_data); accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c index 5006e74c40cd..a73db2a5637f 100644 --- a/drivers/crypto/qce/common.c +++ b/drivers/crypto/qce/common.c @@ -7,7 +7,8 @@ #include #include #include -#include +#include +#include #include "cipher.h" #include "common.h" diff --git a/drivers/crypto/qce/core.c b/drivers/crypto/qce/core.c index ea616b7259ae..80b75085c265 100644 --- a/drivers/crypto/qce/core.c +++ b/drivers/crypto/qce/core.c @@ -13,7 +13,6 @@ #include #include #include -#include #include "core.h" #include "cipher.h" @@ -160,7 +159,21 @@ static int qce_check_version(struct qce_device *qce) return -ENODEV; qce->burst_size = QCE_BAM_BURST_SIZE; - qce->pipe_pair_id = 1; + + /* + * Rx and tx pipes are treated as a pair inside CE. + * Pipe pair number depends on the actual BAM dma pipe + * that is used for transfers. The BAM dma pipes are passed + * from the device tree and used to derive the pipe pair + * id in the CE driver as follows. + * BAM dma pipes(rx, tx) CE pipe pair id + * 0,1 0 + * 2,3 1 + * 4,5 2 + * 6,7 3 + * ... + */ + qce->pipe_pair_id = qce->dma.rxchan->chan_id >> 1; dev_dbg(qce->dev, "Crypto device found, version %d.%d.%d\n", major, minor, step); @@ -261,6 +274,7 @@ static int qce_crypto_remove(struct platform_device *pdev) static const struct of_device_id qce_crypto_of_match[] = { { .compatible = "qcom,crypto-v5.1", }, + { .compatible = "qcom,crypto-v5.4", }, {} }; MODULE_DEVICE_TABLE(of, qce_crypto_of_match); diff --git a/drivers/crypto/qce/sha.c b/drivers/crypto/qce/sha.c index 87be96a0b0bb..61c418c12345 100644 --- a/drivers/crypto/qce/sha.c +++ b/drivers/crypto/qce/sha.c @@ -48,7 +48,7 @@ static void qce_ahash_done(void *data) dma_unmap_sg(qce->dev, &rctx->result_sg, 1, DMA_FROM_DEVICE); memcpy(rctx->digest, result->auth_iv, digestsize); - if (req->result) + if (req->result && rctx->last_blk) memcpy(req->result, result->auth_iv, digestsize); rctx->byte_count[0] = cpu_to_be32(result->auth_byte_count[0]); diff --git a/drivers/crypto/qce/sha.h b/drivers/crypto/qce/sha.h index d63526e3804d..a22695361f16 100644 --- a/drivers/crypto/qce/sha.h +++ b/drivers/crypto/qce/sha.h @@ -7,7 +7,8 @@ #define _SHA_H_ #include -#include +#include +#include #include "common.h" #include "core.h" diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index 3db595570c9c..97278c2574ff 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -12,7 +12,8 @@ #include #include -#include +#include +#include #define _SBF(v, f) ((v) << (f)) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 88a6c853ffd7..682c8a450a57 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -30,7 +30,8 @@ #include #include -#include +#include +#include #include #define _SBF(s, v) ((v) << (s)) diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c index eda93fab95fe..f300b0a5958a 100644 --- a/drivers/crypto/sa2ul.c +++ b/drivers/crypto/sa2ul.c @@ -9,8 +9,10 @@ * Tero Kristo */ #include +#include #include #include +#include #include #include #include @@ -23,7 +25,8 @@ #include #include #include -#include +#include +#include #include "sa2ul.h" @@ -361,42 +364,45 @@ static void sa_swiz_128(u8 *in, u16 len) } /* Prepare the ipad and opad from key as per SHA algorithm step 1*/ -static void prepare_kiopad(u8 *k_ipad, u8 *k_opad, const u8 *key, u16 key_sz) +static void prepare_kipad(u8 *k_ipad, const u8 *key, u16 key_sz) { int i; - for (i = 0; i < key_sz; i++) { + for (i = 0; i < key_sz; i++) k_ipad[i] = key[i] ^ 0x36; - k_opad[i] = key[i] ^ 0x5c; - } /* Instead of XOR with 0 */ - for (; i < SHA1_BLOCK_SIZE; i++) { + for (; i < SHA1_BLOCK_SIZE; i++) k_ipad[i] = 0x36; - k_opad[i] = 0x5c; - } } -static void sa_export_shash(struct shash_desc *hash, int block_size, +static void prepare_kopad(u8 *k_opad, const u8 *key, u16 key_sz) +{ + int i; + + for (i = 0; i < key_sz; i++) + k_opad[i] = key[i] ^ 0x5c; + + /* Instead of XOR with 0 */ + for (; i < SHA1_BLOCK_SIZE; i++) + k_opad[i] = 0x5c; +} + +static void sa_export_shash(void *state, struct shash_desc *hash, int digest_size, __be32 *out) { - union { - struct sha1_state sha1; - struct sha256_state sha256; - struct sha512_state sha512; - } sha; - void *state; + struct sha1_state *sha1; + struct sha256_state *sha256; u32 *result; - int i; switch (digest_size) { case SHA1_DIGEST_SIZE: - state = &sha.sha1; - result = sha.sha1.state; + sha1 = state; + result = sha1->state; break; case SHA256_DIGEST_SIZE: - state = &sha.sha256; - result = sha.sha256.state; + sha256 = state; + result = sha256->state; break; default: dev_err(sa_k3_dev, "%s: bad digest_size=%d\n", __func__, @@ -406,8 +412,7 @@ static void sa_export_shash(struct shash_desc *hash, int block_size, crypto_shash_export(hash, state); - for (i = 0; i < digest_size >> 2; i++) - out[i] = cpu_to_be32(result[i]); + cpu_to_be32_array(out, result, digest_size / 4); } static void sa_prepare_iopads(struct algo_data *data, const u8 *key, @@ -416,24 +421,28 @@ static void sa_prepare_iopads(struct algo_data *data, const u8 *key, SHASH_DESC_ON_STACK(shash, data->ctx->shash); int block_size = crypto_shash_blocksize(data->ctx->shash); int digest_size = crypto_shash_digestsize(data->ctx->shash); - u8 k_ipad[SHA1_BLOCK_SIZE]; - u8 k_opad[SHA1_BLOCK_SIZE]; + union { + struct sha1_state sha1; + struct sha256_state sha256; + u8 k_pad[SHA1_BLOCK_SIZE]; + } sha; shash->tfm = data->ctx->shash; - prepare_kiopad(k_ipad, k_opad, key, key_sz); - - memzero_explicit(ipad, block_size); - memzero_explicit(opad, block_size); + prepare_kipad(sha.k_pad, key, key_sz); crypto_shash_init(shash); - crypto_shash_update(shash, k_ipad, block_size); - sa_export_shash(shash, block_size, digest_size, ipad); + crypto_shash_update(shash, sha.k_pad, block_size); + sa_export_shash(&sha, shash, digest_size, ipad); + + prepare_kopad(sha.k_pad, key, key_sz); crypto_shash_init(shash); - crypto_shash_update(shash, k_opad, block_size); + crypto_shash_update(shash, sha.k_pad, block_size); - sa_export_shash(shash, block_size, digest_size, opad); + sa_export_shash(&sha, shash, digest_size, opad); + + memzero_explicit(&sha, sizeof(sha)); } /* Derive the inverse key used in AES-CBC decryption operation */ @@ -506,7 +515,8 @@ static int sa_set_sc_enc(struct algo_data *ad, const u8 *key, u16 key_sz, static void sa_set_sc_auth(struct algo_data *ad, const u8 *key, u16 key_sz, u8 *sc_buf) { - __be32 ipad[64], opad[64]; + __be32 *ipad = (void *)(sc_buf + 32); + __be32 *opad = (void *)(sc_buf + 64); /* Set Authentication mode selector to hash processing */ sc_buf[0] = SA_HASH_PROCESSING; @@ -515,14 +525,9 @@ static void sa_set_sc_auth(struct algo_data *ad, const u8 *key, u16 key_sz, sc_buf[1] |= ad->auth_ctrl; /* Copy the keys or ipad/opad */ - if (ad->keyed_mac) { + if (ad->keyed_mac) ad->prep_iopad(ad, key, key_sz, ipad, opad); - - /* Copy ipad to AuthKey */ - memcpy(&sc_buf[32], ipad, ad->hash_size); - /* Copy opad to Aux-1 */ - memcpy(&sc_buf[64], opad, ad->hash_size); - } else { + else { /* basic hash */ sc_buf[1] |= SA_BASIC_HASH; } @@ -819,7 +824,7 @@ static void sa_cipher_cra_exit(struct crypto_skcipher *tfm) sa_free_ctx_info(&ctx->enc, data); sa_free_ctx_info(&ctx->dec, data); - crypto_free_sync_skcipher(ctx->fallback.skcipher); + crypto_free_skcipher(ctx->fallback.skcipher); } static int sa_cipher_cra_init(struct crypto_skcipher *tfm) @@ -827,6 +832,7 @@ static int sa_cipher_cra_init(struct crypto_skcipher *tfm) struct sa_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); struct sa_crypto_data *data = dev_get_drvdata(sa_k3_dev); const char *name = crypto_tfm_alg_name(&tfm->base); + struct crypto_skcipher *child; int ret; memzero_explicit(ctx, sizeof(*ctx)); @@ -841,14 +847,17 @@ static int sa_cipher_cra_init(struct crypto_skcipher *tfm) return ret; } - ctx->fallback.skcipher = - crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); + child = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(ctx->fallback.skcipher)) { + if (IS_ERR(child)) { dev_err(sa_k3_dev, "Error allocating fallback algo %s\n", name); - return PTR_ERR(ctx->fallback.skcipher); + return PTR_ERR(child); } + ctx->fallback.skcipher = child; + crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(child) + + sizeof(struct skcipher_request)); + dev_dbg(sa_k3_dev, "%s(0x%p) sc-ids(0x%x(0x%pad), 0x%x(0x%pad))\n", __func__, tfm, ctx->enc.sc_id, &ctx->enc.sc_phys, ctx->dec.sc_id, &ctx->dec.sc_phys); @@ -859,6 +868,7 @@ static int sa_cipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen, struct algo_data *ad) { struct sa_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *child = ctx->fallback.skcipher; int cmdl_len; struct sa_cmdl_cfg cfg; int ret; @@ -874,12 +884,10 @@ static int sa_cipher_setkey(struct crypto_skcipher *tfm, const u8 *key, cfg.enc_eng_id = ad->enc_eng.eng_id; cfg.iv_size = crypto_skcipher_ivsize(tfm); - crypto_sync_skcipher_clear_flags(ctx->fallback.skcipher, + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); - crypto_sync_skcipher_set_flags(ctx->fallback.skcipher, - tfm->base.crt_flags & - CRYPTO_TFM_REQ_MASK); - ret = crypto_sync_skcipher_setkey(ctx->fallback.skcipher, key, keylen); + ret = crypto_skcipher_setkey(child, key, keylen); if (ret) return ret; @@ -1270,7 +1278,6 @@ static int sa_cipher_run(struct skcipher_request *req, u8 *iv, int enc) crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); struct crypto_alg *alg = req->base.tfm->__crt_alg; struct sa_req sa_req = { 0 }; - int ret; if (!req->cryptlen) return 0; @@ -1282,20 +1289,18 @@ static int sa_cipher_run(struct skcipher_request *req, u8 *iv, int enc) if (req->cryptlen > SA_MAX_DATA_SZ || (req->cryptlen >= SA_UNSAFE_DATA_SZ_MIN && req->cryptlen <= SA_UNSAFE_DATA_SZ_MAX)) { - SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback.skcipher); + struct skcipher_request *subreq = skcipher_request_ctx(req); - skcipher_request_set_sync_tfm(subreq, ctx->fallback.skcipher); + skcipher_request_set_tfm(subreq, ctx->fallback.skcipher); skcipher_request_set_callback(subreq, req->base.flags, - NULL, NULL); + req->base.complete, + req->base.data); skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, req->iv); if (enc) - ret = crypto_skcipher_encrypt(subreq); + return crypto_skcipher_encrypt(subreq); else - ret = crypto_skcipher_decrypt(subreq); - - skcipher_request_zero(subreq); - return ret; + return crypto_skcipher_decrypt(subreq); } sa_req.size = req->cryptlen; diff --git a/drivers/crypto/sa2ul.h b/drivers/crypto/sa2ul.h index 7f7e3fe60d11..f597ddecde34 100644 --- a/drivers/crypto/sa2ul.h +++ b/drivers/crypto/sa2ul.h @@ -12,10 +12,9 @@ #ifndef _K3_SA2UL_ #define _K3_SA2UL_ -#include -#include -#include #include +#include +#include #define SA_ENGINE_ENABLE_CONTROL 0x1000 @@ -311,7 +310,7 @@ struct sa_tfm_ctx { struct crypto_shash *shash; /* for fallback */ union { - struct crypto_sync_skcipher *skcipher; + struct crypto_skcipher *skcipher; struct crypto_ahash *ahash; struct crypto_aead *aead; } fallback; diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index d60679c79822..8b5be29cb4dc 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index e3e25278a970..7ac0573ef663 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -25,7 +25,8 @@ #include #include #include -#include +#include +#include #include #define HASH_CR 0x00 diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 66773892f665..4fd85f31630a 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -31,7 +31,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -460,7 +461,7 @@ DEF_TALITOS2_DONE(ch1_3, TALITOS2_ISR_CH_1_3_DONE) /* * locate current (offending) descriptor */ -static u32 current_desc_hdr(struct device *dev, int ch) +static __be32 current_desc_hdr(struct device *dev, int ch) { struct talitos_private *priv = dev_get_drvdata(dev); int tail, iter; @@ -478,7 +479,7 @@ static u32 current_desc_hdr(struct device *dev, int ch) iter = tail; while (priv->chan[ch].fifo[iter].dma_desc != cur_desc && - priv->chan[ch].fifo[iter].desc->next_desc != cur_desc) { + priv->chan[ch].fifo[iter].desc->next_desc != cpu_to_be32(cur_desc)) { iter = (iter + 1) & (priv->fifo_len - 1); if (iter == tail) { dev_err(dev, "couldn't locate current descriptor\n"); @@ -486,7 +487,7 @@ static u32 current_desc_hdr(struct device *dev, int ch) } } - if (priv->chan[ch].fifo[iter].desc->next_desc == cur_desc) { + if (priv->chan[ch].fifo[iter].desc->next_desc == cpu_to_be32(cur_desc)) { struct talitos_edesc *edesc; edesc = container_of(priv->chan[ch].fifo[iter].desc, @@ -501,13 +502,13 @@ static u32 current_desc_hdr(struct device *dev, int ch) /* * user diagnostics; report root cause of error based on execution unit status */ -static void report_eu_error(struct device *dev, int ch, u32 desc_hdr) +static void report_eu_error(struct device *dev, int ch, __be32 desc_hdr) { struct talitos_private *priv = dev_get_drvdata(dev); int i; if (!desc_hdr) - desc_hdr = in_be32(priv->chan[ch].reg + TALITOS_DESCBUF); + desc_hdr = cpu_to_be32(in_be32(priv->chan[ch].reg + TALITOS_DESCBUF)); switch (desc_hdr & DESC_HDR_SEL0_MASK) { case DESC_HDR_SEL0_AFEU: diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 3d407eebb2ba..da284b0ea1b2 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -31,7 +31,8 @@ #include #include -#include +#include +#include #include #include diff --git a/drivers/firmware/efi/embedded-firmware.c b/drivers/firmware/efi/embedded-firmware.c index 21ae0c48232a..f5be8e22305b 100644 --- a/drivers/firmware/efi/embedded-firmware.c +++ b/drivers/firmware/efi/embedded-firmware.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include /* Exported for use by lib/test_firmware.c only */ LIST_HEAD(efi_embedded_fw_list); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c index 072299b14b8d..47d9268a7e3c 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c @@ -51,7 +51,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h index 2d3dfdd2a716..65617752c630 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h @@ -9,7 +9,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index ec930ee2c847..5d5ad8307211 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include "s3fwrn5.h" #include "firmware.h" diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index 6ade4a5c4840..480d294a23ab 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "tee_private.h" #define TEE_NUM_DEVICES 32 diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 988dadf7a94d..3414d35ccbb6 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include "fscrypt_private.h" diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c index 0cba7928446d..e0ec21055505 100644 --- a/fs/crypto/hkdf.c +++ b/fs/crypto/hkdf.c @@ -10,7 +10,7 @@ */ #include -#include +#include #include "fscrypt_private.h" diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c index b93b3cd10bfd..0886d835f597 100644 --- a/fs/ubifs/auth.c +++ b/fs/ubifs/auth.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index 96f7b332f54f..6413d28664d6 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -14,7 +14,7 @@ #define pr_fmt(fmt) "fs-verity: " fmt -#include +#include #include #include diff --git a/include/crypto/aead.h b/include/crypto/aead.h index c32a6f5664e9..fcc12c593ef8 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -191,6 +191,11 @@ static inline void crypto_free_aead(struct crypto_aead *tfm) crypto_destroy_tfm(tfm, crypto_aead_tfm(tfm)); } +static inline const char *crypto_aead_driver_name(struct crypto_aead *tfm) +{ + return crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm)); +} + static inline struct aead_alg *crypto_aead_alg(struct crypto_aead *tfm) { return container_of(crypto_aead_tfm(tfm)->__crt_alg, diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h index 4e6dc840b159..ece6a9b5fafc 100644 --- a/include/crypto/curve25519.h +++ b/include/crypto/curve25519.h @@ -28,6 +28,8 @@ void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]); +bool curve25519_selftest(void); + static inline bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], diff --git a/include/crypto/hash_info.h b/include/crypto/hash_info.h index eb9d2e368969..dd4f06785049 100644 --- a/include/crypto/hash_info.h +++ b/include/crypto/hash_info.h @@ -8,7 +8,8 @@ #ifndef _CRYPTO_HASH_INFO_H #define _CRYPTO_HASH_INFO_H -#include +#include +#include #include #include diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 74ff77032e52..6e376ae6b6b5 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -16,6 +16,8 @@ void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, void blake2s_compress_arch(struct blake2s_state *state,const u8 *block, size_t nblocks, const u32 inc); +bool blake2s_selftest(void); + static inline void blake2s_set_lastblock(struct blake2s_state *state) { state->f[0] = -1; diff --git a/include/crypto/sha1.h b/include/crypto/sha1.h new file mode 100644 index 000000000000..044ecea60ac8 --- /dev/null +++ b/include/crypto/sha1.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common values for SHA-1 algorithms + */ + +#ifndef _CRYPTO_SHA1_H +#define _CRYPTO_SHA1_H + +#include + +#define SHA1_DIGEST_SIZE 20 +#define SHA1_BLOCK_SIZE 64 + +#define SHA1_H0 0x67452301UL +#define SHA1_H1 0xefcdab89UL +#define SHA1_H2 0x98badcfeUL +#define SHA1_H3 0x10325476UL +#define SHA1_H4 0xc3d2e1f0UL + +extern const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE]; + +struct sha1_state { + u32 state[SHA1_DIGEST_SIZE / 4]; + u64 count; + u8 buffer[SHA1_BLOCK_SIZE]; +}; + +struct shash_desc; + +extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash); + +/* + * An implementation of SHA-1's compression function. Don't use in new code! + * You shouldn't be using SHA-1, and even if you *have* to use SHA-1, this isn't + * the correct way to hash something with SHA-1 (use crypto_shash instead). + */ +#define SHA1_DIGEST_WORDS (SHA1_DIGEST_SIZE / 4) +#define SHA1_WORKSPACE_WORDS 16 +void sha1_init(__u32 *buf); +void sha1_transform(__u32 *digest, const char *data, __u32 *W); + +#endif /* _CRYPTO_SHA1_H */ diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h index 20fd1f7468af..2e0e7c3827d1 100644 --- a/include/crypto/sha1_base.h +++ b/include/crypto/sha1_base.h @@ -9,9 +9,10 @@ #define _CRYPTO_SHA1_BASE_H #include -#include +#include #include #include +#include #include @@ -101,7 +102,7 @@ static inline int sha1_base_finish(struct shash_desc *desc, u8 *out) for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) put_unaligned_be32(sctx->state[i], digest++); - *sctx = (struct sha1_state){}; + memzero_explicit(sctx, sizeof(*sctx)); return 0; } diff --git a/include/crypto/sha.h b/include/crypto/sha2.h similarity index 77% rename from include/crypto/sha.h rename to include/crypto/sha2.h index 4ff3da816630..2838f529f31e 100644 --- a/include/crypto/sha.h +++ b/include/crypto/sha2.h @@ -1,16 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Common values for SHA algorithms + * Common values for SHA-2 algorithms */ -#ifndef _CRYPTO_SHA_H -#define _CRYPTO_SHA_H +#ifndef _CRYPTO_SHA2_H +#define _CRYPTO_SHA2_H #include -#define SHA1_DIGEST_SIZE 20 -#define SHA1_BLOCK_SIZE 64 - #define SHA224_DIGEST_SIZE 28 #define SHA224_BLOCK_SIZE 64 @@ -23,12 +20,6 @@ #define SHA512_DIGEST_SIZE 64 #define SHA512_BLOCK_SIZE 128 -#define SHA1_H0 0x67452301UL -#define SHA1_H1 0xefcdab89UL -#define SHA1_H2 0x98badcfeUL -#define SHA1_H3 0x10325476UL -#define SHA1_H4 0xc3d2e1f0UL - #define SHA224_H0 0xc1059ed8UL #define SHA224_H1 0x367cd507UL #define SHA224_H2 0x3070dd17UL @@ -65,8 +56,6 @@ #define SHA512_H6 0x1f83d9abfb41bd6bULL #define SHA512_H7 0x5be0cd19137e2179ULL -extern const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE]; - extern const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE]; extern const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE]; @@ -75,12 +64,6 @@ extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE]; extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE]; -struct sha1_state { - u32 state[SHA1_DIGEST_SIZE / 4]; - u64 count; - u8 buffer[SHA1_BLOCK_SIZE]; -}; - struct sha256_state { u32 state[SHA256_DIGEST_SIZE / 4]; u64 count; @@ -95,12 +78,6 @@ struct sha512_state { struct shash_desc; -extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len); - -extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash); - extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len); @@ -113,16 +90,6 @@ extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data, extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *hash); -/* - * An implementation of SHA-1's compression function. Don't use in new code! - * You shouldn't be using SHA-1, and even if you *have* to use SHA-1, this isn't - * the correct way to hash something with SHA-1 (use crypto_shash instead). - */ -#define SHA1_DIGEST_WORDS (SHA1_DIGEST_SIZE / 4) -#define SHA1_WORKSPACE_WORDS 16 -void sha1_init(__u32 *buf); -void sha1_transform(__u32 *digest, const char *data, __u32 *W); - /* * Stand-alone implementation of the SHA256 algorithm. It is designed to * have as little dependencies as possible so it can be used in the @@ -164,4 +131,4 @@ static inline void sha224_init(struct sha256_state *sctx) void sha224_update(struct sha256_state *sctx, const u8 *data, unsigned int len); void sha224_final(struct sha256_state *sctx, u8 *out); -#endif +#endif /* _CRYPTO_SHA2_H */ diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h index 6ded110783ae..76173c613058 100644 --- a/include/crypto/sha256_base.h +++ b/include/crypto/sha256_base.h @@ -9,9 +9,10 @@ #define _CRYPTO_SHA256_BASE_H #include -#include +#include #include #include +#include #include @@ -105,7 +106,7 @@ static inline int sha256_base_finish(struct shash_desc *desc, u8 *out) for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be32)) put_unaligned_be32(sctx->state[i], digest++); - *sctx = (struct sha256_state){}; + memzero_explicit(sctx, sizeof(*sctx)); return 0; } diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h index fb19c77494dc..b370b3340b16 100644 --- a/include/crypto/sha512_base.h +++ b/include/crypto/sha512_base.h @@ -9,9 +9,10 @@ #define _CRYPTO_SHA512_BASE_H #include -#include +#include #include #include +#include #include @@ -126,7 +127,7 @@ static inline int sha512_base_finish(struct shash_desc *desc, u8 *out) for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be64)) put_unaligned_be64(sctx->state[i], digest++); - *sctx = (struct sha512_state){}; + memzero_explicit(sctx, sizeof(*sctx)); return 0; } diff --git a/include/crypto/sm3_base.h b/include/crypto/sm3_base.h index 1cbf9aa1fe52..2f3a32ab97bb 100644 --- a/include/crypto/sm3_base.h +++ b/include/crypto/sm3_base.h @@ -13,6 +13,7 @@ #include #include #include +#include #include typedef void (sm3_block_fn)(struct sm3_state *sst, u8 const *src, int blocks); @@ -104,7 +105,7 @@ static inline int sm3_base_finish(struct shash_desc *desc, u8 *out) for (i = 0; i < SM3_DIGEST_SIZE / sizeof(__be32); i++) put_unaligned_be32(sctx->state[i], digest++); - *sctx = (struct sm3_state){}; + memzero_explicit(sctx, sizeof(*sctx)); return 0; } diff --git a/include/linux/ccp.h b/include/linux/ccp.h index a5dfbaf2470d..868924dec5a1 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include struct ccp_device; struct ccp_cmd; diff --git a/include/linux/filter.h b/include/linux/filter.h index 1b62397bd124..29c27656165b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h index b950e961cfa8..d7dc1559427f 100644 --- a/include/linux/purgatory.h +++ b/include/linux/purgatory.h @@ -3,7 +3,7 @@ #define _LINUX_PURGATORY_H #include -#include +#include #include struct kexec_sha_region { diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h index 60b7c2efd921..dc52a11ba6d1 100644 --- a/include/uapi/linux/if_alg.h +++ b/include/uapi/linux/if_alg.h @@ -24,6 +24,22 @@ struct sockaddr_alg { __u8 salg_name[64]; }; +/* + * Linux v4.12 and later removed the 64-byte limit on salg_name[]; it's now an + * arbitrary-length field. We had to keep the original struct above for source + * compatibility with existing userspace programs, though. Use the new struct + * below if support for very long algorithm names is needed. To do this, + * allocate 'sizeof(struct sockaddr_alg_new) + strlen(algname) + 1' bytes, and + * copy algname (including the null terminator) into salg_name. + */ +struct sockaddr_alg_new { + __u16 salg_family; + __u8 salg_type[14]; + __u32 salg_feat; + __u32 salg_mask; + __u8 salg_name[]; +}; + struct af_alg_iv { __u32 ivlen; __u8 iv[0]; diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 106e4500fd53..4fcfe0b70c4e 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -11,7 +11,7 @@ #include #include -#include +#include /* vmcoreinfo stuff */ unsigned char *vmcoreinfo_data; diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 8798a8183974..4f8efc278aa7 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -42,7 +42,6 @@ #include #include -#include #include "kexec_internal.h" DEFINE_MUTEX(kexec_mutex); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index e21f6b9234f7..b02086d70492 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c index 79ef404a990d..5d9ea53be973 100644 --- a/lib/crypto/blake2s-selftest.c +++ b/lib/crypto/blake2s-selftest.c @@ -3,7 +3,7 @@ * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. */ -#include +#include #include /* diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 41025a30c524..6a4b6b78d630 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -17,8 +17,6 @@ #include #include -bool blake2s_selftest(void); - void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) { const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c index 288a62cd29b2..fb29739e8c29 100644 --- a/lib/crypto/curve25519.c +++ b/lib/crypto/curve25519.c @@ -13,8 +13,6 @@ #include #include -bool curve25519_selftest(void); - static int __init mod_init(void) { if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c index 2321f6cb322f..72a4b0b1df28 100644 --- a/lib/crypto/sha256.c +++ b/lib/crypto/sha256.c @@ -15,9 +15,28 @@ #include #include #include -#include +#include #include +static const u32 SHA256_K[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +}; + static inline u32 Ch(u32 x, u32 y, u32 z) { return z ^ (x & (y ^ z)); @@ -43,173 +62,68 @@ static inline void BLEND_OP(int I, u32 *W) W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16]; } -static void sha256_transform(u32 *state, const u8 *input) +#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) do { \ + u32 t1, t2; \ + t1 = h + e1(e) + Ch(e, f, g) + SHA256_K[i] + W[i]; \ + t2 = e0(a) + Maj(a, b, c); \ + d += t1; \ + h = t1 + t2; \ +} while (0) + +static void sha256_transform(u32 *state, const u8 *input, u32 *W) { - u32 a, b, c, d, e, f, g, h, t1, t2; - u32 W[64]; + u32 a, b, c, d, e, f, g, h; int i; /* load the input */ - for (i = 0; i < 16; i++) - LOAD_OP(i, W, input); + for (i = 0; i < 16; i += 8) { + LOAD_OP(i + 0, W, input); + LOAD_OP(i + 1, W, input); + LOAD_OP(i + 2, W, input); + LOAD_OP(i + 3, W, input); + LOAD_OP(i + 4, W, input); + LOAD_OP(i + 5, W, input); + LOAD_OP(i + 6, W, input); + LOAD_OP(i + 7, W, input); + } /* now blend */ - for (i = 16; i < 64; i++) - BLEND_OP(i, W); + for (i = 16; i < 64; i += 8) { + BLEND_OP(i + 0, W); + BLEND_OP(i + 1, W); + BLEND_OP(i + 2, W); + BLEND_OP(i + 3, W); + BLEND_OP(i + 4, W); + BLEND_OP(i + 5, W); + BLEND_OP(i + 6, W); + BLEND_OP(i + 7, W); + } /* load the state into our registers */ a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; f = state[5]; g = state[6]; h = state[7]; /* now iterate */ - t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0]; - t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; - t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1]; - t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; - t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2]; - t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; - t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3]; - t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; - t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4]; - t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; - t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5]; - t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; - t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6]; - t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; - t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7]; - t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2; - - t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8]; - t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; - t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9]; - t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; - t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10]; - t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; - t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11]; - t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; - t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12]; - t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; - t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13]; - t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; - t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14]; - t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; - t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15]; - t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2; - - t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16]; - t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; - t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17]; - t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; - t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18]; - t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; - t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19]; - t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; - t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20]; - t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; - t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21]; - t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; - t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22]; - t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; - t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23]; - t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2; - - t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24]; - t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; - t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25]; - t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; - t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26]; - t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; - t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27]; - t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; - t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28]; - t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; - t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29]; - t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; - t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30]; - t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; - t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31]; - t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2; - - t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32]; - t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; - t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33]; - t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; - t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34]; - t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; - t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35]; - t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; - t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36]; - t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; - t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37]; - t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; - t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38]; - t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; - t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39]; - t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2; - - t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40]; - t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; - t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41]; - t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; - t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42]; - t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; - t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43]; - t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; - t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44]; - t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; - t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45]; - t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; - t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46]; - t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; - t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47]; - t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2; - - t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48]; - t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; - t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49]; - t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; - t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50]; - t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; - t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51]; - t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; - t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52]; - t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; - t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53]; - t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; - t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54]; - t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; - t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55]; - t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2; - - t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56]; - t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; - t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57]; - t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; - t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58]; - t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; - t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59]; - t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; - t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60]; - t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; - t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61]; - t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; - t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62]; - t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; - t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63]; - t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2; + for (i = 0; i < 64; i += 8) { + SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h); + SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g); + SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f); + SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e); + SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d); + SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c); + SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b); + SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a); + } state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; state[5] += f; state[6] += g; state[7] += h; - - /* clear any sensitive info... */ - a = b = c = d = e = f = g = h = t1 = t2 = 0; - memzero_explicit(W, 64 * sizeof(u32)); } void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len) { unsigned int partial, done; const u8 *src; + u32 W[64]; partial = sctx->count & 0x3f; sctx->count += len; @@ -224,11 +138,13 @@ void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len) } do { - sha256_transform(sctx->state, src); + sha256_transform(sctx->state, src, W); done += 64; src = data + done; } while (done + 63 < len); + memzero_explicit(W, sizeof(W)); + partial = 0; } memcpy(sctx->buf + partial, src, len - done); @@ -265,7 +181,7 @@ static void __sha256_final(struct sha256_state *sctx, u8 *out, int digest_words) put_unaligned_be32(sctx->state[i], &dst[i]); /* Zeroize sensitive information. */ - memset(sctx, 0, sizeof(*sctx)); + memzero_explicit(sctx, sizeof(*sctx)); } void sha256_final(struct sha256_state *sctx, u8 *out) diff --git a/lib/digsig.c b/lib/digsig.c index e0627c3e53b2..04b5e55ed95f 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/lib/mpi/ec.c b/lib/mpi/ec.c index c21470122dfc..40f5908e57a4 100644 --- a/lib/mpi/ec.c +++ b/lib/mpi/ec.c @@ -1252,7 +1252,6 @@ void mpi_ec_mul_point(MPI_POINT result, MPI_POINT q1, q2, prd, sum; unsigned long sw; mpi_size_t rsize; - int scalar_copied = 0; /* Compute scalar point multiplication with Montgomery Ladder. * Note that we don't use Y-coordinate in the points at all. @@ -1314,8 +1313,6 @@ void mpi_ec_mul_point(MPI_POINT result, point_free(&p2); point_free(&p1_); point_free(&p2_); - if (scalar_copied) - mpi_free(scalar); return; } diff --git a/lib/sha1.c b/lib/sha1.c index 49257a915bb6..9bd1935a1472 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include /* diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 85dddfe3a2c6..687d95dce085 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -35,7 +35,6 @@ #include #include -#include #include #include #include diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c index 05d398d3fde4..b472dc149856 100644 --- a/net/mptcp/crypto.c +++ b/net/mptcp/crypto.c @@ -21,7 +21,7 @@ */ #include -#include +#include #include #include "protocol.h" diff --git a/net/mptcp/options.c b/net/mptcp/options.c index a044dd43411d..90cd52df99a6 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -7,7 +7,7 @@ #define pr_fmt(fmt) "MPTCP: " fmt #include -#include +#include #include #include #include "protocol.h" diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 953906e40742..c21a852a6ffa 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index 413c803c5208..547425c20e11 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 192e531c146f..87432b35d771 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include "encrypted.h" diff --git a/security/keys/trusted-keys/trusted_tpm1.c b/security/keys/trusted-keys/trusted_tpm1.c index b9fe02e5f84f..74d82093cbaa 100644 --- a/security/keys/trusted-keys/trusted_tpm1.c +++ b/security/keys/trusted-keys/trusted_tpm1.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/sound/soc/codecs/cros_ec_codec.c b/sound/soc/codecs/cros_ec_codec.c index 28f039adfa13..58894bf47514 100644 --- a/sound/soc/codecs/cros_ec_codec.c +++ b/sound/soc/codecs/cros_ec_codec.c @@ -8,7 +8,7 @@ * EC for audio function. */ -#include +#include #include #include #include