From 1894cb1de656cfde345c3b2690e379be1eb9db96 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Wed, 6 Mar 2024 21:58:46 +0800 Subject: [PATCH 001/123] crypto: qat - adf_get_etr_base() helper Add and use the new helper function adf_get_etr_base() which retrieves the virtual address of the ring bar. This will be used extensively when adding support for Live Migration. Signed-off-by: Giovanni Cabiddu Signed-off-by: Xin Zeng Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_common_drv.h | 10 ++++++++++ drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c | 4 +--- drivers/crypto/intel/qat/qat_common/adf_transport.c | 4 +--- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h index 57328249c89e..3bec9e20bad0 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h @@ -248,6 +248,16 @@ static inline void __iomem *adf_get_pmisc_base(struct adf_accel_dev *accel_dev) return pmisc->virt_addr; } +static inline void __iomem *adf_get_etr_base(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_bar *etr; + + etr = &GET_BARS(accel_dev)[hw_data->get_etr_bar_id(hw_data)]; + + return etr->virt_addr; +} + static inline void __iomem *adf_get_aram_base(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index d28e1921940a..b8a6d24f791f 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -321,8 +321,7 @@ static int reset_ring_pair(void __iomem *csr, u32 bank_number) int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; - u32 etr_bar_id = hw_data->get_etr_bar_id(hw_data); - void __iomem *csr; + void __iomem *csr = adf_get_etr_base(accel_dev); int ret; if (bank_number >= hw_data->num_banks) @@ -331,7 +330,6 @@ int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number) dev_dbg(&GET_DEV(accel_dev), "ring pair reset for bank:%d\n", bank_number); - csr = (&GET_BARS(accel_dev)[etr_bar_id])->virt_addr; ret = reset_ring_pair(csr, bank_number); if (ret) dev_err(&GET_DEV(accel_dev), diff --git a/drivers/crypto/intel/qat/qat_common/adf_transport.c b/drivers/crypto/intel/qat/qat_common/adf_transport.c index 630d0483c4e0..1efdf46490f1 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_transport.c +++ b/drivers/crypto/intel/qat/qat_common/adf_transport.c @@ -474,7 +474,6 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev, int adf_init_etr_data(struct adf_accel_dev *accel_dev) { struct adf_etr_data *etr_data; - struct adf_hw_device_data *hw_data = accel_dev->hw_device; void __iomem *csr_addr; u32 size; u32 num_banks = 0; @@ -495,8 +494,7 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev) } accel_dev->transport = etr_data; - i = hw_data->get_etr_bar_id(hw_data); - csr_addr = accel_dev->accel_pci_dev.pci_bars[i].virt_addr; + csr_addr = adf_get_etr_base(accel_dev); /* accel_dev->debugfs_dir should always be non-NULL here */ etr_data->debug = debugfs_create_dir("transport", From 1f8d6a163c20751629801c737a8cfd06f2002b4c Mon Sep 17 00:00:00 2001 From: Xin Zeng Date: Wed, 6 Mar 2024 21:58:47 +0800 Subject: [PATCH 002/123] crypto: qat - relocate and rename 4xxx PF2VM definitions Move and rename ADF_4XXX_PF2VM_OFFSET and ADF_4XXX_VM2PF_OFFSET to ADF_GEN4_PF2VM_OFFSET and ADF_GEN4_VM2PF_OFFSET respectively. These definitions are moved from adf_gen4_pfvf.c to adf_gen4_hw_data.h as they are specific to GEN4 and not just to qat_4xxx. This change is made in anticipation of their use in live migration. This does not introduce any functional change. Signed-off-by: Xin Zeng Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h | 4 ++++ drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c | 8 +++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h index c6e80df5a85a..c153f41162ec 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h @@ -197,6 +197,10 @@ do { \ /* Arbiter threads mask with error value */ #define ADF_GEN4_ENA_THD_MASK_ERROR GENMASK(ADF_NUM_THREADS_PER_AE, 0) +/* PF2VM communication channel */ +#define ADF_GEN4_PF2VM_OFFSET(i) (0x40B010 + (i) * 0x20) +#define ADF_GEN4_VM2PF_OFFSET(i) (0x40B014 + (i) * 0x20) + void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); enum icp_qat_gen4_slice_mask { diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c index 8e8efe93f3ee..21474d402d09 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c @@ -6,12 +6,10 @@ #include "adf_accel_devices.h" #include "adf_common_drv.h" #include "adf_gen4_pfvf.h" +#include "adf_gen4_hw_data.h" #include "adf_pfvf_pf_proto.h" #include "adf_pfvf_utils.h" -#define ADF_4XXX_PF2VM_OFFSET(i) (0x40B010 + ((i) * 0x20)) -#define ADF_4XXX_VM2PF_OFFSET(i) (0x40B014 + ((i) * 0x20)) - /* VF2PF interrupt source registers */ #define ADF_4XXX_VM2PF_SOU 0x41A180 #define ADF_4XXX_VM2PF_MSK 0x41A1C0 @@ -29,12 +27,12 @@ static const struct pfvf_csr_format csr_gen4_fmt = { static u32 adf_gen4_pf_get_pf2vf_offset(u32 i) { - return ADF_4XXX_PF2VM_OFFSET(i); + return ADF_GEN4_PF2VM_OFFSET(i); } static u32 adf_gen4_pf_get_vf2pf_offset(u32 i) { - return ADF_4XXX_VM2PF_OFFSET(i); + return ADF_GEN4_VM2PF_OFFSET(i); } static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) From 867e801005e9e76f7ae2d143fed0da440150c64d Mon Sep 17 00:00:00 2001 From: Xin Zeng Date: Wed, 6 Mar 2024 21:58:48 +0800 Subject: [PATCH 003/123] crypto: qat - move PFVF compat checker to a function Move the code that implements VF version compatibility on the PF side to a separate function so that it can be reused when doing VM live migration. This does not introduce any functional change. Signed-off-by: Xin Zeng Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c | 8 +------- drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c index 9ab93fbfefde..b9b5e744a3f1 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c @@ -242,13 +242,7 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr, "VersionRequest received from VF%d (vers %d) to PF (vers %d)\n", vf_nr, vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION); - if (vf_compat_ver == 0) - compat = ADF_PF2VF_VF_INCOMPATIBLE; - else if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION) - compat = ADF_PF2VF_VF_COMPATIBLE; - else - compat = ADF_PF2VF_VF_COMPAT_UNKNOWN; - + compat = adf_vf_compat_checker(vf_compat_ver); vf_info->vf_compat_ver = vf_compat_ver; resp->type = ADF_PF2VF_MSGTYPE_VERSION_RESP; diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h index 2be048e2287b..1a044297d873 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h @@ -28,4 +28,15 @@ u32 adf_pfvf_csr_msg_of(struct adf_accel_dev *accel_dev, struct pfvf_message msg struct pfvf_message adf_pfvf_message_of(struct adf_accel_dev *accel_dev, u32 raw_msg, const struct pfvf_csr_format *fmt); +static inline u8 adf_vf_compat_checker(u8 vf_compat_ver) +{ + if (vf_compat_ver == 0) + return ADF_PF2VF_VF_INCOMPATIBLE; + + if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION) + return ADF_PF2VF_VF_COMPATIBLE; + + return ADF_PF2VF_VF_COMPAT_UNKNOWN; +} + #endif /* ADF_PFVF_UTILS_H */ From 680302d191b043cf3abe4076794de10171a4ca93 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Wed, 6 Mar 2024 21:58:49 +0800 Subject: [PATCH 004/123] crypto: qat - relocate CSR access code As the common hw_data files are growing and the adf_hw_csr_ops is going to be extended with new operations, move all logic related to ring CSRs to the newly created adf_gen[2|4]_hw_csr_data.[c|h] files. This does not introduce any functional change. Signed-off-by: Giovanni Cabiddu Signed-off-by: Xin Zeng Signed-off-by: Herbert Xu --- .../intel/qat/qat_420xx/adf_420xx_hw_data.c | 1 + .../intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 1 + .../intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 1 + .../qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c | 1 + .../intel/qat/qat_c62x/adf_c62x_hw_data.c | 1 + .../intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c | 1 + drivers/crypto/intel/qat/qat_common/Makefile | 2 + .../qat/qat_common/adf_gen2_hw_csr_data.c | 101 ++++++++++++++++++ .../qat/qat_common/adf_gen2_hw_csr_data.h | 86 +++++++++++++++ .../intel/qat/qat_common/adf_gen2_hw_data.c | 97 ----------------- .../intel/qat/qat_common/adf_gen2_hw_data.h | 76 ------------- .../qat/qat_common/adf_gen4_hw_csr_data.c | 101 ++++++++++++++++++ .../qat/qat_common/adf_gen4_hw_csr_data.h | 97 +++++++++++++++++ .../intel/qat/qat_common/adf_gen4_hw_data.c | 97 ----------------- .../intel/qat/qat_common/adf_gen4_hw_data.h | 94 +--------------- .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 1 + .../qat_dh895xccvf/adf_dh895xccvf_hw_data.c | 1 + 17 files changed, 397 insertions(+), 362 deletions(-) create mode 100644 drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c create mode 100644 drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h create mode 100644 drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c create mode 100644 drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c index 1102c47f8293..9ccbf5998d5c 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 927506cf271d..ef4b0aa36603 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c index a882e0ea2279..201f9412c582 100644 --- a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include "adf_c3xxx_hw_data.h" diff --git a/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index 84d9486e04de..a512ca4efd3f 100644 --- a/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c index 48cf3eb7c734..6b5b0cf9c7c7 100644 --- a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include "adf_c62x_hw_data.h" diff --git a/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c index 751d7aa57fc7..4aaaaf921734 100644 --- a/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 5915cde8a7aa..ceaa685352ed 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -14,9 +14,11 @@ intel_qat-objs := adf_cfg.o \ adf_hw_arbiter.o \ adf_sysfs.o \ adf_sysfs_ras_counters.o \ + adf_gen2_hw_csr_data.o \ adf_gen2_hw_data.o \ adf_gen2_config.o \ adf_gen4_config.o \ + adf_gen4_hw_csr_data.o \ adf_gen4_hw_data.o \ adf_gen4_pm.o \ adf_gen2_dc.o \ diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c new file mode 100644 index 000000000000..650c9edd8a66 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include +#include "adf_gen2_hw_csr_data.h" + +static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) +{ + return BUILD_RING_BASE_ADDR(addr, size); +} + +static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); +} + +static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); +} + +static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_E_STAT(csr_base_addr, bank); +} + +static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, + u32 ring, u32 value) +{ + WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); +} + +static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, + dma_addr_t addr) +{ + WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); +} + +static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, u32 value) +{ + WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); +} + +static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) +{ + WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); +} + +static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); +} + +static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); +} + +static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); +} + +static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); +} + +void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) +{ + csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; + csr_ops->read_csr_ring_head = read_csr_ring_head; + csr_ops->write_csr_ring_head = write_csr_ring_head; + csr_ops->read_csr_ring_tail = read_csr_ring_tail; + csr_ops->write_csr_ring_tail = write_csr_ring_tail; + csr_ops->read_csr_e_stat = read_csr_e_stat; + csr_ops->write_csr_ring_config = write_csr_ring_config; + csr_ops->write_csr_ring_base = write_csr_ring_base; + csr_ops->write_csr_int_flag = write_csr_int_flag; + csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; + csr_ops->write_csr_int_col_en = write_csr_int_col_en; + csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; + csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; + csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; +} +EXPORT_SYMBOL_GPL(adf_gen2_init_hw_csr_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h new file mode 100644 index 000000000000..55058b0f9e52 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef ADF_GEN2_HW_CSR_DATA_H_ +#define ADF_GEN2_HW_CSR_DATA_H_ + +#include +#include "adf_accel_devices.h" + +#define ADF_BANK_INT_SRC_SEL_MASK_0 0x4444444CUL +#define ADF_BANK_INT_SRC_SEL_MASK_X 0x44444444UL +#define ADF_RING_CSR_RING_CONFIG 0x000 +#define ADF_RING_CSR_RING_LBASE 0x040 +#define ADF_RING_CSR_RING_UBASE 0x080 +#define ADF_RING_CSR_RING_HEAD 0x0C0 +#define ADF_RING_CSR_RING_TAIL 0x100 +#define ADF_RING_CSR_E_STAT 0x14C +#define ADF_RING_CSR_INT_FLAG 0x170 +#define ADF_RING_CSR_INT_SRCSEL 0x174 +#define ADF_RING_CSR_INT_SRCSEL_2 0x178 +#define ADF_RING_CSR_INT_COL_EN 0x17C +#define ADF_RING_CSR_INT_COL_CTL 0x180 +#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 +#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 +#define ADF_RING_BUNDLE_SIZE 0x1000 +#define ADF_ARB_REG_SLOT 0x1000 +#define ADF_ARB_RINGSRVARBEN_OFFSET 0x19C + +#define BUILD_RING_BASE_ADDR(addr, size) \ + (((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) +#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2)) +#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2)) +#define READ_CSR_E_STAT(csr_base_addr, bank) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_E_STAT) +#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) +#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ +do { \ + u32 l_base = 0, u_base = 0; \ + l_base = (u32)((value) & 0xFFFFFFFF); \ + u_base = (u32)(((value) & 0xFFFFFFFF00000000ULL) >> 32); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_LBASE + ((ring) << 2), l_base); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_UBASE + ((ring) << 2), u_base); \ +} while (0) + +#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) +#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) +#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_FLAG, value) +#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ +do { \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK_0); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_SRCSEL_2, ADF_BANK_INT_SRC_SEL_MASK_X); \ +} while (0) +#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_COL_EN, value) +#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_COL_CTL, \ + ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) +#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_FLAG_AND_COL, value) + +#define WRITE_CSR_RING_SRV_ARB_EN(csr_addr, index, value) \ + ADF_CSR_WR(csr_addr, ADF_ARB_RINGSRVARBEN_OFFSET + \ + (ADF_ARB_REG_SLOT * (index)), value) + +void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c index d1884547b5a1..1f64bf49b221 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c @@ -111,103 +111,6 @@ void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev) } EXPORT_SYMBOL_GPL(adf_gen2_enable_ints); -static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) -{ - return BUILD_RING_BASE_ADDR(addr, size); -} - -static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); -} - -static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); -} - -static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) -{ - return READ_CSR_E_STAT(csr_base_addr, bank); -} - -static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, - u32 ring, u32 value) -{ - WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); -} - -static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, - dma_addr_t addr) -{ - WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); -} - -static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, u32 value) -{ - WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); -} - -static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) -{ - WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); -} - -static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); -} - -static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); -} - -static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); -} - -static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); -} - -void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) -{ - csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; - csr_ops->read_csr_ring_head = read_csr_ring_head; - csr_ops->write_csr_ring_head = write_csr_ring_head; - csr_ops->read_csr_ring_tail = read_csr_ring_tail; - csr_ops->write_csr_ring_tail = write_csr_ring_tail; - csr_ops->read_csr_e_stat = read_csr_e_stat; - csr_ops->write_csr_ring_config = write_csr_ring_config; - csr_ops->write_csr_ring_base = write_csr_ring_base; - csr_ops->write_csr_int_flag = write_csr_int_flag; - csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; - csr_ops->write_csr_int_col_en = write_csr_int_col_en; - csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; - csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; - csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; -} -EXPORT_SYMBOL_GPL(adf_gen2_init_hw_csr_ops); - u32 adf_gen2_get_accel_cap(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h index 6bd341061de4..708e9186127b 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h @@ -6,78 +6,9 @@ #include "adf_accel_devices.h" #include "adf_cfg_common.h" -/* Transport access */ -#define ADF_BANK_INT_SRC_SEL_MASK_0 0x4444444CUL -#define ADF_BANK_INT_SRC_SEL_MASK_X 0x44444444UL -#define ADF_RING_CSR_RING_CONFIG 0x000 -#define ADF_RING_CSR_RING_LBASE 0x040 -#define ADF_RING_CSR_RING_UBASE 0x080 -#define ADF_RING_CSR_RING_HEAD 0x0C0 -#define ADF_RING_CSR_RING_TAIL 0x100 -#define ADF_RING_CSR_E_STAT 0x14C -#define ADF_RING_CSR_INT_FLAG 0x170 -#define ADF_RING_CSR_INT_SRCSEL 0x174 -#define ADF_RING_CSR_INT_SRCSEL_2 0x178 -#define ADF_RING_CSR_INT_COL_EN 0x17C -#define ADF_RING_CSR_INT_COL_CTL 0x180 -#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 -#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 -#define ADF_RING_BUNDLE_SIZE 0x1000 #define ADF_GEN2_RX_RINGS_OFFSET 8 #define ADF_GEN2_TX_RINGS_MASK 0xFF -#define BUILD_RING_BASE_ADDR(addr, size) \ - (((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) -#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2)) -#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2)) -#define READ_CSR_E_STAT(csr_base_addr, bank) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_E_STAT) -#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) -#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ -do { \ - u32 l_base = 0, u_base = 0; \ - l_base = (u32)((value) & 0xFFFFFFFF); \ - u_base = (u32)(((value) & 0xFFFFFFFF00000000ULL) >> 32); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_LBASE + ((ring) << 2), l_base); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_UBASE + ((ring) << 2), u_base); \ -} while (0) - -#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) -#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) -#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_FLAG, value) -#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ -do { \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK_0); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_SRCSEL_2, ADF_BANK_INT_SRC_SEL_MASK_X); \ -} while (0) -#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_COL_EN, value) -#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_COL_CTL, \ - ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) -#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_FLAG_AND_COL, value) - /* AE to function map */ #define AE2FUNCTION_MAP_A_OFFSET (0x3A400 + 0x190) #define AE2FUNCTION_MAP_B_OFFSET (0x3A400 + 0x310) @@ -106,12 +37,6 @@ do { \ #define ADF_ARB_OFFSET 0x30000 #define ADF_ARB_WRK_2_SER_MAP_OFFSET 0x180 #define ADF_ARB_CONFIG (BIT(31) | BIT(6) | BIT(0)) -#define ADF_ARB_REG_SLOT 0x1000 -#define ADF_ARB_RINGSRVARBEN_OFFSET 0x19C - -#define WRITE_CSR_RING_SRV_ARB_EN(csr_addr, index, value) \ - ADF_CSR_WR(csr_addr, ADF_ARB_RINGSRVARBEN_OFFSET + \ - (ADF_ARB_REG_SLOT * (index)), value) /* Power gating */ #define ADF_POWERGATE_DC BIT(23) @@ -158,7 +83,6 @@ u32 adf_gen2_get_num_aes(struct adf_hw_device_data *self); void adf_gen2_enable_error_correction(struct adf_accel_dev *accel_dev); void adf_gen2_cfg_iov_thds(struct adf_accel_dev *accel_dev, bool enable, int num_a_regs, int num_b_regs); -void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); void adf_gen2_get_admin_info(struct admin_info *admin_csrs_info); void adf_gen2_get_arb_info(struct arb_info *arb_info); void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c new file mode 100644 index 000000000000..652ef4598930 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include +#include "adf_gen4_hw_csr_data.h" + +static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) +{ + return BUILD_RING_BASE_ADDR(addr, size); +} + +static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); +} + +static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); +} + +static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_E_STAT(csr_base_addr, bank); +} + +static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); +} + +static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, + dma_addr_t addr) +{ + WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); +} + +static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); +} + +static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) +{ + WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); +} + +static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, u32 value) +{ + WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); +} + +static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); +} + +static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); +} + +static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); +} + +void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) +{ + csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; + csr_ops->read_csr_ring_head = read_csr_ring_head; + csr_ops->write_csr_ring_head = write_csr_ring_head; + csr_ops->read_csr_ring_tail = read_csr_ring_tail; + csr_ops->write_csr_ring_tail = write_csr_ring_tail; + csr_ops->read_csr_e_stat = read_csr_e_stat; + csr_ops->write_csr_ring_config = write_csr_ring_config; + csr_ops->write_csr_ring_base = write_csr_ring_base; + csr_ops->write_csr_int_flag = write_csr_int_flag; + csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; + csr_ops->write_csr_int_col_en = write_csr_int_col_en; + csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; + csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; + csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_hw_csr_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h new file mode 100644 index 000000000000..08d803432d9f --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef ADF_GEN4_HW_CSR_DATA_H_ +#define ADF_GEN4_HW_CSR_DATA_H_ + +#include +#include "adf_accel_devices.h" + +#define ADF_BANK_INT_SRC_SEL_MASK 0x44UL +#define ADF_RING_CSR_RING_CONFIG 0x1000 +#define ADF_RING_CSR_RING_LBASE 0x1040 +#define ADF_RING_CSR_RING_UBASE 0x1080 +#define ADF_RING_CSR_RING_HEAD 0x0C0 +#define ADF_RING_CSR_RING_TAIL 0x100 +#define ADF_RING_CSR_E_STAT 0x14C +#define ADF_RING_CSR_INT_FLAG 0x170 +#define ADF_RING_CSR_INT_SRCSEL 0x174 +#define ADF_RING_CSR_INT_COL_CTL 0x180 +#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 +#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 +#define ADF_RING_CSR_INT_COL_EN 0x17C +#define ADF_RING_CSR_ADDR_OFFSET 0x100000 +#define ADF_RING_BUNDLE_SIZE 0x2000 +#define ADF_RING_CSR_RING_SRV_ARB_EN 0x19C + +#define BUILD_RING_BASE_ADDR(addr, size) \ + ((((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) << 6) +#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2)) +#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2)) +#define READ_CSR_E_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_E_STAT) +#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) +#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ +do { \ + void __iomem *_csr_base_addr = csr_base_addr; \ + u32 _bank = bank; \ + u32 _ring = ring; \ + dma_addr_t _value = value; \ + u32 l_base = 0, u_base = 0; \ + l_base = lower_32_bits(_value); \ + u_base = upper_32_bits(_value); \ + ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (_bank) + \ + ADF_RING_CSR_RING_LBASE + ((_ring) << 2), l_base); \ + ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (_bank) + \ + ADF_RING_CSR_RING_UBASE + ((_ring) << 2), u_base); \ +} while (0) + +#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) +#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) +#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG, (value)) +#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK) +#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_COL_EN, (value)) +#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_COL_CTL, \ + ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) +#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG_AND_COL, (value)) + +#define WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_SRV_ARB_EN, (value)) + +void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index b8a6d24f791f..12269e309fbf 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -8,103 +8,6 @@ #include "adf_gen4_hw_data.h" #include "adf_gen4_pm.h" -static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) -{ - return BUILD_RING_BASE_ADDR(addr, size); -} - -static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); -} - -static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); -} - -static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) -{ - return READ_CSR_E_STAT(csr_base_addr, bank); -} - -static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); -} - -static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, - dma_addr_t addr) -{ - WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); -} - -static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); -} - -static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) -{ - WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); -} - -static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, u32 value) -{ - WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); -} - -static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); -} - -static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); -} - -static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); -} - -void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) -{ - csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; - csr_ops->read_csr_ring_head = read_csr_ring_head; - csr_ops->write_csr_ring_head = write_csr_ring_head; - csr_ops->read_csr_ring_tail = read_csr_ring_tail; - csr_ops->write_csr_ring_tail = write_csr_ring_tail; - csr_ops->read_csr_e_stat = read_csr_e_stat; - csr_ops->write_csr_ring_config = write_csr_ring_config; - csr_ops->write_csr_ring_base = write_csr_ring_base; - csr_ops->write_csr_int_flag = write_csr_int_flag; - csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; - csr_ops->write_csr_int_col_en = write_csr_int_col_en; - csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; - csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; - csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; -} -EXPORT_SYMBOL_GPL(adf_gen4_init_hw_csr_ops); - u32 adf_gen4_get_accel_mask(struct adf_hw_device_data *self) { return ADF_GEN4_ACCELERATORS_MASK; diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h index c153f41162ec..719f7757e587 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */ /* Copyright(c) 2020 Intel Corporation */ -#ifndef ADF_GEN4_HW_CSR_DATA_H_ -#define ADF_GEN4_HW_CSR_DATA_H_ +#ifndef ADF_GEN4_HW_DATA_H_ +#define ADF_GEN4_HW_DATA_H_ #include @@ -54,95 +54,6 @@ #define ADF_GEN4_ADMINMSGLR_OFFSET 0x500578 #define ADF_GEN4_MAILBOX_BASE_OFFSET 0x600970 -/* Transport access */ -#define ADF_BANK_INT_SRC_SEL_MASK 0x44UL -#define ADF_RING_CSR_RING_CONFIG 0x1000 -#define ADF_RING_CSR_RING_LBASE 0x1040 -#define ADF_RING_CSR_RING_UBASE 0x1080 -#define ADF_RING_CSR_RING_HEAD 0x0C0 -#define ADF_RING_CSR_RING_TAIL 0x100 -#define ADF_RING_CSR_E_STAT 0x14C -#define ADF_RING_CSR_INT_FLAG 0x170 -#define ADF_RING_CSR_INT_SRCSEL 0x174 -#define ADF_RING_CSR_INT_COL_CTL 0x180 -#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 -#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 -#define ADF_RING_CSR_INT_COL_EN 0x17C -#define ADF_RING_CSR_ADDR_OFFSET 0x100000 -#define ADF_RING_BUNDLE_SIZE 0x2000 - -#define BUILD_RING_BASE_ADDR(addr, size) \ - ((((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) << 6) -#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ - ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2)) -#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ - ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2)) -#define READ_CSR_E_STAT(csr_base_addr, bank) \ - ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_E_STAT) -#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) -#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ -do { \ - void __iomem *_csr_base_addr = csr_base_addr; \ - u32 _bank = bank; \ - u32 _ring = ring; \ - dma_addr_t _value = value; \ - u32 l_base = 0, u_base = 0; \ - l_base = lower_32_bits(_value); \ - u_base = upper_32_bits(_value); \ - ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (_bank) + \ - ADF_RING_CSR_RING_LBASE + ((_ring) << 2), l_base); \ - ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (_bank) + \ - ADF_RING_CSR_RING_UBASE + ((_ring) << 2), u_base); \ -} while (0) - -#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) -#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) -#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_FLAG, (value)) -#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK) -#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_COL_EN, (value)) -#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_COL_CTL, \ - ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) -#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_FLAG_AND_COL, (value)) - -/* Arbiter configuration */ -#define ADF_RING_CSR_RING_SRV_ARB_EN 0x19C - -#define WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_SRV_ARB_EN, (value)) - /* Default ring mapping */ #define ADF_GEN4_DEFAULT_RING_TO_SRV_MAP \ (ASYM << ADF_CFG_SERV_RING_PAIR_0_SHIFT | \ @@ -234,7 +145,6 @@ u32 adf_gen4_get_num_aes(struct adf_hw_device_data *self); enum dev_sku_info adf_gen4_get_sku(struct adf_hw_device_data *self); u32 adf_gen4_get_sram_bar_id(struct adf_hw_device_data *self); int adf_gen4_init_device(struct adf_accel_dev *accel_dev); -void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number); void adf_gen4_set_msix_default_rttable(struct adf_accel_dev *accel_dev); void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index af14090cc4be..6e24d57e6b98 100644 --- a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include "adf_dh895xcc_hw_data.h" diff --git a/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index 70e56cc16ece..f4ee4c2e00da 100644 --- a/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include From 84058ffb919bf6a6aac24d2baf7fce442d24f390 Mon Sep 17 00:00:00 2001 From: Siming Wan Date: Wed, 6 Mar 2024 21:58:50 +0800 Subject: [PATCH 005/123] crypto: qat - rename get_sla_arr_of_type() The function get_sla_arr_of_type() returns a pointer to an SLA type specific array. Rename it and expose it as it will be used externally to this module. This does not introduce any functional change. Signed-off-by: Siming Wan Reviewed-by: Giovanni Cabiddu Reviewed-by: Damian Muszynski Signed-off-by: Xin Zeng Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_rl.c | 10 +++++----- drivers/crypto/intel/qat/qat_common/adf_rl.h | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c index d4f2db3c53d8..65f752f4792a 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c @@ -183,14 +183,14 @@ static enum adf_cfg_service_type srv_to_cfg_svc_type(enum adf_base_services rl_s } /** - * get_sla_arr_of_type() - Returns a pointer to SLA type specific array + * adf_rl_get_sla_arr_of_type() - Returns a pointer to SLA type specific array * @rl_data: pointer to ratelimiting data * @type: SLA type * @sla_arr: pointer to variable where requested pointer will be stored * * Return: Max number of elements allowed for the returned array */ -static u32 get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type, +u32 adf_rl_get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type, struct rl_sla ***sla_arr) { switch (type) { @@ -778,7 +778,7 @@ static void clear_sla(struct adf_rl *rl_data, struct rl_sla *sla) rp_in_use[sla->ring_pairs_ids[i]] = false; update_budget(sla, old_cir, true); - get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); + adf_rl_get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); assign_node_to_parent(rl_data->accel_dev, sla, true); adf_rl_send_admin_delete_msg(rl_data->accel_dev, node_id, sla->type); mark_rps_usage(sla, rl_data->rp_in_use, false); @@ -875,7 +875,7 @@ static int add_update_sla(struct adf_accel_dev *accel_dev, if (!is_update) { mark_rps_usage(sla, rl_data->rp_in_use, true); - get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); + adf_rl_get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); sla_type_arr[sla->node_id] = sla; rl_data->sla[sla->sla_id] = sla; } @@ -1065,7 +1065,7 @@ void adf_rl_remove_sla_all(struct adf_accel_dev *accel_dev, bool incl_default) /* Unregister and remove all SLAs */ for (j = RL_LEAF; j >= end_type; j--) { - max_id = get_sla_arr_of_type(rl_data, j, &sla_type_arr); + max_id = adf_rl_get_sla_arr_of_type(rl_data, j, &sla_type_arr); for (i = 0; i < max_id; i++) { if (!sla_type_arr[i]) diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.h b/drivers/crypto/intel/qat/qat_common/adf_rl.h index 269c6656fb90..bfe750ea0e83 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.h +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.h @@ -151,6 +151,8 @@ struct rl_sla { u16 ring_pairs_cnt; }; +u32 adf_rl_get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type, + struct rl_sla ***sla_arr); int adf_rl_add_sla(struct adf_accel_dev *accel_dev, struct adf_rl_sla_input_data *sla_in); int adf_rl_update_sla(struct adf_accel_dev *accel_dev, From 3fa1057e35474c715608635a0bf7452397580bfd Mon Sep 17 00:00:00 2001 From: Siming Wan Date: Wed, 6 Mar 2024 21:58:51 +0800 Subject: [PATCH 006/123] crypto: qat - expand CSR operations for QAT GEN4 devices Extend the CSR operations for QAT GEN4 devices to allow saving and restoring the rings state. The new operations will be used as a building block for implementing the state save and restore of Virtual Functions necessary for VM live migration. This adds the following operations: - read ring status register - read ring underflow/overflow status register - read ring nearly empty status register - read ring nearly full status register - read ring full status register - read ring complete status register - read ring exception status register - read/write ring exception interrupt mask register - read ring configuration register - read ring base register - read/write ring interrupt enable register - read ring interrupt flag register - read/write ring interrupt source select register - read ring coalesced interrupt enable register - read ring coalesced interrupt control register - read ring flag and coalesced interrupt enable register - read ring service arbiter enable register - get ring coalesced interrupt control enable mask Signed-off-by: Siming Wan Reviewed-by: Giovanni Cabiddu Signed-off-by: Xin Zeng Signed-off-by: Herbert Xu --- .../intel/qat/qat_common/adf_accel_devices.h | 27 ++++ .../qat/qat_common/adf_gen4_hw_csr_data.c | 130 ++++++++++++++++++ .../qat/qat_common/adf_gen4_hw_csr_data.h | 93 ++++++++++++- 3 files changed, 249 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index 08658c3a01e9..d1f3f5a822ff 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -150,22 +150,49 @@ struct adf_hw_csr_ops { u32 ring); void (*write_csr_ring_tail)(void __iomem *csr_base_addr, u32 bank, u32 ring, u32 value); + u32 (*read_csr_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_uo_stat)(void __iomem *csr_base_addr, u32 bank); u32 (*read_csr_e_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_ne_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_nf_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_f_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_c_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_exp_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_exp_int_en)(void __iomem *csr_base_addr, u32 bank); + void (*write_csr_exp_int_en)(void __iomem *csr_base_addr, u32 bank, + u32 value); + u32 (*read_csr_ring_config)(void __iomem *csr_base_addr, u32 bank, + u32 ring); void (*write_csr_ring_config)(void __iomem *csr_base_addr, u32 bank, u32 ring, u32 value); + dma_addr_t (*read_csr_ring_base)(void __iomem *csr_base_addr, u32 bank, + u32 ring); void (*write_csr_ring_base)(void __iomem *csr_base_addr, u32 bank, u32 ring, dma_addr_t addr); + u32 (*read_csr_int_en)(void __iomem *csr_base_addr, u32 bank); + void (*write_csr_int_en)(void __iomem *csr_base_addr, u32 bank, + u32 value); + u32 (*read_csr_int_flag)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_flag)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_int_srcsel)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_srcsel)(void __iomem *csr_base_addr, u32 bank); + void (*write_csr_int_srcsel_w_val)(void __iomem *csr_base_addr, + u32 bank, u32 value); + u32 (*read_csr_int_col_en)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_col_en)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_int_col_ctl)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_col_ctl)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_int_flag_and_col)(void __iomem *csr_base_addr, + u32 bank); void (*write_csr_int_flag_and_col)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_ring_srv_arb_en)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_ring_srv_arb_en)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*get_int_col_ctl_enable_mask)(void); }; struct adf_cfg_device_data; diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c index 652ef4598930..6609c248aaba 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c @@ -30,57 +30,166 @@ static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); } +static u32 read_csr_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_STAT(csr_base_addr, bank); +} + +static u32 read_csr_uo_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_UO_STAT(csr_base_addr, bank); +} + static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) { return READ_CSR_E_STAT(csr_base_addr, bank); } +static u32 read_csr_ne_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_NE_STAT(csr_base_addr, bank); +} + +static u32 read_csr_nf_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_NF_STAT(csr_base_addr, bank); +} + +static u32 read_csr_f_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_F_STAT(csr_base_addr, bank); +} + +static u32 read_csr_c_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_C_STAT(csr_base_addr, bank); +} + +static u32 read_csr_exp_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_EXP_STAT(csr_base_addr, bank); +} + +static u32 read_csr_exp_int_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_EXP_INT_EN(csr_base_addr, bank); +} + +static void write_csr_exp_int_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_EXP_INT_EN(csr_base_addr, bank, value); +} + +static u32 read_csr_ring_config(void __iomem *csr_base_addr, u32 bank, + u32 ring) +{ + return READ_CSR_RING_CONFIG(csr_base_addr, bank, ring); +} + static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, u32 ring, u32 value) { WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); } +static dma_addr_t read_csr_ring_base(void __iomem *csr_base_addr, u32 bank, + u32 ring) +{ + return READ_CSR_RING_BASE(csr_base_addr, bank, ring); +} + static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, dma_addr_t addr) { WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); } +static u32 read_csr_int_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_EN(csr_base_addr, bank); +} + +static void write_csr_int_en(void __iomem *csr_base_addr, u32 bank, u32 value) +{ + WRITE_CSR_INT_EN(csr_base_addr, bank, value); +} + +static u32 read_csr_int_flag(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_FLAG(csr_base_addr, bank); +} + static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, u32 value) { WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); } +static u32 read_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_SRCSEL(csr_base_addr, bank); +} + static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) { WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); } +static void write_csr_int_srcsel_w_val(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_SRCSEL_W_VAL(csr_base_addr, bank, value); +} + +static u32 read_csr_int_col_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_COL_EN(csr_base_addr, bank); +} + static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, u32 value) { WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); } +static u32 read_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_COL_CTL(csr_base_addr, bank); +} + static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, u32 value) { WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); } +static u32 read_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_FLAG_AND_COL(csr_base_addr, bank); +} + static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, u32 value) { WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); } +static u32 read_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_RING_SRV_ARB_EN(csr_base_addr, bank); +} + static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, u32 value) { WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); } +static u32 get_int_col_ctl_enable_mask(void) +{ + return ADF_RING_CSR_INT_COL_CTL_ENABLE; +} + void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) { csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; @@ -88,14 +197,35 @@ void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) csr_ops->write_csr_ring_head = write_csr_ring_head; csr_ops->read_csr_ring_tail = read_csr_ring_tail; csr_ops->write_csr_ring_tail = write_csr_ring_tail; + csr_ops->read_csr_stat = read_csr_stat; + csr_ops->read_csr_uo_stat = read_csr_uo_stat; csr_ops->read_csr_e_stat = read_csr_e_stat; + csr_ops->read_csr_ne_stat = read_csr_ne_stat; + csr_ops->read_csr_nf_stat = read_csr_nf_stat; + csr_ops->read_csr_f_stat = read_csr_f_stat; + csr_ops->read_csr_c_stat = read_csr_c_stat; + csr_ops->read_csr_exp_stat = read_csr_exp_stat; + csr_ops->read_csr_exp_int_en = read_csr_exp_int_en; + csr_ops->write_csr_exp_int_en = write_csr_exp_int_en; + csr_ops->read_csr_ring_config = read_csr_ring_config; csr_ops->write_csr_ring_config = write_csr_ring_config; + csr_ops->read_csr_ring_base = read_csr_ring_base; csr_ops->write_csr_ring_base = write_csr_ring_base; + csr_ops->read_csr_int_en = read_csr_int_en; + csr_ops->write_csr_int_en = write_csr_int_en; + csr_ops->read_csr_int_flag = read_csr_int_flag; csr_ops->write_csr_int_flag = write_csr_int_flag; + csr_ops->read_csr_int_srcsel = read_csr_int_srcsel; csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; + csr_ops->write_csr_int_srcsel_w_val = write_csr_int_srcsel_w_val; + csr_ops->read_csr_int_col_en = read_csr_int_col_en; csr_ops->write_csr_int_col_en = write_csr_int_col_en; + csr_ops->read_csr_int_col_ctl = read_csr_int_col_ctl; csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; + csr_ops->read_csr_int_flag_and_col = read_csr_int_flag_and_col; csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; + csr_ops->read_csr_ring_srv_arb_en = read_csr_ring_srv_arb_en; csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; + csr_ops->get_int_col_ctl_enable_mask = get_int_col_ctl_enable_mask; } EXPORT_SYMBOL_GPL(adf_gen4_init_hw_csr_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h index 08d803432d9f..6f33e7c87c2c 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h @@ -12,13 +12,22 @@ #define ADF_RING_CSR_RING_UBASE 0x1080 #define ADF_RING_CSR_RING_HEAD 0x0C0 #define ADF_RING_CSR_RING_TAIL 0x100 +#define ADF_RING_CSR_STAT 0x140 +#define ADF_RING_CSR_UO_STAT 0x148 #define ADF_RING_CSR_E_STAT 0x14C +#define ADF_RING_CSR_NE_STAT 0x150 +#define ADF_RING_CSR_NF_STAT 0x154 +#define ADF_RING_CSR_F_STAT 0x158 +#define ADF_RING_CSR_C_STAT 0x15C +#define ADF_RING_CSR_INT_FLAG_EN 0x16C #define ADF_RING_CSR_INT_FLAG 0x170 #define ADF_RING_CSR_INT_SRCSEL 0x174 +#define ADF_RING_CSR_INT_COL_EN 0x17C #define ADF_RING_CSR_INT_COL_CTL 0x180 #define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 +#define ADF_RING_CSR_EXP_STAT 0x188 +#define ADF_RING_CSR_EXP_INT_EN 0x18C #define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 -#define ADF_RING_CSR_INT_COL_EN 0x17C #define ADF_RING_CSR_ADDR_OFFSET 0x100000 #define ADF_RING_BUNDLE_SIZE 0x2000 #define ADF_RING_CSR_RING_SRV_ARB_EN 0x19C @@ -33,9 +42,41 @@ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ ADF_RING_BUNDLE_SIZE * (bank) + \ ADF_RING_CSR_RING_TAIL + ((ring) << 2)) +#define READ_CSR_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_STAT) +#define READ_CSR_UO_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_UO_STAT) #define READ_CSR_E_STAT(csr_base_addr, bank) \ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_E_STAT) +#define READ_CSR_NE_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_NE_STAT) +#define READ_CSR_NF_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_NF_STAT) +#define READ_CSR_F_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_F_STAT) +#define READ_CSR_C_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_C_STAT) +#define READ_CSR_EXP_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_EXP_STAT) +#define READ_CSR_EXP_INT_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_EXP_INT_EN) +#define WRITE_CSR_EXP_INT_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_EXP_INT_EN, value) +#define READ_CSR_RING_CONFIG(csr_base_addr, bank, ring) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_CONFIG + ((ring) << 2)) #define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ ADF_RING_BUNDLE_SIZE * (bank) + \ @@ -57,6 +98,25 @@ do { \ ADF_RING_CSR_RING_UBASE + ((_ring) << 2), u_base); \ } while (0) +static inline u64 read_base(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + u32 l_base, u_base; + + /* + * Use special IO wrapper for ring base as LBASE and UBASE are + * not physically contigious + */ + l_base = ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + + ADF_RING_CSR_RING_LBASE + (ring << 2)); + u_base = ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + + ADF_RING_CSR_RING_UBASE + (ring << 2)); + + return (u64)u_base << 32 | (u64)l_base; +} + +#define READ_CSR_RING_BASE(csr_base_addr, bank, ring) \ + read_base((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, (bank), (ring)) + #define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ ADF_RING_BUNDLE_SIZE * (bank) + \ @@ -65,28 +125,59 @@ do { \ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ ADF_RING_BUNDLE_SIZE * (bank) + \ ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) +#define READ_CSR_INT_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_FLAG_EN) +#define WRITE_CSR_INT_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG_EN, (value)) +#define READ_CSR_INT_FLAG(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_FLAG) #define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ ADF_RING_BUNDLE_SIZE * (bank) + \ ADF_RING_CSR_INT_FLAG, (value)) +#define READ_CSR_INT_SRCSEL(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_SRCSEL) #define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ ADF_RING_BUNDLE_SIZE * (bank) + \ ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK) +#define WRITE_CSR_INT_SRCSEL_W_VAL(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_SRCSEL, (value)) +#define READ_CSR_INT_COL_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_COL_EN) #define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ ADF_RING_BUNDLE_SIZE * (bank) + \ ADF_RING_CSR_INT_COL_EN, (value)) +#define READ_CSR_INT_COL_CTL(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_COL_CTL) #define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ ADF_RING_BUNDLE_SIZE * (bank) + \ ADF_RING_CSR_INT_COL_CTL, \ ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) +#define READ_CSR_INT_FLAG_AND_COL(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG_AND_COL) #define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ ADF_RING_BUNDLE_SIZE * (bank) + \ ADF_RING_CSR_INT_FLAG_AND_COL, (value)) +#define READ_CSR_RING_SRV_ARB_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_SRV_ARB_EN) #define WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value) \ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ ADF_RING_BUNDLE_SIZE * (bank) + \ From bbfdde7d195ffc9c10598055c449b24c50a0cd25 Mon Sep 17 00:00:00 2001 From: Siming Wan Date: Wed, 6 Mar 2024 21:58:52 +0800 Subject: [PATCH 007/123] crypto: qat - add bank save and restore flows Add logic to save, restore, quiesce and drain a ring bank for QAT GEN4 devices. This allows to save and restore the state of a Virtual Function (VF) and will be used to implement VM live migration. Signed-off-by: Siming Wan Reviewed-by: Giovanni Cabiddu Signed-off-by: Xin Zeng Signed-off-by: Herbert Xu --- .../intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 2 + .../intel/qat/qat_common/adf_accel_devices.h | 38 +++ .../intel/qat/qat_common/adf_gen4_hw_data.c | 279 ++++++++++++++++++ .../intel/qat/qat_common/adf_gen4_hw_data.h | 19 ++ 4 files changed, 338 insertions(+) diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index ef4b0aa36603..eaf055e6f938 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -455,6 +455,8 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map; hw_data->disable_iov = adf_disable_sriov; hw_data->ring_pair_reset = adf_gen4_ring_pair_reset; + hw_data->bank_state_save = adf_gen4_bank_state_save; + hw_data->bank_state_restore = adf_gen4_bank_state_restore; hw_data->enable_pm = adf_gen4_enable_pm; hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt; hw_data->dev_config = adf_gen4_dev_config; diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index d1f3f5a822ff..986e63ec702d 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -140,6 +140,40 @@ struct admin_info { u32 mailbox_offset; }; +struct ring_config { + u64 base; + u32 config; + u32 head; + u32 tail; + u32 reserved0; +}; + +struct bank_state { + u32 ringstat0; + u32 ringstat1; + u32 ringuostat; + u32 ringestat; + u32 ringnestat; + u32 ringnfstat; + u32 ringfstat; + u32 ringcstat0; + u32 ringcstat1; + u32 ringcstat2; + u32 ringcstat3; + u32 iaintflagen; + u32 iaintflagreg; + u32 iaintflagsrcsel0; + u32 iaintflagsrcsel1; + u32 iaintcolen; + u32 iaintcolctl; + u32 iaintflagandcolen; + u32 ringexpstat; + u32 ringexpintenable; + u32 ringsrvarben; + u32 reserved0; + struct ring_config rings[ADF_ETR_MAX_RINGS_PER_BANK]; +}; + struct adf_hw_csr_ops { u64 (*build_csr_ring_base_addr)(dma_addr_t addr, u32 size); u32 (*read_csr_ring_head)(void __iomem *csr_base_addr, u32 bank, @@ -271,6 +305,10 @@ struct adf_hw_device_data { void (*enable_ints)(struct adf_accel_dev *accel_dev); void (*set_ssm_wdtimer)(struct adf_accel_dev *accel_dev); int (*ring_pair_reset)(struct adf_accel_dev *accel_dev, u32 bank_nr); + int (*bank_state_save)(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state); + int (*bank_state_restore)(struct adf_accel_dev *accel_dev, + u32 bank_number, struct bank_state *state); void (*reset_device)(struct adf_accel_dev *accel_dev); void (*set_msix_rttable)(struct adf_accel_dev *accel_dev); const char *(*uof_get_name)(struct adf_accel_dev *accel_dev, u32 obj_num); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index 12269e309fbf..41a0979e68c1 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2020 Intel Corporation */ #include +#include #include "adf_accel_devices.h" #include "adf_cfg_services.h" #include "adf_common_drv.h" @@ -390,3 +391,281 @@ u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev) return ring_to_svc_map; } EXPORT_SYMBOL_GPL(adf_gen4_get_ring_to_svc_map); + +/* + * adf_gen4_bank_quiesce_coal_timer() - quiesce bank coalesced interrupt timer + * @accel_dev: Pointer to the device structure + * @bank_idx: Offset to the bank within this device + * @timeout_ms: Timeout in milliseconds for the operation + * + * This function tries to quiesce the coalesced interrupt timer of a bank if + * it has been enabled and triggered. + * + * Returns 0 on success, error code otherwise + * + */ +int adf_gen4_bank_quiesce_coal_timer(struct adf_accel_dev *accel_dev, + u32 bank_idx, int timeout_ms) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + void __iomem *csr_misc = adf_get_pmisc_base(accel_dev); + void __iomem *csr_etr = adf_get_etr_base(accel_dev); + u32 int_col_ctl, int_col_mask, int_col_en; + u32 e_stat, intsrc; + u64 wait_us; + int ret; + + if (timeout_ms < 0) + return -EINVAL; + + int_col_ctl = csr_ops->read_csr_int_col_ctl(csr_etr, bank_idx); + int_col_mask = csr_ops->get_int_col_ctl_enable_mask(); + if (!(int_col_ctl & int_col_mask)) + return 0; + + int_col_en = csr_ops->read_csr_int_col_en(csr_etr, bank_idx); + int_col_en &= BIT(ADF_WQM_CSR_RP_IDX_RX); + + e_stat = csr_ops->read_csr_e_stat(csr_etr, bank_idx); + if (!(~e_stat & int_col_en)) + return 0; + + wait_us = 2 * ((int_col_ctl & ~int_col_mask) << 8) * USEC_PER_SEC; + do_div(wait_us, hw_data->clock_frequency); + wait_us = min(wait_us, (u64)timeout_ms * USEC_PER_MSEC); + dev_dbg(&GET_DEV(accel_dev), + "wait for bank %d - coalesced timer expires in %llu us (max=%u ms estat=0x%x intcolen=0x%x)\n", + bank_idx, wait_us, timeout_ms, e_stat, int_col_en); + + ret = read_poll_timeout(ADF_CSR_RD, intsrc, intsrc, + ADF_COALESCED_POLL_DELAY_US, wait_us, true, + csr_misc, ADF_WQM_CSR_RPINTSOU(bank_idx)); + if (ret) + dev_warn(&GET_DEV(accel_dev), + "coalesced timer for bank %d expired (%llu us)\n", + bank_idx, wait_us); + + return ret; +} +EXPORT_SYMBOL_GPL(adf_gen4_bank_quiesce_coal_timer); + +static int drain_bank(void __iomem *csr, u32 bank_number, int timeout_us) +{ + u32 status; + + ADF_CSR_WR(csr, ADF_WQM_CSR_RPRESETCTL(bank_number), + ADF_WQM_CSR_RPRESETCTL_DRAIN); + + return read_poll_timeout(ADF_CSR_RD, status, + status & ADF_WQM_CSR_RPRESETSTS_STATUS, + ADF_RPRESET_POLL_DELAY_US, timeout_us, true, + csr, ADF_WQM_CSR_RPRESETSTS(bank_number)); +} + +void adf_gen4_bank_drain_finish(struct adf_accel_dev *accel_dev, + u32 bank_number) +{ + void __iomem *csr = adf_get_etr_base(accel_dev); + + ADF_CSR_WR(csr, ADF_WQM_CSR_RPRESETSTS(bank_number), + ADF_WQM_CSR_RPRESETSTS_STATUS); +} + +int adf_gen4_bank_drain_start(struct adf_accel_dev *accel_dev, + u32 bank_number, int timeout_us) +{ + void __iomem *csr = adf_get_etr_base(accel_dev); + int ret; + + dev_dbg(&GET_DEV(accel_dev), "Drain bank %d\n", bank_number); + + ret = drain_bank(csr, bank_number, timeout_us); + if (ret) + dev_err(&GET_DEV(accel_dev), "Bank drain failed (timeout)\n"); + else + dev_dbg(&GET_DEV(accel_dev), "Bank drain successful\n"); + + return ret; +} + +static void bank_state_save(struct adf_hw_csr_ops *ops, void __iomem *base, + u32 bank, struct bank_state *state, u32 num_rings) +{ + u32 i; + + state->ringstat0 = ops->read_csr_stat(base, bank); + state->ringuostat = ops->read_csr_uo_stat(base, bank); + state->ringestat = ops->read_csr_e_stat(base, bank); + state->ringnestat = ops->read_csr_ne_stat(base, bank); + state->ringnfstat = ops->read_csr_nf_stat(base, bank); + state->ringfstat = ops->read_csr_f_stat(base, bank); + state->ringcstat0 = ops->read_csr_c_stat(base, bank); + state->iaintflagen = ops->read_csr_int_en(base, bank); + state->iaintflagreg = ops->read_csr_int_flag(base, bank); + state->iaintflagsrcsel0 = ops->read_csr_int_srcsel(base, bank); + state->iaintcolen = ops->read_csr_int_col_en(base, bank); + state->iaintcolctl = ops->read_csr_int_col_ctl(base, bank); + state->iaintflagandcolen = ops->read_csr_int_flag_and_col(base, bank); + state->ringexpstat = ops->read_csr_exp_stat(base, bank); + state->ringexpintenable = ops->read_csr_exp_int_en(base, bank); + state->ringsrvarben = ops->read_csr_ring_srv_arb_en(base, bank); + + for (i = 0; i < num_rings; i++) { + state->rings[i].head = ops->read_csr_ring_head(base, bank, i); + state->rings[i].tail = ops->read_csr_ring_tail(base, bank, i); + state->rings[i].config = ops->read_csr_ring_config(base, bank, i); + state->rings[i].base = ops->read_csr_ring_base(base, bank, i); + } +} + +#define CHECK_STAT(op, expect_val, name, args...) \ +({ \ + u32 __expect_val = (expect_val); \ + u32 actual_val = op(args); \ + (__expect_val == actual_val) ? 0 : \ + (pr_err("QAT: Fail to restore %s register. Expected 0x%x, actual 0x%x\n", \ + name, __expect_val, actual_val), -EINVAL); \ +}) + +static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, + u32 bank, struct bank_state *state, u32 num_rings, + int tx_rx_gap) +{ + u32 val, tmp_val, i; + int ret; + + for (i = 0; i < num_rings; i++) + ops->write_csr_ring_base(base, bank, i, state->rings[i].base); + + for (i = 0; i < num_rings; i++) + ops->write_csr_ring_config(base, bank, i, state->rings[i].config); + + for (i = 0; i < num_rings / 2; i++) { + int tx = i * (tx_rx_gap + 1); + int rx = tx + tx_rx_gap; + + ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head); + ops->write_csr_ring_tail(base, bank, tx, state->rings[tx].tail); + + /* + * The TX ring head needs to be updated again to make sure that + * the HW will not consider the ring as full when it is empty + * and the correct state flags are set to match the recovered state. + */ + if (state->ringestat & BIT(tx)) { + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK; + ops->write_csr_int_srcsel_w_val(base, bank, val); + ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head); + } + + ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail); + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH; + ops->write_csr_int_srcsel_w_val(base, bank, val); + + ops->write_csr_ring_head(base, bank, rx, state->rings[rx].head); + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_FALL_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH; + ops->write_csr_int_srcsel_w_val(base, bank, val); + + /* + * The RX ring tail needs to be updated again to make sure that + * the HW will not consider the ring as empty when it is full + * and the correct state flags are set to match the recovered state. + */ + if (state->ringfstat & BIT(rx)) + ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail); + } + + ops->write_csr_int_flag_and_col(base, bank, state->iaintflagandcolen); + ops->write_csr_int_en(base, bank, state->iaintflagen); + ops->write_csr_int_col_en(base, bank, state->iaintcolen); + ops->write_csr_int_srcsel_w_val(base, bank, state->iaintflagsrcsel0); + ops->write_csr_exp_int_en(base, bank, state->ringexpintenable); + ops->write_csr_int_col_ctl(base, bank, state->iaintcolctl); + ops->write_csr_ring_srv_arb_en(base, bank, state->ringsrvarben); + + /* Check that all ring statuses match the saved state. */ + ret = CHECK_STAT(ops->read_csr_stat, state->ringstat0, "ringstat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_e_stat, state->ringestat, "ringestat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_ne_stat, state->ringnestat, "ringnestat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_nf_stat, state->ringnfstat, "ringnfstat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_f_stat, state->ringfstat, "ringfstat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_c_stat, state->ringcstat0, "ringcstat", + base, bank); + if (ret) + return ret; + + tmp_val = ops->read_csr_exp_stat(base, bank); + val = state->ringexpstat; + if (tmp_val && !val) { + pr_err("QAT: Bank was restored with exception: 0x%x\n", val); + return -EINVAL; + } + + return 0; +} + +int adf_gen4_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + void __iomem *csr_base = adf_get_etr_base(accel_dev); + + if (bank_number >= hw_data->num_banks || !state) + return -EINVAL; + + dev_dbg(&GET_DEV(accel_dev), "Saving state of bank %d\n", bank_number); + + bank_state_save(csr_ops, csr_base, bank_number, state, + hw_data->num_rings_per_bank); + + return 0; +} +EXPORT_SYMBOL_GPL(adf_gen4_bank_state_save); + +int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + void __iomem *csr_base = adf_get_etr_base(accel_dev); + int ret; + + if (bank_number >= hw_data->num_banks || !state) + return -EINVAL; + + dev_dbg(&GET_DEV(accel_dev), "Restoring state of bank %d\n", bank_number); + + ret = bank_state_restore(csr_ops, csr_base, bank_number, state, + hw_data->num_rings_per_bank, hw_data->tx_rx_gap); + if (ret) + dev_err(&GET_DEV(accel_dev), + "Unable to restore state of bank %d\n", bank_number); + + return ret; +} +EXPORT_SYMBOL_GPL(adf_gen4_bank_state_restore); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h index 719f7757e587..e8cb930e80c9 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h @@ -77,10 +77,19 @@ #define ADF_RPRESET_POLL_TIMEOUT_US (5 * USEC_PER_SEC) #define ADF_RPRESET_POLL_DELAY_US 20 #define ADF_WQM_CSR_RPRESETCTL_RESET BIT(0) +#define ADF_WQM_CSR_RPRESETCTL_DRAIN BIT(2) #define ADF_WQM_CSR_RPRESETCTL(bank) (0x6000 + ((bank) << 3)) #define ADF_WQM_CSR_RPRESETSTS_STATUS BIT(0) #define ADF_WQM_CSR_RPRESETSTS(bank) (ADF_WQM_CSR_RPRESETCTL(bank) + 4) +/* Ring interrupt */ +#define ADF_RP_INT_SRC_SEL_F_RISE_MASK BIT(2) +#define ADF_RP_INT_SRC_SEL_F_FALL_MASK GENMASK(2, 0) +#define ADF_RP_INT_SRC_SEL_RANGE_WIDTH 4 +#define ADF_COALESCED_POLL_DELAY_US 1000 +#define ADF_WQM_CSR_RPINTSOU(bank) (0x200000 + ((bank) << 12)) +#define ADF_WQM_CSR_RP_IDX_RX 1 + /* Error source registers */ #define ADF_GEN4_ERRSOU0 (0x41A200) #define ADF_GEN4_ERRSOU1 (0x41A204) @@ -150,5 +159,15 @@ void adf_gen4_set_msix_default_rttable(struct adf_accel_dev *accel_dev); void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev); u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev); +int adf_gen4_bank_quiesce_coal_timer(struct adf_accel_dev *accel_dev, + u32 bank_idx, int timeout_ms); +int adf_gen4_bank_drain_start(struct adf_accel_dev *accel_dev, + u32 bank_number, int timeout_us); +void adf_gen4_bank_drain_finish(struct adf_accel_dev *accel_dev, + u32 bank_number); +int adf_gen4_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state); +int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev, + u32 bank_number, struct bank_state *state); #endif From 0fce55e5334d380d8a09f80ba9c9b68eeea6971d Mon Sep 17 00:00:00 2001 From: Xin Zeng Date: Wed, 6 Mar 2024 21:58:53 +0800 Subject: [PATCH 008/123] crypto: qat - add interface for live migration Extend the driver with a new interface to be used for VF live migration. This allows to create and destroy a qat_mig_dev object that contains a set of methods to allow to save and restore the state of QAT VF. This interface will be used by the qat-vfio-pci module. Signed-off-by: Xin Zeng Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/Makefile | 2 +- .../intel/qat/qat_common/adf_accel_devices.h | 17 +++ .../intel/qat/qat_common/adf_gen4_vf_mig.h | 10 ++ .../crypto/intel/qat/qat_common/qat_mig_dev.c | 130 ++++++++++++++++++ include/linux/qat/qat_mig_dev.h | 31 +++++ 5 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h create mode 100644 drivers/crypto/intel/qat/qat_common/qat_mig_dev.c create mode 100644 include/linux/qat/qat_mig_dev.h diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index ceaa685352ed..9fba31d4ac7f 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -54,6 +54,6 @@ intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \ intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \ adf_pfvf_pf_msg.o adf_pfvf_pf_proto.o \ adf_pfvf_vf_msg.o adf_pfvf_vf_proto.o \ - adf_gen2_pfvf.o adf_gen4_pfvf.o + adf_gen2_pfvf.o adf_gen4_pfvf.o qat_mig_dev.o intel_qat-$(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION) += adf_heartbeat_inject.o diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index 986e63ec702d..b08fea10121e 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "adf_cfg_common.h" #include "adf_rl.h" #include "adf_telemetry.h" @@ -258,6 +259,20 @@ struct adf_dc_ops { void (*build_deflate_ctx)(void *ctx); }; +struct qat_migdev_ops { + int (*init)(struct qat_mig_dev *mdev); + void (*cleanup)(struct qat_mig_dev *mdev); + void (*reset)(struct qat_mig_dev *mdev); + int (*open)(struct qat_mig_dev *mdev); + void (*close)(struct qat_mig_dev *mdev); + int (*suspend)(struct qat_mig_dev *mdev); + int (*resume)(struct qat_mig_dev *mdev); + int (*save_state)(struct qat_mig_dev *mdev); + int (*save_setup)(struct qat_mig_dev *mdev); + int (*load_state)(struct qat_mig_dev *mdev); + int (*load_setup)(struct qat_mig_dev *mdev, int size); +}; + struct adf_dev_err_mask { u32 cppagentcmdpar_mask; u32 parerr_ath_cph_mask; @@ -325,6 +340,7 @@ struct adf_hw_device_data { struct adf_dev_err_mask dev_err_mask; struct adf_rl_hw_data rl_data; struct adf_tl_hw_data tl_data; + struct qat_migdev_ops vfmig_ops; const char *fw_name; const char *fw_mmp_name; u32 fuses; @@ -381,6 +397,7 @@ struct adf_hw_device_data { #define GET_CSR_OPS(accel_dev) (&(accel_dev)->hw_device->csr_ops) #define GET_PFVF_OPS(accel_dev) (&(accel_dev)->hw_device->pfvf_ops) #define GET_DC_OPS(accel_dev) (&(accel_dev)->hw_device->dc_ops) +#define GET_VFMIG_OPS(accel_dev) (&(accel_dev)->hw_device->vfmig_ops) #define GET_TL_DATA(accel_dev) GET_HW_DATA(accel_dev)->tl_data #define accel_to_pci_dev(accel_ptr) accel_ptr->accel_pci_dev.pci_dev diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h new file mode 100644 index 000000000000..72216d078ee1 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef ADF_GEN4_VF_MIG_H_ +#define ADF_GEN4_VF_MIG_H_ + +#include "adf_accel_devices.h" + +void adf_gen4_init_vf_mig_ops(struct qat_migdev_ops *vfmig_ops); + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c b/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c new file mode 100644 index 000000000000..892c2283a50e --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include +#include +#include +#include +#include +#include "adf_accel_devices.h" +#include "adf_common_drv.h" + +struct qat_mig_dev *qat_vfmig_create(struct pci_dev *pdev, int vf_id) +{ + struct adf_accel_dev *accel_dev; + struct qat_migdev_ops *ops; + struct qat_mig_dev *mdev; + + accel_dev = adf_devmgr_pci_to_accel_dev(pdev); + if (!accel_dev) + return ERR_PTR(-ENODEV); + + ops = GET_VFMIG_OPS(accel_dev); + if (!ops || !ops->init || !ops->cleanup || !ops->reset || !ops->open || + !ops->close || !ops->suspend || !ops->resume || !ops->save_state || + !ops->load_state || !ops->save_setup || !ops->load_setup) + return ERR_PTR(-EINVAL); + + mdev = kmalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) + return ERR_PTR(-ENOMEM); + + mdev->vf_id = vf_id; + mdev->parent_accel_dev = accel_dev; + + return mdev; +} +EXPORT_SYMBOL_GPL(qat_vfmig_create); + +int qat_vfmig_init(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->init(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_init); + +void qat_vfmig_cleanup(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->cleanup(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_cleanup); + +void qat_vfmig_reset(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->reset(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_reset); + +int qat_vfmig_open(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->open(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_open); + +void qat_vfmig_close(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + GET_VFMIG_OPS(accel_dev)->close(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_close); + +int qat_vfmig_suspend(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->suspend(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_suspend); + +int qat_vfmig_resume(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->resume(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_resume); + +int qat_vfmig_save_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->save_state(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_save_state); + +int qat_vfmig_save_setup(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->save_setup(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_save_setup); + +int qat_vfmig_load_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->load_state(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_load_state); + +int qat_vfmig_load_setup(struct qat_mig_dev *mdev, int size) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->load_setup(mdev, size); +} +EXPORT_SYMBOL_GPL(qat_vfmig_load_setup); + +void qat_vfmig_destroy(struct qat_mig_dev *mdev) +{ + kfree(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_destroy); diff --git a/include/linux/qat/qat_mig_dev.h b/include/linux/qat/qat_mig_dev.h new file mode 100644 index 000000000000..dbbb6a063dd2 --- /dev/null +++ b/include/linux/qat/qat_mig_dev.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef QAT_MIG_DEV_H_ +#define QAT_MIG_DEV_H_ + +struct pci_dev; + +struct qat_mig_dev { + void *parent_accel_dev; + u8 *state; + u32 setup_size; + u32 remote_setup_size; + u32 state_size; + s32 vf_id; +}; + +struct qat_mig_dev *qat_vfmig_create(struct pci_dev *pdev, int vf_id); +int qat_vfmig_init(struct qat_mig_dev *mdev); +void qat_vfmig_cleanup(struct qat_mig_dev *mdev); +void qat_vfmig_reset(struct qat_mig_dev *mdev); +int qat_vfmig_open(struct qat_mig_dev *mdev); +void qat_vfmig_close(struct qat_mig_dev *mdev); +int qat_vfmig_suspend(struct qat_mig_dev *mdev); +int qat_vfmig_resume(struct qat_mig_dev *mdev); +int qat_vfmig_save_state(struct qat_mig_dev *mdev); +int qat_vfmig_save_setup(struct qat_mig_dev *mdev); +int qat_vfmig_load_state(struct qat_mig_dev *mdev); +int qat_vfmig_load_setup(struct qat_mig_dev *mdev, int size); +void qat_vfmig_destroy(struct qat_mig_dev *mdev); + +#endif /*QAT_MIG_DEV_H_*/ From f0bbfc391aa7eaa796f09ee40dd1cd78c6c81960 Mon Sep 17 00:00:00 2001 From: Xin Zeng Date: Wed, 6 Mar 2024 21:58:54 +0800 Subject: [PATCH 009/123] crypto: qat - implement interface for live migration Add logic to implement the interface for live migration defined in qat/qat_mig_dev.h. This is specific for QAT GEN4 Virtual Functions (VFs). This introduces a migration data manager which is used to handle the device state during migration. The manager ensures that the device state is stored in a format that can be restored in the destination node. The VF state is organized into a hierarchical structure that includes a preamble, a general state section, a MISC bar section and an ETR bar section. The latter contains the state of the 4 ring pairs contained on a VF. Here is a graphical representation of the state: preamble | general state section | leaf state | MISC bar state section| leaf state | ETR bar state section | bank0 state section | leaf state | bank1 state section | leaf state | bank2 state section | leaf state | bank3 state section | leaf state In addition to the implementation of the qat_migdev_ops interface and the state manager framework, add a mutex in pfvf to avoid pf2vf messages during migration. Signed-off-by: Xin Zeng Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../intel/qat/qat_420xx/adf_420xx_hw_data.c | 2 + .../intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 2 + drivers/crypto/intel/qat/qat_common/Makefile | 2 + .../intel/qat/qat_common/adf_accel_devices.h | 6 + .../intel/qat/qat_common/adf_gen4_hw_data.h | 10 + .../intel/qat/qat_common/adf_gen4_vf_mig.c | 1010 +++++++++++++++++ .../intel/qat/qat_common/adf_mstate_mgr.c | 318 ++++++ .../intel/qat/qat_common/adf_mstate_mgr.h | 89 ++ .../crypto/intel/qat/qat_common/adf_sriov.c | 7 +- 9 files changed, 1445 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c create mode 100644 drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c create mode 100644 drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c index 9ccbf5998d5c..d255cb3ebd9c 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "adf_420xx_hw_data.h" #include "icp_qat_hw.h" @@ -488,6 +489,7 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id) adf_gen4_init_dc_ops(&hw_data->dc_ops); adf_gen4_init_ras_ops(&hw_data->ras_ops); adf_gen4_init_tl_data(&hw_data->tl_data); + adf_gen4_init_vf_mig_ops(&hw_data->vfmig_ops); adf_init_rl_data(&hw_data->rl_data); } diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index eaf055e6f938..1e77e189a938 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -17,6 +17,7 @@ #include "adf_gen4_ras.h" #include #include +#include #include "adf_4xxx_hw_data.h" #include "icp_qat_hw.h" @@ -472,6 +473,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) adf_gen4_init_dc_ops(&hw_data->dc_ops); adf_gen4_init_ras_ops(&hw_data->ras_ops); adf_gen4_init_tl_data(&hw_data->tl_data); + adf_gen4_init_vf_mig_ops(&hw_data->vfmig_ops); adf_init_rl_data(&hw_data->rl_data); } diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 9fba31d4ac7f..6f9266edc9f1 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -20,12 +20,14 @@ intel_qat-objs := adf_cfg.o \ adf_gen4_config.o \ adf_gen4_hw_csr_data.o \ adf_gen4_hw_data.o \ + adf_gen4_vf_mig.o \ adf_gen4_pm.o \ adf_gen2_dc.o \ adf_gen4_dc.o \ adf_gen4_ras.o \ adf_gen4_timer.o \ adf_clock.o \ + adf_mstate_mgr.o \ qat_crypto.o \ qat_compression.o \ qat_comp_algs.o \ diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index b08fea10121e..7830ecb1a1f1 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -412,11 +412,17 @@ struct adf_fw_loader_data { struct adf_accel_vf_info { struct adf_accel_dev *accel_dev; struct mutex pf2vf_lock; /* protect CSR access for PF2VF messages */ + struct mutex pfvf_mig_lock; /* protects PFVF state for migration */ struct ratelimit_state vf2pf_ratelimit; u32 vf_nr; bool init; bool restarting; u8 vf_compat_ver; + /* + * Private area used for device migration. + * Memory allocation and free is managed by migration driver. + */ + void *mig_priv; }; struct adf_dc_data { diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h index e8cb930e80c9..8b10926cedba 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h @@ -86,6 +86,7 @@ #define ADF_RP_INT_SRC_SEL_F_RISE_MASK BIT(2) #define ADF_RP_INT_SRC_SEL_F_FALL_MASK GENMASK(2, 0) #define ADF_RP_INT_SRC_SEL_RANGE_WIDTH 4 +#define ADF_COALESCED_POLL_TIMEOUT_US (1 * USEC_PER_SEC) #define ADF_COALESCED_POLL_DELAY_US 1000 #define ADF_WQM_CSR_RPINTSOU(bank) (0x200000 + ((bank) << 12)) #define ADF_WQM_CSR_RP_IDX_RX 1 @@ -120,6 +121,15 @@ /* PF2VM communication channel */ #define ADF_GEN4_PF2VM_OFFSET(i) (0x40B010 + (i) * 0x20) #define ADF_GEN4_VM2PF_OFFSET(i) (0x40B014 + (i) * 0x20) +#define ADF_GEN4_VINTMSKPF2VM_OFFSET(i) (0x40B00C + (i) * 0x20) +#define ADF_GEN4_VINTSOUPF2VM_OFFSET(i) (0x40B008 + (i) * 0x20) +#define ADF_GEN4_VINTMSK_OFFSET(i) (0x40B004 + (i) * 0x20) +#define ADF_GEN4_VINTSOU_OFFSET(i) (0x40B000 + (i) * 0x20) + +struct adf_gen4_vfmig { + struct adf_mstate_mgr *mstate_mgr; + bool bank_stopped[ADF_GEN4_NUM_BANKS_PER_VF]; +}; void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c new file mode 100644 index 000000000000..78a39cfe196f --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c @@ -0,0 +1,1010 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include +#include +#include +#include +#include +#include +#include + +#include "adf_accel_devices.h" +#include "adf_common_drv.h" +#include "adf_gen4_hw_data.h" +#include "adf_gen4_pfvf.h" +#include "adf_pfvf_utils.h" +#include "adf_mstate_mgr.h" +#include "adf_gen4_vf_mig.h" + +#define ADF_GEN4_VF_MSTATE_SIZE 4096 +#define ADF_GEN4_PFVF_RSP_TIMEOUT_US 5000 + +static int adf_gen4_vfmig_save_setup(struct qat_mig_dev *mdev); +static int adf_gen4_vfmig_load_setup(struct qat_mig_dev *mdev, int len); + +static int adf_gen4_vfmig_init_device(struct qat_mig_dev *mdev) +{ + u8 *state; + + state = kmalloc(ADF_GEN4_VF_MSTATE_SIZE, GFP_KERNEL); + if (!state) + return -ENOMEM; + + mdev->state = state; + mdev->state_size = ADF_GEN4_VF_MSTATE_SIZE; + mdev->setup_size = 0; + mdev->remote_setup_size = 0; + + return 0; +} + +static void adf_gen4_vfmig_cleanup_device(struct qat_mig_dev *mdev) +{ + kfree(mdev->state); + mdev->state = NULL; +} + +static void adf_gen4_vfmig_reset_device(struct qat_mig_dev *mdev) +{ + mdev->setup_size = 0; + mdev->remote_setup_size = 0; +} + +static int adf_gen4_vfmig_open_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + + vf_info = &accel_dev->pf.vf_info[mdev->vf_id]; + + vfmig = kzalloc(sizeof(*vfmig), GFP_KERNEL); + if (!vfmig) + return -ENOMEM; + + vfmig->mstate_mgr = adf_mstate_mgr_new(mdev->state, mdev->state_size); + if (!vfmig->mstate_mgr) { + kfree(vfmig); + return -ENOMEM; + } + vf_info->mig_priv = vfmig; + mdev->setup_size = 0; + mdev->remote_setup_size = 0; + + return 0; +} + +static void adf_gen4_vfmig_close_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + + vf_info = &accel_dev->pf.vf_info[mdev->vf_id]; + if (vf_info->mig_priv) { + vfmig = vf_info->mig_priv; + adf_mstate_mgr_destroy(vfmig->mstate_mgr); + kfree(vfmig); + vf_info->mig_priv = NULL; + } +} + +static int adf_gen4_vfmig_suspend_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vf_mig; + u32 vf_nr = mdev->vf_id; + int ret, i; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vf_mig = vf_info->mig_priv; + + /* Stop all inflight jobs */ + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + u32 pf_bank_nr = i + vf_nr * hw_data->num_banks_per_vf; + + ret = adf_gen4_bank_drain_start(accel_dev, pf_bank_nr, + ADF_RPRESET_POLL_TIMEOUT_US); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to drain bank %d for vf_nr %d\n", i, + vf_nr); + return ret; + } + vf_mig->bank_stopped[i] = true; + + adf_gen4_bank_quiesce_coal_timer(accel_dev, pf_bank_nr, + ADF_COALESCED_POLL_TIMEOUT_US); + } + + return 0; +} + +static int adf_gen4_vfmig_resume_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vf_mig; + u32 vf_nr = mdev->vf_id; + int i; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vf_mig = vf_info->mig_priv; + + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + u32 pf_bank_nr = i + vf_nr * hw_data->num_banks_per_vf; + + if (vf_mig->bank_stopped[i]) { + adf_gen4_bank_drain_finish(accel_dev, pf_bank_nr); + vf_mig->bank_stopped[i] = false; + } + } + + return 0; +} + +struct adf_vf_bank_info { + struct adf_accel_dev *accel_dev; + u32 vf_nr; + u32 bank_nr; +}; + +struct mig_user_sla { + enum adf_base_services srv; + u64 rp_mask; + u32 cir; + u32 pir; +}; + +static int adf_mstate_sla_check(struct adf_mstate_mgr *sub_mgr, u8 *src_buf, + u32 src_size, void *opaque) +{ + struct adf_mstate_vreginfo _sinfo = { src_buf, src_size }; + struct adf_mstate_vreginfo *sinfo = &_sinfo, *dinfo = opaque; + u32 src_sla_cnt = sinfo->size / sizeof(struct mig_user_sla); + u32 dst_sla_cnt = dinfo->size / sizeof(struct mig_user_sla); + struct mig_user_sla *src_slas = sinfo->addr; + struct mig_user_sla *dst_slas = dinfo->addr; + int i, j; + + for (i = 0; i < src_sla_cnt; i++) { + for (j = 0; j < dst_sla_cnt; j++) { + if (src_slas[i].srv != dst_slas[j].srv || + src_slas[i].rp_mask != dst_slas[j].rp_mask) + continue; + + if (src_slas[i].cir > dst_slas[j].cir || + src_slas[i].pir > dst_slas[j].pir) { + pr_err("QAT: DST VF rate limiting mismatch.\n"); + return -EINVAL; + } + break; + } + + if (j == dst_sla_cnt) { + pr_err("QAT: SRC VF rate limiting mismatch - SRC srv %d and rp_mask 0x%llx.\n", + src_slas[i].srv, src_slas[i].rp_mask); + return -EINVAL; + } + } + + return 0; +} + +static inline int adf_mstate_check_cap_size(u32 src_sz, u32 dst_sz, u32 max_sz) +{ + if (src_sz > max_sz || dst_sz > max_sz) + return -EINVAL; + else + return 0; +} + +static int adf_mstate_compatver_check(struct adf_mstate_mgr *sub_mgr, + u8 *src_buf, u32 src_sz, void *opaque) +{ + struct adf_mstate_vreginfo *info = opaque; + u8 compat = 0; + u8 *pcompat; + + if (src_sz != info->size) { + pr_debug("QAT: State mismatch (compat version size), current %u, expected %u\n", + src_sz, info->size); + return -EINVAL; + } + + memcpy(info->addr, src_buf, info->size); + pcompat = info->addr; + if (*pcompat == 0) { + pr_warn("QAT: Unable to determine the version of VF\n"); + return 0; + } + + compat = adf_vf_compat_checker(*pcompat); + if (compat == ADF_PF2VF_VF_INCOMPATIBLE) { + pr_debug("QAT: SRC VF driver (ver=%u) is incompatible with DST PF driver (ver=%u)\n", + *pcompat, ADF_PFVF_COMPAT_THIS_VERSION); + return -EINVAL; + } + + if (compat == ADF_PF2VF_VF_COMPAT_UNKNOWN) + pr_debug("QAT: SRC VF driver (ver=%u) is newer than DST PF driver (ver=%u)\n", + *pcompat, ADF_PFVF_COMPAT_THIS_VERSION); + + return 0; +} + +/* + * adf_mstate_capmask_compare() - compare QAT device capability mask + * @sinfo: Pointer to source capability info + * @dinfo: Pointer to target capability info + * + * This function compares the capability mask between source VF and target VF + * + * Returns: 0 if target capability mask is identical to source capability mask, + * 1 if target mask can represent all the capabilities represented by source mask, + * -1 if target mask can't represent all the capabilities represented by source + * mask. + */ +static int adf_mstate_capmask_compare(struct adf_mstate_vreginfo *sinfo, + struct adf_mstate_vreginfo *dinfo) +{ + u64 src = 0, dst = 0; + + if (adf_mstate_check_cap_size(sinfo->size, dinfo->size, sizeof(u64))) { + pr_debug("QAT: Unexpected capability size %u %u %zu\n", + sinfo->size, dinfo->size, sizeof(u64)); + return -1; + } + + memcpy(&src, sinfo->addr, sinfo->size); + memcpy(&dst, dinfo->addr, dinfo->size); + + pr_debug("QAT: Check cap compatibility of cap %llu %llu\n", src, dst); + + if (src == dst) + return 0; + + if ((src | dst) == dst) + return 1; + + return -1; +} + +static int adf_mstate_capmask_superset(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa) +{ + struct adf_mstate_vreginfo sinfo = { buf, size }; + + if (adf_mstate_capmask_compare(&sinfo, opa) >= 0) + return 0; + + return -EINVAL; +} + +static int adf_mstate_capmask_equal(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa) +{ + struct adf_mstate_vreginfo sinfo = { buf, size }; + + if (adf_mstate_capmask_compare(&sinfo, opa) == 0) + return 0; + + return -EINVAL; +} + +static int adf_mstate_set_vreg(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa) +{ + struct adf_mstate_vreginfo *info = opa; + + if (size != info->size) { + pr_debug("QAT: Unexpected cap size %u %u\n", size, info->size); + return -EINVAL; + } + memcpy(info->addr, buf, info->size); + + return 0; +} + +static u32 adf_gen4_vfmig_get_slas(struct adf_accel_dev *accel_dev, u32 vf_nr, + struct mig_user_sla *pmig_slas) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_rl *rl_data = accel_dev->rate_limiting; + struct rl_sla **sla_type_arr = NULL; + u64 rp_mask, rp_index; + u32 max_num_sla; + u32 sla_cnt = 0; + int i, j; + + if (!accel_dev->rate_limiting) + return 0; + + rp_index = vf_nr * hw_data->num_banks_per_vf; + max_num_sla = adf_rl_get_sla_arr_of_type(rl_data, RL_LEAF, &sla_type_arr); + + for (i = 0; i < max_num_sla; i++) { + if (!sla_type_arr[i]) + continue; + + rp_mask = 0; + for (j = 0; j < sla_type_arr[i]->ring_pairs_cnt; j++) + rp_mask |= BIT(sla_type_arr[i]->ring_pairs_ids[j]); + + if (rp_mask & GENMASK_ULL(rp_index + 3, rp_index)) { + pmig_slas->rp_mask = rp_mask; + pmig_slas->cir = sla_type_arr[i]->cir; + pmig_slas->pir = sla_type_arr[i]->pir; + pmig_slas->srv = sla_type_arr[i]->srv; + pmig_slas++; + sla_cnt++; + } + } + + return sla_cnt; +} + +static int adf_gen4_vfmig_load_etr_regs(struct adf_mstate_mgr *sub_mgr, + u8 *state, u32 size, void *opa) +{ + struct adf_vf_bank_info *vf_bank_info = opa; + struct adf_accel_dev *accel_dev = vf_bank_info->accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + u32 pf_bank_nr; + int ret; + + pf_bank_nr = vf_bank_info->bank_nr + vf_bank_info->vf_nr * hw_data->num_banks_per_vf; + ret = hw_data->bank_state_restore(accel_dev, pf_bank_nr, + (struct bank_state *)state); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load regs for vf%d bank%d\n", + vf_bank_info->vf_nr, vf_bank_info->bank_nr); + return ret; + } + + return 0; +} + +static int adf_gen4_vfmig_load_etr_bank(struct adf_accel_dev *accel_dev, + u32 vf_nr, u32 bank_nr, + struct adf_mstate_mgr *mstate_mgr) +{ + struct adf_vf_bank_info vf_bank_info = {accel_dev, vf_nr, bank_nr}; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + char bank_ids[ADF_MSTATE_ID_LEN]; + + snprintf(bank_ids, sizeof(bank_ids), ADF_MSTATE_BANK_IDX_IDS "%x", bank_nr); + subsec = adf_mstate_sect_lookup(mstate_mgr, bank_ids, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to lookup sec %s for vf%d bank%d\n", + ADF_MSTATE_BANK_IDX_IDS, vf_nr, bank_nr); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, ADF_MSTATE_ETR_REGS_IDS, + adf_gen4_vfmig_load_etr_regs, + &vf_bank_info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to add sec %s for vf%d bank%d\n", + ADF_MSTATE_ETR_REGS_IDS, vf_nr, bank_nr); + return -EINVAL; + } + + return 0; +} + +static int adf_gen4_vfmig_load_etr(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec; + int ret, i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_ETRB_IDS, NULL, + NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_ETRB_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + ret = adf_gen4_vfmig_load_etr_bank(accel_dev, vf_nr, i, + &sub_sects_mgr); + if (ret) + return ret; + } + + return 0; +} + +static int adf_gen4_vfmig_load_misc(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + void __iomem *csr = adf_get_pmisc_base(accel_dev); + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + struct { + char *id; + u64 ofs; + } misc_states[] = { + {ADF_MSTATE_VINTMSK_IDS, ADF_GEN4_VINTMSK_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTMSK_PF2VM_IDS, ADF_GEN4_VINTMSKPF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_PF2VM_IDS, ADF_GEN4_PF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_VM2PF_IDS, ADF_GEN4_VM2PF_OFFSET(vf_nr)}, + }; + int i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_MISCB_IDS, NULL, + NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_MISCB_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < ARRAY_SIZE(misc_states); i++) { + struct adf_mstate_vreginfo info; + u32 regv; + + info.addr = ®v; + info.size = sizeof(regv); + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, + misc_states[i].id, + adf_mstate_set_vreg, + &info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to load sec %s\n", misc_states[i].id); + return -EINVAL; + } + ADF_CSR_WR(csr, misc_states[i].ofs, regv); + } + + return 0; +} + +static int adf_gen4_vfmig_load_generic(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct mig_user_sla dst_slas[RL_RP_CNT_PER_LEAF_MAX] = { }; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + u32 dst_sla_cnt; + struct { + char *id; + int (*action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size, void *opa); + struct adf_mstate_vreginfo info; + } gen_states[] = { + {ADF_MSTATE_IOV_INIT_IDS, adf_mstate_set_vreg, + {&vf_info->init, sizeof(vf_info->init)}}, + {ADF_MSTATE_COMPAT_VER_IDS, adf_mstate_compatver_check, + {&vf_info->vf_compat_ver, sizeof(vf_info->vf_compat_ver)}}, + {ADF_MSTATE_SLA_IDS, adf_mstate_sla_check, {dst_slas, 0}}, + }; + int i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_GEN_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_GEN_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < ARRAY_SIZE(gen_states); i++) { + if (gen_states[i].info.addr == dst_slas) { + dst_sla_cnt = adf_gen4_vfmig_get_slas(accel_dev, vf_nr, dst_slas); + gen_states[i].info.size = dst_sla_cnt * sizeof(struct mig_user_sla); + } + + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, + gen_states[i].id, + gen_states[i].action, + &gen_states[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + gen_states[i].id); + return -EINVAL; + } + } + + return 0; +} + +static int adf_gen4_vfmig_load_config(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + struct { + char *id; + int (*action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size, void *opa); + struct adf_mstate_vreginfo info; + } setups[] = { + {ADF_MSTATE_GEN_CAP_IDS, adf_mstate_capmask_superset, + {&hw_data->accel_capabilities_mask, sizeof(hw_data->accel_capabilities_mask)}}, + {ADF_MSTATE_GEN_SVCMAP_IDS, adf_mstate_capmask_equal, + {&hw_data->ring_to_svc_map, sizeof(hw_data->ring_to_svc_map)}}, + {ADF_MSTATE_GEN_EXTDC_IDS, adf_mstate_capmask_superset, + {&hw_data->extended_dc_capabilities, sizeof(hw_data->extended_dc_capabilities)}}, + }; + int i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_CONFIG_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_CONFIG_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < ARRAY_SIZE(setups); i++) { + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, setups[i].id, + setups[i].action, &setups[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + setups[i].id); + return -EINVAL; + } + } + + return 0; +} + +static int adf_gen4_vfmig_save_etr_regs(struct adf_mstate_mgr *subs, u8 *state, + u32 size, void *opa) +{ + struct adf_vf_bank_info *vf_bank_info = opa; + struct adf_accel_dev *accel_dev = vf_bank_info->accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + u32 pf_bank_nr; + int ret; + + pf_bank_nr = vf_bank_info->bank_nr; + pf_bank_nr += vf_bank_info->vf_nr * hw_data->num_banks_per_vf; + + ret = hw_data->bank_state_save(accel_dev, pf_bank_nr, + (struct bank_state *)state); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save regs for vf%d bank%d\n", + vf_bank_info->vf_nr, vf_bank_info->bank_nr); + return ret; + } + + return sizeof(struct bank_state); +} + +static int adf_gen4_vfmig_save_etr_bank(struct adf_accel_dev *accel_dev, + u32 vf_nr, u32 bank_nr, + struct adf_mstate_mgr *mstate_mgr) +{ + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_vf_bank_info vf_bank_info; + struct adf_mstate_mgr sub_sects_mgr; + char bank_ids[ADF_MSTATE_ID_LEN]; + + snprintf(bank_ids, sizeof(bank_ids), ADF_MSTATE_BANK_IDX_IDS "%x", bank_nr); + + subsec = adf_mstate_sect_add(mstate_mgr, bank_ids, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to add sec %s for vf%d bank%d\n", + ADF_MSTATE_BANK_IDX_IDS, vf_nr, bank_nr); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + vf_bank_info.accel_dev = accel_dev; + vf_bank_info.vf_nr = vf_nr; + vf_bank_info.bank_nr = bank_nr; + l2_subsec = adf_mstate_sect_add(&sub_sects_mgr, ADF_MSTATE_ETR_REGS_IDS, + adf_gen4_vfmig_save_etr_regs, + &vf_bank_info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to add sec %s for vf%d bank%d\n", + ADF_MSTATE_ETR_REGS_IDS, vf_nr, bank_nr); + return -EINVAL; + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_etr(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec; + int ret, i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_ETRB_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_ETRB_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + ret = adf_gen4_vfmig_save_etr_bank(accel_dev, vf_nr, i, + &sub_sects_mgr); + if (ret) + return ret; + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_misc(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + void __iomem *csr = adf_get_pmisc_base(accel_dev); + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + struct { + char *id; + u64 offset; + } misc_states[] = { + {ADF_MSTATE_VINTSRC_IDS, ADF_GEN4_VINTSOU_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTMSK_IDS, ADF_GEN4_VINTMSK_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTSRC_PF2VM_IDS, ADF_GEN4_VINTSOUPF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTMSK_PF2VM_IDS, ADF_GEN4_VINTMSKPF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_PF2VM_IDS, ADF_GEN4_PF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_VM2PF_IDS, ADF_GEN4_VM2PF_OFFSET(vf_nr)}, + }; + ktime_t time_exp; + int i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_MISCB_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_MISCB_IDS); + return -EINVAL; + } + + time_exp = ktime_add_us(ktime_get(), ADF_GEN4_PFVF_RSP_TIMEOUT_US); + while (!mutex_trylock(&vf_info->pfvf_mig_lock)) { + if (ktime_after(ktime_get(), time_exp)) { + dev_err(&GET_DEV(accel_dev), "Failed to get pfvf mig lock\n"); + return -ETIMEDOUT; + } + usleep_range(500, 1000); + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < ARRAY_SIZE(misc_states); i++) { + struct adf_mstate_vreginfo info; + u32 regv; + + info.addr = ®v; + info.size = sizeof(regv); + regv = ADF_CSR_RD(csr, misc_states[i].offset); + + l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr, + misc_states[i].id, + &info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + misc_states[i].id); + mutex_unlock(&vf_info->pfvf_mig_lock); + return -EINVAL; + } + } + + mutex_unlock(&vf_info->pfvf_mig_lock); + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_generic(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct mig_user_sla src_slas[RL_RP_CNT_PER_LEAF_MAX] = { }; + u32 src_sla_cnt; + struct { + char *id; + struct adf_mstate_vreginfo info; + } gen_states[] = { + {ADF_MSTATE_IOV_INIT_IDS, + {&vf_info->init, sizeof(vf_info->init)}}, + {ADF_MSTATE_COMPAT_VER_IDS, + {&vf_info->vf_compat_ver, sizeof(vf_info->vf_compat_ver)}}, + {ADF_MSTATE_SLA_IDS, {src_slas, 0}}, + }; + int i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_GEN_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_GEN_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < ARRAY_SIZE(gen_states); i++) { + if (gen_states[i].info.addr == src_slas) { + src_sla_cnt = adf_gen4_vfmig_get_slas(accel_dev, vf_nr, src_slas); + gen_states[i].info.size = src_sla_cnt * sizeof(struct mig_user_sla); + } + + l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr, + gen_states[i].id, + &gen_states[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + gen_states[i].id); + return -EINVAL; + } + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_config(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct { + char *id; + struct adf_mstate_vreginfo info; + } setups[] = { + {ADF_MSTATE_GEN_CAP_IDS, + {&hw_data->accel_capabilities_mask, sizeof(hw_data->accel_capabilities_mask)}}, + {ADF_MSTATE_GEN_SVCMAP_IDS, + {&hw_data->ring_to_svc_map, sizeof(hw_data->ring_to_svc_map)}}, + {ADF_MSTATE_GEN_EXTDC_IDS, + {&hw_data->extended_dc_capabilities, sizeof(hw_data->extended_dc_capabilities)}}, + }; + int i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_CONFIG_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_CONFIG_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < ARRAY_SIZE(setups); i++) { + l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr, setups[i].id, + &setups[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + setups[i].id); + return -EINVAL; + } + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + ret = adf_gen4_vfmig_save_setup(mdev); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save setup for vf_nr %d\n", vf_nr); + return ret; + } + + adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state + mdev->setup_size, + mdev->state_size - mdev->setup_size); + if (!adf_mstate_preamble_add(vfmig->mstate_mgr)) + return -EINVAL; + + ret = adf_gen4_vfmig_save_generic(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save generic state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_save_misc(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save misc bar state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_save_etr(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save etr bar state for vf_nr %d\n", vf_nr); + return ret; + } + + adf_mstate_preamble_update(vfmig->mstate_mgr); + + return 0; +} + +static int adf_gen4_vfmig_load_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + ret = adf_gen4_vfmig_load_setup(mdev, mdev->state_size); + if (ret) { + dev_err(&GET_DEV(accel_dev), "Failed to load setup for vf_nr %d\n", + vf_nr); + return ret; + } + + ret = adf_mstate_mgr_init_from_remote(vfmig->mstate_mgr, + mdev->state + mdev->remote_setup_size, + mdev->state_size - mdev->remote_setup_size, + NULL, NULL); + if (ret) { + dev_err(&GET_DEV(accel_dev), "Invalid state for vf_nr %d\n", + vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_load_generic(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load general state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_load_misc(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load misc bar state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_load_etr(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load etr bar state for vf_nr %d\n", vf_nr); + return ret; + } + + return 0; +} + +static int adf_gen4_vfmig_save_setup(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + if (mdev->setup_size) + return 0; + + adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state, mdev->state_size); + if (!adf_mstate_preamble_add(vfmig->mstate_mgr)) + return -EINVAL; + + ret = adf_gen4_vfmig_save_config(accel_dev, mdev->vf_id); + if (ret) + return ret; + + adf_mstate_preamble_update(vfmig->mstate_mgr); + mdev->setup_size = adf_mstate_state_size(vfmig->mstate_mgr); + + return 0; +} + +static int adf_gen4_vfmig_load_setup(struct qat_mig_dev *mdev, int len) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + u32 setup_size; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + if (mdev->remote_setup_size) + return 0; + + if (len < sizeof(struct adf_mstate_preh)) + return -EAGAIN; + + adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state, mdev->state_size); + setup_size = adf_mstate_state_size_from_remote(vfmig->mstate_mgr); + if (setup_size > mdev->state_size) + return -EINVAL; + + if (len < setup_size) + return -EAGAIN; + + ret = adf_mstate_mgr_init_from_remote(vfmig->mstate_mgr, mdev->state, + setup_size, NULL, NULL); + if (ret) { + dev_err(&GET_DEV(accel_dev), "Invalide setup for vf_nr %d\n", + vf_nr); + return ret; + } + + mdev->remote_setup_size = setup_size; + + ret = adf_gen4_vfmig_load_config(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load config for vf_nr %d\n", vf_nr); + return ret; + } + + return 0; +} + +void adf_gen4_init_vf_mig_ops(struct qat_migdev_ops *vfmig_ops) +{ + vfmig_ops->init = adf_gen4_vfmig_init_device; + vfmig_ops->cleanup = adf_gen4_vfmig_cleanup_device; + vfmig_ops->reset = adf_gen4_vfmig_reset_device; + vfmig_ops->open = adf_gen4_vfmig_open_device; + vfmig_ops->close = adf_gen4_vfmig_close_device; + vfmig_ops->suspend = adf_gen4_vfmig_suspend_device; + vfmig_ops->resume = adf_gen4_vfmig_resume_device; + vfmig_ops->save_state = adf_gen4_vfmig_save_state; + vfmig_ops->load_state = adf_gen4_vfmig_load_state; + vfmig_ops->load_setup = adf_gen4_vfmig_load_setup; + vfmig_ops->save_setup = adf_gen4_vfmig_save_setup; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_vf_mig_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c new file mode 100644 index 000000000000..41cc763a74aa --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ + +#include +#include +#include "adf_mstate_mgr.h" + +#define ADF_MSTATE_MAGIC 0xADF5CAEA +#define ADF_MSTATE_VERSION 0x1 + +struct adf_mstate_sect_h { + u8 id[ADF_MSTATE_ID_LEN]; + u32 size; + u32 sub_sects; + u8 state[]; +}; + +u32 adf_mstate_state_size(struct adf_mstate_mgr *mgr) +{ + return mgr->state - mgr->buf; +} + +static inline u32 adf_mstate_avail_room(struct adf_mstate_mgr *mgr) +{ + return mgr->buf + mgr->size - mgr->state; +} + +void adf_mstate_mgr_init(struct adf_mstate_mgr *mgr, u8 *buf, u32 size) +{ + mgr->buf = buf; + mgr->state = buf; + mgr->size = size; + mgr->n_sects = 0; +}; + +struct adf_mstate_mgr *adf_mstate_mgr_new(u8 *buf, u32 size) +{ + struct adf_mstate_mgr *mgr; + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return NULL; + + adf_mstate_mgr_init(mgr, buf, size); + + return mgr; +} + +void adf_mstate_mgr_destroy(struct adf_mstate_mgr *mgr) +{ + kfree(mgr); +} + +void adf_mstate_mgr_init_from_parent(struct adf_mstate_mgr *mgr, + struct adf_mstate_mgr *p_mgr) +{ + adf_mstate_mgr_init(mgr, p_mgr->state, + p_mgr->size - adf_mstate_state_size(p_mgr)); +} + +void adf_mstate_mgr_init_from_psect(struct adf_mstate_mgr *mgr, + struct adf_mstate_sect_h *p_sect) +{ + adf_mstate_mgr_init(mgr, p_sect->state, p_sect->size); + mgr->n_sects = p_sect->sub_sects; +} + +static void adf_mstate_preamble_init(struct adf_mstate_preh *preamble) +{ + preamble->magic = ADF_MSTATE_MAGIC; + preamble->version = ADF_MSTATE_VERSION; + preamble->preh_len = sizeof(*preamble); + preamble->size = 0; + preamble->n_sects = 0; +} + +/* default preambles checker */ +static int adf_mstate_preamble_def_checker(struct adf_mstate_preh *preamble, + void *opaque) +{ + struct adf_mstate_mgr *mgr = opaque; + + if (preamble->magic != ADF_MSTATE_MAGIC || + preamble->version > ADF_MSTATE_VERSION || + preamble->preh_len > mgr->size) { + pr_debug("QAT: LM - Invalid state (magic=%#x, version=%#x, hlen=%u), state_size=%u\n", + preamble->magic, preamble->version, preamble->preh_len, + mgr->size); + return -EINVAL; + } + + return 0; +} + +struct adf_mstate_preh *adf_mstate_preamble_add(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_preh *pre = (struct adf_mstate_preh *)mgr->buf; + + if (adf_mstate_avail_room(mgr) < sizeof(*pre)) { + pr_err("QAT: LM - Not enough space for preamble\n"); + return NULL; + } + + adf_mstate_preamble_init(pre); + mgr->state += pre->preh_len; + + return pre; +} + +int adf_mstate_preamble_update(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_preh *preamble = (struct adf_mstate_preh *)mgr->buf; + + preamble->size = adf_mstate_state_size(mgr) - preamble->preh_len; + preamble->n_sects = mgr->n_sects; + + return 0; +} + +static void adf_mstate_dump_sect(struct adf_mstate_sect_h *sect, + const char *prefix) +{ + pr_debug("QAT: LM - %s QAT state section %s\n", prefix, sect->id); + print_hex_dump_debug("h-", DUMP_PREFIX_OFFSET, 16, 2, sect, + sizeof(*sect), true); + print_hex_dump_debug("s-", DUMP_PREFIX_OFFSET, 16, 2, sect->state, + sect->size, true); +} + +static inline void __adf_mstate_sect_update(struct adf_mstate_mgr *mgr, + struct adf_mstate_sect_h *sect, + u32 size, + u32 n_subsects) +{ + sect->size += size; + sect->sub_sects += n_subsects; + mgr->n_sects++; + mgr->state += sect->size; + + adf_mstate_dump_sect(sect, "Add"); +} + +void adf_mstate_sect_update(struct adf_mstate_mgr *p_mgr, + struct adf_mstate_mgr *curr_mgr, + struct adf_mstate_sect_h *sect) +{ + __adf_mstate_sect_update(p_mgr, sect, adf_mstate_state_size(curr_mgr), + curr_mgr->n_sects); +} + +static struct adf_mstate_sect_h *adf_mstate_sect_add_header(struct adf_mstate_mgr *mgr, + const char *id) +{ + struct adf_mstate_sect_h *sect = (struct adf_mstate_sect_h *)(mgr->state); + + if (adf_mstate_avail_room(mgr) < sizeof(*sect)) { + pr_debug("QAT: LM - Not enough space for header of QAT state sect %s\n", id); + return NULL; + } + + strscpy(sect->id, id, sizeof(sect->id)); + sect->size = 0; + sect->sub_sects = 0; + mgr->state += sizeof(*sect); + + return sect; +} + +struct adf_mstate_sect_h *adf_mstate_sect_add_vreg(struct adf_mstate_mgr *mgr, + const char *id, + struct adf_mstate_vreginfo *info) +{ + struct adf_mstate_sect_h *sect; + + sect = adf_mstate_sect_add_header(mgr, id); + if (!sect) + return NULL; + + if (adf_mstate_avail_room(mgr) < info->size) { + pr_debug("QAT: LM - Not enough space for QAT state sect %s, requires %u\n", + id, info->size); + return NULL; + } + + memcpy(sect->state, info->addr, info->size); + __adf_mstate_sect_update(mgr, sect, info->size, 0); + + return sect; +} + +struct adf_mstate_sect_h *adf_mstate_sect_add(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_populate populate, + void *opaque) +{ + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *sect; + int avail_room, size; + + sect = adf_mstate_sect_add_header(mgr, id); + if (!sect) + return NULL; + + if (!populate) + return sect; + + avail_room = adf_mstate_avail_room(mgr); + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mgr); + + size = (*populate)(&sub_sects_mgr, sect->state, avail_room, opaque); + if (size < 0) + return NULL; + + size += adf_mstate_state_size(&sub_sects_mgr); + if (avail_room < size) { + pr_debug("QAT: LM - Not enough space for QAT state sect %s, requires %u\n", + id, size); + return NULL; + } + __adf_mstate_sect_update(mgr, sect, size, sub_sects_mgr.n_sects); + + return sect; +} + +static int adf_mstate_sect_validate(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_sect_h *start = (struct adf_mstate_sect_h *)mgr->state; + struct adf_mstate_sect_h *sect = start; + u64 end; + int i; + + end = (uintptr_t)mgr->buf + mgr->size; + for (i = 0; i < mgr->n_sects; i++) { + uintptr_t s_start = (uintptr_t)sect->state; + uintptr_t s_end = s_start + sect->size; + + if (s_end < s_start || s_end > end) { + pr_debug("QAT: LM - Corrupted state section (index=%u, size=%u) in state_mgr (size=%u, secs=%u)\n", + i, sect->size, mgr->size, mgr->n_sects); + return -EINVAL; + } + sect = (struct adf_mstate_sect_h *)s_end; + } + + pr_debug("QAT: LM - Scanned section (last child=%s, size=%lu) in state_mgr (size=%u, secs=%u)\n", + start->id, sizeof(struct adf_mstate_sect_h) * (ulong)(sect - start), + mgr->size, mgr->n_sects); + + return 0; +} + +u32 adf_mstate_state_size_from_remote(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_preh *preh = (struct adf_mstate_preh *)mgr->buf; + + return preh->preh_len + preh->size; +} + +int adf_mstate_mgr_init_from_remote(struct adf_mstate_mgr *mgr, u8 *buf, u32 size, + adf_mstate_preamble_checker pre_checker, + void *opaque) +{ + struct adf_mstate_preh *pre; + int ret; + + adf_mstate_mgr_init(mgr, buf, size); + pre = (struct adf_mstate_preh *)(mgr->buf); + + pr_debug("QAT: LM - Dump state preambles\n"); + print_hex_dump_debug("", DUMP_PREFIX_OFFSET, 16, 2, pre, pre->preh_len, 0); + + if (pre_checker) + ret = (*pre_checker)(pre, opaque); + else + ret = adf_mstate_preamble_def_checker(pre, mgr); + if (ret) + return ret; + + mgr->state = mgr->buf + pre->preh_len; + mgr->n_sects = pre->n_sects; + + return adf_mstate_sect_validate(mgr); +} + +struct adf_mstate_sect_h *adf_mstate_sect_lookup(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_action action, + void *opaque) +{ + struct adf_mstate_sect_h *sect = (struct adf_mstate_sect_h *)mgr->state; + struct adf_mstate_mgr sub_sects_mgr; + int i, ret; + + for (i = 0; i < mgr->n_sects; i++) { + if (!strncmp(sect->id, id, sizeof(sect->id))) + goto found; + + sect = (struct adf_mstate_sect_h *)(sect->state + sect->size); + } + + return NULL; + +found: + adf_mstate_dump_sect(sect, "Found"); + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, sect); + if (sect->sub_sects && adf_mstate_sect_validate(&sub_sects_mgr)) + return NULL; + + if (!action) + return sect; + + ret = (*action)(&sub_sects_mgr, sect->state, sect->size, opaque); + if (ret) + return NULL; + + return sect; +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h new file mode 100644 index 000000000000..81d263a596c5 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ + +#ifndef ADF_MSTATE_MGR_H +#define ADF_MSTATE_MGR_H + +#define ADF_MSTATE_ID_LEN 8 + +#define ADF_MSTATE_ETRB_IDS "ETRBAR" +#define ADF_MSTATE_MISCB_IDS "MISCBAR" +#define ADF_MSTATE_EXTB_IDS "EXTBAR" +#define ADF_MSTATE_GEN_IDS "GENER" +#define ADF_MSTATE_CONFIG_IDS "CONFIG" +#define ADF_MSTATE_SECTION_NUM 5 + +#define ADF_MSTATE_BANK_IDX_IDS "bnk" + +#define ADF_MSTATE_ETR_REGS_IDS "mregs" +#define ADF_MSTATE_VINTSRC_IDS "visrc" +#define ADF_MSTATE_VINTMSK_IDS "vimsk" +#define ADF_MSTATE_SLA_IDS "sla" +#define ADF_MSTATE_IOV_INIT_IDS "iovinit" +#define ADF_MSTATE_COMPAT_VER_IDS "compver" +#define ADF_MSTATE_GEN_CAP_IDS "gencap" +#define ADF_MSTATE_GEN_SVCMAP_IDS "svcmap" +#define ADF_MSTATE_GEN_EXTDC_IDS "extdc" +#define ADF_MSTATE_VINTSRC_PF2VM_IDS "vispv" +#define ADF_MSTATE_VINTMSK_PF2VM_IDS "vimpv" +#define ADF_MSTATE_VM2PF_IDS "vm2pf" +#define ADF_MSTATE_PF2VM_IDS "pf2vm" + +struct adf_mstate_mgr { + u8 *buf; + u8 *state; + u32 size; + u32 n_sects; +}; + +struct adf_mstate_preh { + u32 magic; + u32 version; + u16 preh_len; + u16 n_sects; + u32 size; +}; + +struct adf_mstate_vreginfo { + void *addr; + u32 size; +}; + +struct adf_mstate_sect_h; + +typedef int (*adf_mstate_preamble_checker)(struct adf_mstate_preh *preamble, void *opa); +typedef int (*adf_mstate_populate)(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa); +typedef int (*adf_mstate_action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size, + void *opa); + +struct adf_mstate_mgr *adf_mstate_mgr_new(u8 *buf, u32 size); +void adf_mstate_mgr_destroy(struct adf_mstate_mgr *mgr); +void adf_mstate_mgr_init(struct adf_mstate_mgr *mgr, u8 *buf, u32 size); +void adf_mstate_mgr_init_from_parent(struct adf_mstate_mgr *mgr, + struct adf_mstate_mgr *p_mgr); +void adf_mstate_mgr_init_from_psect(struct adf_mstate_mgr *mgr, + struct adf_mstate_sect_h *p_sect); +int adf_mstate_mgr_init_from_remote(struct adf_mstate_mgr *mgr, + u8 *buf, u32 size, + adf_mstate_preamble_checker checker, + void *opaque); +struct adf_mstate_preh *adf_mstate_preamble_add(struct adf_mstate_mgr *mgr); +int adf_mstate_preamble_update(struct adf_mstate_mgr *mgr); +u32 adf_mstate_state_size(struct adf_mstate_mgr *mgr); +u32 adf_mstate_state_size_from_remote(struct adf_mstate_mgr *mgr); +void adf_mstate_sect_update(struct adf_mstate_mgr *p_mgr, + struct adf_mstate_mgr *curr_mgr, + struct adf_mstate_sect_h *sect); +struct adf_mstate_sect_h *adf_mstate_sect_add_vreg(struct adf_mstate_mgr *mgr, + const char *id, + struct adf_mstate_vreginfo *info); +struct adf_mstate_sect_h *adf_mstate_sect_add(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_populate populate, + void *opaque); +struct adf_mstate_sect_h *adf_mstate_sect_lookup(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_action action, + void *opaque); +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_sriov.c b/drivers/crypto/intel/qat/qat_common/adf_sriov.c index 87a70c00c41e..8d645e7e04aa 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sriov.c @@ -26,10 +26,12 @@ static void adf_iov_send_resp(struct work_struct *work) u32 vf_nr = vf_info->vf_nr; bool ret; + mutex_lock(&vf_info->pfvf_mig_lock); ret = adf_recv_and_handle_vf2pf_msg(accel_dev, vf_nr); if (ret) /* re-enable interrupt on PF from this VF */ adf_enable_vf2pf_interrupts(accel_dev, 1 << vf_nr); + mutex_unlock(&vf_info->pfvf_mig_lock); kfree(pf2vf_resp); } @@ -62,6 +64,7 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) vf_info->vf_nr = i; mutex_init(&vf_info->pf2vf_lock); + mutex_init(&vf_info->pfvf_mig_lock); ratelimit_state_init(&vf_info->vf2pf_ratelimit, ADF_VF2PF_RATELIMIT_INTERVAL, ADF_VF2PF_RATELIMIT_BURST); @@ -138,8 +141,10 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) if (hw_data->configure_iov_threads) hw_data->configure_iov_threads(accel_dev, false); - for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) + for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) { mutex_destroy(&vf->pf2vf_lock); + mutex_destroy(&vf->pfvf_mig_lock); + } if (!test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) { kfree(accel_dev->pf.vf_info); From 19b0ed5ddc8b554d1dfc34f870dc56e706ed205a Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 4 Mar 2024 15:20:08 -0600 Subject: [PATCH 010/123] crypto: iaa - fix decomp_bytes_in stats Decomp stats should use slen, not dlen. Change both the global and per-wq stats to use the correct value. Signed-off-by: Tom Zanussi Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index b2191ade9011..cc4200579e54 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1079,8 +1079,8 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc, update_total_comp_bytes_out(ctx->req->dlen); update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen); } else { - update_total_decomp_bytes_in(ctx->req->dlen); - update_wq_decomp_bytes(iaa_wq->wq, ctx->req->dlen); + update_total_decomp_bytes_in(ctx->req->slen); + update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen); } if (ctx->compress && compression_ctx->verify_compress) { From 956cb8a37039306379a1a926ccb1b55e08ffae80 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 4 Mar 2024 15:20:09 -0600 Subject: [PATCH 011/123] crypto: iaa - Remove comp/decomp delay statistics As part of the simplification/cleanup of the iaa statistics, remove the comp/decomp delay statistics. They're actually not really useful and can be/are being more flexibly generated using standard kernel tracing infrastructure. Signed-off-by: Tom Zanussi Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 9 ------- drivers/crypto/intel/iaa/iaa_crypto_stats.c | 28 --------------------- drivers/crypto/intel/iaa/iaa_crypto_stats.h | 8 ------ 3 files changed, 45 deletions(-) diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index cc4200579e54..6229b24b0d35 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1498,7 +1498,6 @@ static int iaa_comp_acompress(struct acomp_req *req) u32 compression_crc; struct idxd_wq *wq; struct device *dev; - u64 start_time_ns; int order = -1; compression_ctx = crypto_tfm_ctx(tfm); @@ -1572,10 +1571,8 @@ static int iaa_comp_acompress(struct acomp_req *req) " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); - start_time_ns = iaa_get_ts(); ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, &compression_crc, disable_async); - update_max_comp_delay_ns(start_time_ns); if (ret == -EINPROGRESS) return ret; @@ -1622,7 +1619,6 @@ static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req) struct iaa_wq *iaa_wq; struct device *dev; struct idxd_wq *wq; - u64 start_time_ns; int order = -1; cpu = get_cpu(); @@ -1679,10 +1675,8 @@ static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req) dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); - start_time_ns = iaa_get_ts(); ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, true); - update_max_decomp_delay_ns(start_time_ns); if (ret == -EOVERFLOW) { dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); req->dlen *= 2; @@ -1713,7 +1707,6 @@ static int iaa_comp_adecompress(struct acomp_req *req) int nr_sgs, cpu, ret = 0; struct iaa_wq *iaa_wq; struct device *dev; - u64 start_time_ns; struct idxd_wq *wq; if (!iaa_crypto_enabled) { @@ -1773,10 +1766,8 @@ static int iaa_comp_adecompress(struct acomp_req *req) " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); - start_time_ns = iaa_get_ts(); ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, false); - update_max_decomp_delay_ns(start_time_ns); if (ret == -EINPROGRESS) return ret; diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.c b/drivers/crypto/intel/iaa/iaa_crypto_stats.c index c9f83af4b307..7820062a91e5 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.c @@ -20,8 +20,6 @@ static u64 total_comp_calls; static u64 total_decomp_calls; static u64 total_sw_decomp_calls; -static u64 max_comp_delay_ns; -static u64 max_decomp_delay_ns; static u64 total_comp_bytes_out; static u64 total_decomp_bytes_in; static u64 total_completion_einval_errors; @@ -70,26 +68,6 @@ void update_completion_comp_buf_overflow_errs(void) total_completion_comp_buf_overflow_errors++; } -void update_max_comp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_comp_delay_ns) - max_comp_delay_ns = time_diff; -} - -void update_max_decomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_decomp_delay_ns) - max_decomp_delay_ns = time_diff; -} - void update_wq_comp_calls(struct idxd_wq *idxd_wq) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); @@ -127,8 +105,6 @@ static void reset_iaa_crypto_stats(void) total_comp_calls = 0; total_decomp_calls = 0; total_sw_decomp_calls = 0; - max_comp_delay_ns = 0; - max_decomp_delay_ns = 0; total_comp_bytes_out = 0; total_decomp_bytes_in = 0; total_completion_einval_errors = 0; @@ -252,10 +228,6 @@ int __init iaa_crypto_debugfs_init(void) iaa_crypto_debugfs_root = debugfs_create_dir("iaa_crypto", NULL); - debugfs_create_u64("max_comp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_comp_delay_ns); - debugfs_create_u64("max_decomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_decomp_delay_ns); debugfs_create_u64("total_comp_calls", 0644, iaa_crypto_debugfs_root, &total_comp_calls); debugfs_create_u64("total_decomp_calls", 0644, diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.h b/drivers/crypto/intel/iaa/iaa_crypto_stats.h index c916ca83f070..3787a5f507eb 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.h +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.h @@ -13,8 +13,6 @@ void update_total_comp_bytes_out(int n); void update_total_decomp_calls(void); void update_total_sw_decomp_calls(void); void update_total_decomp_bytes_in(int n); -void update_max_comp_delay_ns(u64 start_time_ns); -void update_max_decomp_delay_ns(u64 start_time_ns); void update_completion_einval_errs(void); void update_completion_timeout_errs(void); void update_completion_comp_buf_overflow_errs(void); @@ -24,8 +22,6 @@ void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n); void update_wq_decomp_calls(struct idxd_wq *idxd_wq); void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n); -static inline u64 iaa_get_ts(void) { return ktime_get_ns(); } - #else static inline int iaa_crypto_debugfs_init(void) { return 0; } static inline void iaa_crypto_debugfs_cleanup(void) {} @@ -35,8 +31,6 @@ static inline void update_total_comp_bytes_out(int n) {} static inline void update_total_decomp_calls(void) {} static inline void update_total_sw_decomp_calls(void) {} static inline void update_total_decomp_bytes_in(int n) {} -static inline void update_max_comp_delay_ns(u64 start_time_ns) {} -static inline void update_max_decomp_delay_ns(u64 start_time_ns) {} static inline void update_completion_einval_errs(void) {} static inline void update_completion_timeout_errs(void) {} static inline void update_completion_comp_buf_overflow_errs(void) {} @@ -46,8 +40,6 @@ static inline void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) {} static inline void update_wq_decomp_calls(struct idxd_wq *idxd_wq) {} static inline void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) {} -static inline u64 iaa_get_ts(void) { return 0; } - #endif // CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS #endif From c21fb22df63d51bb26d34023b0d9651a15442eb6 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 4 Mar 2024 15:20:10 -0600 Subject: [PATCH 012/123] crypto: iaa - Add global_stats file and remove individual stat files Currently, the wq_stats output also includes the global stats, while the individual global stats are also available as separate debugfs files. Since these are all read-only, there's really no reason to have them as separate files, especially since we already display them as global stats in the wq_stats. It makes more sense to just add a separate global_stats file to display those, and remove them from the wq_stats, as well as removing the individual stats files. Signed-off-by: Tom Zanussi Signed-off-by: Herbert Xu --- .../driver-api/crypto/iaa/iaa-crypto.rst | 74 +++++++++++-------- drivers/crypto/intel/iaa/iaa_crypto_stats.c | 30 ++++---- 2 files changed, 60 insertions(+), 44 deletions(-) diff --git a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst index de587cf9cbed..7b28aef39ba0 100644 --- a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst +++ b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst @@ -321,33 +321,30 @@ driver will generate statistics which can be accessed in debugfs at:: # ls -al /sys/kernel/debug/iaa-crypto/ total 0 - drwxr-xr-x 2 root root 0 Mar 3 09:35 . - drwx------ 47 root root 0 Mar 3 09:35 .. - -rw-r--r-- 1 root root 0 Mar 3 09:35 max_acomp_delay_ns - -rw-r--r-- 1 root root 0 Mar 3 09:35 max_adecomp_delay_ns - -rw-r--r-- 1 root root 0 Mar 3 09:35 max_comp_delay_ns - -rw-r--r-- 1 root root 0 Mar 3 09:35 max_decomp_delay_ns - -rw-r--r-- 1 root root 0 Mar 3 09:35 stats_reset - -rw-r--r-- 1 root root 0 Mar 3 09:35 total_comp_bytes_out - -rw-r--r-- 1 root root 0 Mar 3 09:35 total_comp_calls - -rw-r--r-- 1 root root 0 Mar 3 09:35 total_decomp_bytes_in - -rw-r--r-- 1 root root 0 Mar 3 09:35 total_decomp_calls - -rw-r--r-- 1 root root 0 Mar 3 09:35 wq_stats + drwxr-xr-x 2 root root 0 Mar 3 07:55 . + drwx------ 53 root root 0 Mar 3 07:55 .. + -rw-r--r-- 1 root root 0 Mar 3 07:55 global_stats + -rw-r--r-- 1 root root 0 Mar 3 07:55 stats_reset + -rw-r--r-- 1 root root 0 Mar 3 07:55 wq_stats -Most of the above statisticss are self-explanatory. The wq_stats file -shows per-wq stats, a set for each iaa device and wq in addition to -some global stats:: +The global_stats file shows a set of global statistics collected since +the driver has been loaded or reset:: - # cat wq_stats + # cat global_stats global stats: - total_comp_calls: 100 - total_decomp_calls: 100 - total_comp_bytes_out: 22800 - total_decomp_bytes_in: 22800 + total_comp_calls: 4300 + total_decomp_calls: 4164 + total_sw_decomp_calls: 0 + total_comp_bytes_out: 5993989 + total_decomp_bytes_in: 5993989 total_completion_einval_errors: 0 total_completion_timeout_errors: 0 - total_completion_comp_buf_overflow_errors: 0 + total_completion_comp_buf_overflow_errors: 136 +The wq_stats file shows per-wq stats, a set for each iaa device and wq +in addition to some global stats:: + + # cat wq_stats iaa device: id: 1 n_wqs: 1 @@ -379,21 +376,36 @@ some global stats:: iaa device: id: 5 n_wqs: 1 - comp_calls: 100 - comp_bytes: 22800 - decomp_calls: 100 - decomp_bytes: 22800 + comp_calls: 1360 + comp_bytes: 1999776 + decomp_calls: 0 + decomp_bytes: 0 wqs: name: iaa_crypto - comp_calls: 100 - comp_bytes: 22800 - decomp_calls: 100 - decomp_bytes: 22800 + comp_calls: 1360 + comp_bytes: 1999776 + decomp_calls: 0 + decomp_bytes: 0 -Writing 0 to 'stats_reset' resets all the stats, including the + iaa device: + id: 7 + n_wqs: 1 + comp_calls: 2940 + comp_bytes: 3994213 + decomp_calls: 4164 + decomp_bytes: 5993989 + wqs: + name: iaa_crypto + comp_calls: 2940 + comp_bytes: 3994213 + decomp_calls: 4164 + decomp_bytes: 5993989 + ... + +Writing to 'stats_reset' resets all the stats, including the per-device and per-wq stats:: - # echo 0 > stats_reset + # echo 1 > stats_reset # cat wq_stats global stats: total_comp_calls: 0 diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.c b/drivers/crypto/intel/iaa/iaa_crypto_stats.c index 7820062a91e5..0f225bdf2279 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.c @@ -159,7 +159,7 @@ static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device) wq_show(m, iaa_wq); } -static void global_stats_show(struct seq_file *m) +static int global_stats_show(struct seq_file *m, void *v) { seq_puts(m, "global stats:\n"); seq_printf(m, " total_comp_calls: %llu\n", total_comp_calls); @@ -173,6 +173,8 @@ static void global_stats_show(struct seq_file *m) total_completion_timeout_errors); seq_printf(m, " total_completion_comp_buf_overflow_errors: %llu\n\n", total_completion_comp_buf_overflow_errors); + + return 0; } static int wq_stats_show(struct seq_file *m, void *v) @@ -181,8 +183,6 @@ static int wq_stats_show(struct seq_file *m, void *v) mutex_lock(&iaa_devices_lock); - global_stats_show(m); - list_for_each_entry(iaa_device, &iaa_devices, list) device_stats_show(m, iaa_device); @@ -219,6 +219,18 @@ static const struct file_operations wq_stats_fops = { .release = single_release, }; +static int global_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, global_stats_show, file); +} + +static const struct file_operations global_stats_fops = { + .open = global_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + DEFINE_DEBUGFS_ATTRIBUTE(wq_stats_reset_fops, NULL, iaa_crypto_stats_reset, "%llu\n"); int __init iaa_crypto_debugfs_init(void) @@ -228,16 +240,8 @@ int __init iaa_crypto_debugfs_init(void) iaa_crypto_debugfs_root = debugfs_create_dir("iaa_crypto", NULL); - debugfs_create_u64("total_comp_calls", 0644, - iaa_crypto_debugfs_root, &total_comp_calls); - debugfs_create_u64("total_decomp_calls", 0644, - iaa_crypto_debugfs_root, &total_decomp_calls); - debugfs_create_u64("total_sw_decomp_calls", 0644, - iaa_crypto_debugfs_root, &total_sw_decomp_calls); - debugfs_create_u64("total_comp_bytes_out", 0644, - iaa_crypto_debugfs_root, &total_comp_bytes_out); - debugfs_create_u64("total_decomp_bytes_in", 0644, - iaa_crypto_debugfs_root, &total_decomp_bytes_in); + debugfs_create_file("global_stats", 0644, iaa_crypto_debugfs_root, NULL, + &global_stats_fops); debugfs_create_file("wq_stats", 0644, iaa_crypto_debugfs_root, NULL, &wq_stats_fops); debugfs_create_file("stats_reset", 0644, iaa_crypto_debugfs_root, NULL, From 43698cd6c02ddcf3b4ffb34e5da0be3e801027fe Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 4 Mar 2024 15:20:11 -0600 Subject: [PATCH 013/123] crypto: iaa - Change iaa statistics to atomic64_t Change all the iaa statistics to use atomic64_t instead of the current u64, to avoid potentially inconsistent counts. Signed-off-by: Tom Zanussi Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto.h | 16 +-- drivers/crypto/intel/iaa/iaa_crypto_stats.c | 125 +++++++++++--------- 2 files changed, 77 insertions(+), 64 deletions(-) diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h index 2524091a5f70..56985e395263 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto.h +++ b/drivers/crypto/intel/iaa/iaa_crypto.h @@ -49,10 +49,10 @@ struct iaa_wq { struct iaa_device *iaa_device; - u64 comp_calls; - u64 comp_bytes; - u64 decomp_calls; - u64 decomp_bytes; + atomic64_t comp_calls; + atomic64_t comp_bytes; + atomic64_t decomp_calls; + atomic64_t decomp_bytes; }; struct iaa_device_compression_mode { @@ -73,10 +73,10 @@ struct iaa_device { int n_wq; struct list_head wqs; - u64 comp_calls; - u64 comp_bytes; - u64 decomp_calls; - u64 decomp_bytes; + atomic64_t comp_calls; + atomic64_t comp_bytes; + atomic64_t decomp_calls; + atomic64_t decomp_bytes; }; struct wq_table_entry { diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.c b/drivers/crypto/intel/iaa/iaa_crypto_stats.c index 0f225bdf2279..f5cc3d29ca19 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.c @@ -17,117 +17,117 @@ #include "iaa_crypto.h" #include "iaa_crypto_stats.h" -static u64 total_comp_calls; -static u64 total_decomp_calls; -static u64 total_sw_decomp_calls; -static u64 total_comp_bytes_out; -static u64 total_decomp_bytes_in; -static u64 total_completion_einval_errors; -static u64 total_completion_timeout_errors; -static u64 total_completion_comp_buf_overflow_errors; +static atomic64_t total_comp_calls; +static atomic64_t total_decomp_calls; +static atomic64_t total_sw_decomp_calls; +static atomic64_t total_comp_bytes_out; +static atomic64_t total_decomp_bytes_in; +static atomic64_t total_completion_einval_errors; +static atomic64_t total_completion_timeout_errors; +static atomic64_t total_completion_comp_buf_overflow_errors; static struct dentry *iaa_crypto_debugfs_root; void update_total_comp_calls(void) { - total_comp_calls++; + atomic64_inc(&total_comp_calls); } void update_total_comp_bytes_out(int n) { - total_comp_bytes_out += n; + atomic64_add(n, &total_comp_bytes_out); } void update_total_decomp_calls(void) { - total_decomp_calls++; + atomic64_inc(&total_decomp_calls); } void update_total_sw_decomp_calls(void) { - total_sw_decomp_calls++; + atomic64_inc(&total_sw_decomp_calls); } void update_total_decomp_bytes_in(int n) { - total_decomp_bytes_in += n; + atomic64_add(n, &total_decomp_bytes_in); } void update_completion_einval_errs(void) { - total_completion_einval_errors++; + atomic64_inc(&total_completion_einval_errors); } void update_completion_timeout_errs(void) { - total_completion_timeout_errors++; + atomic64_inc(&total_completion_timeout_errors); } void update_completion_comp_buf_overflow_errs(void) { - total_completion_comp_buf_overflow_errors++; + atomic64_inc(&total_completion_comp_buf_overflow_errors); } void update_wq_comp_calls(struct idxd_wq *idxd_wq) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->comp_calls++; - wq->iaa_device->comp_calls++; + atomic64_inc(&wq->comp_calls); + atomic64_inc(&wq->iaa_device->comp_calls); } void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->comp_bytes += n; - wq->iaa_device->comp_bytes += n; + atomic64_add(n, &wq->comp_bytes); + atomic64_add(n, &wq->iaa_device->comp_bytes); } void update_wq_decomp_calls(struct idxd_wq *idxd_wq) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->decomp_calls++; - wq->iaa_device->decomp_calls++; + atomic64_inc(&wq->decomp_calls); + atomic64_inc(&wq->iaa_device->decomp_calls); } void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->decomp_bytes += n; - wq->iaa_device->decomp_bytes += n; + atomic64_add(n, &wq->decomp_bytes); + atomic64_add(n, &wq->iaa_device->decomp_bytes); } static void reset_iaa_crypto_stats(void) { - total_comp_calls = 0; - total_decomp_calls = 0; - total_sw_decomp_calls = 0; - total_comp_bytes_out = 0; - total_decomp_bytes_in = 0; - total_completion_einval_errors = 0; - total_completion_timeout_errors = 0; - total_completion_comp_buf_overflow_errors = 0; + atomic64_set(&total_comp_calls, 0); + atomic64_set(&total_decomp_calls, 0); + atomic64_set(&total_sw_decomp_calls, 0); + atomic64_set(&total_comp_bytes_out, 0); + atomic64_set(&total_decomp_bytes_in, 0); + atomic64_set(&total_completion_einval_errors, 0); + atomic64_set(&total_completion_timeout_errors, 0); + atomic64_set(&total_completion_comp_buf_overflow_errors, 0); } static void reset_wq_stats(struct iaa_wq *wq) { - wq->comp_calls = 0; - wq->comp_bytes = 0; - wq->decomp_calls = 0; - wq->decomp_bytes = 0; + atomic64_set(&wq->comp_calls, 0); + atomic64_set(&wq->comp_bytes, 0); + atomic64_set(&wq->decomp_calls, 0); + atomic64_set(&wq->decomp_bytes, 0); } static void reset_device_stats(struct iaa_device *iaa_device) { struct iaa_wq *iaa_wq; - iaa_device->comp_calls = 0; - iaa_device->comp_bytes = 0; - iaa_device->decomp_calls = 0; - iaa_device->decomp_bytes = 0; + atomic64_set(&iaa_device->comp_calls, 0); + atomic64_set(&iaa_device->comp_bytes, 0); + atomic64_set(&iaa_device->decomp_calls, 0); + atomic64_set(&iaa_device->decomp_bytes, 0); list_for_each_entry(iaa_wq, &iaa_device->wqs, list) reset_wq_stats(iaa_wq); @@ -136,10 +136,14 @@ static void reset_device_stats(struct iaa_device *iaa_device) static void wq_show(struct seq_file *m, struct iaa_wq *iaa_wq) { seq_printf(m, " name: %s\n", iaa_wq->wq->name); - seq_printf(m, " comp_calls: %llu\n", iaa_wq->comp_calls); - seq_printf(m, " comp_bytes: %llu\n", iaa_wq->comp_bytes); - seq_printf(m, " decomp_calls: %llu\n", iaa_wq->decomp_calls); - seq_printf(m, " decomp_bytes: %llu\n\n", iaa_wq->decomp_bytes); + seq_printf(m, " comp_calls: %llu\n", + atomic64_read(&iaa_wq->comp_calls)); + seq_printf(m, " comp_bytes: %llu\n", + atomic64_read(&iaa_wq->comp_bytes)); + seq_printf(m, " decomp_calls: %llu\n", + atomic64_read(&iaa_wq->decomp_calls)); + seq_printf(m, " decomp_bytes: %llu\n\n", + atomic64_read(&iaa_wq->decomp_bytes)); } static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device) @@ -149,10 +153,14 @@ static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device) seq_puts(m, "iaa device:\n"); seq_printf(m, " id: %d\n", iaa_device->idxd->id); seq_printf(m, " n_wqs: %d\n", iaa_device->n_wq); - seq_printf(m, " comp_calls: %llu\n", iaa_device->comp_calls); - seq_printf(m, " comp_bytes: %llu\n", iaa_device->comp_bytes); - seq_printf(m, " decomp_calls: %llu\n", iaa_device->decomp_calls); - seq_printf(m, " decomp_bytes: %llu\n", iaa_device->decomp_bytes); + seq_printf(m, " comp_calls: %llu\n", + atomic64_read(&iaa_device->comp_calls)); + seq_printf(m, " comp_bytes: %llu\n", + atomic64_read(&iaa_device->comp_bytes)); + seq_printf(m, " decomp_calls: %llu\n", + atomic64_read(&iaa_device->decomp_calls)); + seq_printf(m, " decomp_bytes: %llu\n", + atomic64_read(&iaa_device->decomp_bytes)); seq_puts(m, " wqs:\n"); list_for_each_entry(iaa_wq, &iaa_device->wqs, list) @@ -162,17 +170,22 @@ static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device) static int global_stats_show(struct seq_file *m, void *v) { seq_puts(m, "global stats:\n"); - seq_printf(m, " total_comp_calls: %llu\n", total_comp_calls); - seq_printf(m, " total_decomp_calls: %llu\n", total_decomp_calls); - seq_printf(m, " total_sw_decomp_calls: %llu\n", total_sw_decomp_calls); - seq_printf(m, " total_comp_bytes_out: %llu\n", total_comp_bytes_out); - seq_printf(m, " total_decomp_bytes_in: %llu\n", total_decomp_bytes_in); + seq_printf(m, " total_comp_calls: %llu\n", + atomic64_read(&total_comp_calls)); + seq_printf(m, " total_decomp_calls: %llu\n", + atomic64_read(&total_decomp_calls)); + seq_printf(m, " total_sw_decomp_calls: %llu\n", + atomic64_read(&total_sw_decomp_calls)); + seq_printf(m, " total_comp_bytes_out: %llu\n", + atomic64_read(&total_comp_bytes_out)); + seq_printf(m, " total_decomp_bytes_in: %llu\n", + atomic64_read(&total_decomp_bytes_in)); seq_printf(m, " total_completion_einval_errors: %llu\n", - total_completion_einval_errors); + atomic64_read(&total_completion_einval_errors)); seq_printf(m, " total_completion_timeout_errors: %llu\n", - total_completion_timeout_errors); + atomic64_read(&total_completion_timeout_errors)); seq_printf(m, " total_completion_comp_buf_overflow_errors: %llu\n\n", - total_completion_comp_buf_overflow_errors); + atomic64_read(&total_completion_comp_buf_overflow_errors)); return 0; } From 2ccf7a5d9c50f3eaa21ae560332d5f986b8f25e6 Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Tue, 5 Mar 2024 15:10:00 +0800 Subject: [PATCH 014/123] dt-bindings: crypto: starfive: Add jh8100 support Add compatible string and additional interrupt for StarFive JH8100 crypto engine. Signed-off-by: Jia Jie Ho Acked-by: Conor Dooley Signed-off-by: Herbert Xu --- .../crypto/starfive,jh7110-crypto.yaml | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml b/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml index 71a2876bd6e4..446764bc2ccc 100644 --- a/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml +++ b/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml @@ -12,7 +12,9 @@ maintainers: properties: compatible: - const: starfive,jh7110-crypto + enum: + - starfive,jh8100-crypto + - starfive,jh7110-crypto reg: maxItems: 1 @@ -28,7 +30,10 @@ properties: - const: ahb interrupts: - maxItems: 1 + minItems: 1 + items: + - description: SHA2 module irq + - description: SM3 module irq resets: maxItems: 1 @@ -54,6 +59,27 @@ required: additionalProperties: false +allOf: + - if: + properties: + compatible: + const: starfive,jh7110-crypto + + then: + properties: + interrupts: + maxItems: 1 + + - if: + properties: + compatible: + const: starfive,jh8100-crypto + + then: + properties: + interrupts: + minItems: 2 + examples: - | crypto: crypto@16000000 { From b6e9eb69a19562cd8d8b09f76593884e4c69ffc7 Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Tue, 5 Mar 2024 15:10:01 +0800 Subject: [PATCH 015/123] crypto: starfive - Update hash dma usage Current hash uses sw fallback for non-word aligned input scatterlists. Add support for unaligned cases utilizing the data valid mask for dma. Signed-off-by: Jia Jie Ho Signed-off-by: Herbert Xu --- drivers/crypto/starfive/jh7110-cryp.c | 9 - drivers/crypto/starfive/jh7110-cryp.h | 4 +- drivers/crypto/starfive/jh7110-hash.c | 285 +++++++++++--------------- 3 files changed, 117 insertions(+), 181 deletions(-) diff --git a/drivers/crypto/starfive/jh7110-cryp.c b/drivers/crypto/starfive/jh7110-cryp.c index 425fddf3a8ab..2685f5483639 100644 --- a/drivers/crypto/starfive/jh7110-cryp.c +++ b/drivers/crypto/starfive/jh7110-cryp.c @@ -103,12 +103,6 @@ static irqreturn_t starfive_cryp_irq(int irq, void *priv) tasklet_schedule(&cryp->aes_done); } - if (status & STARFIVE_IE_FLAG_HASH_DONE) { - mask |= STARFIVE_IE_MASK_HASH_DONE; - writel(mask, cryp->base + STARFIVE_IE_MASK_OFFSET); - tasklet_schedule(&cryp->hash_done); - } - return IRQ_HANDLED; } @@ -132,7 +126,6 @@ static int starfive_cryp_probe(struct platform_device *pdev) "Error remapping memory for platform device\n"); tasklet_init(&cryp->aes_done, starfive_aes_done_task, (unsigned long)cryp); - tasklet_init(&cryp->hash_done, starfive_hash_done_task, (unsigned long)cryp); cryp->phys_base = res->start; cryp->dma_maxburst = 32; @@ -220,7 +213,6 @@ static int starfive_cryp_probe(struct platform_device *pdev) reset_control_assert(cryp->rst); tasklet_kill(&cryp->aes_done); - tasklet_kill(&cryp->hash_done); return ret; } @@ -234,7 +226,6 @@ static void starfive_cryp_remove(struct platform_device *pdev) starfive_rsa_unregister_algs(); tasklet_kill(&cryp->aes_done); - tasklet_kill(&cryp->hash_done); crypto_engine_stop(cryp->engine); crypto_engine_exit(cryp->engine); diff --git a/drivers/crypto/starfive/jh7110-cryp.h b/drivers/crypto/starfive/jh7110-cryp.h index 6cdf6db5d904..82327e21d340 100644 --- a/drivers/crypto/starfive/jh7110-cryp.h +++ b/drivers/crypto/starfive/jh7110-cryp.h @@ -91,6 +91,7 @@ union starfive_hash_csr { #define STARFIVE_HASH_KEY_DONE BIT(13) u32 key_done :1; u32 key_flag :1; +#define STARFIVE_HASH_HMAC_DONE BIT(15) u32 hmac_done :1; #define STARFIVE_HASH_BUSY BIT(16) u32 busy :1; @@ -189,7 +190,7 @@ struct starfive_cryp_dev { struct scatter_walk out_walk; struct crypto_engine *engine; struct tasklet_struct aes_done; - struct tasklet_struct hash_done; + struct completion dma_done; size_t assoclen; size_t total_in; size_t total_out; @@ -237,6 +238,5 @@ void starfive_rsa_unregister_algs(void); int starfive_aes_register_algs(void); void starfive_aes_unregister_algs(void); -void starfive_hash_done_task(unsigned long param); void starfive_aes_done_task(unsigned long param); #endif diff --git a/drivers/crypto/starfive/jh7110-hash.c b/drivers/crypto/starfive/jh7110-hash.c index b6d1808012ca..2c60a1047bc3 100644 --- a/drivers/crypto/starfive/jh7110-hash.c +++ b/drivers/crypto/starfive/jh7110-hash.c @@ -36,15 +36,22 @@ #define STARFIVE_HASH_BUFLEN SHA512_BLOCK_SIZE #define STARFIVE_HASH_RESET 0x2 -static inline int starfive_hash_wait_busy(struct starfive_cryp_ctx *ctx) +static inline int starfive_hash_wait_busy(struct starfive_cryp_dev *cryp) { - struct starfive_cryp_dev *cryp = ctx->cryp; u32 status; return readl_relaxed_poll_timeout(cryp->base + STARFIVE_HASH_SHACSR, status, !(status & STARFIVE_HASH_BUSY), 10, 100000); } +static inline int starfive_hash_wait_hmac_done(struct starfive_cryp_dev *cryp) +{ + u32 status; + + return readl_relaxed_poll_timeout(cryp->base + STARFIVE_HASH_SHACSR, status, + (status & STARFIVE_HASH_HMAC_DONE), 10, 100000); +} + static inline int starfive_hash_wait_key_done(struct starfive_cryp_ctx *ctx) { struct starfive_cryp_dev *cryp = ctx->cryp; @@ -84,64 +91,26 @@ static int starfive_hash_hmac_key(struct starfive_cryp_ctx *ctx) return 0; } -static void starfive_hash_start(void *param) +static void starfive_hash_start(struct starfive_cryp_dev *cryp) { - struct starfive_cryp_ctx *ctx = param; - struct starfive_cryp_request_ctx *rctx = ctx->rctx; - struct starfive_cryp_dev *cryp = ctx->cryp; - union starfive_alg_cr alg_cr; union starfive_hash_csr csr; - u32 stat; - - dma_unmap_sg(cryp->dev, rctx->in_sg, rctx->in_sg_len, DMA_TO_DEVICE); - - alg_cr.v = 0; - alg_cr.clear = 1; - - writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); csr.v = readl(cryp->base + STARFIVE_HASH_SHACSR); csr.firstb = 0; csr.final = 1; - - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_HASH_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); writel(csr.v, cryp->base + STARFIVE_HASH_SHACSR); } -static int starfive_hash_xmit_dma(struct starfive_cryp_ctx *ctx) +static void starfive_hash_dma_callback(void *param) { - struct starfive_cryp_request_ctx *rctx = ctx->rctx; - struct starfive_cryp_dev *cryp = ctx->cryp; - struct dma_async_tx_descriptor *in_desc; - union starfive_alg_cr alg_cr; - int total_len; - int ret; + struct starfive_cryp_dev *cryp = param; - if (!rctx->total) { - starfive_hash_start(ctx); - return 0; - } + complete(&cryp->dma_done); +} - writel(rctx->total, cryp->base + STARFIVE_DMA_IN_LEN_OFFSET); - - total_len = rctx->total; - total_len = (total_len & 0x3) ? (((total_len >> 2) + 1) << 2) : total_len; - sg_dma_len(rctx->in_sg) = total_len; - - alg_cr.v = 0; - alg_cr.start = 1; - alg_cr.hash_dma_en = 1; - - writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); - - ret = dma_map_sg(cryp->dev, rctx->in_sg, rctx->in_sg_len, DMA_TO_DEVICE); - if (!ret) - return dev_err_probe(cryp->dev, -EINVAL, "dma_map_sg() error\n"); - - cryp->cfg_in.direction = DMA_MEM_TO_DEV; - cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; +static void starfive_hash_dma_init(struct starfive_cryp_dev *cryp) +{ + cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES; cryp->cfg_in.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; cryp->cfg_in.src_maxburst = cryp->dma_maxburst; cryp->cfg_in.dst_maxburst = cryp->dma_maxburst; @@ -149,50 +118,48 @@ static int starfive_hash_xmit_dma(struct starfive_cryp_ctx *ctx) dmaengine_slave_config(cryp->tx, &cryp->cfg_in); - in_desc = dmaengine_prep_slave_sg(cryp->tx, rctx->in_sg, - ret, DMA_MEM_TO_DEV, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + init_completion(&cryp->dma_done); +} - if (!in_desc) - return -EINVAL; +static int starfive_hash_dma_xfer(struct starfive_cryp_dev *cryp, + struct scatterlist *sg) +{ + struct dma_async_tx_descriptor *in_desc; + union starfive_alg_cr alg_cr; + int ret = 0; - in_desc->callback = starfive_hash_start; - in_desc->callback_param = ctx; + alg_cr.v = 0; + alg_cr.start = 1; + alg_cr.hash_dma_en = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); + + writel(sg_dma_len(sg), cryp->base + STARFIVE_DMA_IN_LEN_OFFSET); + sg_dma_len(sg) = ALIGN(sg_dma_len(sg), sizeof(u32)); + + in_desc = dmaengine_prep_slave_sg(cryp->tx, sg, 1, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!in_desc) { + ret = -EINVAL; + goto end; + } + + reinit_completion(&cryp->dma_done); + in_desc->callback = starfive_hash_dma_callback; + in_desc->callback_param = cryp; dmaengine_submit(in_desc); dma_async_issue_pending(cryp->tx); - return 0; -} + if (!wait_for_completion_timeout(&cryp->dma_done, + msecs_to_jiffies(1000))) + ret = -ETIMEDOUT; -static int starfive_hash_xmit(struct starfive_cryp_ctx *ctx) -{ - struct starfive_cryp_request_ctx *rctx = ctx->rctx; - struct starfive_cryp_dev *cryp = ctx->cryp; - int ret = 0; +end: + alg_cr.v = 0; + alg_cr.clear = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); - rctx->csr.hash.v = 0; - rctx->csr.hash.reset = 1; - writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR); - - if (starfive_hash_wait_busy(ctx)) - return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error resetting engine.\n"); - - rctx->csr.hash.v = 0; - rctx->csr.hash.mode = ctx->hash_mode; - rctx->csr.hash.ie = 1; - - if (ctx->is_hmac) { - ret = starfive_hash_hmac_key(ctx); - if (ret) - return ret; - } else { - rctx->csr.hash.start = 1; - rctx->csr.hash.firstb = 1; - writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR); - } - - return starfive_hash_xmit_dma(ctx); + return ret; } static int starfive_hash_copy_hash(struct ahash_request *req) @@ -215,58 +182,74 @@ static int starfive_hash_copy_hash(struct ahash_request *req) return 0; } -void starfive_hash_done_task(unsigned long param) +static void starfive_hash_done_task(struct starfive_cryp_dev *cryp) { - struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)param; int err = cryp->err; if (!err) err = starfive_hash_copy_hash(cryp->req.hreq); - /* Reset to clear hash_done in irq register*/ - writel(STARFIVE_HASH_RESET, cryp->base + STARFIVE_HASH_SHACSR); - crypto_finalize_hash_request(cryp->engine, cryp->req.hreq, err); } -static int starfive_hash_check_aligned(struct scatterlist *sg, size_t total, size_t align) -{ - int len = 0; - - if (!total) - return 0; - - if (!IS_ALIGNED(total, align)) - return -EINVAL; - - while (sg) { - if (!IS_ALIGNED(sg->offset, sizeof(u32))) - return -EINVAL; - - if (!IS_ALIGNED(sg->length, align)) - return -EINVAL; - - len += sg->length; - sg = sg_next(sg); - } - - if (len != total) - return -EINVAL; - - return 0; -} - static int starfive_hash_one_request(struct crypto_engine *engine, void *areq) { struct ahash_request *req = container_of(areq, struct ahash_request, base); struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct starfive_cryp_request_ctx *rctx = ctx->rctx; struct starfive_cryp_dev *cryp = ctx->cryp; + struct scatterlist *tsg; + int ret, src_nents, i; - if (!cryp) - return -ENODEV; + writel(STARFIVE_HASH_RESET, cryp->base + STARFIVE_HASH_SHACSR); - return starfive_hash_xmit(ctx); + if (starfive_hash_wait_busy(cryp)) + return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error resetting hardware\n"); + + rctx->csr.hash.v = 0; + rctx->csr.hash.mode = ctx->hash_mode; + + if (ctx->is_hmac) { + ret = starfive_hash_hmac_key(ctx); + if (ret) + return ret; + } else { + rctx->csr.hash.start = 1; + rctx->csr.hash.firstb = 1; + writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR); + } + + /* No input message, get digest and end. */ + if (!rctx->total) + goto hash_start; + + starfive_hash_dma_init(cryp); + + for_each_sg(rctx->in_sg, tsg, rctx->in_sg_len, i) { + src_nents = dma_map_sg(cryp->dev, tsg, 1, DMA_TO_DEVICE); + if (src_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg error\n"); + + ret = starfive_hash_dma_xfer(cryp, tsg); + dma_unmap_sg(cryp->dev, tsg, 1, DMA_TO_DEVICE); + if (ret) + return ret; + } + +hash_start: + starfive_hash_start(cryp); + + if (starfive_hash_wait_busy(cryp)) + return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error generating digest\n"); + + if (ctx->is_hmac) + cryp->err = starfive_hash_wait_hmac_done(cryp); + + starfive_hash_done_task(cryp); + + return 0; } static int starfive_hash_init(struct ahash_request *req) @@ -337,22 +320,6 @@ static int starfive_hash_finup(struct ahash_request *req) return crypto_ahash_finup(&rctx->ahash_fbk_req); } -static int starfive_hash_digest_fb(struct ahash_request *req) -{ - struct starfive_cryp_request_ctx *rctx = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(tfm); - - ahash_request_set_tfm(&rctx->ahash_fbk_req, ctx->ahash_fbk); - ahash_request_set_callback(&rctx->ahash_fbk_req, req->base.flags, - req->base.complete, req->base.data); - - ahash_request_set_crypt(&rctx->ahash_fbk_req, req->src, - req->result, req->nbytes); - - return crypto_ahash_digest(&rctx->ahash_fbk_req); -} - static int starfive_hash_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -370,9 +337,6 @@ static int starfive_hash_digest(struct ahash_request *req) rctx->in_sg_len = sg_nents_for_len(rctx->in_sg, rctx->total); ctx->rctx = rctx; - if (starfive_hash_check_aligned(rctx->in_sg, rctx->total, rctx->blksize)) - return starfive_hash_digest_fb(req); - return crypto_transfer_hash_request_to_engine(cryp->engine, req); } @@ -406,7 +370,8 @@ static int starfive_hash_import(struct ahash_request *req, const void *in) static int starfive_hash_init_tfm(struct crypto_ahash *hash, const char *alg_name, - unsigned int mode) + unsigned int mode, + bool is_hmac) { struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); @@ -426,7 +391,7 @@ static int starfive_hash_init_tfm(struct crypto_ahash *hash, crypto_ahash_set_reqsize(hash, sizeof(struct starfive_cryp_request_ctx) + crypto_ahash_reqsize(ctx->ahash_fbk)); - ctx->keylen = 0; + ctx->is_hmac = is_hmac; ctx->hash_mode = mode; return 0; @@ -529,81 +494,61 @@ static int starfive_hash_setkey(struct crypto_ahash *hash, static int starfive_sha224_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha224-generic", - STARFIVE_HASH_SHA224); + STARFIVE_HASH_SHA224, 0); } static int starfive_sha256_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha256-generic", - STARFIVE_HASH_SHA256); + STARFIVE_HASH_SHA256, 0); } static int starfive_sha384_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha384-generic", - STARFIVE_HASH_SHA384); + STARFIVE_HASH_SHA384, 0); } static int starfive_sha512_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha512-generic", - STARFIVE_HASH_SHA512); + STARFIVE_HASH_SHA512, 0); } static int starfive_sm3_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sm3-generic", - STARFIVE_HASH_SM3); + STARFIVE_HASH_SM3, 0); } static int starfive_hmac_sha224_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha224-generic)", - STARFIVE_HASH_SHA224); + STARFIVE_HASH_SHA224, 1); } static int starfive_hmac_sha256_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha256-generic)", - STARFIVE_HASH_SHA256); + STARFIVE_HASH_SHA256, 1); } static int starfive_hmac_sha384_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha384-generic)", - STARFIVE_HASH_SHA384); + STARFIVE_HASH_SHA384, 1); } static int starfive_hmac_sha512_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha512-generic)", - STARFIVE_HASH_SHA512); + STARFIVE_HASH_SHA512, 1); } static int starfive_hmac_sm3_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sm3-generic)", - STARFIVE_HASH_SM3); + STARFIVE_HASH_SM3, 1); } static struct ahash_engine_alg algs_sha2_sm3[] = { From a05c821e42e6b6fd265270a6b6b9413fee1d4221 Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Tue, 5 Mar 2024 15:10:02 +0800 Subject: [PATCH 016/123] crypto: starfive - Skip unneeded key free Skip unneeded kfree_sensitive if RSA module is using falback algo. Signed-off-by: Jia Jie Ho Signed-off-by: Herbert Xu --- drivers/crypto/starfive/jh7110-rsa.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/starfive/jh7110-rsa.c b/drivers/crypto/starfive/jh7110-rsa.c index cf8bda7f0855..e642e948d747 100644 --- a/drivers/crypto/starfive/jh7110-rsa.c +++ b/drivers/crypto/starfive/jh7110-rsa.c @@ -45,6 +45,9 @@ static inline int starfive_pka_wait_done(struct starfive_cryp_ctx *ctx) static void starfive_rsa_free_key(struct starfive_rsa_key *key) { + if (!key->key_sz) + return; + kfree_sensitive(key->d); kfree_sensitive(key->e); kfree_sensitive(key->n); From 7467147ef9bf42d1ea5b3314c7a05cd542b3518e Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Tue, 5 Mar 2024 15:10:03 +0800 Subject: [PATCH 017/123] crypto: starfive - Use dma for aes requests Convert AES module to use dma for data transfers to reduce cpu load and compatible with future variants. Signed-off-by: Jia Jie Ho Signed-off-by: Herbert Xu --- drivers/crypto/starfive/Kconfig | 4 + drivers/crypto/starfive/jh7110-aes.c | 595 +++++++++++++++++--------- drivers/crypto/starfive/jh7110-cryp.c | 34 -- drivers/crypto/starfive/jh7110-cryp.h | 6 +- 4 files changed, 398 insertions(+), 241 deletions(-) diff --git a/drivers/crypto/starfive/Kconfig b/drivers/crypto/starfive/Kconfig index cb59357b58b2..0fe389e9f932 100644 --- a/drivers/crypto/starfive/Kconfig +++ b/drivers/crypto/starfive/Kconfig @@ -14,6 +14,10 @@ config CRYPTO_DEV_JH7110 select CRYPTO_RSA select CRYPTO_AES select CRYPTO_CCM + select CRYPTO_GCM + select CRYPTO_ECB + select CRYPTO_CBC + select CRYPTO_CTR help Support for StarFive JH7110 crypto hardware acceleration engine. This module provides acceleration for public key algo, diff --git a/drivers/crypto/starfive/jh7110-aes.c b/drivers/crypto/starfive/jh7110-aes.c index 1ac15cc4ef3c..72b7d46150d5 100644 --- a/drivers/crypto/starfive/jh7110-aes.c +++ b/drivers/crypto/starfive/jh7110-aes.c @@ -78,7 +78,7 @@ static inline int is_gcm(struct starfive_cryp_dev *cryp) return (cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_GCM; } -static inline int is_encrypt(struct starfive_cryp_dev *cryp) +static inline bool is_encrypt(struct starfive_cryp_dev *cryp) { return cryp->flags & FLG_ENCRYPT; } @@ -103,16 +103,6 @@ static void starfive_aes_aead_hw_start(struct starfive_cryp_ctx *ctx, u32 hw_mod } } -static inline void starfive_aes_set_ivlen(struct starfive_cryp_ctx *ctx) -{ - struct starfive_cryp_dev *cryp = ctx->cryp; - - if (is_gcm(cryp)) - writel(GCM_AES_IV_SIZE, cryp->base + STARFIVE_AES_IVLEN); - else - writel(AES_BLOCK_SIZE, cryp->base + STARFIVE_AES_IVLEN); -} - static inline void starfive_aes_set_alen(struct starfive_cryp_ctx *ctx) { struct starfive_cryp_dev *cryp = ctx->cryp; @@ -261,7 +251,6 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx) rctx->csr.aes.mode = hw_mode; rctx->csr.aes.cmode = !is_encrypt(cryp); - rctx->csr.aes.ie = 1; rctx->csr.aes.stmode = STARFIVE_AES_MODE_XFB_1; if (cryp->side_chan) { @@ -279,7 +268,7 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx) case STARFIVE_AES_MODE_GCM: starfive_aes_set_alen(ctx); starfive_aes_set_mlen(ctx); - starfive_aes_set_ivlen(ctx); + writel(GCM_AES_IV_SIZE, cryp->base + STARFIVE_AES_IVLEN); starfive_aes_aead_hw_start(ctx, hw_mode); starfive_aes_write_iv(ctx, (void *)cryp->req.areq->iv); break; @@ -300,28 +289,30 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx) return cryp->err; } -static int starfive_aes_read_authtag(struct starfive_cryp_dev *cryp) +static int starfive_aes_read_authtag(struct starfive_cryp_ctx *ctx) { - int i, start_addr; + struct starfive_cryp_dev *cryp = ctx->cryp; + struct starfive_cryp_request_ctx *rctx = ctx->rctx; + int i; if (starfive_aes_wait_busy(cryp)) return dev_err_probe(cryp->dev, -ETIMEDOUT, "Timeout waiting for tag generation."); - start_addr = STARFIVE_AES_NONCE0; - - if (is_gcm(cryp)) - for (i = 0; i < AES_BLOCK_32; i++, start_addr += 4) - cryp->tag_out[i] = readl(cryp->base + start_addr); - else + if ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_GCM) { + cryp->tag_out[0] = readl(cryp->base + STARFIVE_AES_NONCE0); + cryp->tag_out[1] = readl(cryp->base + STARFIVE_AES_NONCE1); + cryp->tag_out[2] = readl(cryp->base + STARFIVE_AES_NONCE2); + cryp->tag_out[3] = readl(cryp->base + STARFIVE_AES_NONCE3); + } else { for (i = 0; i < AES_BLOCK_32; i++) cryp->tag_out[i] = readl(cryp->base + STARFIVE_AES_AESDIO0R); + } if (is_encrypt(cryp)) { - scatterwalk_copychunks(cryp->tag_out, &cryp->out_walk, cryp->authsize, 1); + scatterwalk_map_and_copy(cryp->tag_out, rctx->out_sg, + cryp->total_in, cryp->authsize, 1); } else { - scatterwalk_copychunks(cryp->tag_in, &cryp->in_walk, cryp->authsize, 0); - if (crypto_memneq(cryp->tag_in, cryp->tag_out, cryp->authsize)) return dev_err_probe(cryp->dev, -EBADMSG, "Failed tag verification\n"); } @@ -329,23 +320,18 @@ static int starfive_aes_read_authtag(struct starfive_cryp_dev *cryp) return 0; } -static void starfive_aes_finish_req(struct starfive_cryp_dev *cryp) +static void starfive_aes_finish_req(struct starfive_cryp_ctx *ctx) { - union starfive_aes_csr csr; + struct starfive_cryp_dev *cryp = ctx->cryp; int err = cryp->err; if (!err && cryp->authsize) - err = starfive_aes_read_authtag(cryp); + err = starfive_aes_read_authtag(ctx); if (!err && ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CBC || (cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CTR)) starfive_aes_get_iv(cryp, (void *)cryp->req.sreq->iv); - /* reset irq flags*/ - csr.v = 0; - csr.aesrst = 1; - writel(csr.v, cryp->base + STARFIVE_AES_CSR); - if (cryp->authsize) crypto_finalize_aead_request(cryp->engine, cryp->req.areq, err); else @@ -353,39 +339,6 @@ static void starfive_aes_finish_req(struct starfive_cryp_dev *cryp) err); } -void starfive_aes_done_task(unsigned long param) -{ - struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)param; - u32 block[AES_BLOCK_32]; - u32 stat; - int i; - - for (i = 0; i < AES_BLOCK_32; i++) - block[i] = readl(cryp->base + STARFIVE_AES_AESDIO0R); - - scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_out), 1); - - cryp->total_out -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_out); - - if (!cryp->total_out) { - starfive_aes_finish_req(cryp); - return; - } - - memset(block, 0, AES_BLOCK_SIZE); - scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_in), 0); - cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in); - - for (i = 0; i < AES_BLOCK_32; i++) - writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R); - - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_AES_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); -} - static int starfive_aes_gcm_write_adata(struct starfive_cryp_ctx *ctx) { struct starfive_cryp_dev *cryp = ctx->cryp; @@ -451,60 +404,165 @@ static int starfive_aes_ccm_write_adata(struct starfive_cryp_ctx *ctx) return 0; } -static int starfive_aes_prepare_req(struct skcipher_request *req, - struct aead_request *areq) +static void starfive_aes_dma_done(void *param) { - struct starfive_cryp_ctx *ctx; - struct starfive_cryp_request_ctx *rctx; - struct starfive_cryp_dev *cryp; + struct starfive_cryp_dev *cryp = param; - if (!req && !areq) - return -EINVAL; + complete(&cryp->dma_done); +} - ctx = req ? crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)) : - crypto_aead_ctx(crypto_aead_reqtfm(areq)); +static void starfive_aes_dma_init(struct starfive_cryp_dev *cryp) +{ + cryp->cfg_in.direction = DMA_MEM_TO_DEV; + cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES; + cryp->cfg_in.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cryp->cfg_in.src_maxburst = cryp->dma_maxburst; + cryp->cfg_in.dst_maxburst = cryp->dma_maxburst; + cryp->cfg_in.dst_addr = cryp->phys_base + STARFIVE_ALG_FIFO_OFFSET; - cryp = ctx->cryp; - rctx = req ? skcipher_request_ctx(req) : aead_request_ctx(areq); + dmaengine_slave_config(cryp->tx, &cryp->cfg_in); - if (req) { - cryp->req.sreq = req; - cryp->total_in = req->cryptlen; - cryp->total_out = req->cryptlen; - cryp->assoclen = 0; - cryp->authsize = 0; + cryp->cfg_out.direction = DMA_DEV_TO_MEM; + cryp->cfg_out.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cryp->cfg_out.dst_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES; + cryp->cfg_out.src_maxburst = 4; + cryp->cfg_out.dst_maxburst = 4; + cryp->cfg_out.src_addr = cryp->phys_base + STARFIVE_ALG_FIFO_OFFSET; + + dmaengine_slave_config(cryp->rx, &cryp->cfg_out); + + init_completion(&cryp->dma_done); +} + +static int starfive_aes_dma_xfer(struct starfive_cryp_dev *cryp, + struct scatterlist *src, + struct scatterlist *dst, + int len) +{ + struct dma_async_tx_descriptor *in_desc, *out_desc; + union starfive_alg_cr alg_cr; + int ret = 0, in_save, out_save; + + alg_cr.v = 0; + alg_cr.start = 1; + alg_cr.aes_dma_en = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); + + in_save = sg_dma_len(src); + out_save = sg_dma_len(dst); + + writel(ALIGN(len, AES_BLOCK_SIZE), cryp->base + STARFIVE_DMA_IN_LEN_OFFSET); + writel(ALIGN(len, AES_BLOCK_SIZE), cryp->base + STARFIVE_DMA_OUT_LEN_OFFSET); + + sg_dma_len(src) = ALIGN(len, AES_BLOCK_SIZE); + sg_dma_len(dst) = ALIGN(len, AES_BLOCK_SIZE); + + out_desc = dmaengine_prep_slave_sg(cryp->rx, dst, 1, DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!out_desc) { + ret = -EINVAL; + goto dma_err; + } + + out_desc->callback = starfive_aes_dma_done; + out_desc->callback_param = cryp; + + reinit_completion(&cryp->dma_done); + dmaengine_submit(out_desc); + dma_async_issue_pending(cryp->rx); + + in_desc = dmaengine_prep_slave_sg(cryp->tx, src, 1, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!in_desc) { + ret = -EINVAL; + goto dma_err; + } + + dmaengine_submit(in_desc); + dma_async_issue_pending(cryp->tx); + + if (!wait_for_completion_timeout(&cryp->dma_done, + msecs_to_jiffies(1000))) + ret = -ETIMEDOUT; + +dma_err: + sg_dma_len(src) = in_save; + sg_dma_len(dst) = out_save; + + alg_cr.v = 0; + alg_cr.clear = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); + + return ret; +} + +static int starfive_aes_map_sg(struct starfive_cryp_dev *cryp, + struct scatterlist *src, + struct scatterlist *dst) +{ + struct scatterlist *stsg, *dtsg; + struct scatterlist _src[2], _dst[2]; + unsigned int remain = cryp->total_in; + unsigned int len, src_nents, dst_nents; + int ret; + + if (src == dst) { + for (stsg = src, dtsg = dst; remain > 0; + stsg = sg_next(stsg), dtsg = sg_next(dtsg)) { + src_nents = dma_map_sg(cryp->dev, stsg, 1, DMA_BIDIRECTIONAL); + if (src_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg error\n"); + + dst_nents = src_nents; + len = min(sg_dma_len(stsg), remain); + + ret = starfive_aes_dma_xfer(cryp, stsg, dtsg, len); + dma_unmap_sg(cryp->dev, stsg, 1, DMA_BIDIRECTIONAL); + if (ret) + return ret; + + remain -= len; + } } else { - cryp->req.areq = areq; - cryp->assoclen = areq->assoclen; - cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(areq)); - if (is_encrypt(cryp)) { - cryp->total_in = areq->cryptlen; - cryp->total_out = areq->cryptlen; - } else { - cryp->total_in = areq->cryptlen - cryp->authsize; - cryp->total_out = cryp->total_in; + for (stsg = src, dtsg = dst;;) { + src_nents = dma_map_sg(cryp->dev, stsg, 1, DMA_TO_DEVICE); + if (src_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg src error\n"); + + dst_nents = dma_map_sg(cryp->dev, dtsg, 1, DMA_FROM_DEVICE); + if (dst_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg dst error\n"); + + len = min(sg_dma_len(stsg), sg_dma_len(dtsg)); + len = min(len, remain); + + ret = starfive_aes_dma_xfer(cryp, stsg, dtsg, len); + dma_unmap_sg(cryp->dev, stsg, 1, DMA_TO_DEVICE); + dma_unmap_sg(cryp->dev, dtsg, 1, DMA_FROM_DEVICE); + if (ret) + return ret; + + remain -= len; + if (remain == 0) + break; + + if (sg_dma_len(stsg) - len) { + stsg = scatterwalk_ffwd(_src, stsg, len); + dtsg = sg_next(dtsg); + } else if (sg_dma_len(dtsg) - len) { + dtsg = scatterwalk_ffwd(_dst, dtsg, len); + stsg = sg_next(stsg); + } else { + stsg = sg_next(stsg); + dtsg = sg_next(dtsg); + } } } - rctx->in_sg = req ? req->src : areq->src; - scatterwalk_start(&cryp->in_walk, rctx->in_sg); - - rctx->out_sg = req ? req->dst : areq->dst; - scatterwalk_start(&cryp->out_walk, rctx->out_sg); - - if (cryp->assoclen) { - rctx->adata = kzalloc(cryp->assoclen + AES_BLOCK_SIZE, GFP_KERNEL); - if (!rctx->adata) - return dev_err_probe(cryp->dev, -ENOMEM, - "Failed to alloc memory for adata"); - - scatterwalk_copychunks(rctx->adata, &cryp->in_walk, cryp->assoclen, 0); - scatterwalk_copychunks(NULL, &cryp->out_walk, cryp->assoclen, 2); - } - - ctx->rctx = rctx; - - return starfive_aes_hw_init(ctx); + return 0; } static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq) @@ -513,35 +571,38 @@ static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq) container_of(areq, struct skcipher_request, base); struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct starfive_cryp_request_ctx *rctx = skcipher_request_ctx(req); struct starfive_cryp_dev *cryp = ctx->cryp; - u32 block[AES_BLOCK_32]; - u32 stat; - int err; - int i; + int ret; - err = starfive_aes_prepare_req(req, NULL); - if (err) - return err; + cryp->req.sreq = req; + cryp->total_in = req->cryptlen; + cryp->total_out = req->cryptlen; + cryp->assoclen = 0; + cryp->authsize = 0; - /* - * Write first plain/ciphertext block to start the module - * then let irq tasklet handle the rest of the data blocks. - */ - scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_in), 0); - cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in); + rctx->in_sg = req->src; + rctx->out_sg = req->dst; - for (i = 0; i < AES_BLOCK_32; i++) - writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R); + ctx->rctx = rctx; - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_AES_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); + ret = starfive_aes_hw_init(ctx); + if (ret) + return ret; + + starfive_aes_dma_init(cryp); + + ret = starfive_aes_map_sg(cryp, rctx->in_sg, rctx->out_sg); + if (ret) + return ret; + + starfive_aes_finish_req(ctx); return 0; } -static int starfive_aes_init_tfm(struct crypto_skcipher *tfm) +static int starfive_aes_init_tfm(struct crypto_skcipher *tfm, + const char *alg_name) { struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -549,12 +610,26 @@ static int starfive_aes_init_tfm(struct crypto_skcipher *tfm) if (!ctx->cryp) return -ENODEV; + ctx->skcipher_fbk = crypto_alloc_skcipher(alg_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->skcipher_fbk)) + return dev_err_probe(ctx->cryp->dev, PTR_ERR(ctx->skcipher_fbk), + "%s() failed to allocate fallback for %s\n", + __func__, alg_name); + crypto_skcipher_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) + - sizeof(struct skcipher_request)); + crypto_skcipher_reqsize(ctx->skcipher_fbk)); return 0; } +static void starfive_aes_exit_tfm(struct crypto_skcipher *tfm) +{ + struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->skcipher_fbk); +} + static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq) { struct aead_request *req = @@ -562,79 +637,99 @@ static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq struct starfive_cryp_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); struct starfive_cryp_dev *cryp = ctx->cryp; - struct starfive_cryp_request_ctx *rctx; - u32 block[AES_BLOCK_32]; - u32 stat; - int err; - int i; + struct starfive_cryp_request_ctx *rctx = aead_request_ctx(req); + struct scatterlist _src[2], _dst[2]; + int ret; - err = starfive_aes_prepare_req(NULL, req); - if (err) - return err; + cryp->req.areq = req; + cryp->assoclen = req->assoclen; + cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(req)); - rctx = ctx->rctx; + rctx->in_sg = scatterwalk_ffwd(_src, req->src, cryp->assoclen); + if (req->src == req->dst) + rctx->out_sg = rctx->in_sg; + else + rctx->out_sg = scatterwalk_ffwd(_dst, req->dst, cryp->assoclen); + + if (is_encrypt(cryp)) { + cryp->total_in = req->cryptlen; + cryp->total_out = req->cryptlen; + } else { + cryp->total_in = req->cryptlen - cryp->authsize; + cryp->total_out = cryp->total_in; + scatterwalk_map_and_copy(cryp->tag_in, req->src, + cryp->total_in + cryp->assoclen, + cryp->authsize, 0); + } + + if (cryp->assoclen) { + rctx->adata = kzalloc(cryp->assoclen + AES_BLOCK_SIZE, GFP_KERNEL); + if (!rctx->adata) + return dev_err_probe(cryp->dev, -ENOMEM, + "Failed to alloc memory for adata"); + + if (sg_copy_to_buffer(req->src, sg_nents_for_len(req->src, cryp->assoclen), + rctx->adata, cryp->assoclen) != cryp->assoclen) + return -EINVAL; + } + + if (cryp->total_in) + sg_zero_buffer(rctx->in_sg, sg_nents(rctx->in_sg), + sg_dma_len(rctx->in_sg) - cryp->total_in, + cryp->total_in); + + ctx->rctx = rctx; + + ret = starfive_aes_hw_init(ctx); + if (ret) + return ret; if (!cryp->assoclen) goto write_text; if ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CCM) - cryp->err = starfive_aes_ccm_write_adata(ctx); + ret = starfive_aes_ccm_write_adata(ctx); else - cryp->err = starfive_aes_gcm_write_adata(ctx); + ret = starfive_aes_gcm_write_adata(ctx); kfree(rctx->adata); - if (cryp->err) - return cryp->err; + if (ret) + return ret; write_text: if (!cryp->total_in) goto finish_req; - /* - * Write first plain/ciphertext block to start the module - * then let irq tasklet handle the rest of the data blocks. - */ - scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_in), 0); - cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in); + starfive_aes_dma_init(cryp); - for (i = 0; i < AES_BLOCK_32; i++) - writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R); - - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_AES_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); - - return 0; + ret = starfive_aes_map_sg(cryp, rctx->in_sg, rctx->out_sg); + if (ret) + return ret; finish_req: - starfive_aes_finish_req(cryp); + starfive_aes_finish_req(ctx); return 0; } -static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm) +static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm, + const char *alg_name) { struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm); - struct starfive_cryp_dev *cryp = ctx->cryp; - struct crypto_tfm *aead = crypto_aead_tfm(tfm); - struct crypto_alg *alg = aead->__crt_alg; ctx->cryp = starfive_cryp_find_dev(ctx); if (!ctx->cryp) return -ENODEV; - if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { - ctx->aead_fbk = crypto_alloc_aead(alg->cra_name, 0, - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(ctx->aead_fbk)) - return dev_err_probe(cryp->dev, PTR_ERR(ctx->aead_fbk), - "%s() failed to allocate fallback for %s\n", - __func__, alg->cra_name); - } + ctx->aead_fbk = crypto_alloc_aead(alg_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->aead_fbk)) + return dev_err_probe(ctx->cryp->dev, PTR_ERR(ctx->aead_fbk), + "%s() failed to allocate fallback for %s\n", + __func__, alg_name); - crypto_aead_set_reqsize(tfm, sizeof(struct starfive_cryp_ctx) + - sizeof(struct aead_request)); + crypto_aead_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) + + crypto_aead_reqsize(ctx->aead_fbk)); return 0; } @@ -646,6 +741,44 @@ static void starfive_aes_aead_exit_tfm(struct crypto_aead *tfm) crypto_free_aead(ctx->aead_fbk); } +static bool starfive_aes_check_unaligned(struct starfive_cryp_dev *cryp, + struct scatterlist *src, + struct scatterlist *dst) +{ + struct scatterlist *tsg; + int i; + + for_each_sg(src, tsg, sg_nents(src), i) + if (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) && + !sg_is_last(tsg)) + return true; + + if (src != dst) + for_each_sg(dst, tsg, sg_nents(dst), i) + if (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) && + !sg_is_last(tsg)) + return true; + + return false; +} + +static int starfive_aes_do_fallback(struct skcipher_request *req, bool enc) +{ + struct starfive_cryp_ctx *ctx = + crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct skcipher_request *subreq = skcipher_request_ctx(req); + + skcipher_request_set_tfm(subreq, ctx->skcipher_fbk); + skcipher_request_set_callback(subreq, req->base.flags, + req->base.complete, + req->base.data); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen, req->iv); + + return enc ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); +} + static int starfive_aes_crypt(struct skcipher_request *req, unsigned long flags) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -660,32 +793,54 @@ static int starfive_aes_crypt(struct skcipher_request *req, unsigned long flags) if (req->cryptlen & blocksize_align) return -EINVAL; + if (starfive_aes_check_unaligned(cryp, req->src, req->dst)) + return starfive_aes_do_fallback(req, is_encrypt(cryp)); + return crypto_transfer_skcipher_request_to_engine(cryp->engine, req); } +static int starfive_aes_aead_do_fallback(struct aead_request *req, bool enc) +{ + struct starfive_cryp_ctx *ctx = + crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct aead_request *subreq = aead_request_ctx(req); + + aead_request_set_tfm(subreq, ctx->aead_fbk); + aead_request_set_callback(subreq, req->base.flags, + req->base.complete, + req->base.data); + aead_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen, req->iv); + aead_request_set_ad(subreq, req->assoclen); + + return enc ? crypto_aead_encrypt(subreq) : + crypto_aead_decrypt(subreq); +} + static int starfive_aes_aead_crypt(struct aead_request *req, unsigned long flags) { struct starfive_cryp_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); struct starfive_cryp_dev *cryp = ctx->cryp; + struct scatterlist *src, *dst, _src[2], _dst[2]; cryp->flags = flags; - /* - * HW engine could not perform CCM tag verification on - * non-blocksize aligned text, use fallback algo instead + /* aes-ccm does not support tag verification for non-aligned text, + * use fallback for ccm decryption instead. */ - if (ctx->aead_fbk && !is_encrypt(cryp)) { - struct aead_request *subreq = aead_request_ctx(req); + if (((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CCM) && + !is_encrypt(cryp)) + return starfive_aes_aead_do_fallback(req, 0); - aead_request_set_tfm(subreq, ctx->aead_fbk); - aead_request_set_callback(subreq, req->base.flags, - req->base.complete, req->base.data); - aead_request_set_crypt(subreq, req->src, - req->dst, req->cryptlen, req->iv); - aead_request_set_ad(subreq, req->assoclen); + src = scatterwalk_ffwd(_src, req->src, req->assoclen); - return crypto_aead_decrypt(subreq); - } + if (req->src == req->dst) + dst = src; + else + dst = scatterwalk_ffwd(_dst, req->dst, req->assoclen); + + if (starfive_aes_check_unaligned(cryp, src, dst)) + return starfive_aes_aead_do_fallback(req, is_encrypt(cryp)); return crypto_transfer_aead_request_to_engine(cryp->engine, req); } @@ -706,7 +861,7 @@ static int starfive_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, memcpy(ctx->key, key, keylen); ctx->keylen = keylen; - return 0; + return crypto_skcipher_setkey(ctx->skcipher_fbk, key, keylen); } static int starfive_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key, @@ -725,16 +880,20 @@ static int starfive_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key, memcpy(ctx->key, key, keylen); ctx->keylen = keylen; - if (ctx->aead_fbk) - return crypto_aead_setkey(ctx->aead_fbk, key, keylen); - - return 0; + return crypto_aead_setkey(ctx->aead_fbk, key, keylen); } static int starfive_aes_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { - return crypto_gcm_check_authsize(authsize); + struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm); + int ret; + + ret = crypto_gcm_check_authsize(authsize); + if (ret) + return ret; + + return crypto_aead_setauthsize(ctx->aead_fbk, authsize); } static int starfive_aes_ccm_setauthsize(struct crypto_aead *tfm, @@ -820,9 +979,35 @@ static int starfive_aes_ccm_decrypt(struct aead_request *req) return starfive_aes_aead_crypt(req, STARFIVE_AES_MODE_CCM); } +static int starfive_aes_ecb_init_tfm(struct crypto_skcipher *tfm) +{ + return starfive_aes_init_tfm(tfm, "ecb(aes-generic)"); +} + +static int starfive_aes_cbc_init_tfm(struct crypto_skcipher *tfm) +{ + return starfive_aes_init_tfm(tfm, "cbc(aes-generic)"); +} + +static int starfive_aes_ctr_init_tfm(struct crypto_skcipher *tfm) +{ + return starfive_aes_init_tfm(tfm, "ctr(aes-generic)"); +} + +static int starfive_aes_ccm_init_tfm(struct crypto_aead *tfm) +{ + return starfive_aes_aead_init_tfm(tfm, "ccm_base(ctr(aes-generic),cbcmac(aes-generic))"); +} + +static int starfive_aes_gcm_init_tfm(struct crypto_aead *tfm) +{ + return starfive_aes_aead_init_tfm(tfm, "gcm_base(ctr(aes-generic),ghash-generic)"); +} + static struct skcipher_engine_alg skcipher_algs[] = { { - .base.init = starfive_aes_init_tfm, + .base.init = starfive_aes_ecb_init_tfm, + .base.exit = starfive_aes_exit_tfm, .base.setkey = starfive_aes_setkey, .base.encrypt = starfive_aes_ecb_encrypt, .base.decrypt = starfive_aes_ecb_decrypt, @@ -832,7 +1017,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .cra_name = "ecb(aes)", .cra_driver_name = "starfive-ecb-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -842,7 +1028,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .do_one_request = starfive_aes_do_one_req, }, }, { - .base.init = starfive_aes_init_tfm, + .base.init = starfive_aes_cbc_init_tfm, + .base.exit = starfive_aes_exit_tfm, .base.setkey = starfive_aes_setkey, .base.encrypt = starfive_aes_cbc_encrypt, .base.decrypt = starfive_aes_cbc_decrypt, @@ -853,7 +1040,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .cra_name = "cbc(aes)", .cra_driver_name = "starfive-cbc-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -863,7 +1051,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .do_one_request = starfive_aes_do_one_req, }, }, { - .base.init = starfive_aes_init_tfm, + .base.init = starfive_aes_ctr_init_tfm, + .base.exit = starfive_aes_exit_tfm, .base.setkey = starfive_aes_setkey, .base.encrypt = starfive_aes_ctr_encrypt, .base.decrypt = starfive_aes_ctr_decrypt, @@ -874,7 +1063,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .cra_name = "ctr(aes)", .cra_driver_name = "starfive-ctr-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -892,7 +1082,7 @@ static struct aead_engine_alg aead_algs[] = { .base.setauthsize = starfive_aes_gcm_setauthsize, .base.encrypt = starfive_aes_gcm_encrypt, .base.decrypt = starfive_aes_gcm_decrypt, - .base.init = starfive_aes_aead_init_tfm, + .base.init = starfive_aes_gcm_init_tfm, .base.exit = starfive_aes_aead_exit_tfm, .base.ivsize = GCM_AES_IV_SIZE, .base.maxauthsize = AES_BLOCK_SIZE, @@ -900,7 +1090,8 @@ static struct aead_engine_alg aead_algs[] = { .cra_name = "gcm(aes)", .cra_driver_name = "starfive-gcm-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -914,7 +1105,7 @@ static struct aead_engine_alg aead_algs[] = { .base.setauthsize = starfive_aes_ccm_setauthsize, .base.encrypt = starfive_aes_ccm_encrypt, .base.decrypt = starfive_aes_ccm_decrypt, - .base.init = starfive_aes_aead_init_tfm, + .base.init = starfive_aes_ccm_init_tfm, .base.exit = starfive_aes_aead_exit_tfm, .base.ivsize = AES_BLOCK_SIZE, .base.maxauthsize = AES_BLOCK_SIZE, diff --git a/drivers/crypto/starfive/jh7110-cryp.c b/drivers/crypto/starfive/jh7110-cryp.c index 2685f5483639..e4dfed7ee0b0 100644 --- a/drivers/crypto/starfive/jh7110-cryp.c +++ b/drivers/crypto/starfive/jh7110-cryp.c @@ -89,28 +89,10 @@ static void starfive_dma_cleanup(struct starfive_cryp_dev *cryp) dma_release_channel(cryp->rx); } -static irqreturn_t starfive_cryp_irq(int irq, void *priv) -{ - u32 status; - u32 mask; - struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)priv; - - mask = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - status = readl(cryp->base + STARFIVE_IE_FLAG_OFFSET); - if (status & STARFIVE_IE_FLAG_AES_DONE) { - mask |= STARFIVE_IE_MASK_AES_DONE; - writel(mask, cryp->base + STARFIVE_IE_MASK_OFFSET); - tasklet_schedule(&cryp->aes_done); - } - - return IRQ_HANDLED; -} - static int starfive_cryp_probe(struct platform_device *pdev) { struct starfive_cryp_dev *cryp; struct resource *res; - int irq; int ret; cryp = devm_kzalloc(&pdev->dev, sizeof(*cryp), GFP_KERNEL); @@ -125,8 +107,6 @@ static int starfive_cryp_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(cryp->base), "Error remapping memory for platform device\n"); - tasklet_init(&cryp->aes_done, starfive_aes_done_task, (unsigned long)cryp); - cryp->phys_base = res->start; cryp->dma_maxburst = 32; cryp->side_chan = side_chan; @@ -146,16 +126,6 @@ static int starfive_cryp_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(cryp->rst), "Error getting hardware reset line\n"); - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - ret = devm_request_irq(&pdev->dev, irq, starfive_cryp_irq, 0, pdev->name, - (void *)cryp); - if (ret) - return dev_err_probe(&pdev->dev, ret, - "Failed to register interrupt handler\n"); - clk_prepare_enable(cryp->hclk); clk_prepare_enable(cryp->ahb); reset_control_deassert(cryp->rst); @@ -212,8 +182,6 @@ static int starfive_cryp_probe(struct platform_device *pdev) clk_disable_unprepare(cryp->ahb); reset_control_assert(cryp->rst); - tasklet_kill(&cryp->aes_done); - return ret; } @@ -225,8 +193,6 @@ static void starfive_cryp_remove(struct platform_device *pdev) starfive_hash_unregister_algs(); starfive_rsa_unregister_algs(); - tasklet_kill(&cryp->aes_done); - crypto_engine_stop(cryp->engine); crypto_engine_exit(cryp->engine); diff --git a/drivers/crypto/starfive/jh7110-cryp.h b/drivers/crypto/starfive/jh7110-cryp.h index 82327e21d340..494a74f52706 100644 --- a/drivers/crypto/starfive/jh7110-cryp.h +++ b/drivers/crypto/starfive/jh7110-cryp.h @@ -169,6 +169,7 @@ struct starfive_cryp_ctx { struct crypto_akcipher *akcipher_fbk; struct crypto_ahash *ahash_fbk; struct crypto_aead *aead_fbk; + struct crypto_skcipher *skcipher_fbk; }; struct starfive_cryp_dev { @@ -186,10 +187,7 @@ struct starfive_cryp_dev { struct dma_chan *rx; struct dma_slave_config cfg_in; struct dma_slave_config cfg_out; - struct scatter_walk in_walk; - struct scatter_walk out_walk; struct crypto_engine *engine; - struct tasklet_struct aes_done; struct completion dma_done; size_t assoclen; size_t total_in; @@ -237,6 +235,4 @@ void starfive_rsa_unregister_algs(void); int starfive_aes_register_algs(void); void starfive_aes_unregister_algs(void); - -void starfive_aes_done_task(unsigned long param); #endif From 1e6b251ce1759392666856908113dd5d7cea044d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 5 Mar 2024 13:57:56 -0600 Subject: [PATCH 018/123] crypto: nx - Avoid -Wflex-array-member-not-at-end warning -Wflex-array-member-not-at-end is coming in GCC-14, and we are getting ready to enable it globally. So, we are deprecating flexible-array members in the middle of another structure. There is currently an object (`header`) in `struct nx842_crypto_ctx` that contains a flexible structure (`struct nx842_crypto_header`): struct nx842_crypto_ctx { ... struct nx842_crypto_header header; struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX]; ... }; So, in order to avoid ending up with a flexible-array member in the middle of another struct, we use the `struct_group_tagged()` helper to separate the flexible array from the rest of the members in the flexible structure: struct nx842_crypto_header { struct_group_tagged(nx842_crypto_header_hdr, hdr, ... the rest of the members ); struct nx842_crypto_header_group group[]; } __packed; With the change described above, we can now declare an object of the type of the tagged struct, without embedding the flexible array in the middle of another struct: struct nx842_crypto_ctx { ... struct nx842_crypto_header_hdr header; struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX]; ... } __packed; We also use `container_of()` whenever we need to retrieve a pointer to the flexible structure, through which we can access the flexible array if needed. So, with these changes, fix the following warning: In file included from drivers/crypto/nx/nx-842.c:55: drivers/crypto/nx/nx-842.h:174:36: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] 174 | struct nx842_crypto_header header; | ^~~~~~ Signed-off-by: Gustavo A. R. Silva Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx-842.c | 6 ++++-- drivers/crypto/nx/nx-842.h | 10 ++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index 2ab90ec10e61..82214cde2bcd 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -251,7 +251,9 @@ int nx842_crypto_compress(struct crypto_tfm *tfm, u8 *dst, unsigned int *dlen) { struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); - struct nx842_crypto_header *hdr = &ctx->header; + struct nx842_crypto_header *hdr = + container_of(&ctx->header, + struct nx842_crypto_header, hdr); struct nx842_crypto_param p; struct nx842_constraints c = *ctx->driver->constraints; unsigned int groups, hdrsize, h; @@ -490,7 +492,7 @@ int nx842_crypto_decompress(struct crypto_tfm *tfm, } memcpy(&ctx->header, src, hdr_len); - hdr = &ctx->header; + hdr = container_of(&ctx->header, struct nx842_crypto_header, hdr); for (n = 0; n < hdr->groups; n++) { /* ignore applies to last group */ diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h index 7590bfb24d79..25fa70b2112c 100644 --- a/drivers/crypto/nx/nx-842.h +++ b/drivers/crypto/nx/nx-842.h @@ -157,9 +157,11 @@ struct nx842_crypto_header_group { } __packed; struct nx842_crypto_header { - __be16 magic; /* NX842_CRYPTO_MAGIC */ - __be16 ignore; /* decompressed end bytes to ignore */ - u8 groups; /* total groups in this header */ + struct_group_tagged(nx842_crypto_header_hdr, hdr, + __be16 magic; /* NX842_CRYPTO_MAGIC */ + __be16 ignore; /* decompressed end bytes to ignore */ + u8 groups; /* total groups in this header */ + ); struct nx842_crypto_header_group group[]; } __packed; @@ -171,7 +173,7 @@ struct nx842_crypto_ctx { u8 *wmem; u8 *sbounce, *dbounce; - struct nx842_crypto_header header; + struct nx842_crypto_header_hdr header; struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX]; struct nx842_driver *driver; From 29ce50e078b857977202205394f200a25889636e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Mar 2024 20:48:21 -0700 Subject: [PATCH 019/123] crypto: remove CONFIG_CRYPTO_STATS Remove support for the "Crypto usage statistics" feature (CONFIG_CRYPTO_STATS). This feature does not appear to have ever been used, and it is harmful because it significantly reduces performance and is a large maintenance burden. Covering each of these points in detail: 1. Feature is not being used Since these generic crypto statistics are only readable using netlink, it's fairly straightforward to look for programs that use them. I'm unable to find any evidence that any such programs exist. For example, Debian Code Search returns no hits except the kernel header and kernel code itself and translations of the kernel header: https://codesearch.debian.net/search?q=CRYPTOCFGA_STAT&literal=1&perpkg=1 The patch series that added this feature in 2018 (https://lore.kernel.org/linux-crypto/1537351855-16618-1-git-send-email-clabbe@baylibre.com/) said "The goal is to have an ifconfig for crypto device." This doesn't appear to have happened. It's not clear that there is real demand for crypto statistics. Just because the kernel provides other types of statistics such as I/O and networking statistics and some people find those useful does not mean that crypto statistics are useful too. Further evidence that programs are not using CONFIG_CRYPTO_STATS is that it was able to be disabled in RHEL and Fedora as a bug fix (https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/2947). Even further evidence comes from the fact that there are and have been bugs in how the stats work, but they were never reported. For example, before Linux v6.7 hash stats were double-counted in most cases. There has also never been any documentation for this feature, so it might be hard to use even if someone wanted to. 2. CONFIG_CRYPTO_STATS significantly reduces performance Enabling CONFIG_CRYPTO_STATS significantly reduces the performance of the crypto API, even if no program ever retrieves the statistics. This primarily affects systems with a large number of CPUs. For example, https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2039576 reported that Lustre client encryption performance improved from 21.7GB/s to 48.2GB/s by disabling CONFIG_CRYPTO_STATS. It can be argued that this means that CONFIG_CRYPTO_STATS should be optimized with per-cpu counters similar to many of the networking counters. But no one has done this in 5+ years. This is consistent with the fact that the feature appears to be unused, so there seems to be little interest in improving it as opposed to just disabling it. It can be argued that because CONFIG_CRYPTO_STATS is off by default, performance doesn't matter. But Linux distros tend to error on the side of enabling options. The option is enabled in Ubuntu and Arch Linux, and until recently was enabled in RHEL and Fedora (see above). So, even just having the option available is harmful to users. 3. CONFIG_CRYPTO_STATS is a large maintenance burden There are over 1000 lines of code associated with CONFIG_CRYPTO_STATS, spread among 32 files. It significantly complicates much of the implementation of the crypto API. After the initial submission, many fixes and refactorings have consumed effort of multiple people to keep this feature "working". We should be spending this effort elsewhere. Acked-by: Ard Biesheuvel Acked-by: Corentin Labbe Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - crypto/Kconfig | 20 --- crypto/Makefile | 2 - crypto/acompress.c | 33 ---- crypto/aead.c | 84 +-------- crypto/ahash.c | 63 +------ crypto/akcipher.c | 31 ---- crypto/compress.h | 3 - crypto/{crypto_user_base.c => crypto_user.c} | 10 +- crypto/crypto_user_stat.c | 176 ------------------- crypto/hash.h | 30 ---- crypto/kpp.c | 30 ---- crypto/lskcipher.c | 73 +------- crypto/rng.c | 44 +---- crypto/scompress.c | 3 - crypto/shash.c | 75 +------- crypto/sig.c | 13 -- crypto/skcipher.c | 86 +-------- crypto/skcipher.h | 10 -- include/crypto/acompress.h | 73 +------- include/crypto/aead.h | 21 --- include/crypto/akcipher.h | 78 +------- include/crypto/algapi.h | 3 - include/crypto/hash.h | 22 --- include/crypto/internal/acompress.h | 1 - include/crypto/internal/cryptouser.h | 16 -- include/crypto/internal/scompress.h | 1 - include/crypto/kpp.h | 58 +----- include/crypto/rng.h | 51 +----- include/crypto/skcipher.h | 25 --- include/uapi/linux/cryptouser.h | 30 ++-- 32 files changed, 71 insertions(+), 1096 deletions(-) rename crypto/{crypto_user_base.c => crypto_user.c} (98%) delete mode 100644 crypto/crypto_user_stat.c delete mode 100644 include/crypto/internal/cryptouser.h diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index f1fb01cd5a25..145342e46ea8 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -760,7 +760,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_STATS=y CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_SHA1_S390=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index d33f814f78b2..dc237896f99d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -745,7 +745,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_STATS=y CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_SHA1_S390=m diff --git a/crypto/Kconfig b/crypto/Kconfig index 2903ce19f15c..5688d42a59c2 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1456,26 +1456,6 @@ config CRYPTO_USER_API_ENABLE_OBSOLETE already been phased out from internal use by the kernel, and are only useful for userspace clients that still rely on them. -config CRYPTO_STATS - bool "Crypto usage statistics" - depends on CRYPTO_USER - help - Enable the gathering of crypto stats. - - Enabling this option reduces the performance of the crypto API. It - should only be enabled when there is actually a use case for it. - - This collects data sizes, numbers of requests, and numbers - of errors processed by: - - AEAD ciphers (encrypt, decrypt) - - asymmetric key ciphers (encrypt, decrypt, verify, sign) - - symmetric key ciphers (encrypt, decrypt) - - compression algorithms (compress, decompress) - - hash algorithms (hash) - - key-agreement protocol primitives (setsecret, generate - public key, compute shared secret) - - RNG (generate, seed) - endmenu config CRYPTO_HASH_INFO diff --git a/crypto/Makefile b/crypto/Makefile index 408f0a1f9ab9..de9a3312a2c8 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -69,8 +69,6 @@ cryptomgr-y := algboss.o testmgr.o obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o obj-$(CONFIG_CRYPTO_USER) += crypto_user.o -crypto_user-y := crypto_user_base.o -crypto_user-$(CONFIG_CRYPTO_STATS) += crypto_user_stat.o obj-$(CONFIG_CRYPTO_CMAC) += cmac.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_VMAC) += vmac.o diff --git a/crypto/acompress.c b/crypto/acompress.c index 1c682810a484..6fdf0ff9f3c0 100644 --- a/crypto/acompress.c +++ b/crypto/acompress.c @@ -93,32 +93,6 @@ static unsigned int crypto_acomp_extsize(struct crypto_alg *alg) return extsize; } -static inline int __crypto_acomp_report_stat(struct sk_buff *skb, - struct crypto_alg *alg) -{ - struct comp_alg_common *calg = __crypto_comp_alg_common(alg); - struct crypto_istat_compress *istat = comp_get_stat(calg); - struct crypto_stat_compress racomp; - - memset(&racomp, 0, sizeof(racomp)); - - strscpy(racomp.type, "acomp", sizeof(racomp.type)); - racomp.stat_compress_cnt = atomic64_read(&istat->compress_cnt); - racomp.stat_compress_tlen = atomic64_read(&istat->compress_tlen); - racomp.stat_decompress_cnt = atomic64_read(&istat->decompress_cnt); - racomp.stat_decompress_tlen = atomic64_read(&istat->decompress_tlen); - racomp.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_ACOMP, sizeof(racomp), &racomp); -} - -#ifdef CONFIG_CRYPTO_STATS -int crypto_acomp_report_stat(struct sk_buff *skb, struct crypto_alg *alg) -{ - return __crypto_acomp_report_stat(skb, alg); -} -#endif - static const struct crypto_type crypto_acomp_type = { .extsize = crypto_acomp_extsize, .init_tfm = crypto_acomp_init_tfm, @@ -127,9 +101,6 @@ static const struct crypto_type crypto_acomp_type = { #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_acomp_report, -#endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_acomp_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_ACOMPRESS_MASK, @@ -184,13 +155,9 @@ EXPORT_SYMBOL_GPL(acomp_request_free); void comp_prepare_alg(struct comp_alg_common *alg) { - struct crypto_istat_compress *istat = comp_get_stat(alg); struct crypto_alg *base = &alg->base; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); } int crypto_register_acomp(struct acomp_alg *alg) diff --git a/crypto/aead.c b/crypto/aead.c index 54906633566a..0e75a69189df 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -20,15 +20,6 @@ #include "internal.h" -static inline struct crypto_istat_aead *aead_get_stat(struct aead_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key, unsigned int keylen) { @@ -90,62 +81,28 @@ int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) } EXPORT_SYMBOL_GPL(crypto_aead_setauthsize); -static inline int crypto_aead_errstat(struct crypto_istat_aead *istat, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&istat->err_cnt); - - return err; -} - int crypto_aead_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct aead_alg *alg = crypto_aead_alg(aead); - struct crypto_istat_aead *istat; - int ret; - - istat = aead_get_stat(alg); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - atomic64_inc(&istat->encrypt_cnt); - atomic64_add(req->cryptlen, &istat->encrypt_tlen); - } if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY) - ret = -ENOKEY; - else - ret = alg->encrypt(req); + return -ENOKEY; - return crypto_aead_errstat(istat, ret); + return crypto_aead_alg(aead)->encrypt(req); } EXPORT_SYMBOL_GPL(crypto_aead_encrypt); int crypto_aead_decrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct aead_alg *alg = crypto_aead_alg(aead); - struct crypto_istat_aead *istat; - int ret; - - istat = aead_get_stat(alg); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - atomic64_inc(&istat->encrypt_cnt); - atomic64_add(req->cryptlen, &istat->encrypt_tlen); - } if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY) - ret = -ENOKEY; - else if (req->cryptlen < crypto_aead_authsize(aead)) - ret = -EINVAL; - else - ret = alg->decrypt(req); + return -ENOKEY; - return crypto_aead_errstat(istat, ret); + if (req->cryptlen < crypto_aead_authsize(aead)) + return -EINVAL; + + return crypto_aead_alg(aead)->decrypt(req); } EXPORT_SYMBOL_GPL(crypto_aead_decrypt); @@ -215,26 +172,6 @@ static void crypto_aead_free_instance(struct crypto_instance *inst) aead->free(aead); } -static int __maybe_unused crypto_aead_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct aead_alg *aead = container_of(alg, struct aead_alg, base); - struct crypto_istat_aead *istat = aead_get_stat(aead); - struct crypto_stat_aead raead; - - memset(&raead, 0, sizeof(raead)); - - strscpy(raead.type, "aead", sizeof(raead.type)); - - raead.stat_encrypt_cnt = atomic64_read(&istat->encrypt_cnt); - raead.stat_encrypt_tlen = atomic64_read(&istat->encrypt_tlen); - raead.stat_decrypt_cnt = atomic64_read(&istat->decrypt_cnt); - raead.stat_decrypt_tlen = atomic64_read(&istat->decrypt_tlen); - raead.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_AEAD, sizeof(raead), &raead); -} - static const struct crypto_type crypto_aead_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_aead_init_tfm, @@ -244,9 +181,6 @@ static const struct crypto_type crypto_aead_type = { #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_aead_report, -#endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_aead_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, @@ -277,7 +211,6 @@ EXPORT_SYMBOL_GPL(crypto_has_aead); static int aead_prepare_alg(struct aead_alg *alg) { - struct crypto_istat_aead *istat = aead_get_stat(alg); struct crypto_alg *base = &alg->base; if (max3(alg->maxauthsize, alg->ivsize, alg->chunksize) > @@ -291,9 +224,6 @@ static int aead_prepare_alg(struct aead_alg *alg) base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_AEAD; - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); - return 0; } diff --git a/crypto/ahash.c b/crypto/ahash.c index 0ac83f7f701d..bcd9de009a91 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -27,22 +27,6 @@ #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e -static inline struct crypto_istat_hash *ahash_get_stat(struct ahash_alg *alg) -{ - return hash_get_stat(&alg->halg); -} - -static inline int crypto_ahash_errstat(struct ahash_alg *alg, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&ahash_get_stat(alg)->err_cnt); - - return err; -} - /* * For an ahash tfm that is using an shash algorithm (instead of an ahash * algorithm), this returns the underlying shash tfm. @@ -344,75 +328,47 @@ static void ahash_restore_req(struct ahash_request *req, int err) int crypto_ahash_update(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ahash_alg *alg; if (likely(tfm->using_shash)) return shash_ahash_update(req, ahash_request_ctx(req)); - alg = crypto_ahash_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_add(req->nbytes, &ahash_get_stat(alg)->hash_tlen); - return crypto_ahash_errstat(alg, alg->update(req)); + return crypto_ahash_alg(tfm)->update(req); } EXPORT_SYMBOL_GPL(crypto_ahash_update); int crypto_ahash_final(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ahash_alg *alg; if (likely(tfm->using_shash)) return crypto_shash_final(ahash_request_ctx(req), req->result); - alg = crypto_ahash_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&ahash_get_stat(alg)->hash_cnt); - return crypto_ahash_errstat(alg, alg->final(req)); + return crypto_ahash_alg(tfm)->final(req); } EXPORT_SYMBOL_GPL(crypto_ahash_final); int crypto_ahash_finup(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ahash_alg *alg; if (likely(tfm->using_shash)) return shash_ahash_finup(req, ahash_request_ctx(req)); - alg = crypto_ahash_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_hash *istat = ahash_get_stat(alg); - - atomic64_inc(&istat->hash_cnt); - atomic64_add(req->nbytes, &istat->hash_tlen); - } - return crypto_ahash_errstat(alg, alg->finup(req)); + return crypto_ahash_alg(tfm)->finup(req); } EXPORT_SYMBOL_GPL(crypto_ahash_finup); int crypto_ahash_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ahash_alg *alg; - int err; if (likely(tfm->using_shash)) return shash_ahash_digest(req, prepare_shash_desc(req, tfm)); - alg = crypto_ahash_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_hash *istat = ahash_get_stat(alg); - - atomic64_inc(&istat->hash_cnt); - atomic64_add(req->nbytes, &istat->hash_tlen); - } - if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) - err = -ENOKEY; - else - err = alg->digest(req); + return -ENOKEY; - return crypto_ahash_errstat(alg, err); + return crypto_ahash_alg(tfm)->digest(req); } EXPORT_SYMBOL_GPL(crypto_ahash_digest); @@ -571,12 +527,6 @@ static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) __crypto_hash_alg_common(alg)->digestsize); } -static int __maybe_unused crypto_ahash_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - return crypto_hash_report_stat(skb, alg, "ahash"); -} - static const struct crypto_type crypto_ahash_type = { .extsize = crypto_ahash_extsize, .init_tfm = crypto_ahash_init_tfm, @@ -586,9 +536,6 @@ static const struct crypto_type crypto_ahash_type = { #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_ahash_report, -#endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_ahash_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_AHASH_MASK, diff --git a/crypto/akcipher.c b/crypto/akcipher.c index 52813f0b19e4..e0ff5f4dda6d 100644 --- a/crypto/akcipher.c +++ b/crypto/akcipher.c @@ -70,30 +70,6 @@ static void crypto_akcipher_free_instance(struct crypto_instance *inst) akcipher->free(akcipher); } -static int __maybe_unused crypto_akcipher_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct akcipher_alg *akcipher = __crypto_akcipher_alg(alg); - struct crypto_istat_akcipher *istat; - struct crypto_stat_akcipher rakcipher; - - istat = akcipher_get_stat(akcipher); - - memset(&rakcipher, 0, sizeof(rakcipher)); - - strscpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); - rakcipher.stat_encrypt_cnt = atomic64_read(&istat->encrypt_cnt); - rakcipher.stat_encrypt_tlen = atomic64_read(&istat->encrypt_tlen); - rakcipher.stat_decrypt_cnt = atomic64_read(&istat->decrypt_cnt); - rakcipher.stat_decrypt_tlen = atomic64_read(&istat->decrypt_tlen); - rakcipher.stat_sign_cnt = atomic64_read(&istat->sign_cnt); - rakcipher.stat_verify_cnt = atomic64_read(&istat->verify_cnt); - rakcipher.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER, - sizeof(rakcipher), &rakcipher); -} - static const struct crypto_type crypto_akcipher_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_akcipher_init_tfm, @@ -103,9 +79,6 @@ static const struct crypto_type crypto_akcipher_type = { #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_akcipher_report, -#endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_akcipher_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_AHASH_MASK, @@ -131,15 +104,11 @@ EXPORT_SYMBOL_GPL(crypto_alloc_akcipher); static void akcipher_prepare_alg(struct akcipher_alg *alg) { - struct crypto_istat_akcipher *istat = akcipher_get_stat(alg); struct crypto_alg *base = &alg->base; base->cra_type = &crypto_akcipher_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_AKCIPHER; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); } static int akcipher_default_op(struct akcipher_request *req) diff --git a/crypto/compress.h b/crypto/compress.h index 19f65516d699..c3cedfb5e606 100644 --- a/crypto/compress.h +++ b/crypto/compress.h @@ -13,14 +13,11 @@ struct acomp_req; struct comp_alg_common; -struct sk_buff; int crypto_init_scomp_ops_async(struct crypto_tfm *tfm); struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req); void crypto_acomp_scomp_free_ctx(struct acomp_req *req); -int crypto_acomp_report_stat(struct sk_buff *skb, struct crypto_alg *alg); - void comp_prepare_alg(struct comp_alg_common *alg); #endif /* _LOCAL_CRYPTO_COMPRESS_H */ diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user.c similarity index 98% rename from crypto/crypto_user_base.c rename to crypto/crypto_user.c index 3fa20f12989f..6c571834e86a 100644 --- a/crypto/crypto_user_base.c +++ b/crypto/crypto_user.c @@ -18,7 +18,6 @@ #include #include #include -#include #include "internal.h" @@ -33,7 +32,7 @@ struct crypto_dump_info { u16 nlmsg_flags; }; -struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact) +static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact) { struct crypto_alg *q, *alg = NULL; @@ -387,6 +386,13 @@ static int crypto_del_rng(struct sk_buff *skb, struct nlmsghdr *nlh, return crypto_del_default_rng(); } +static int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, + struct nlattr **attrs) +{ + /* No longer supported */ + return -ENOTSUPP; +} + #define MSGSIZE(type) sizeof(struct type) static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = { diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c deleted file mode 100644 index d4f3d39b5137..000000000000 --- a/crypto/crypto_user_stat.c +++ /dev/null @@ -1,176 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Crypto user configuration API. - * - * Copyright (C) 2017-2018 Corentin Labbe - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define null_terminated(x) (strnlen(x, sizeof(x)) < sizeof(x)) - -struct crypto_dump_info { - struct sk_buff *in_skb; - struct sk_buff *out_skb; - u32 nlmsg_seq; - u16 nlmsg_flags; -}; - -static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) -{ - struct crypto_stat_cipher rcipher; - - memset(&rcipher, 0, sizeof(rcipher)); - - strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); - - return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); -} - -static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) -{ - struct crypto_stat_compress rcomp; - - memset(&rcomp, 0, sizeof(rcomp)); - - strscpy(rcomp.type, "compression", sizeof(rcomp.type)); - - return nla_put(skb, CRYPTOCFGA_STAT_COMPRESS, sizeof(rcomp), &rcomp); -} - -static int crypto_reportstat_one(struct crypto_alg *alg, - struct crypto_user_alg *ualg, - struct sk_buff *skb) -{ - memset(ualg, 0, sizeof(*ualg)); - - strscpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); - strscpy(ualg->cru_driver_name, alg->cra_driver_name, - sizeof(ualg->cru_driver_name)); - strscpy(ualg->cru_module_name, module_name(alg->cra_module), - sizeof(ualg->cru_module_name)); - - ualg->cru_type = 0; - ualg->cru_mask = 0; - ualg->cru_flags = alg->cra_flags; - ualg->cru_refcnt = refcount_read(&alg->cra_refcnt); - - if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority)) - goto nla_put_failure; - if (alg->cra_flags & CRYPTO_ALG_LARVAL) { - struct crypto_stat_larval rl; - - memset(&rl, 0, sizeof(rl)); - strscpy(rl.type, "larval", sizeof(rl.type)); - if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL, sizeof(rl), &rl)) - goto nla_put_failure; - goto out; - } - - if (alg->cra_type && alg->cra_type->report_stat) { - if (alg->cra_type->report_stat(skb, alg)) - goto nla_put_failure; - goto out; - } - - switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) { - case CRYPTO_ALG_TYPE_CIPHER: - if (crypto_report_cipher(skb, alg)) - goto nla_put_failure; - break; - case CRYPTO_ALG_TYPE_COMPRESS: - if (crypto_report_comp(skb, alg)) - goto nla_put_failure; - break; - default: - pr_err("ERROR: Unhandled alg %d in %s\n", - alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL), - __func__); - } - -out: - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -static int crypto_reportstat_alg(struct crypto_alg *alg, - struct crypto_dump_info *info) -{ - struct sk_buff *in_skb = info->in_skb; - struct sk_buff *skb = info->out_skb; - struct nlmsghdr *nlh; - struct crypto_user_alg *ualg; - int err = 0; - - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq, - CRYPTO_MSG_GETSTAT, sizeof(*ualg), info->nlmsg_flags); - if (!nlh) { - err = -EMSGSIZE; - goto out; - } - - ualg = nlmsg_data(nlh); - - err = crypto_reportstat_one(alg, ualg, skb); - if (err) { - nlmsg_cancel(skb, nlh); - goto out; - } - - nlmsg_end(skb, nlh); - -out: - return err; -} - -int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, - struct nlattr **attrs) -{ - struct net *net = sock_net(in_skb->sk); - struct crypto_user_alg *p = nlmsg_data(in_nlh); - struct crypto_alg *alg; - struct sk_buff *skb; - struct crypto_dump_info info; - int err; - - if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) - return -EINVAL; - - alg = crypto_alg_match(p, 0); - if (!alg) - return -ENOENT; - - err = -ENOMEM; - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); - if (!skb) - goto drop_alg; - - info.in_skb = in_skb; - info.out_skb = skb; - info.nlmsg_seq = in_nlh->nlmsg_seq; - info.nlmsg_flags = 0; - - err = crypto_reportstat_alg(alg, &info); - -drop_alg: - crypto_mod_put(alg); - - if (err) { - kfree_skb(skb); - return err; - } - - return nlmsg_unicast(net->crypto_nlsk, skb, NETLINK_CB(in_skb).portid); -} - -MODULE_LICENSE("GPL"); diff --git a/crypto/hash.h b/crypto/hash.h index 93f6ba0df263..cf9aee07f77d 100644 --- a/crypto/hash.h +++ b/crypto/hash.h @@ -8,39 +8,9 @@ #define _LOCAL_CRYPTO_HASH_H #include -#include #include "internal.h" -static inline struct crypto_istat_hash *hash_get_stat( - struct hash_alg_common *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - -static inline int crypto_hash_report_stat(struct sk_buff *skb, - struct crypto_alg *alg, - const char *type) -{ - struct hash_alg_common *halg = __crypto_hash_alg_common(alg); - struct crypto_istat_hash *istat = hash_get_stat(halg); - struct crypto_stat_hash rhash; - - memset(&rhash, 0, sizeof(rhash)); - - strscpy(rhash.type, type, sizeof(rhash.type)); - - rhash.stat_hash_cnt = atomic64_read(&istat->hash_cnt); - rhash.stat_hash_tlen = atomic64_read(&istat->hash_tlen); - rhash.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash); -} - extern const struct crypto_type crypto_shash_type; int hash_prepare_alg(struct hash_alg_common *alg); diff --git a/crypto/kpp.c b/crypto/kpp.c index 33d44e59387f..ecc63a1a948d 100644 --- a/crypto/kpp.c +++ b/crypto/kpp.c @@ -66,29 +66,6 @@ static void crypto_kpp_free_instance(struct crypto_instance *inst) kpp->free(kpp); } -static int __maybe_unused crypto_kpp_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct kpp_alg *kpp = __crypto_kpp_alg(alg); - struct crypto_istat_kpp *istat; - struct crypto_stat_kpp rkpp; - - istat = kpp_get_stat(kpp); - - memset(&rkpp, 0, sizeof(rkpp)); - - strscpy(rkpp.type, "kpp", sizeof(rkpp.type)); - - rkpp.stat_setsecret_cnt = atomic64_read(&istat->setsecret_cnt); - rkpp.stat_generate_public_key_cnt = - atomic64_read(&istat->generate_public_key_cnt); - rkpp.stat_compute_shared_secret_cnt = - atomic64_read(&istat->compute_shared_secret_cnt); - rkpp.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_KPP, sizeof(rkpp), &rkpp); -} - static const struct crypto_type crypto_kpp_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_kpp_init_tfm, @@ -98,9 +75,6 @@ static const struct crypto_type crypto_kpp_type = { #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_kpp_report, -#endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_kpp_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, @@ -131,15 +105,11 @@ EXPORT_SYMBOL_GPL(crypto_has_kpp); static void kpp_prepare_alg(struct kpp_alg *alg) { - struct crypto_istat_kpp *istat = kpp_get_stat(alg); struct crypto_alg *base = &alg->base; base->cra_type = &crypto_kpp_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_KPP; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); } int crypto_register_kpp(struct kpp_alg *alg) diff --git a/crypto/lskcipher.c b/crypto/lskcipher.c index 0f1bd7dcde24..cdb4897c63e6 100644 --- a/crypto/lskcipher.c +++ b/crypto/lskcipher.c @@ -29,25 +29,6 @@ static inline struct lskcipher_alg *__crypto_lskcipher_alg( return container_of(alg, struct lskcipher_alg, co.base); } -static inline struct crypto_istat_cipher *lskcipher_get_stat( - struct lskcipher_alg *alg) -{ - return skcipher_get_stat_common(&alg->co); -} - -static inline int crypto_lskcipher_errstat(struct lskcipher_alg *alg, int err) -{ - struct crypto_istat_cipher *istat = lskcipher_get_stat(alg); - - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err) - atomic64_inc(&istat->err_cnt); - - return err; -} - static int lskcipher_setkey_unaligned(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen) { @@ -147,20 +128,13 @@ static int crypto_lskcipher_crypt(struct crypto_lskcipher *tfm, const u8 *src, u32 flags)) { unsigned long alignmask = crypto_lskcipher_alignmask(tfm); - struct lskcipher_alg *alg = crypto_lskcipher_alg(tfm); - int ret; if (((unsigned long)src | (unsigned long)dst | (unsigned long)iv) & - alignmask) { - ret = crypto_lskcipher_crypt_unaligned(tfm, src, dst, len, iv, - crypt); - goto out; - } + alignmask) + return crypto_lskcipher_crypt_unaligned(tfm, src, dst, len, iv, + crypt); - ret = crypt(tfm, src, dst, len, iv, CRYPTO_LSKCIPHER_FLAG_FINAL); - -out: - return crypto_lskcipher_errstat(alg, ret); + return crypt(tfm, src, dst, len, iv, CRYPTO_LSKCIPHER_FLAG_FINAL); } int crypto_lskcipher_encrypt(struct crypto_lskcipher *tfm, const u8 *src, @@ -168,13 +142,6 @@ int crypto_lskcipher_encrypt(struct crypto_lskcipher *tfm, const u8 *src, { struct lskcipher_alg *alg = crypto_lskcipher_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_cipher *istat = lskcipher_get_stat(alg); - - atomic64_inc(&istat->encrypt_cnt); - atomic64_add(len, &istat->encrypt_tlen); - } - return crypto_lskcipher_crypt(tfm, src, dst, len, iv, alg->encrypt); } EXPORT_SYMBOL_GPL(crypto_lskcipher_encrypt); @@ -184,13 +151,6 @@ int crypto_lskcipher_decrypt(struct crypto_lskcipher *tfm, const u8 *src, { struct lskcipher_alg *alg = crypto_lskcipher_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_cipher *istat = lskcipher_get_stat(alg); - - atomic64_inc(&istat->decrypt_cnt); - atomic64_add(len, &istat->decrypt_tlen); - } - return crypto_lskcipher_crypt(tfm, src, dst, len, iv, alg->decrypt); } EXPORT_SYMBOL_GPL(crypto_lskcipher_decrypt); @@ -320,28 +280,6 @@ static int __maybe_unused crypto_lskcipher_report( sizeof(rblkcipher), &rblkcipher); } -static int __maybe_unused crypto_lskcipher_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct lskcipher_alg *skcipher = __crypto_lskcipher_alg(alg); - struct crypto_istat_cipher *istat; - struct crypto_stat_cipher rcipher; - - istat = lskcipher_get_stat(skcipher); - - memset(&rcipher, 0, sizeof(rcipher)); - - strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); - - rcipher.stat_encrypt_cnt = atomic64_read(&istat->encrypt_cnt); - rcipher.stat_encrypt_tlen = atomic64_read(&istat->encrypt_tlen); - rcipher.stat_decrypt_cnt = atomic64_read(&istat->decrypt_cnt); - rcipher.stat_decrypt_tlen = atomic64_read(&istat->decrypt_tlen); - rcipher.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); -} - static const struct crypto_type crypto_lskcipher_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_lskcipher_init_tfm, @@ -351,9 +289,6 @@ static const struct crypto_type crypto_lskcipher_type = { #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_lskcipher_report, -#endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_lskcipher_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, diff --git a/crypto/rng.c b/crypto/rng.c index 279dffdebf59..9d8804e46422 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -30,30 +30,24 @@ static int crypto_default_rng_refcnt; int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { - struct rng_alg *alg = crypto_rng_alg(tfm); u8 *buf = NULL; int err; - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&rng_get_stat(alg)->seed_cnt); - if (!seed && slen) { buf = kmalloc(slen, GFP_KERNEL); - err = -ENOMEM; if (!buf) - goto out; + return -ENOMEM; err = get_random_bytes_wait(buf, slen); if (err) - goto free_buf; + goto out; seed = buf; } - err = alg->seed(tfm, seed, slen); -free_buf: - kfree_sensitive(buf); + err = crypto_rng_alg(tfm)->seed(tfm, seed, slen); out: - return crypto_rng_errstat(alg, err); + kfree_sensitive(buf); + return err; } EXPORT_SYMBOL_GPL(crypto_rng_reset); @@ -91,27 +85,6 @@ static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "seedsize : %u\n", seedsize(alg)); } -static int __maybe_unused crypto_rng_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct rng_alg *rng = __crypto_rng_alg(alg); - struct crypto_istat_rng *istat; - struct crypto_stat_rng rrng; - - istat = rng_get_stat(rng); - - memset(&rrng, 0, sizeof(rrng)); - - strscpy(rrng.type, "rng", sizeof(rrng.type)); - - rrng.stat_generate_cnt = atomic64_read(&istat->generate_cnt); - rrng.stat_generate_tlen = atomic64_read(&istat->generate_tlen); - rrng.stat_seed_cnt = atomic64_read(&istat->seed_cnt); - rrng.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_RNG, sizeof(rrng), &rrng); -} - static const struct crypto_type crypto_rng_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_rng_init_tfm, @@ -120,9 +93,6 @@ static const struct crypto_type crypto_rng_type = { #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_rng_report, -#endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_rng_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, @@ -199,7 +169,6 @@ EXPORT_SYMBOL_GPL(crypto_del_default_rng); int crypto_register_rng(struct rng_alg *alg) { - struct crypto_istat_rng *istat = rng_get_stat(alg); struct crypto_alg *base = &alg->base; if (alg->seedsize > PAGE_SIZE / 8) @@ -209,9 +178,6 @@ int crypto_register_rng(struct rng_alg *alg) base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_RNG; - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); - return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_rng); diff --git a/crypto/scompress.c b/crypto/scompress.c index 60bbb7ea4060..1cef6bb06a81 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -270,9 +270,6 @@ static const struct crypto_type crypto_scomp_type = { #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_scomp_report, -#endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_acomp_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, diff --git a/crypto/shash.c b/crypto/shash.c index c3f7f6a25280..301ab42bf849 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -16,18 +16,6 @@ #include "hash.h" -static inline struct crypto_istat_hash *shash_get_stat(struct shash_alg *alg) -{ - return hash_get_stat(&alg->halg); -} - -static inline int crypto_shash_errstat(struct shash_alg *alg, int err) -{ - if (IS_ENABLED(CONFIG_CRYPTO_STATS) && err) - atomic64_inc(&shash_get_stat(alg)->err_cnt); - return err; -} - int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { @@ -61,29 +49,13 @@ EXPORT_SYMBOL_GPL(crypto_shash_setkey); int crypto_shash_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct shash_alg *shash = crypto_shash_alg(desc->tfm); - int err; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_add(len, &shash_get_stat(shash)->hash_tlen); - - err = shash->update(desc, data, len); - - return crypto_shash_errstat(shash, err); + return crypto_shash_alg(desc->tfm)->update(desc, data, len); } EXPORT_SYMBOL_GPL(crypto_shash_update); int crypto_shash_final(struct shash_desc *desc, u8 *out) { - struct shash_alg *shash = crypto_shash_alg(desc->tfm); - int err; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&shash_get_stat(shash)->hash_cnt); - - err = shash->final(desc, out); - - return crypto_shash_errstat(shash, err); + return crypto_shash_alg(desc->tfm)->final(desc, out); } EXPORT_SYMBOL_GPL(crypto_shash_final); @@ -99,20 +71,7 @@ static int shash_default_finup(struct shash_desc *desc, const u8 *data, int crypto_shash_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - struct crypto_shash *tfm = desc->tfm; - struct shash_alg *shash = crypto_shash_alg(tfm); - int err; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_hash *istat = shash_get_stat(shash); - - atomic64_inc(&istat->hash_cnt); - atomic64_add(len, &istat->hash_tlen); - } - - err = shash->finup(desc, data, len, out); - - return crypto_shash_errstat(shash, err); + return crypto_shash_alg(desc->tfm)->finup(desc, data, len, out); } EXPORT_SYMBOL_GPL(crypto_shash_finup); @@ -129,22 +88,11 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct crypto_shash *tfm = desc->tfm; - struct shash_alg *shash = crypto_shash_alg(tfm); - int err; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_hash *istat = shash_get_stat(shash); - - atomic64_inc(&istat->hash_cnt); - atomic64_add(len, &istat->hash_tlen); - } if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) - err = -ENOKEY; - else - err = shash->digest(desc, data, len, out); + return -ENOKEY; - return crypto_shash_errstat(shash, err); + return crypto_shash_alg(tfm)->digest(desc, data, len, out); } EXPORT_SYMBOL_GPL(crypto_shash_digest); @@ -265,12 +213,6 @@ static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "digestsize : %u\n", salg->digestsize); } -static int __maybe_unused crypto_shash_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - return crypto_hash_report_stat(skb, alg, "shash"); -} - const struct crypto_type crypto_shash_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_shash_init_tfm, @@ -280,9 +222,6 @@ const struct crypto_type crypto_shash_type = { #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_shash_report, -#endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_shash_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, @@ -350,7 +289,6 @@ EXPORT_SYMBOL_GPL(crypto_clone_shash); int hash_prepare_alg(struct hash_alg_common *alg) { - struct crypto_istat_hash *istat = hash_get_stat(alg); struct crypto_alg *base = &alg->base; if (alg->digestsize > HASH_MAX_DIGESTSIZE) @@ -362,9 +300,6 @@ int hash_prepare_alg(struct hash_alg_common *alg) base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); - return 0; } diff --git a/crypto/sig.c b/crypto/sig.c index 224c47019297..7645bedf3a1f 100644 --- a/crypto/sig.c +++ b/crypto/sig.c @@ -45,16 +45,6 @@ static int __maybe_unused crypto_sig_report(struct sk_buff *skb, return nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, sizeof(rsig), &rsig); } -static int __maybe_unused crypto_sig_report_stat(struct sk_buff *skb, - struct crypto_alg *alg) -{ - struct crypto_stat_akcipher rsig = {}; - - strscpy(rsig.type, "sig", sizeof(rsig.type)); - - return nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER, sizeof(rsig), &rsig); -} - static const struct crypto_type crypto_sig_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_sig_init_tfm, @@ -63,9 +53,6 @@ static const struct crypto_type crypto_sig_type = { #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_sig_report, -#endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_sig_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_SIG_MASK, diff --git a/crypto/skcipher.c b/crypto/skcipher.c index bc70e159d27d..ceed7f33a67b 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -89,25 +89,6 @@ static inline struct skcipher_alg *__crypto_skcipher_alg( return container_of(alg, struct skcipher_alg, base); } -static inline struct crypto_istat_cipher *skcipher_get_stat( - struct skcipher_alg *alg) -{ - return skcipher_get_stat_common(&alg->co); -} - -static inline int crypto_skcipher_errstat(struct skcipher_alg *alg, int err) -{ - struct crypto_istat_cipher *istat = skcipher_get_stat(alg); - - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&istat->err_cnt); - - return err; -} - static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize) { u8 *addr; @@ -654,23 +635,12 @@ int crypto_skcipher_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); - int ret; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_cipher *istat = skcipher_get_stat(alg); - - atomic64_inc(&istat->encrypt_cnt); - atomic64_add(req->cryptlen, &istat->encrypt_tlen); - } if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) - ret = -ENOKEY; - else if (alg->co.base.cra_type != &crypto_skcipher_type) - ret = crypto_lskcipher_encrypt_sg(req); - else - ret = alg->encrypt(req); - - return crypto_skcipher_errstat(alg, ret); + return -ENOKEY; + if (alg->co.base.cra_type != &crypto_skcipher_type) + return crypto_lskcipher_encrypt_sg(req); + return alg->encrypt(req); } EXPORT_SYMBOL_GPL(crypto_skcipher_encrypt); @@ -678,23 +648,12 @@ int crypto_skcipher_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); - int ret; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_cipher *istat = skcipher_get_stat(alg); - - atomic64_inc(&istat->decrypt_cnt); - atomic64_add(req->cryptlen, &istat->decrypt_tlen); - } if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) - ret = -ENOKEY; - else if (alg->co.base.cra_type != &crypto_skcipher_type) - ret = crypto_lskcipher_decrypt_sg(req); - else - ret = alg->decrypt(req); - - return crypto_skcipher_errstat(alg, ret); + return -ENOKEY; + if (alg->co.base.cra_type != &crypto_skcipher_type) + return crypto_lskcipher_decrypt_sg(req); + return alg->decrypt(req); } EXPORT_SYMBOL_GPL(crypto_skcipher_decrypt); @@ -846,28 +805,6 @@ static int __maybe_unused crypto_skcipher_report( sizeof(rblkcipher), &rblkcipher); } -static int __maybe_unused crypto_skcipher_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct skcipher_alg *skcipher = __crypto_skcipher_alg(alg); - struct crypto_istat_cipher *istat; - struct crypto_stat_cipher rcipher; - - istat = skcipher_get_stat(skcipher); - - memset(&rcipher, 0, sizeof(rcipher)); - - strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); - - rcipher.stat_encrypt_cnt = atomic64_read(&istat->encrypt_cnt); - rcipher.stat_encrypt_tlen = atomic64_read(&istat->encrypt_tlen); - rcipher.stat_decrypt_cnt = atomic64_read(&istat->decrypt_cnt); - rcipher.stat_decrypt_tlen = atomic64_read(&istat->decrypt_tlen); - rcipher.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); -} - static const struct crypto_type crypto_skcipher_type = { .extsize = crypto_skcipher_extsize, .init_tfm = crypto_skcipher_init_tfm, @@ -877,9 +814,6 @@ static const struct crypto_type crypto_skcipher_type = { #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_skcipher_report, -#endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_skcipher_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_SKCIPHER_MASK, @@ -935,7 +869,6 @@ EXPORT_SYMBOL_GPL(crypto_has_skcipher); int skcipher_prepare_alg_common(struct skcipher_alg_common *alg) { - struct crypto_istat_cipher *istat = skcipher_get_stat_common(alg); struct crypto_alg *base = &alg->base; if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8 || @@ -948,9 +881,6 @@ int skcipher_prepare_alg_common(struct skcipher_alg_common *alg) base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); - return 0; } diff --git a/crypto/skcipher.h b/crypto/skcipher.h index 16c9484360da..703651367dd8 100644 --- a/crypto/skcipher.h +++ b/crypto/skcipher.h @@ -10,16 +10,6 @@ #include #include "internal.h" -static inline struct crypto_istat_cipher *skcipher_get_stat_common( - struct skcipher_alg_common *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - int crypto_lskcipher_encrypt_sg(struct skcipher_request *req); int crypto_lskcipher_decrypt_sg(struct skcipher_request *req); int crypto_init_lskcipher_ops_sg(struct crypto_tfm *tfm); diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index 80e243611fe2..54937b615239 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -56,31 +56,7 @@ struct crypto_acomp { struct crypto_tfm base; }; -/* - * struct crypto_istat_compress - statistics for compress algorithm - * @compress_cnt: number of compress requests - * @compress_tlen: total data size handled by compress requests - * @decompress_cnt: number of decompress requests - * @decompress_tlen: total data size handled by decompress requests - * @err_cnt: number of error for compress requests - */ -struct crypto_istat_compress { - atomic64_t compress_cnt; - atomic64_t compress_tlen; - atomic64_t decompress_cnt; - atomic64_t decompress_tlen; - atomic64_t err_cnt; -}; - -#ifdef CONFIG_CRYPTO_STATS -#define COMP_ALG_COMMON_STATS struct crypto_istat_compress stat; -#else -#define COMP_ALG_COMMON_STATS -#endif - #define COMP_ALG_COMMON { \ - COMP_ALG_COMMON_STATS \ - \ struct crypto_alg base; \ } struct comp_alg_common COMP_ALG_COMMON; @@ -261,27 +237,6 @@ static inline void acomp_request_set_params(struct acomp_req *req, req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT; } -static inline struct crypto_istat_compress *comp_get_stat( - struct comp_alg_common *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - -static inline int crypto_comp_errstat(struct comp_alg_common *alg, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&comp_get_stat(alg)->err_cnt); - - return err; -} - /** * crypto_acomp_compress() -- Invoke asynchronous compress operation * @@ -293,19 +248,7 @@ static inline int crypto_comp_errstat(struct comp_alg_common *alg, int err) */ static inline int crypto_acomp_compress(struct acomp_req *req) { - struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); - struct comp_alg_common *alg; - - alg = crypto_comp_alg_common(tfm); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_compress *istat = comp_get_stat(alg); - - atomic64_inc(&istat->compress_cnt); - atomic64_add(req->slen, &istat->compress_tlen); - } - - return crypto_comp_errstat(alg, tfm->compress(req)); + return crypto_acomp_reqtfm(req)->compress(req); } /** @@ -319,19 +262,7 @@ static inline int crypto_acomp_compress(struct acomp_req *req) */ static inline int crypto_acomp_decompress(struct acomp_req *req) { - struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); - struct comp_alg_common *alg; - - alg = crypto_comp_alg_common(tfm); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_compress *istat = comp_get_stat(alg); - - atomic64_inc(&istat->decompress_cnt); - atomic64_add(req->slen, &istat->decompress_tlen); - } - - return crypto_comp_errstat(alg, tfm->decompress(req)); + return crypto_acomp_reqtfm(req)->decompress(req); } #endif diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 51382befbe37..0e8a41638678 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -101,22 +101,6 @@ struct aead_request { void *__ctx[] CRYPTO_MINALIGN_ATTR; }; -/* - * struct crypto_istat_aead - statistics for AEAD algorithm - * @encrypt_cnt: number of encrypt requests - * @encrypt_tlen: total data size handled by encrypt requests - * @decrypt_cnt: number of decrypt requests - * @decrypt_tlen: total data size handled by decrypt requests - * @err_cnt: number of error for AEAD requests - */ -struct crypto_istat_aead { - atomic64_t encrypt_cnt; - atomic64_t encrypt_tlen; - atomic64_t decrypt_cnt; - atomic64_t decrypt_tlen; - atomic64_t err_cnt; -}; - /** * struct aead_alg - AEAD cipher definition * @maxauthsize: Set the maximum authentication tag size supported by the @@ -135,7 +119,6 @@ struct crypto_istat_aead { * @setkey: see struct skcipher_alg * @encrypt: see struct skcipher_alg * @decrypt: see struct skcipher_alg - * @stat: statistics for AEAD algorithm * @ivsize: see struct skcipher_alg * @chunksize: see struct skcipher_alg * @init: Initialize the cryptographic transformation object. This function @@ -162,10 +145,6 @@ struct aead_alg { int (*init)(struct crypto_aead *tfm); void (*exit)(struct crypto_aead *tfm); -#ifdef CONFIG_CRYPTO_STATS - struct crypto_istat_aead stat; -#endif - unsigned int ivsize; unsigned int maxauthsize; unsigned int chunksize; diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h index 31c111bebb68..18a10cad07aa 100644 --- a/include/crypto/akcipher.h +++ b/include/crypto/akcipher.h @@ -54,26 +54,6 @@ struct crypto_akcipher { struct crypto_tfm base; }; -/* - * struct crypto_istat_akcipher - statistics for akcipher algorithm - * @encrypt_cnt: number of encrypt requests - * @encrypt_tlen: total data size handled by encrypt requests - * @decrypt_cnt: number of decrypt requests - * @decrypt_tlen: total data size handled by decrypt requests - * @verify_cnt: number of verify operation - * @sign_cnt: number of sign requests - * @err_cnt: number of error for akcipher requests - */ -struct crypto_istat_akcipher { - atomic64_t encrypt_cnt; - atomic64_t encrypt_tlen; - atomic64_t decrypt_cnt; - atomic64_t decrypt_tlen; - atomic64_t verify_cnt; - atomic64_t sign_cnt; - atomic64_t err_cnt; -}; - /** * struct akcipher_alg - generic public key algorithm * @@ -110,7 +90,6 @@ struct crypto_istat_akcipher { * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. - * @stat: Statistics for akcipher algorithm * * @base: Common crypto API algorithm data structure */ @@ -127,10 +106,6 @@ struct akcipher_alg { int (*init)(struct crypto_akcipher *tfm); void (*exit)(struct crypto_akcipher *tfm); -#ifdef CONFIG_CRYPTO_STATS - struct crypto_istat_akcipher stat; -#endif - struct crypto_alg base; }; @@ -302,27 +277,6 @@ static inline unsigned int crypto_akcipher_maxsize(struct crypto_akcipher *tfm) return alg->max_size(tfm); } -static inline struct crypto_istat_akcipher *akcipher_get_stat( - struct akcipher_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - -static inline int crypto_akcipher_errstat(struct akcipher_alg *alg, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&akcipher_get_stat(alg)->err_cnt); - - return err; -} - /** * crypto_akcipher_encrypt() - Invoke public key encrypt operation * @@ -336,16 +290,8 @@ static inline int crypto_akcipher_errstat(struct akcipher_alg *alg, int err) static inline int crypto_akcipher_encrypt(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct akcipher_alg *alg = crypto_akcipher_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_akcipher *istat = akcipher_get_stat(alg); - - atomic64_inc(&istat->encrypt_cnt); - atomic64_add(req->src_len, &istat->encrypt_tlen); - } - - return crypto_akcipher_errstat(alg, alg->encrypt(req)); + return crypto_akcipher_alg(tfm)->encrypt(req); } /** @@ -361,16 +307,8 @@ static inline int crypto_akcipher_encrypt(struct akcipher_request *req) static inline int crypto_akcipher_decrypt(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct akcipher_alg *alg = crypto_akcipher_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_akcipher *istat = akcipher_get_stat(alg); - - atomic64_inc(&istat->decrypt_cnt); - atomic64_add(req->src_len, &istat->decrypt_tlen); - } - - return crypto_akcipher_errstat(alg, alg->decrypt(req)); + return crypto_akcipher_alg(tfm)->decrypt(req); } /** @@ -422,12 +360,8 @@ int crypto_akcipher_sync_decrypt(struct crypto_akcipher *tfm, static inline int crypto_akcipher_sign(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct akcipher_alg *alg = crypto_akcipher_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&akcipher_get_stat(alg)->sign_cnt); - - return crypto_akcipher_errstat(alg, alg->sign(req)); + return crypto_akcipher_alg(tfm)->sign(req); } /** @@ -447,12 +381,8 @@ static inline int crypto_akcipher_sign(struct akcipher_request *req) static inline int crypto_akcipher_verify(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct akcipher_alg *alg = crypto_akcipher_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&akcipher_get_stat(alg)->verify_cnt); - - return crypto_akcipher_errstat(alg, alg->verify(req)); + return crypto_akcipher_alg(tfm)->verify(req); } /** diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 7a4a71af653f..156de41ca760 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -61,9 +61,6 @@ struct crypto_type { void (*show)(struct seq_file *m, struct crypto_alg *alg); int (*report)(struct sk_buff *skb, struct crypto_alg *alg); void (*free)(struct crypto_instance *inst); -#ifdef CONFIG_CRYPTO_STATS - int (*report_stat)(struct sk_buff *skb, struct crypto_alg *alg); -#endif unsigned int type; unsigned int maskclear; diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 5d61f576cfc8..0014bdd81ab7 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -23,27 +23,8 @@ struct crypto_ahash; * crypto_unregister_shash(). */ -/* - * struct crypto_istat_hash - statistics for has algorithm - * @hash_cnt: number of hash requests - * @hash_tlen: total data size hashed - * @err_cnt: number of error for hash requests - */ -struct crypto_istat_hash { - atomic64_t hash_cnt; - atomic64_t hash_tlen; - atomic64_t err_cnt; -}; - -#ifdef CONFIG_CRYPTO_STATS -#define HASH_ALG_COMMON_STAT struct crypto_istat_hash stat; -#else -#define HASH_ALG_COMMON_STAT -#endif - /* * struct hash_alg_common - define properties of message digest - * @stat: Statistics for hash algorithm. * @digestsize: Size of the result of the transformation. A buffer of this size * must be available to the @final and @finup calls, so they can * store the resulting hash into it. For various predefined sizes, @@ -60,8 +41,6 @@ struct crypto_istat_hash { * information. */ #define HASH_ALG_COMMON { \ - HASH_ALG_COMMON_STAT \ - \ unsigned int digestsize; \ unsigned int statesize; \ \ @@ -243,7 +222,6 @@ struct shash_alg { }; }; #undef HASH_ALG_COMMON -#undef HASH_ALG_COMMON_STAT struct crypto_ahash { bool using_shash; /* Underlying algorithm is shash, not ahash */ diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 4ac46bafba9d..d00392d1988e 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -31,7 +31,6 @@ * @init. * * @reqsize: Context size for (de)compression requests - * @stat: Statistics for compress algorithm * @base: Common crypto API algorithm data structure * @calg: Cmonn algorithm data structure shared with scomp */ diff --git a/include/crypto/internal/cryptouser.h b/include/crypto/internal/cryptouser.h deleted file mode 100644 index fd54074332f5..000000000000 --- a/include/crypto/internal/cryptouser.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - -struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact); - -#ifdef CONFIG_CRYPTO_STATS -int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs); -#else -static inline int crypto_reportstat(struct sk_buff *in_skb, - struct nlmsghdr *in_nlh, - struct nlattr **attrs) -{ - return -ENOTSUPP; -} -#endif diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h index 858fe3965ae3..07a10fd2d321 100644 --- a/include/crypto/internal/scompress.h +++ b/include/crypto/internal/scompress.h @@ -27,7 +27,6 @@ struct crypto_scomp { * @free_ctx: Function frees context allocated with alloc_ctx * @compress: Function performs a compress operation * @decompress: Function performs a de-compress operation - * @stat: Statistics for compress algorithm * @base: Common crypto API algorithm data structure * @calg: Cmonn algorithm data structure shared with acomp */ diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index 1988e24a0d1d..2d9c4de57b69 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -51,20 +51,6 @@ struct crypto_kpp { struct crypto_tfm base; }; -/* - * struct crypto_istat_kpp - statistics for KPP algorithm - * @setsecret_cnt: number of setsecrey operation - * @generate_public_key_cnt: number of generate_public_key operation - * @compute_shared_secret_cnt: number of compute_shared_secret operation - * @err_cnt: number of error for KPP requests - */ -struct crypto_istat_kpp { - atomic64_t setsecret_cnt; - atomic64_t generate_public_key_cnt; - atomic64_t compute_shared_secret_cnt; - atomic64_t err_cnt; -}; - /** * struct kpp_alg - generic key-agreement protocol primitives * @@ -87,7 +73,6 @@ struct crypto_istat_kpp { * @exit: Undo everything @init did. * * @base: Common crypto API algorithm data structure - * @stat: Statistics for KPP algorithm */ struct kpp_alg { int (*set_secret)(struct crypto_kpp *tfm, const void *buffer, @@ -100,10 +85,6 @@ struct kpp_alg { int (*init)(struct crypto_kpp *tfm); void (*exit)(struct crypto_kpp *tfm); -#ifdef CONFIG_CRYPTO_STATS - struct crypto_istat_kpp stat; -#endif - struct crypto_alg base; }; @@ -291,26 +272,6 @@ struct kpp_secret { unsigned short len; }; -static inline struct crypto_istat_kpp *kpp_get_stat(struct kpp_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - -static inline int crypto_kpp_errstat(struct kpp_alg *alg, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&kpp_get_stat(alg)->err_cnt); - - return err; -} - /** * crypto_kpp_set_secret() - Invoke kpp operation * @@ -329,12 +290,7 @@ static inline int crypto_kpp_errstat(struct kpp_alg *alg, int err) static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, const void *buffer, unsigned int len) { - struct kpp_alg *alg = crypto_kpp_alg(tfm); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&kpp_get_stat(alg)->setsecret_cnt); - - return crypto_kpp_errstat(alg, alg->set_secret(tfm, buffer, len)); + return crypto_kpp_alg(tfm)->set_secret(tfm, buffer, len); } /** @@ -353,12 +309,8 @@ static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, static inline int crypto_kpp_generate_public_key(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - struct kpp_alg *alg = crypto_kpp_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&kpp_get_stat(alg)->generate_public_key_cnt); - - return crypto_kpp_errstat(alg, alg->generate_public_key(req)); + return crypto_kpp_alg(tfm)->generate_public_key(req); } /** @@ -374,12 +326,8 @@ static inline int crypto_kpp_generate_public_key(struct kpp_request *req) static inline int crypto_kpp_compute_shared_secret(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - struct kpp_alg *alg = crypto_kpp_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&kpp_get_stat(alg)->compute_shared_secret_cnt); - - return crypto_kpp_errstat(alg, alg->compute_shared_secret(req)); + return crypto_kpp_alg(tfm)->compute_shared_secret(req); } /** diff --git a/include/crypto/rng.h b/include/crypto/rng.h index 6abe5102e5fb..5ac4388f50e1 100644 --- a/include/crypto/rng.h +++ b/include/crypto/rng.h @@ -15,20 +15,6 @@ struct crypto_rng; -/* - * struct crypto_istat_rng: statistics for RNG algorithm - * @generate_cnt: number of RNG generate requests - * @generate_tlen: total data size of generated data by the RNG - * @seed_cnt: number of times the RNG was seeded - * @err_cnt: number of error for RNG requests - */ -struct crypto_istat_rng { - atomic64_t generate_cnt; - atomic64_t generate_tlen; - atomic64_t seed_cnt; - atomic64_t err_cnt; -}; - /** * struct rng_alg - random number generator definition * @@ -46,7 +32,6 @@ struct crypto_istat_rng { * size of the seed is defined with @seedsize . * @set_ent: Set entropy that would otherwise be obtained from * entropy source. Internal use only. - * @stat: Statistics for rng algorithm * @seedsize: The seed size required for a random number generator * initialization defined with this variable. Some * random number generators does not require a seed @@ -63,10 +48,6 @@ struct rng_alg { void (*set_ent)(struct crypto_rng *tfm, const u8 *data, unsigned int len); -#ifdef CONFIG_CRYPTO_STATS - struct crypto_istat_rng stat; -#endif - unsigned int seedsize; struct crypto_alg base; @@ -144,26 +125,6 @@ static inline void crypto_free_rng(struct crypto_rng *tfm) crypto_destroy_tfm(tfm, crypto_rng_tfm(tfm)); } -static inline struct crypto_istat_rng *rng_get_stat(struct rng_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - -static inline int crypto_rng_errstat(struct rng_alg *alg, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&rng_get_stat(alg)->err_cnt); - - return err; -} - /** * crypto_rng_generate() - get random number * @tfm: cipher handle @@ -182,17 +143,7 @@ static inline int crypto_rng_generate(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int dlen) { - struct rng_alg *alg = crypto_rng_alg(tfm); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_rng *istat = rng_get_stat(alg); - - atomic64_inc(&istat->generate_cnt); - atomic64_add(dlen, &istat->generate_tlen); - } - - return crypto_rng_errstat(alg, - alg->generate(tfm, src, slen, dst, dlen)); + return crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen); } /** diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index c8857d7bdb37..74d47e23374e 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -64,28 +64,6 @@ struct crypto_lskcipher { struct crypto_tfm base; }; -/* - * struct crypto_istat_cipher - statistics for cipher algorithm - * @encrypt_cnt: number of encrypt requests - * @encrypt_tlen: total data size handled by encrypt requests - * @decrypt_cnt: number of decrypt requests - * @decrypt_tlen: total data size handled by decrypt requests - * @err_cnt: number of error for cipher requests - */ -struct crypto_istat_cipher { - atomic64_t encrypt_cnt; - atomic64_t encrypt_tlen; - atomic64_t decrypt_cnt; - atomic64_t decrypt_tlen; - atomic64_t err_cnt; -}; - -#ifdef CONFIG_CRYPTO_STATS -#define SKCIPHER_ALG_COMMON_STAT struct crypto_istat_cipher stat; -#else -#define SKCIPHER_ALG_COMMON_STAT -#endif - /* * struct skcipher_alg_common - common properties of skcipher_alg * @min_keysize: Minimum key size supported by the transformation. This is the @@ -103,7 +81,6 @@ struct crypto_istat_cipher { * @chunksize: Equal to the block size except for stream ciphers such as * CTR where it is set to the underlying block size. * @statesize: Size of the internal state for the algorithm. - * @stat: Statistics for cipher algorithm * @base: Definition of a generic crypto algorithm. */ #define SKCIPHER_ALG_COMMON { \ @@ -113,8 +90,6 @@ struct crypto_istat_cipher { unsigned int chunksize; \ unsigned int statesize; \ \ - SKCIPHER_ALG_COMMON_STAT \ - \ struct crypto_alg base; \ } struct skcipher_alg_common SKCIPHER_ALG_COMMON; diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 5730c67f0617..20a6c0fc149e 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -32,7 +32,7 @@ enum { CRYPTO_MSG_UPDATEALG, CRYPTO_MSG_GETALG, CRYPTO_MSG_DELRNG, - CRYPTO_MSG_GETSTAT, + CRYPTO_MSG_GETSTAT, /* No longer supported, do not use. */ __CRYPTO_MSG_MAX }; #define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1) @@ -54,16 +54,16 @@ enum crypto_attr_type_t { CRYPTOCFGA_REPORT_AKCIPHER, /* struct crypto_report_akcipher */ CRYPTOCFGA_REPORT_KPP, /* struct crypto_report_kpp */ CRYPTOCFGA_REPORT_ACOMP, /* struct crypto_report_acomp */ - CRYPTOCFGA_STAT_LARVAL, /* struct crypto_stat */ - CRYPTOCFGA_STAT_HASH, /* struct crypto_stat */ - CRYPTOCFGA_STAT_BLKCIPHER, /* struct crypto_stat */ - CRYPTOCFGA_STAT_AEAD, /* struct crypto_stat */ - CRYPTOCFGA_STAT_COMPRESS, /* struct crypto_stat */ - CRYPTOCFGA_STAT_RNG, /* struct crypto_stat */ - CRYPTOCFGA_STAT_CIPHER, /* struct crypto_stat */ - CRYPTOCFGA_STAT_AKCIPHER, /* struct crypto_stat */ - CRYPTOCFGA_STAT_KPP, /* struct crypto_stat */ - CRYPTOCFGA_STAT_ACOMP, /* struct crypto_stat */ + CRYPTOCFGA_STAT_LARVAL, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_HASH, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_BLKCIPHER, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_AEAD, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_COMPRESS, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_RNG, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_CIPHER, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_AKCIPHER, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_KPP, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_ACOMP, /* No longer supported, do not use. */ __CRYPTOCFGA_MAX #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1) @@ -79,6 +79,7 @@ struct crypto_user_alg { __u32 cru_flags; }; +/* No longer supported, do not use. */ struct crypto_stat_aead { char type[CRYPTO_MAX_NAME]; __u64 stat_encrypt_cnt; @@ -88,6 +89,7 @@ struct crypto_stat_aead { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_akcipher { char type[CRYPTO_MAX_NAME]; __u64 stat_encrypt_cnt; @@ -99,6 +101,7 @@ struct crypto_stat_akcipher { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_cipher { char type[CRYPTO_MAX_NAME]; __u64 stat_encrypt_cnt; @@ -108,6 +111,7 @@ struct crypto_stat_cipher { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_compress { char type[CRYPTO_MAX_NAME]; __u64 stat_compress_cnt; @@ -117,6 +121,7 @@ struct crypto_stat_compress { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_hash { char type[CRYPTO_MAX_NAME]; __u64 stat_hash_cnt; @@ -124,6 +129,7 @@ struct crypto_stat_hash { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_kpp { char type[CRYPTO_MAX_NAME]; __u64 stat_setsecret_cnt; @@ -132,6 +138,7 @@ struct crypto_stat_kpp { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_rng { char type[CRYPTO_MAX_NAME]; __u64 stat_generate_cnt; @@ -140,6 +147,7 @@ struct crypto_stat_rng { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_larval { char type[CRYPTO_MAX_NAME]; }; From 355577ef84e1e24a32e995d1c9b37b28cbf2cbbe Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 13 Mar 2024 13:53:14 +0100 Subject: [PATCH 020/123] dt-bindings: crypto: ice: Document sc7280 inline crypto engine Document the compatible used for the inline crypto engine found on SC7280. Signed-off-by: Luca Weiss Acked-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- .../devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml index e91bc7dc6ad3..0304f074cf08 100644 --- a/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml +++ b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml @@ -15,6 +15,7 @@ properties: - enum: - qcom,sa8775p-inline-crypto-engine - qcom,sc7180-inline-crypto-engine + - qcom,sc7280-inline-crypto-engine - qcom,sm8450-inline-crypto-engine - qcom,sm8550-inline-crypto-engine - qcom,sm8650-inline-crypto-engine From 90d012fbbf14af6d05795f3b44b571b2a4afe583 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 14 Mar 2024 16:45:59 +0800 Subject: [PATCH 021/123] hwrng: core - Convert sprintf/snprintf to sysfs_emit Per filesystems/sysfs.rst, show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. coccinelle complains that there are still a couple of functions that use snprintf(). Convert them to sysfs_emit(). sprintf() will be converted as weel if they have. Generally, this patch is generated by make coccicheck M= MODE=patch \ COCCI=scripts/coccinelle/api/device_attr_show.cocci No functional change intended CC: Olivia Mackall CC: Herbert Xu CC: linux-crypto@vger.kernel.org Signed-off-by: Li Zhijian Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index a3bbdd6e60fc..f5c71a617a99 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -382,7 +382,7 @@ static ssize_t rng_current_show(struct device *dev, if (IS_ERR(rng)) return PTR_ERR(rng); - ret = snprintf(buf, PAGE_SIZE, "%s\n", rng ? rng->name : "none"); + ret = sysfs_emit(buf, "%s\n", rng ? rng->name : "none"); put_rng(rng); return ret; From eb5739a1efbc9ff216271aeea0ebe1c92e5383e5 Mon Sep 17 00:00:00 2001 From: Vitaly Chikunov Date: Mon, 18 Mar 2024 03:42:40 +0300 Subject: [PATCH 022/123] crypto: ecrdsa - Fix module auto-load on add_key Add module alias with the algorithm cra_name similar to what we have for RSA-related and other algorithms. The kernel attempts to modprobe asymmetric algorithms using the names "crypto-$cra_name" and "crypto-$cra_name-all." However, since these aliases are currently missing, the modules are not loaded. For instance, when using the `add_key` function, the hash algorithm is typically loaded automatically, but the asymmetric algorithm is not. Steps to test: 1. Cert is generated usings ima-evm-utils test suite with `gen-keys.sh`, example cert is provided below: $ base64 -d >test-gost2012_512-A.cer < Cc: stable@vger.kernel.org Signed-off-by: Vitaly Chikunov Tested-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecrdsa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c index f3c6b5e15e75..3811f3805b5d 100644 --- a/crypto/ecrdsa.c +++ b/crypto/ecrdsa.c @@ -294,4 +294,5 @@ module_exit(ecrdsa_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vitaly Chikunov "); MODULE_DESCRIPTION("EC-RDSA generic algorithm"); +MODULE_ALIAS_CRYPTO("ecrdsa"); MODULE_ALIAS_CRYPTO("ecrdsa-generic"); From dbad7b6969c10b746a3d8b53c6cf6b4ec62ae5e1 Mon Sep 17 00:00:00 2001 From: Joachim Vandersmissen Date: Wed, 20 Mar 2024 00:13:38 -0500 Subject: [PATCH 023/123] crypto: ecc - update ecc_gen_privkey for FIPS 186-5 FIPS 186-5 [1] was released approximately 1 year ago. The most interesting change for ecc_gen_privkey is the removal of curves with order < 224 bits. This is minimum is now checked in step 1. It is unlikely that there is still any benefit in generating private keys for curves with n < 224, as those curves provide less than 112 bits of security strength and are therefore unsafe for any modern usage. This patch also updates the documentation for __ecc_is_key_valid and ecc_gen_privkey to clarify which FIPS 186-5 method is being used to generate private keys. Previous documentation mentioned that "extra random bits" was used. However, this did not match the code. Instead, the code currently uses (and always has used) the "rejection sampling" ("testing candidates" in FIPS 186-4) method. [1]: https://doi.org/10.6028/NIST.FIPS.186-5 Signed-off-by: Joachim Vandersmissen Signed-off-by: Herbert Xu --- crypto/ecc.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/crypto/ecc.c b/crypto/ecc.c index f53fb4d6af99..3581027e9f92 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -1416,6 +1416,12 @@ void ecc_point_mult_shamir(const struct ecc_point *result, } EXPORT_SYMBOL(ecc_point_mult_shamir); +/* + * This function performs checks equivalent to Appendix A.4.2 of FIPS 186-5. + * Whereas A.4.2 results in an integer in the interval [1, n-1], this function + * ensures that the integer is in the range of [2, n-3]. We are slightly + * stricter because of the currently used scalar multiplication algorithm. + */ static int __ecc_is_key_valid(const struct ecc_curve *curve, const u64 *private_key, unsigned int ndigits) { @@ -1455,16 +1461,11 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits, EXPORT_SYMBOL(ecc_is_key_valid); /* - * ECC private keys are generated using the method of extra random bits, - * equivalent to that described in FIPS 186-4, Appendix B.4.1. - * - * d = (c mod(n–1)) + 1 where c is a string of random bits, 64 bits longer - * than requested - * 0 <= c mod(n-1) <= n-2 and implies that - * 1 <= d <= n-1 + * ECC private keys are generated using the method of rejection sampling, + * equivalent to that described in FIPS 186-5, Appendix A.2.2. * * This method generates a private key uniformly distributed in the range - * [1, n-1]. + * [2, n-3]. */ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) { @@ -1474,12 +1475,15 @@ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) unsigned int nbits = vli_num_bits(curve->n, ndigits); int err; - /* Check that N is included in Table 1 of FIPS 186-4, section 6.1.1 */ - if (nbits < 160 || ndigits > ARRAY_SIZE(priv)) + /* + * Step 1 & 2: check that N is included in Table 1 of FIPS 186-5, + * section 6.1.1. + */ + if (nbits < 224 || ndigits > ARRAY_SIZE(priv)) return -EINVAL; /* - * FIPS 186-4 recommends that the private key should be obtained from a + * FIPS 186-5 recommends that the private key should be obtained from a * RBG with a security strength equal to or greater than the security * strength associated with N. * @@ -1492,12 +1496,13 @@ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) if (crypto_get_default_rng()) return -EFAULT; + /* Step 3: obtain N returned_bits from the DRBG. */ err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)priv, nbytes); crypto_put_default_rng(); if (err) return err; - /* Make sure the private key is in the valid range. */ + /* Step 4: make sure the private key is in the valid range. */ if (__ecc_is_key_valid(curve, priv, ndigits)) return -EINVAL; From 48e4fd6d54f54d0ceab5a952d73e47a9454a6ccb Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 21 Mar 2024 10:44:33 -0400 Subject: [PATCH 024/123] crypto: ecdsa - Fix module auto-load on add-key Add module alias with the algorithm cra_name similar to what we have for RSA-related and other algorithms. The kernel attempts to modprobe asymmetric algorithms using the names "crypto-$cra_name" and "crypto-$cra_name-all." However, since these aliases are currently missing, the modules are not loaded. For instance, when using the `add_key` function, the hash algorithm is typically loaded automatically, but the asymmetric algorithm is not. Steps to test: 1. Create certificate openssl req -x509 -sha256 -newkey ec \ -pkeyopt "ec_paramgen_curve:secp384r1" -keyout key.pem -days 365 \ -subj '/CN=test' -nodes -outform der -out nist-p384.der 2. Optionally, trace module requests with: trace-cmd stream -e module & 3. Trigger add_key call for the cert: # keyctl padd asymmetric "" @u < nist-p384.der 641069229 # lsmod | head -2 Module Size Used by ecdsa_generic 16384 0 Fixes: c12d448ba939 ("crypto: ecdsa - Register NIST P384 and extend test suite") Cc: stable@vger.kernel.org Signed-off-by: Stefan Berger Reviewed-by: Vitaly Chikunov Signed-off-by: Herbert Xu --- crypto/ecdsa.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index fbd76498aba8..3f9ec273a121 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -373,4 +373,7 @@ module_exit(ecdsa_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stefan Berger "); MODULE_DESCRIPTION("ECDSA generic algorithm"); +MODULE_ALIAS_CRYPTO("ecdsa-nist-p192"); +MODULE_ALIAS_CRYPTO("ecdsa-nist-p256"); +MODULE_ALIAS_CRYPTO("ecdsa-nist-p384"); MODULE_ALIAS_CRYPTO("ecdsa-generic"); From 616ce45c150fa65683c8c1b1f2e2ac930462868d Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Thu, 21 Mar 2024 16:08:46 -0500 Subject: [PATCH 025/123] crypto: iaa - Fix some errors in IAA documentation This cleans up the following issues I ran into when trying to use the scripts and commands in the iaa-crypto.rst document. - Fix incorrect arguments being passed to accel-config config-wq. - Replace --device_name with --driver-name. - Replace --driver_name with --driver-name. - Replace --size with --wq-size. - Add missing --priority argument. - Add missing accel-config config-engine command after the config-wq commands. - Fix wq name passed to accel-config config-wq. - Add rmmod/modprobe of iaa_crypto to script that disables, then enables all devices and workqueues to avoid enable-wq failing with -EEXIST when trying to register to compression algorithm. - Fix device name in cases where iaa was used instead of iax. Cc: Jonathan Corbet Cc: linux-crypto@vger.kernel.org Cc: linux-doc@vger.kernel.org Signed-off-by: Jerry Snitselaar Reviewed-by: Tom Zanussi Signed-off-by: Herbert Xu --- .../driver-api/crypto/iaa/iaa-crypto.rst | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst index 7b28aef39ba0..f4fba897d931 100644 --- a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst +++ b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst @@ -179,7 +179,9 @@ has the old 'iax' device naming in place) :: # configure wq1.0 - accel-config config-wq --group-id=0 --mode=dedicated --type=kernel --name="iaa_crypto" --device_name="crypto" iax1/wq1.0 + accel-config config-wq --group-id=0 --mode=dedicated --type=kernel --priority=10 --name="iaa_crypto" --driver-name="crypto" iax1/wq1.0 + + accel-config config-engine iax1/engine1.0 --group-id=0 # enable IAA device iax1 @@ -548,12 +550,20 @@ The below script automatically does that:: echo "End Disable IAA" + echo "Reload iaa_crypto module" + + rmmod iaa_crypto + modprobe iaa_crypto + + echo "End Reload iaa_crypto module" + # # configure iaa wqs and devices # echo "Configure IAA" for ((i = 1; i < ${num_iaa} * 2; i += 2)); do - accel-config config-wq --group-id=0 --mode=dedicated --size=128 --priority=10 --type=kernel --name="iaa_crypto" --driver_name="crypto" iax${i}/wq${i} + accel-config config-wq --group-id=0 --mode=dedicated --wq-size=128 --priority=10 --type=kernel --name="iaa_crypto" --driver-name="crypto" iax${i}/wq${i}.0 + accel-config config-engine iax${i}/engine${i}.0 --group-id=0 done echo "End Configure IAA" @@ -564,10 +574,10 @@ The below script automatically does that:: echo "Enable IAA" for ((i = 1; i < ${num_iaa} * 2; i += 2)); do - echo enable iaa iaa${i} - accel-config enable-device iaa${i} - echo enable wq iaa${i}/wq${i}.0 - accel-config enable-wq iaa${i}/wq${i}.0 + echo enable iaa iax${i} + accel-config enable-device iax${i} + echo enable wq iax${i}/wq${i}.0 + accel-config enable-wq iax${i}/wq${i}.0 done echo "End Enable IAA" From 2b3460cbf454c6b03d7429e9ffc4fe09322eb1a9 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Fri, 22 Mar 2024 23:59:15 +0300 Subject: [PATCH 026/123] crypto: bcm - Fix pointer arithmetic In spu2_dump_omd() value of ptr is increased by ciph_key_len instead of hash_iv_len which could lead to going beyond the buffer boundaries. Fix this bug by changing ciph_key_len to hash_iv_len. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 9d12ba86f818 ("crypto: brcm - Add Broadcom SPU driver") Signed-off-by: Aleksandr Mishin Signed-off-by: Herbert Xu --- drivers/crypto/bcm/spu2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/bcm/spu2.c b/drivers/crypto/bcm/spu2.c index 07989bb8c220..3fdc64b5a65e 100644 --- a/drivers/crypto/bcm/spu2.c +++ b/drivers/crypto/bcm/spu2.c @@ -495,7 +495,7 @@ static void spu2_dump_omd(u8 *omd, u16 hash_key_len, u16 ciph_key_len, if (hash_iv_len) { packet_log(" Hash IV Length %u bytes\n", hash_iv_len); packet_dump(" hash IV: ", ptr, hash_iv_len); - ptr += ciph_key_len; + ptr += hash_iv_len; } if (ciph_iv_len) { From d50b35f0c4424185a5f4658b27188c459c9372a8 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Fri, 22 Mar 2024 16:04:58 -0700 Subject: [PATCH 027/123] crypto: x86/aesni - Rearrange AES key size check aes_expandkey() already includes an AES key size check. If AES-NI is unusable, invoke the function without the size check. Also, use aes_check_keylen() instead of open code. Signed-off-by: Chang S. Bae Cc: Eric Biggers Cc: Ard Biesheuvel Cc: linux-crypto@vger.kernel.org Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index b1d90c25975a..8b3b17b065ad 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -233,18 +233,16 @@ static int aes_set_key_common(struct crypto_aes_ctx *ctx, { int err; - if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && - key_len != AES_KEYSIZE_256) - return -EINVAL; - if (!crypto_simd_usable()) - err = aes_expandkey(ctx, in_key, key_len); - else { - kernel_fpu_begin(); - err = aesni_set_key(ctx, in_key, key_len); - kernel_fpu_end(); - } + return aes_expandkey(ctx, in_key, key_len); + err = aes_check_keylen(key_len); + if (err) + return err; + + kernel_fpu_begin(); + err = aesni_set_key(ctx, in_key, key_len); + kernel_fpu_end(); return err; } From e3299a4c1c4265535625df4d947ffd119f146bfc Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Fri, 22 Mar 2024 16:04:59 -0700 Subject: [PATCH 028/123] crypto: x86/aesni - Update aesni_set_key() to return void The aesni_set_key() implementation has no error case, yet its prototype specifies to return an error code. Modify the function prototype to return void and adjust the related code. Signed-off-by: Chang S. Bae Reviewed-by: Eric Biggers Cc: Eric Biggers Cc: Ard Biesheuvel Cc: linux-crypto@vger.kernel.org Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 5 ++--- arch/x86/crypto/aesni-intel_glue.c | 8 ++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 411d8c83e88a..7ecb55cae3d6 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1820,8 +1820,8 @@ SYM_FUNC_START_LOCAL(_key_expansion_256b) SYM_FUNC_END(_key_expansion_256b) /* - * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, - * unsigned int key_len) + * void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + * unsigned int key_len) */ SYM_FUNC_START(aesni_set_key) FRAME_BEGIN @@ -1926,7 +1926,6 @@ SYM_FUNC_START(aesni_set_key) sub $0x10, UKEYP cmp TKEYP, KEYP jb .Ldec_key_loop - xor AREG, AREG #ifndef __x86_64__ popl KEYP #endif diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 8b3b17b065ad..0ea3abaaa645 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -87,8 +87,8 @@ static inline void *aes_align_addr(void *addr) return PTR_ALIGN(addr, AESNI_ALIGN); } -asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, - unsigned int key_len); +asmlinkage void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len); asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in); asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in); asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, @@ -241,9 +241,9 @@ static int aes_set_key_common(struct crypto_aes_ctx *ctx, return err; kernel_fpu_begin(); - err = aesni_set_key(ctx, in_key, key_len); + aesni_set_key(ctx, in_key, key_len); kernel_fpu_end(); - return err; + return 0; } static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, From a9a72140536fe02d98bce72a608ccf3ba9008a71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 24 Mar 2024 17:12:26 +0100 Subject: [PATCH 029/123] hwrng: mxc-rnga - Drop usage of platform_driver_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are considerations to drop platform_driver_probe() as a concept that isn't relevant any more today. It comes with an added complexity that makes many users hold it wrong. (E.g. this driver should have mark the driver struct with __refdata.) Convert the driver to the more usual module_platform_driver(). This fixes a W=1 build warning: WARNING: modpost: drivers/char/hw_random/mxc-rnga: section mismatch in reference: mxc_rnga_driver+0x10 (section: .data) -> mxc_rnga_remove (section: .exit.text) with CONFIG_HW_RANDOM_MXC_RNGA=m. Signed-off-by: Uwe Kleine-König Signed-off-by: Herbert Xu --- drivers/char/hw_random/mxc-rnga.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c index 07ec000e4cd7..94ee18a1120a 100644 --- a/drivers/char/hw_random/mxc-rnga.c +++ b/drivers/char/hw_random/mxc-rnga.c @@ -131,7 +131,7 @@ static void mxc_rnga_cleanup(struct hwrng *rng) __raw_writel(ctrl & ~RNGA_CONTROL_GO, mxc_rng->mem + RNGA_CONTROL); } -static int __init mxc_rnga_probe(struct platform_device *pdev) +static int mxc_rnga_probe(struct platform_device *pdev) { int err; struct mxc_rng *mxc_rng; @@ -176,7 +176,7 @@ static int __init mxc_rnga_probe(struct platform_device *pdev) return err; } -static void __exit mxc_rnga_remove(struct platform_device *pdev) +static void mxc_rnga_remove(struct platform_device *pdev) { struct mxc_rng *mxc_rng = platform_get_drvdata(pdev); @@ -197,10 +197,11 @@ static struct platform_driver mxc_rnga_driver = { .name = "mxc_rnga", .of_match_table = mxc_rnga_of_match, }, - .remove_new = __exit_p(mxc_rnga_remove), + .probe = mxc_rnga_probe, + .remove_new = mxc_rnga_remove, }; -module_platform_driver_probe(mxc_rnga_driver, mxc_rnga_probe); +module_platform_driver(mxc_rnga_driver); MODULE_AUTHOR("Freescale Semiconductor, Inc."); MODULE_DESCRIPTION("H/W RNGA driver for i.MX"); From 140e4c85d54045ecd67f1d50fdad0fe2ecc088eb Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 25 Mar 2024 14:44:55 -0600 Subject: [PATCH 030/123] crypto: qat - Avoid -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end is coming in GCC-14, and we are getting ready to enable it globally. Use the `__struct_group()` helper to separate the flexible array from the rest of the members in flexible `struct qat_alg_buf_list`, through tagged `struct qat_alg_buf_list_hdr`, and avoid embedding the flexible-array member in the middle of `struct qat_alg_fixed_buf_list`. Also, use `container_of()` whenever we need to retrieve a pointer to the flexible structure. So, with these changes, fix the following warnings: drivers/crypto/intel/qat/qat_common/qat_bl.h:25:33: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/crypto/intel/qat/qat_common/qat_bl.h:25:33: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/crypto/intel/qat/qat_common/qat_bl.h:25:33: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/crypto/intel/qat/qat_common/qat_bl.h:25:33: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/crypto/intel/qat/qat_common/qat_bl.h:25:33: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/crypto/intel/qat/qat_common/qat_bl.h:25:33: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/crypto/intel/qat/qat_common/qat_bl.h:25:33: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/crypto/intel/qat/qat_common/qat_bl.h:25:33: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Link: https://github.com/KSPP/linux/issues/202 Signed-off-by: Gustavo A. R. Silva Acked-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/qat_bl.c | 6 ++++-- drivers/crypto/intel/qat/qat_common/qat_bl.h | 11 +++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/qat_bl.c b/drivers/crypto/intel/qat/qat_common/qat_bl.c index 76baed0a76c0..338acf29c487 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_bl.c +++ b/drivers/crypto/intel/qat/qat_common/qat_bl.c @@ -81,7 +81,8 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, if (unlikely(!bufl)) return -ENOMEM; } else { - bufl = &buf->sgl_src.sgl_hdr; + bufl = container_of(&buf->sgl_src.sgl_hdr, + struct qat_alg_buf_list, hdr); memset(bufl, 0, sizeof(struct qat_alg_buf_list)); buf->sgl_src_valid = true; } @@ -139,7 +140,8 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, if (unlikely(!buflout)) goto err_in; } else { - buflout = &buf->sgl_dst.sgl_hdr; + buflout = container_of(&buf->sgl_dst.sgl_hdr, + struct qat_alg_buf_list, hdr); memset(buflout, 0, sizeof(struct qat_alg_buf_list)); buf->sgl_dst_valid = true; } diff --git a/drivers/crypto/intel/qat/qat_common/qat_bl.h b/drivers/crypto/intel/qat/qat_common/qat_bl.h index d87e4f35ac39..85bc32a9ec0e 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_bl.h +++ b/drivers/crypto/intel/qat/qat_common/qat_bl.h @@ -15,14 +15,17 @@ struct qat_alg_buf { } __packed; struct qat_alg_buf_list { - u64 resrvd; - u32 num_bufs; - u32 num_mapped_bufs; + /* New members must be added within the __struct_group() macro below. */ + __struct_group(qat_alg_buf_list_hdr, hdr, __packed, + u64 resrvd; + u32 num_bufs; + u32 num_mapped_bufs; + ); struct qat_alg_buf buffers[]; } __packed; struct qat_alg_fixed_buf_list { - struct qat_alg_buf_list sgl_hdr; + struct qat_alg_buf_list_hdr sgl_hdr; struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC]; } __packed __aligned(64); From a00dce05ba31fbea58ca64158e0601cdc99d8929 Mon Sep 17 00:00:00 2001 From: Animesh Agarwal Date: Wed, 27 Mar 2024 11:19:06 +0530 Subject: [PATCH 031/123] dt-bindings: crypto: ti,omap-sham: Convert to dtschema Convert the OMAP SoC SHA crypto Module bindings to DT Schema. Signed-off-by: Animesh Agarwal Reviewed-by: Conor Dooley Signed-off-by: Herbert Xu --- .../devicetree/bindings/crypto/omap-sham.txt | 28 ---------- .../bindings/crypto/ti,omap-sham.yaml | 56 +++++++++++++++++++ 2 files changed, 56 insertions(+), 28 deletions(-) delete mode 100644 Documentation/devicetree/bindings/crypto/omap-sham.txt create mode 100644 Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml diff --git a/Documentation/devicetree/bindings/crypto/omap-sham.txt b/Documentation/devicetree/bindings/crypto/omap-sham.txt deleted file mode 100644 index ad9115569611..000000000000 --- a/Documentation/devicetree/bindings/crypto/omap-sham.txt +++ /dev/null @@ -1,28 +0,0 @@ -OMAP SoC SHA crypto Module - -Required properties: - -- compatible : Should contain entries for this and backward compatible - SHAM versions: - - "ti,omap2-sham" for OMAP2 & OMAP3. - - "ti,omap4-sham" for OMAP4 and AM33XX. - - "ti,omap5-sham" for OMAP5, DRA7 and AM43XX. -- ti,hwmods: Name of the hwmod associated with the SHAM module -- reg : Offset and length of the register set for the module -- interrupts : the interrupt-specifier for the SHAM module. - -Optional properties: -- dmas: DMA specifiers for the rx dma. See the DMA client binding, - Documentation/devicetree/bindings/dma/dma.txt -- dma-names: DMA request name. Should be "rx" if a dma is present. - -Example: - /* AM335x */ - sham: sham@53100000 { - compatible = "ti,omap4-sham"; - ti,hwmods = "sham"; - reg = <0x53100000 0x200>; - interrupts = <109>; - dmas = <&edma 36>; - dma-names = "rx"; - }; diff --git a/Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml b/Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml new file mode 100644 index 000000000000..d69b50228009 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/ti,omap-sham.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: OMAP SoC SHA crypto Module + +maintainers: + - Animesh Agarwal + +properties: + compatible: + enum: + - ti,omap2-sham + - ti,omap4-sham + - ti,omap5-sham + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + dmas: + maxItems: 1 + + dma-names: + const: rx + + ti,hwmods: + description: Name of the hwmod associated with the SHAM module + $ref: /schemas/types.yaml#/definitions/string + enum: [sham] + +dependencies: + dmas: [dma-names] + +additionalProperties: false + +required: + - compatible + - ti,hwmods + - reg + - interrupts + +examples: + - | + sham@53100000 { + compatible = "ti,omap4-sham"; + ti,hwmods = "sham"; + reg = <0x53100000 0x200>; + interrupts = <109>; + dmas = <&edma 36>; + dma-names = "rx"; + }; From 6e61ee1ca551292d8714c35c92a019c41db79e4e Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 27 Mar 2024 23:25:09 +0100 Subject: [PATCH 032/123] crypto: jitter - Use kvfree_sensitive() to fix Coccinelle warning Replace memzero_explicit() and kvfree() with kvfree_sensitive() to fix the following Coccinelle/coccicheck warning reported by kfree_sensitive.cocci: WARNING opportunity for kfree_sensitive/kvfree_sensitive Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu --- crypto/jitterentropy-kcapi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c index 76edbf8af0ac..c24d4ff2b4a8 100644 --- a/crypto/jitterentropy-kcapi.c +++ b/crypto/jitterentropy-kcapi.c @@ -61,8 +61,7 @@ void *jent_kvzalloc(unsigned int len) void jent_kvzfree(void *ptr, unsigned int len) { - memzero_explicit(ptr, len); - kvfree(ptr); + kvfree_sensitive(ptr, len); } void *jent_zalloc(unsigned int len) From 5adf213cf2d60bfa61eb1872b099a649239343d3 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Thu, 28 Mar 2024 16:57:50 +0100 Subject: [PATCH 033/123] crypto: fips - Remove the now superfluous sentinel element from ctl_table array This commit comes at the tail end of a greater effort to remove the empty elements at the end of the ctl_table arrays (sentinels) which will reduce the overall build time size of the kernel and run time memory bloat by ~64 bytes per sentinel (further information Link : https://lore.kernel.org/all/ZO5Yx5JFogGi%2FcBo@bombadil.infradead.org/) Remove sentinel from crypto_sysctl_table Signed-off-by: Joel Granados Signed-off-by: Herbert Xu --- crypto/fips.c | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/fips.c b/crypto/fips.c index 92fd506abb21..8a784018ebfc 100644 --- a/crypto/fips.c +++ b/crypto/fips.c @@ -63,7 +63,6 @@ static struct ctl_table crypto_sysctl_table[] = { .mode = 0444, .proc_handler = proc_dostring }, - {} }; static struct ctl_table_header *crypto_sysctls; From 73e5984e540a76a2ee1868b91590c922da8c24c9 Mon Sep 17 00:00:00 2001 From: Joachim Vandersmissen Date: Thu, 28 Mar 2024 11:24:30 -0500 Subject: [PATCH 034/123] crypto: ecdh - explicitly zeroize private_key private_key is overwritten with the key parameter passed in by the caller (if present), or alternatively a newly generated private key. However, it is possible that the caller provides a key (or the newly generated key) which is shorter than the previous key. In that scenario, some key material from the previous key would not be overwritten. The easiest solution is to explicitly zeroize the entire private_key array first. Note that this patch slightly changes the behavior of this function: previously, if the ecc_gen_privkey failed, the old private_key would remain. Now, the private_key is always zeroized. This behavior is consistent with the case where params.key is set and ecc_is_key_valid fails. Signed-off-by: Joachim Vandersmissen Signed-off-by: Herbert Xu --- crypto/ecdh.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/ecdh.c b/crypto/ecdh.c index 80afee3234fb..3049f147e011 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -33,6 +33,8 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, params.key_size > sizeof(u64) * ctx->ndigits) return -EINVAL; + memset(ctx->private_key, 0, sizeof(ctx->private_key)); + if (!params.key || !params.key_size) return ecc_gen_privkey(ctx->curve_id, ctx->ndigits, ctx->private_key); From 7d4700d16186b9295d1419625c822cfa1d27fb7b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Mar 2024 01:03:49 -0700 Subject: [PATCH 035/123] x86: add kconfig symbols for assembler VAES and VPCLMULQDQ support Add config symbols AS_VAES and AS_VPCLMULQDQ that expose whether the assembler supports the vector AES and carryless multiplication cryptographic extensions. Reviewed-by: Ingo Molnar Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/Kconfig.assembler | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 8ad41da301e5..59aedf32c4ea 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -25,6 +25,16 @@ config AS_GFNI help Supported by binutils >= 2.30 and LLVM integrated assembler +config AS_VAES + def_bool $(as-instr,vaesenc %ymm0$(comma)%ymm1$(comma)%ymm2) + help + Supported by binutils >= 2.30 and LLVM integrated assembler + +config AS_VPCLMULQDQ + def_bool $(as-instr,vpclmulqdq \$0x10$(comma)%ymm0$(comma)%ymm1$(comma)%ymm2) + help + Supported by binutils >= 2.30 and LLVM integrated assembler + config AS_WRUSS def_bool $(as-instr,wrussq %rax$(comma)(%rbx)) help From d6371688101223a355860d2eff51907e1aea984e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Mar 2024 01:03:50 -0700 Subject: [PATCH 036/123] crypto: x86/aes-xts - add AES-XTS assembly macro for modern CPUs Add an assembly file aes-xts-avx-x86_64.S which contains a macro that expands into AES-XTS implementations for x86_64 CPUs that support at least AES-NI and AVX, optionally also taking advantage of VAES, VPCLMULQDQ, and AVX512 or AVX10. This patch doesn't expand the macro at all. Later patches will do so, adding each implementation individually so that the motivation and use case for each individual implementation can be fully presented. The file also provides a function aes_xts_encrypt_iv() which handles the encryption of the IV (tweak), using AES-NI and AVX. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 3 +- arch/x86/crypto/aes-xts-avx-x86_64.S | 800 +++++++++++++++++++++++++++ 2 files changed, 802 insertions(+), 1 deletion(-) create mode 100644 arch/x86/crypto/aes-xts-avx-x86_64.S diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 9aa46093c91b..9c5ce5613738 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -48,7 +48,8 @@ chacha-x86_64-$(CONFIG_AS_AVX512) += chacha-avx512vl-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o -aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o +aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o \ + aes_ctrby8_avx-x86_64.o aes-xts-avx-x86_64.o obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S new file mode 100644 index 000000000000..a5e2783c46ec --- /dev/null +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -0,0 +1,800 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * AES-XTS for modern x86_64 CPUs + * + * Copyright 2024 Google LLC + * + * Author: Eric Biggers + */ + +/* + * This file implements AES-XTS for modern x86_64 CPUs. To handle the + * complexities of coding for x86 SIMD, e.g. where every vector length needs + * different code, it uses a macro to generate several implementations that + * share similar source code but are targeted at different CPUs, listed below: + * + * AES-NI + AVX + * - 128-bit vectors (1 AES block per vector) + * - VEX-coded instructions + * - xmm0-xmm15 + * - This is for older CPUs that lack VAES but do have AVX. + * + * VAES + VPCLMULQDQ + AVX2 + * - 256-bit vectors (2 AES blocks per vector) + * - VEX-coded instructions + * - ymm0-ymm15 + * - This is for CPUs that have VAES but lack AVX512 or AVX10, + * e.g. Intel's Alder Lake and AMD's Zen 3. + * + * VAES + VPCLMULQDQ + AVX10/256 + BMI2 + * - 256-bit vectors (2 AES blocks per vector) + * - EVEX-coded instructions + * - ymm0-ymm31 + * - This is for CPUs that have AVX512 but where using zmm registers causes + * downclocking, and for CPUs that have AVX10/256 but not AVX10/512. + * - By "AVX10/256" we really mean (AVX512BW + AVX512VL) || AVX10/256. + * To avoid confusion with 512-bit, we just write AVX10/256. + * + * VAES + VPCLMULQDQ + AVX10/512 + BMI2 + * - Same as the previous one, but upgrades to 512-bit vectors + * (4 AES blocks per vector) in zmm0-zmm31. + * - This is for CPUs that have good AVX512 or AVX10/512 support. + * + * This file doesn't have an implementation for AES-NI alone (without AVX), as + * the lack of VEX would make all the assembly code different. + * + * When we use VAES, we also use VPCLMULQDQ to parallelize the computation of + * the XTS tweaks. This avoids a bottleneck. Currently there don't seem to be + * any CPUs that support VAES but not VPCLMULQDQ. If that changes, we might + * need to start also providing an implementation using VAES alone. + * + * The AES-XTS implementations in this file support everything required by the + * crypto API, including support for arbitrary input lengths and multi-part + * processing. However, they are most heavily optimized for the common case of + * power-of-2 length inputs that are processed in a single part (disk sectors). + */ + +#include +#include + +.section .rodata +.p2align 4 +.Lgf_poly: + // The low 64 bits of this value represent the polynomial x^7 + x^2 + x + // + 1. It is the value that must be XOR'd into the low 64 bits of the + // tweak each time a 1 is carried out of the high 64 bits. + // + // The high 64 bits of this value is just the internal carry bit that + // exists when there's a carry out of the low 64 bits of the tweak. + .quad 0x87, 1 + + // This table contains constants for vpshufb and vpblendvb, used to + // handle variable byte shifts and blending during ciphertext stealing + // on CPUs that don't support AVX10-style masking. +.Lcts_permute_table: + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 +.text + +// Function parameters +.set KEY, %rdi // Initially points to crypto_aes_ctx, then is + // advanced to point directly to the round keys +.set SRC, %rsi // Pointer to next source data +.set DST, %rdx // Pointer to next destination data +.set LEN, %rcx // Remaining length in bytes +.set TWEAK, %r8 // Pointer to next tweak + +// %r9d holds the AES key length in bytes. +.set KEYLEN, %r9d + +// %rax and %r10-r11 are available as temporaries. + +.macro _define_Vi i +.if VL == 16 + .set V\i, %xmm\i +.elseif VL == 32 + .set V\i, %ymm\i +.elseif VL == 64 + .set V\i, %zmm\i +.else + .error "Unsupported Vector Length (VL)" +.endif +.endm + +.macro _define_aliases + // Define register aliases V0-V15, or V0-V31 if all 32 SIMD registers + // are available, that map to the xmm, ymm, or zmm registers according + // to the selected Vector Length (VL). + _define_Vi 0 + _define_Vi 1 + _define_Vi 2 + _define_Vi 3 + _define_Vi 4 + _define_Vi 5 + _define_Vi 6 + _define_Vi 7 + _define_Vi 8 + _define_Vi 9 + _define_Vi 10 + _define_Vi 11 + _define_Vi 12 + _define_Vi 13 + _define_Vi 14 + _define_Vi 15 +.if USE_AVX10 + _define_Vi 16 + _define_Vi 17 + _define_Vi 18 + _define_Vi 19 + _define_Vi 20 + _define_Vi 21 + _define_Vi 22 + _define_Vi 23 + _define_Vi 24 + _define_Vi 25 + _define_Vi 26 + _define_Vi 27 + _define_Vi 28 + _define_Vi 29 + _define_Vi 30 + _define_Vi 31 +.endif + + // V0-V3 hold the data blocks during the main loop, or temporary values + // otherwise. V4-V5 hold temporary values. + + // V6-V9 hold XTS tweaks. Each 128-bit lane holds one tweak. + .set TWEAK0_XMM, %xmm6 + .set TWEAK0, V6 + .set TWEAK1_XMM, %xmm7 + .set TWEAK1, V7 + .set TWEAK2, V8 + .set TWEAK3, V9 + + // V10-V13 are used for computing the next values of TWEAK[0-3]. + .set NEXT_TWEAK0, V10 + .set NEXT_TWEAK1, V11 + .set NEXT_TWEAK2, V12 + .set NEXT_TWEAK3, V13 + + // V14 holds the constant from .Lgf_poly, copied to all 128-bit lanes. + .set GF_POLY_XMM, %xmm14 + .set GF_POLY, V14 + + // V15 holds the first AES round key, copied to all 128-bit lanes. + .set KEY0_XMM, %xmm15 + .set KEY0, V15 + + // If 32 SIMD registers are available, then V16-V29 hold the remaining + // AES round keys, copied to all 128-bit lanes. +.if USE_AVX10 + .set KEY1_XMM, %xmm16 + .set KEY1, V16 + .set KEY2_XMM, %xmm17 + .set KEY2, V17 + .set KEY3_XMM, %xmm18 + .set KEY3, V18 + .set KEY4_XMM, %xmm19 + .set KEY4, V19 + .set KEY5_XMM, %xmm20 + .set KEY5, V20 + .set KEY6_XMM, %xmm21 + .set KEY6, V21 + .set KEY7_XMM, %xmm22 + .set KEY7, V22 + .set KEY8_XMM, %xmm23 + .set KEY8, V23 + .set KEY9_XMM, %xmm24 + .set KEY9, V24 + .set KEY10_XMM, %xmm25 + .set KEY10, V25 + .set KEY11_XMM, %xmm26 + .set KEY11, V26 + .set KEY12_XMM, %xmm27 + .set KEY12, V27 + .set KEY13_XMM, %xmm28 + .set KEY13, V28 + .set KEY14_XMM, %xmm29 + .set KEY14, V29 +.endif + // V30-V31 are currently unused. +.endm + +// Move a vector between memory and a register. +.macro _vmovdqu src, dst +.if VL < 64 + vmovdqu \src, \dst +.else + vmovdqu8 \src, \dst +.endif +.endm + +// Broadcast a 128-bit value into a vector. +.macro _vbroadcast128 src, dst +.if VL == 16 && !USE_AVX10 + vmovdqu \src, \dst +.elseif VL == 32 && !USE_AVX10 + vbroadcasti128 \src, \dst +.else + vbroadcasti32x4 \src, \dst +.endif +.endm + +// XOR two vectors together. +.macro _vpxor src1, src2, dst +.if USE_AVX10 + vpxord \src1, \src2, \dst +.else + vpxor \src1, \src2, \dst +.endif +.endm + +// XOR three vectors together. +.macro _xor3 src1, src2, src3_and_dst +.if USE_AVX10 + // vpternlogd with immediate 0x96 is a three-argument XOR. + vpternlogd $0x96, \src1, \src2, \src3_and_dst +.else + vpxor \src1, \src3_and_dst, \src3_and_dst + vpxor \src2, \src3_and_dst, \src3_and_dst +.endif +.endm + +// Given a 128-bit XTS tweak in the xmm register \src, compute the next tweak +// (by multiplying by the polynomial 'x') and write it to \dst. +.macro _next_tweak src, tmp, dst + vpshufd $0x13, \src, \tmp + vpaddq \src, \src, \dst + vpsrad $31, \tmp, \tmp + vpand GF_POLY_XMM, \tmp, \tmp + vpxor \tmp, \dst, \dst +.endm + +// Given the XTS tweak(s) in the vector \src, compute the next vector of +// tweak(s) (by multiplying by the polynomial 'x^(VL/16)') and write it to \dst. +// +// If VL > 16, then there are multiple tweaks, and we use vpclmulqdq to compute +// all tweaks in the vector in parallel. If VL=16, we just do the regular +// computation without vpclmulqdq, as it's the faster method for a single tweak. +.macro _next_tweakvec src, tmp1, tmp2, dst +.if VL == 16 + _next_tweak \src, \tmp1, \dst +.else + vpsrlq $64 - VL/16, \src, \tmp1 + vpclmulqdq $0x01, GF_POLY, \tmp1, \tmp2 + vpslldq $8, \tmp1, \tmp1 + vpsllq $VL/16, \src, \dst + _xor3 \tmp1, \tmp2, \dst +.endif +.endm + +// Given the first XTS tweak at (TWEAK), compute the first set of tweaks and +// store them in the vector registers TWEAK0-TWEAK3. Clobbers V0-V5. +.macro _compute_first_set_of_tweaks + vmovdqu (TWEAK), TWEAK0_XMM + _vbroadcast128 .Lgf_poly(%rip), GF_POLY +.if VL == 16 + // With VL=16, multiplying by x serially is fastest. + _next_tweak TWEAK0, %xmm0, TWEAK1 + _next_tweak TWEAK1, %xmm0, TWEAK2 + _next_tweak TWEAK2, %xmm0, TWEAK3 +.else +.if VL == 32 + // Compute the second block of TWEAK0. + _next_tweak TWEAK0_XMM, %xmm0, %xmm1 + vinserti128 $1, %xmm1, TWEAK0, TWEAK0 +.elseif VL == 64 + // Compute the remaining blocks of TWEAK0. + _next_tweak TWEAK0_XMM, %xmm0, %xmm1 + _next_tweak %xmm1, %xmm0, %xmm2 + _next_tweak %xmm2, %xmm0, %xmm3 + vinserti32x4 $1, %xmm1, TWEAK0, TWEAK0 + vinserti32x4 $2, %xmm2, TWEAK0, TWEAK0 + vinserti32x4 $3, %xmm3, TWEAK0, TWEAK0 +.endif + // Compute TWEAK[1-3] from TWEAK0. + vpsrlq $64 - 1*VL/16, TWEAK0, V0 + vpsrlq $64 - 2*VL/16, TWEAK0, V2 + vpsrlq $64 - 3*VL/16, TWEAK0, V4 + vpclmulqdq $0x01, GF_POLY, V0, V1 + vpclmulqdq $0x01, GF_POLY, V2, V3 + vpclmulqdq $0x01, GF_POLY, V4, V5 + vpslldq $8, V0, V0 + vpslldq $8, V2, V2 + vpslldq $8, V4, V4 + vpsllq $1*VL/16, TWEAK0, TWEAK1 + vpsllq $2*VL/16, TWEAK0, TWEAK2 + vpsllq $3*VL/16, TWEAK0, TWEAK3 +.if USE_AVX10 + vpternlogd $0x96, V0, V1, TWEAK1 + vpternlogd $0x96, V2, V3, TWEAK2 + vpternlogd $0x96, V4, V5, TWEAK3 +.else + vpxor V0, TWEAK1, TWEAK1 + vpxor V2, TWEAK2, TWEAK2 + vpxor V4, TWEAK3, TWEAK3 + vpxor V1, TWEAK1, TWEAK1 + vpxor V3, TWEAK2, TWEAK2 + vpxor V5, TWEAK3, TWEAK3 +.endif +.endif +.endm + +// Do one step in computing the next set of tweaks using the method of just +// multiplying by x repeatedly (the same method _next_tweak uses). +.macro _tweak_step_mulx i +.if \i == 0 + .set PREV_TWEAK, TWEAK3 + .set NEXT_TWEAK, NEXT_TWEAK0 +.elseif \i == 5 + .set PREV_TWEAK, NEXT_TWEAK0 + .set NEXT_TWEAK, NEXT_TWEAK1 +.elseif \i == 10 + .set PREV_TWEAK, NEXT_TWEAK1 + .set NEXT_TWEAK, NEXT_TWEAK2 +.elseif \i == 15 + .set PREV_TWEAK, NEXT_TWEAK2 + .set NEXT_TWEAK, NEXT_TWEAK3 +.endif +.if \i < 20 && \i % 5 == 0 + vpshufd $0x13, PREV_TWEAK, V5 +.elseif \i < 20 && \i % 5 == 1 + vpaddq PREV_TWEAK, PREV_TWEAK, NEXT_TWEAK +.elseif \i < 20 && \i % 5 == 2 + vpsrad $31, V5, V5 +.elseif \i < 20 && \i % 5 == 3 + vpand GF_POLY, V5, V5 +.elseif \i < 20 && \i % 5 == 4 + vpxor V5, NEXT_TWEAK, NEXT_TWEAK +.elseif \i == 1000 + vmovdqa NEXT_TWEAK0, TWEAK0 + vmovdqa NEXT_TWEAK1, TWEAK1 + vmovdqa NEXT_TWEAK2, TWEAK2 + vmovdqa NEXT_TWEAK3, TWEAK3 +.endif +.endm + +// Do one step in computing the next set of tweaks using the VPCLMULQDQ method +// (the same method _next_tweakvec uses for VL > 16). This means multiplying +// each tweak by x^(4*VL/16) independently. Since 4*VL/16 is a multiple of 8 +// when VL > 16 (which it is here), the needed shift amounts are byte-aligned, +// which allows the use of vpsrldq and vpslldq to do 128-bit wide shifts. +.macro _tweak_step_pclmul i +.if \i == 2 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK0, NEXT_TWEAK0 +.elseif \i == 4 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK1, NEXT_TWEAK1 +.elseif \i == 6 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK2, NEXT_TWEAK2 +.elseif \i == 8 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK3, NEXT_TWEAK3 +.elseif \i == 10 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK0, NEXT_TWEAK0 +.elseif \i == 12 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK1, NEXT_TWEAK1 +.elseif \i == 14 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK2, NEXT_TWEAK2 +.elseif \i == 16 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK3, NEXT_TWEAK3 +.elseif \i == 1000 + vpslldq $(4*VL/16) / 8, TWEAK0, TWEAK0 + vpslldq $(4*VL/16) / 8, TWEAK1, TWEAK1 + vpslldq $(4*VL/16) / 8, TWEAK2, TWEAK2 + vpslldq $(4*VL/16) / 8, TWEAK3, TWEAK3 + _vpxor NEXT_TWEAK0, TWEAK0, TWEAK0 + _vpxor NEXT_TWEAK1, TWEAK1, TWEAK1 + _vpxor NEXT_TWEAK2, TWEAK2, TWEAK2 + _vpxor NEXT_TWEAK3, TWEAK3, TWEAK3 +.endif +.endm + +// _tweak_step does one step of the computation of the next set of tweaks from +// TWEAK[0-3]. To complete all steps, this must be invoked with \i values 0 +// through at least 19, then 1000 which signals the last step. +// +// This is used to interleave the computation of the next set of tweaks with the +// AES en/decryptions, which increases performance in some cases. +.macro _tweak_step i +.if VL == 16 + _tweak_step_mulx \i +.else + _tweak_step_pclmul \i +.endif +.endm + +// Load the round keys: just the first one if !USE_AVX10, otherwise all of them. +.macro _load_round_keys + _vbroadcast128 0*16(KEY), KEY0 +.if USE_AVX10 + _vbroadcast128 1*16(KEY), KEY1 + _vbroadcast128 2*16(KEY), KEY2 + _vbroadcast128 3*16(KEY), KEY3 + _vbroadcast128 4*16(KEY), KEY4 + _vbroadcast128 5*16(KEY), KEY5 + _vbroadcast128 6*16(KEY), KEY6 + _vbroadcast128 7*16(KEY), KEY7 + _vbroadcast128 8*16(KEY), KEY8 + _vbroadcast128 9*16(KEY), KEY9 + _vbroadcast128 10*16(KEY), KEY10 + // Note: if it's AES-128 or AES-192, the last several round keys won't + // be used. We do the loads anyway to save a conditional jump. + _vbroadcast128 11*16(KEY), KEY11 + _vbroadcast128 12*16(KEY), KEY12 + _vbroadcast128 13*16(KEY), KEY13 + _vbroadcast128 14*16(KEY), KEY14 +.endif +.endm + +// Do a single round of AES encryption (if \enc==1) or decryption (if \enc==0) +// on the block(s) in \data using the round key(s) in \key. The register length +// determines the number of AES blocks en/decrypted. +.macro _vaes enc, last, key, data +.if \enc +.if \last + vaesenclast \key, \data, \data +.else + vaesenc \key, \data, \data +.endif +.else +.if \last + vaesdeclast \key, \data, \data +.else + vaesdec \key, \data, \data +.endif +.endif +.endm + +// Do a single round of AES en/decryption on the block(s) in \data, using the +// same key for all block(s). The round key is loaded from the appropriate +// register or memory location for round \i. May clobber V4. +.macro _vaes_1x enc, last, i, xmm_suffix, data +.if USE_AVX10 + _vaes \enc, \last, KEY\i\xmm_suffix, \data +.else +.ifnb \xmm_suffix + _vaes \enc, \last, \i*16(KEY), \data +.else + _vbroadcast128 \i*16(KEY), V4 + _vaes \enc, \last, V4, \data +.endif +.endif +.endm + +// Do a single round of AES en/decryption on the blocks in registers V0-V3, +// using the same key for all blocks. The round key is loaded from the +// appropriate register or memory location for round \i. In addition, does step +// \i of the computation of the next set of tweaks. May clobber V4. +.macro _vaes_4x enc, last, i +.if USE_AVX10 + _tweak_step (2*(\i-1)) + _vaes \enc, \last, KEY\i, V0 + _vaes \enc, \last, KEY\i, V1 + _tweak_step (2*(\i-1) + 1) + _vaes \enc, \last, KEY\i, V2 + _vaes \enc, \last, KEY\i, V3 +.else + _vbroadcast128 \i*16(KEY), V4 + _tweak_step (2*(\i-1)) + _vaes \enc, \last, V4, V0 + _vaes \enc, \last, V4, V1 + _tweak_step (2*(\i-1) + 1) + _vaes \enc, \last, V4, V2 + _vaes \enc, \last, V4, V3 +.endif +.endm + +// Do tweaked AES en/decryption (i.e., XOR with \tweak, then AES en/decrypt, +// then XOR with \tweak again) of the block(s) in \data. To process a single +// block, use xmm registers and set \xmm_suffix=_XMM. To process a vector of +// length VL, use V* registers and leave \xmm_suffix empty. May clobber V4. +.macro _aes_crypt enc, xmm_suffix, tweak, data + _xor3 KEY0\xmm_suffix, \tweak, \data + _vaes_1x \enc, 0, 1, \xmm_suffix, \data + _vaes_1x \enc, 0, 2, \xmm_suffix, \data + _vaes_1x \enc, 0, 3, \xmm_suffix, \data + _vaes_1x \enc, 0, 4, \xmm_suffix, \data + _vaes_1x \enc, 0, 5, \xmm_suffix, \data + _vaes_1x \enc, 0, 6, \xmm_suffix, \data + _vaes_1x \enc, 0, 7, \xmm_suffix, \data + _vaes_1x \enc, 0, 8, \xmm_suffix, \data + _vaes_1x \enc, 0, 9, \xmm_suffix, \data + cmp $24, KEYLEN + jle .Laes_128_or_192\@ + _vaes_1x \enc, 0, 10, \xmm_suffix, \data + _vaes_1x \enc, 0, 11, \xmm_suffix, \data + _vaes_1x \enc, 0, 12, \xmm_suffix, \data + _vaes_1x \enc, 0, 13, \xmm_suffix, \data + _vaes_1x \enc, 1, 14, \xmm_suffix, \data + jmp .Laes_done\@ +.Laes_128_or_192\@: + je .Laes_192\@ + _vaes_1x \enc, 1, 10, \xmm_suffix, \data + jmp .Laes_done\@ +.Laes_192\@: + _vaes_1x \enc, 0, 10, \xmm_suffix, \data + _vaes_1x \enc, 0, 11, \xmm_suffix, \data + _vaes_1x \enc, 1, 12, \xmm_suffix, \data +.Laes_done\@: + _vpxor \tweak, \data, \data +.endm + +.macro _aes_xts_crypt enc + _define_aliases + + // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). + movl 480(KEY), KEYLEN + + // If decrypting, advance KEY to the decryption round keys. +.if !\enc + add $240, KEY +.endif + + // Check whether the data length is a multiple of the AES block length. + test $15, LEN + jnz .Lneed_cts\@ +.Lxts_init\@: + + // Cache as many round keys as possible. + _load_round_keys + + // Compute the first set of tweaks TWEAK[0-3]. + _compute_first_set_of_tweaks + + sub $4*VL, LEN + jl .Lhandle_remainder\@ + +.Lmain_loop\@: + // This is the main loop, en/decrypting 4*VL bytes per iteration. + + // XOR each source block with its tweak and the first round key. +.if USE_AVX10 + vmovdqu8 0*VL(SRC), V0 + vmovdqu8 1*VL(SRC), V1 + vmovdqu8 2*VL(SRC), V2 + vmovdqu8 3*VL(SRC), V3 + vpternlogd $0x96, TWEAK0, KEY0, V0 + vpternlogd $0x96, TWEAK1, KEY0, V1 + vpternlogd $0x96, TWEAK2, KEY0, V2 + vpternlogd $0x96, TWEAK3, KEY0, V3 +.else + vpxor 0*VL(SRC), KEY0, V0 + vpxor 1*VL(SRC), KEY0, V1 + vpxor 2*VL(SRC), KEY0, V2 + vpxor 3*VL(SRC), KEY0, V3 + vpxor TWEAK0, V0, V0 + vpxor TWEAK1, V1, V1 + vpxor TWEAK2, V2, V2 + vpxor TWEAK3, V3, V3 +.endif + // Do all the AES rounds on the data blocks, interleaved with + // the computation of the next set of tweaks. + _vaes_4x \enc, 0, 1 + _vaes_4x \enc, 0, 2 + _vaes_4x \enc, 0, 3 + _vaes_4x \enc, 0, 4 + _vaes_4x \enc, 0, 5 + _vaes_4x \enc, 0, 6 + _vaes_4x \enc, 0, 7 + _vaes_4x \enc, 0, 8 + _vaes_4x \enc, 0, 9 + // Try to optimize for AES-256 by keeping the code for AES-128 and + // AES-192 out-of-line. + cmp $24, KEYLEN + jle .Lencrypt_4x_aes_128_or_192\@ + _vaes_4x \enc, 0, 10 + _vaes_4x \enc, 0, 11 + _vaes_4x \enc, 0, 12 + _vaes_4x \enc, 0, 13 + _vaes_4x \enc, 1, 14 +.Lencrypt_4x_done\@: + + // XOR in the tweaks again. + _vpxor TWEAK0, V0, V0 + _vpxor TWEAK1, V1, V1 + _vpxor TWEAK2, V2, V2 + _vpxor TWEAK3, V3, V3 + + // Store the destination blocks. + _vmovdqu V0, 0*VL(DST) + _vmovdqu V1, 1*VL(DST) + _vmovdqu V2, 2*VL(DST) + _vmovdqu V3, 3*VL(DST) + + // Finish computing the next set of tweaks. + _tweak_step 1000 + + add $4*VL, SRC + add $4*VL, DST + sub $4*VL, LEN + jge .Lmain_loop\@ + + // Check for the uncommon case where the data length isn't a multiple of + // 4*VL. Handle it out-of-line in order to optimize for the common + // case. In the common case, just fall through to the ret. + test $4*VL-1, LEN + jnz .Lhandle_remainder\@ +.Ldone\@: + // Store the next tweak back to *TWEAK to support continuation calls. + vmovdqu TWEAK0_XMM, (TWEAK) +.if VL > 16 + vzeroupper +.endif + RET + +.Lhandle_remainder\@: + add $4*VL, LEN // Undo the extra sub from earlier. + + // En/decrypt any remaining full blocks, one vector at a time. +.if VL > 16 + sub $VL, LEN + jl .Lvec_at_a_time_done\@ +.Lvec_at_a_time\@: + _vmovdqu (SRC), V0 + _aes_crypt \enc, , TWEAK0, V0 + _vmovdqu V0, (DST) + _next_tweakvec TWEAK0, V0, V1, TWEAK0 + add $VL, SRC + add $VL, DST + sub $VL, LEN + jge .Lvec_at_a_time\@ +.Lvec_at_a_time_done\@: + add $VL-16, LEN // Undo the extra sub from earlier. +.else + sub $16, LEN +.endif + + // En/decrypt any remaining full blocks, one at a time. + jl .Lblock_at_a_time_done\@ +.Lblock_at_a_time\@: + vmovdqu (SRC), %xmm0 + _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0 + vmovdqu %xmm0, (DST) + _next_tweak TWEAK0_XMM, %xmm0, TWEAK0_XMM + add $16, SRC + add $16, DST + sub $16, LEN + jge .Lblock_at_a_time\@ +.Lblock_at_a_time_done\@: + add $16, LEN // Undo the extra sub from earlier. + +.Lfull_blocks_done\@: + // Now 0 <= LEN <= 15. If LEN is nonzero, do ciphertext stealing to + // process the last 16 + LEN bytes. If LEN is zero, we're done. + test LEN, LEN + jnz .Lcts\@ + jmp .Ldone\@ + + // Out-of-line handling of AES-128 and AES-192 +.Lencrypt_4x_aes_128_or_192\@: + jz .Lencrypt_4x_aes_192\@ + _vaes_4x \enc, 1, 10 + jmp .Lencrypt_4x_done\@ +.Lencrypt_4x_aes_192\@: + _vaes_4x \enc, 0, 10 + _vaes_4x \enc, 0, 11 + _vaes_4x \enc, 1, 12 + jmp .Lencrypt_4x_done\@ + +.Lneed_cts\@: + // The data length isn't a multiple of the AES block length, so + // ciphertext stealing (CTS) will be needed. Subtract one block from + // LEN so that the main loop doesn't process the last full block. The + // CTS step will process it specially along with the partial block. + sub $16, LEN + jmp .Lxts_init\@ + +.Lcts\@: + // Do ciphertext stealing (CTS) to en/decrypt the last full block and + // the partial block. CTS needs two tweaks. TWEAK0_XMM contains the + // next tweak; compute the one after that. Decryption uses these two + // tweaks in reverse order, so also define aliases to handle that. + _next_tweak TWEAK0_XMM, %xmm0, TWEAK1_XMM +.if \enc + .set CTS_TWEAK0, TWEAK0_XMM + .set CTS_TWEAK1, TWEAK1_XMM +.else + .set CTS_TWEAK0, TWEAK1_XMM + .set CTS_TWEAK1, TWEAK0_XMM +.endif + + // En/decrypt the last full block. + vmovdqu (SRC), %xmm0 + _aes_crypt \enc, _XMM, CTS_TWEAK0, %xmm0 + +.if USE_AVX10 + // Create a mask that has the first LEN bits set. + mov $-1, %rax + bzhi LEN, %rax, %rax + kmovq %rax, %k1 + + // Swap the first LEN bytes of the above result with the partial block. + // Note that to support in-place en/decryption, the load from the src + // partial block must happen before the store to the dst partial block. + vmovdqa %xmm0, %xmm1 + vmovdqu8 16(SRC), %xmm0{%k1} + vmovdqu8 %xmm1, 16(DST){%k1} +.else + lea .Lcts_permute_table(%rip), %rax + + // Load the src partial block, left-aligned. Note that to support + // in-place en/decryption, this must happen before the store to the dst + // partial block. + vmovdqu (SRC, LEN, 1), %xmm1 + + // Shift the first LEN bytes of the en/decryption of the last full block + // to the end of a register, then store it to DST+LEN. This stores the + // dst partial block. It also writes to the second part of the dst last + // full block, but that part is overwritten later. + vpshufb (%rax, LEN, 1), %xmm0, %xmm2 + vmovdqu %xmm2, (DST, LEN, 1) + + // Make xmm3 contain [16-LEN,16-LEN+1,...,14,15,0x80,0x80,...]. + sub LEN, %rax + vmovdqu 32(%rax), %xmm3 + + // Shift the src partial block to the beginning of its register. + vpshufb %xmm3, %xmm1, %xmm1 + + // Do a blend to generate the src partial block followed by the second + // part of the en/decryption of the last full block. + vpblendvb %xmm3, %xmm0, %xmm1, %xmm0 +.endif + // En/decrypt again and store the last full block. + _aes_crypt \enc, _XMM, CTS_TWEAK1, %xmm0 + vmovdqu %xmm0, (DST) + jmp .Ldone\@ +.endm + +// void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, +// u8 iv[AES_BLOCK_SIZE]); +SYM_FUNC_START(aes_xts_encrypt_iv) + vmovdqu (%rsi), %xmm0 + vpxor 0*16(%rdi), %xmm0, %xmm0 + vaesenc 1*16(%rdi), %xmm0, %xmm0 + vaesenc 2*16(%rdi), %xmm0, %xmm0 + vaesenc 3*16(%rdi), %xmm0, %xmm0 + vaesenc 4*16(%rdi), %xmm0, %xmm0 + vaesenc 5*16(%rdi), %xmm0, %xmm0 + vaesenc 6*16(%rdi), %xmm0, %xmm0 + vaesenc 7*16(%rdi), %xmm0, %xmm0 + vaesenc 8*16(%rdi), %xmm0, %xmm0 + vaesenc 9*16(%rdi), %xmm0, %xmm0 + cmpl $24, 480(%rdi) + jle .Lencrypt_iv_aes_128_or_192 + vaesenc 10*16(%rdi), %xmm0, %xmm0 + vaesenc 11*16(%rdi), %xmm0, %xmm0 + vaesenc 12*16(%rdi), %xmm0, %xmm0 + vaesenc 13*16(%rdi), %xmm0, %xmm0 + vaesenclast 14*16(%rdi), %xmm0, %xmm0 +.Lencrypt_iv_done: + vmovdqu %xmm0, (%rsi) + RET + + // Out-of-line handling of AES-128 and AES-192 +.Lencrypt_iv_aes_128_or_192: + jz .Lencrypt_iv_aes_192 + vaesenclast 10*16(%rdi), %xmm0, %xmm0 + jmp .Lencrypt_iv_done +.Lencrypt_iv_aes_192: + vaesenc 10*16(%rdi), %xmm0, %xmm0 + vaesenc 11*16(%rdi), %xmm0, %xmm0 + vaesenclast 12*16(%rdi), %xmm0, %xmm0 + jmp .Lencrypt_iv_done +SYM_FUNC_END(aes_xts_encrypt_iv) + +// Below are the actual AES-XTS encryption and decryption functions, +// instantiated from the above macro. They all have the following prototype: +// +// void (*xts_asm_func)(const struct crypto_aes_ctx *key, +// const u8 *src, u8 *dst, size_t len, +// u8 tweak[AES_BLOCK_SIZE]); +// +// |key| is the data key. |tweak| contains the next tweak; the encryption of +// the original IV with the tweak key was already done. This function supports +// incremental computation, but |len| must always be >= 16 (AES_BLOCK_SIZE), and +// |len| must be a multiple of 16 except on the last call. If |len| is a +// multiple of 16, then this function updates |tweak| to contain the next tweak. From 996f4dcbd231ec022f38a3c27e7fc45727e4e875 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Mar 2024 01:03:51 -0700 Subject: [PATCH 037/123] crypto: x86/aes-xts - wire up AESNI + AVX implementation Add an AES-XTS implementation "xts-aes-aesni-avx" for x86_64 CPUs that have the AES-NI and AVX extensions but not VAES. It's similar to the existing xts-aes-aesni in that uses xmm registers to operate on one AES block at a time. It differs from xts-aes-aesni in the following ways: - It uses the VEX-coded (non-destructive) instructions from AVX. This improves performance slightly. - It incorporates some additional optimizations such as interleaving the tweak computation with AES en/decryption, handling single-page messages more efficiently, and caching the first round key. - It supports only 64-bit (x86_64). - It's generated by an assembly macro that will also be used to generate VAES-based implementations. The performance improvement over xts-aes-aesni varies from small to large, depending on the CPU and other factors such as the size of the messages en/decrypted. For example, the following increases in AES-256-XTS decryption throughput are seen on the following CPUs: | 4096-byte messages | 512-byte messages | ----------------------+--------------------+-------------------+ Intel Skylake | 6% | 31% | Intel Cascade Lake | 4% | 26% | AMD Zen 1 | 61% | 73% | AMD Zen 2 | 36% | 59% | (The above CPUs don't support VAES, so they can't use VAES instead.) While this isn't as large an improvement as what VAES provides, this still seems worthwhile. This implementation is fairly easy to provide based on the assembly macro that's needed for VAES anyway, and it will be the best implementation on a large number of CPUs (very roughly, the CPUs launched by Intel and AMD from 2011 to 2018). This makes the existing xts-aes-aesni *mostly* obsolete. For now, leave it in place to support 32-bit kernels and also CPUs like Intel Westmere that support AES-NI but not AVX. (We could potentially remove it anyway and just rely on the indirect acceleration via ecb-aes-aesni in those cases, but that change will need to be considered separately.) Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-xts-avx-x86_64.S | 9 ++ arch/x86/crypto/aesni-intel_glue.c | 202 ++++++++++++++++++++++++++- 2 files changed, 209 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index a5e2783c46ec..32e26f562cf0 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -798,3 +798,12 @@ SYM_FUNC_END(aes_xts_encrypt_iv) // incremental computation, but |len| must always be >= 16 (AES_BLOCK_SIZE), and // |len| must be a multiple of 16 except on the last call. If |len| is a // multiple of 16, then this function updates |tweak| to contain the next tweak. + +.set VL, 16 +.set USE_AVX10, 0 +SYM_TYPED_FUNC_START(aes_xts_encrypt_aesni_avx) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_aesni_avx) +SYM_TYPED_FUNC_START(aes_xts_decrypt_aesni_avx) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_aesni_avx) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 0ea3abaaa645..cb5e8578131f 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1135,7 +1135,196 @@ static struct skcipher_alg aesni_xctr = { }; static struct simd_skcipher_alg *aesni_simd_xctr; -#endif /* CONFIG_X86_64 */ + +asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, + u8 iv[AES_BLOCK_SIZE]); + +typedef void (*xts_asm_func)(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, size_t len, + u8 tweak[AES_BLOCK_SIZE]); + +/* This handles cases where the source and/or destination span pages. */ +static noinline int +xts_crypt_slowpath(struct skcipher_request *req, xts_asm_func asm_func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct skcipher_walk walk; + struct scatterlist *src, *dst; + int err; + + /* + * If the message length isn't divisible by the AES block size, then + * separate off the last full block and the partial block. This ensures + * that they are processed in the same call to the assembly function, + * which is required for ciphertext stealing. + */ + if (tail) { + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + req->cryptlen - tail - AES_BLOCK_SIZE, + req->iv); + req = &subreq; + } + + err = skcipher_walk_virt(&walk, req, false); + + while (walk.nbytes) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, AES_BLOCK_SIZE); + + kernel_fpu_begin(); + (*asm_func)(&ctx->crypt_ctx, walk.src.virt.addr, + walk.dst.virt.addr, nbytes, req->iv); + kernel_fpu_end(); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + if (err || !tail) + return err; + + /* Do ciphertext stealing with the last full block and partial block. */ + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + kernel_fpu_begin(); + (*asm_func)(&ctx->crypt_ctx, walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, req->iv); + kernel_fpu_end(); + + return skcipher_walk_done(&walk, 0); +} + +/* __always_inline to avoid indirect call in fastpath */ +static __always_inline int +xts_crypt2(struct skcipher_request *req, xts_asm_func asm_func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); + const unsigned int cryptlen = req->cryptlen; + struct scatterlist *src = req->src; + struct scatterlist *dst = req->dst; + + if (unlikely(cryptlen < AES_BLOCK_SIZE)) + return -EINVAL; + + kernel_fpu_begin(); + aes_xts_encrypt_iv(&ctx->tweak_ctx, req->iv); + + /* + * In practice, virtually all XTS plaintexts and ciphertexts are either + * 512 or 4096 bytes, aligned such that they don't span page boundaries. + * To optimize the performance of these cases, and also any other case + * where no page boundary is spanned, the below fast-path handles + * single-page sources and destinations as efficiently as possible. + */ + if (likely(src->length >= cryptlen && dst->length >= cryptlen && + src->offset + cryptlen <= PAGE_SIZE && + dst->offset + cryptlen <= PAGE_SIZE)) { + struct page *src_page = sg_page(src); + struct page *dst_page = sg_page(dst); + void *src_virt = kmap_local_page(src_page) + src->offset; + void *dst_virt = kmap_local_page(dst_page) + dst->offset; + + (*asm_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen, + req->iv); + kunmap_local(dst_virt); + kunmap_local(src_virt); + kernel_fpu_end(); + return 0; + } + kernel_fpu_end(); + return xts_crypt_slowpath(req, asm_func); +} + +#define DEFINE_XTS_ALG(suffix, driver_name, priority) \ + \ +asmlinkage void aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, \ + const u8 *src, u8 *dst, size_t len, \ + u8 tweak[AES_BLOCK_SIZE]); \ +asmlinkage void aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, \ + const u8 *src, u8 *dst, size_t len, \ + u8 tweak[AES_BLOCK_SIZE]); \ + \ +static int xts_encrypt_##suffix(struct skcipher_request *req) \ +{ \ + return xts_crypt2(req, aes_xts_encrypt_##suffix); \ +} \ + \ +static int xts_decrypt_##suffix(struct skcipher_request *req) \ +{ \ + return xts_crypt2(req, aes_xts_decrypt_##suffix); \ +} \ + \ +static struct skcipher_alg aes_xts_alg_##suffix = { \ + .base = { \ + .cra_name = "__xts(aes)", \ + .cra_driver_name = "__" driver_name, \ + .cra_priority = priority, \ + .cra_flags = CRYPTO_ALG_INTERNAL, \ + .cra_blocksize = AES_BLOCK_SIZE, \ + .cra_ctxsize = XTS_AES_CTX_SIZE, \ + .cra_module = THIS_MODULE, \ + }, \ + .min_keysize = 2 * AES_MIN_KEY_SIZE, \ + .max_keysize = 2 * AES_MAX_KEY_SIZE, \ + .ivsize = AES_BLOCK_SIZE, \ + .walksize = 2 * AES_BLOCK_SIZE, \ + .setkey = xts_aesni_setkey, \ + .encrypt = xts_encrypt_##suffix, \ + .decrypt = xts_decrypt_##suffix, \ +}; \ + \ +static struct simd_skcipher_alg *aes_xts_simdalg_##suffix + +DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500); + +static int __init register_xts_algs(void) +{ + int err; + + if (!boot_cpu_has(X86_FEATURE_AVX)) + return 0; + err = simd_register_skciphers_compat(&aes_xts_alg_aesni_avx, 1, + &aes_xts_simdalg_aesni_avx); + if (err) + return err; + return 0; +} + +static void unregister_xts_algs(void) +{ + if (aes_xts_simdalg_aesni_avx) + simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1, + &aes_xts_simdalg_aesni_avx); +} +#else /* CONFIG_X86_64 */ +static int __init register_xts_algs(void) +{ + return 0; +} + +static void unregister_xts_algs(void) +{ +} +#endif /* !CONFIG_X86_64 */ #ifdef CONFIG_X86_64 static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key, @@ -1274,13 +1463,21 @@ static int __init aesni_init(void) goto unregister_aeads; #endif /* CONFIG_X86_64 */ + err = register_xts_algs(); + if (err) + goto unregister_xts; + return 0; +unregister_xts: + unregister_xts_algs(); #ifdef CONFIG_X86_64 + if (aesni_simd_xctr) + simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); unregister_aeads: +#endif /* CONFIG_X86_64 */ simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads), aesni_simd_aeads); -#endif /* CONFIG_X86_64 */ unregister_skciphers: simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), @@ -1301,6 +1498,7 @@ static void __exit aesni_exit(void) if (boot_cpu_has(X86_FEATURE_AVX)) simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); #endif /* CONFIG_X86_64 */ + unregister_xts_algs(); } late_initcall(aesni_init); From e787060bdfa35f8b40ef4d277a345ee35b41039f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Mar 2024 01:03:52 -0700 Subject: [PATCH 038/123] crypto: x86/aes-xts - wire up VAES + AVX2 implementation Add an AES-XTS implementation "xts-aes-vaes-avx2" for x86_64 CPUs with the VAES, VPCLMULQDQ, and AVX2 extensions, but not AVX512 or AVX10. This implementation uses ymm registers to operate on two AES blocks at a time. The assembly code is instantiated using a macro so that most of the source code is shared with other implementations. This is the optimal implementation on AMD Zen 3. It should also be the optimal implementation on Intel Alder Lake, which similarly supports VAES but not AVX512. Comparing to xts-aes-aesni-avx on Zen 3, xts-aes-vaes-avx2 provides 70% higher AES-256-XTS decryption throughput with 4096-byte messages, or 23% higher with 512-byte messages. A large improvement is also seen with CPUs that do support AVX512 (e.g., 98% higher AES-256-XTS decryption throughput on Ice Lake with 4096-byte messages), though the following patches add AVX512 optimized implementations to get a bit more performance on those CPUs. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-xts-avx-x86_64.S | 11 +++++++++++ arch/x86/crypto/aesni-intel_glue.c | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index 32e26f562cf0..43706213dfca 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -807,3 +807,14 @@ SYM_FUNC_END(aes_xts_encrypt_aesni_avx) SYM_TYPED_FUNC_START(aes_xts_decrypt_aesni_avx) _aes_xts_crypt 0 SYM_FUNC_END(aes_xts_decrypt_aesni_avx) + +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) +.set VL, 32 +.set USE_AVX10, 0 +SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx2) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_vaes_avx2) +SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx2) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_vaes_avx2) +#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index cb5e8578131f..c95ed01a0d60 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1295,6 +1295,9 @@ static struct skcipher_alg aes_xts_alg_##suffix = { \ static struct simd_skcipher_alg *aes_xts_simdalg_##suffix DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500); +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) +DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600); +#endif static int __init register_xts_algs(void) { @@ -1306,6 +1309,18 @@ static int __init register_xts_algs(void) &aes_xts_simdalg_aesni_avx); if (err) return err; +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) + if (!boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_VAES) || + !boot_cpu_has(X86_FEATURE_VPCLMULQDQ) || + !boot_cpu_has(X86_FEATURE_PCLMULQDQ) || + !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) + return 0; + err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx2, 1, + &aes_xts_simdalg_vaes_avx2); + if (err) + return err; +#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ return 0; } @@ -1314,6 +1329,11 @@ static void unregister_xts_algs(void) if (aes_xts_simdalg_aesni_avx) simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1, &aes_xts_simdalg_aesni_avx); +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) + if (aes_xts_simdalg_vaes_avx2) + simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1, + &aes_xts_simdalg_vaes_avx2); +#endif } #else /* CONFIG_X86_64 */ static int __init register_xts_algs(void) From ee63fea005be4a84a50603269ac9ca2c9bf9c6ca Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Mar 2024 01:03:53 -0700 Subject: [PATCH 039/123] crypto: x86/aes-xts - wire up VAES + AVX10/256 implementation Add an AES-XTS implementation "xts-aes-vaes-avx10_256" for x86_64 CPUs with the VAES, VPCLMULQDQ, and either AVX10/256 or AVX512BW + AVX512VL extensions. This implementation avoids using zmm registers, instead using ymm registers to operate on two AES blocks at a time. The assembly code is instantiated using a macro so that most of the source code is shared with other implementations. This is the optimal implementation on CPUs that support VAES and AVX512 but where the zmm registers should not be used due to downclocking effects, for example Intel's Ice Lake. It should also be the optimal implementation on future CPUs that support AVX10/256 but not AVX10/512. The performance is slightly better than that of xts-aes-vaes-avx2, which uses the same 256-bit vector length, due to factors such as being able to use ymm16-ymm31 to cache the AES round keys, and being able to use the vpternlogd instruction to do XORs more efficiently. For example, on Ice Lake, the throughput of decrypting 4096-byte messages with AES-256-XTS is 6.6% higher with xts-aes-vaes-avx10_256 than with xts-aes-vaes-avx2. While this is a small improvement, it is straightforward to provide this implementation (xts-aes-vaes-avx10_256) as long as we are providing xts-aes-vaes-avx2 and xts-aes-vaes-avx10_512 anyway, due to the way the _aes_xts_crypt macro is structured. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-xts-avx-x86_64.S | 9 +++++++++ arch/x86/crypto/aesni-intel_glue.c | 16 ++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index 43706213dfca..71be474b22da 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -817,4 +817,13 @@ SYM_FUNC_END(aes_xts_encrypt_vaes_avx2) SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx2) _aes_xts_crypt 0 SYM_FUNC_END(aes_xts_decrypt_vaes_avx2) + +.set VL, 32 +.set USE_AVX10, 1 +SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_256) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256) +SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256) #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index c95ed01a0d60..fce794758937 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1297,6 +1297,7 @@ static struct simd_skcipher_alg *aes_xts_simdalg_##suffix DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500); #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600); +DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700); #endif static int __init register_xts_algs(void) @@ -1320,6 +1321,18 @@ static int __init register_xts_algs(void) &aes_xts_simdalg_vaes_avx2); if (err) return err; + + if (!boot_cpu_has(X86_FEATURE_AVX512BW) || + !boot_cpu_has(X86_FEATURE_AVX512VL) || + !boot_cpu_has(X86_FEATURE_BMI2) || + !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL)) + return 0; + + err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_256, 1, + &aes_xts_simdalg_vaes_avx10_256); + if (err) + return err; #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ return 0; } @@ -1333,6 +1346,9 @@ static void unregister_xts_algs(void) if (aes_xts_simdalg_vaes_avx2) simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1, &aes_xts_simdalg_vaes_avx2); + if (aes_xts_simdalg_vaes_avx10_256) + simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1, + &aes_xts_simdalg_vaes_avx10_256); #endif } #else /* CONFIG_X86_64 */ From aa2197f566479a204fcadb3205160837c3b82f66 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Mar 2024 01:03:54 -0700 Subject: [PATCH 040/123] crypto: x86/aes-xts - wire up VAES + AVX10/512 implementation Add an AES-XTS implementation "xts-aes-vaes-avx10_512" for x86_64 CPUs with the VAES, VPCLMULQDQ, and either AVX10/512 or AVX512BW + AVX512VL extensions. This implementation uses zmm registers to operate on four AES blocks at a time. The assembly code is instantiated using a macro so that most of the source code is shared with other implementations. To avoid downclocking on older Intel CPU models, an exclusion list is used to prevent this 512-bit implementation from being used by default on some CPU models. They will use xts-aes-vaes-avx10_256 instead. For now, this exclusion list is simply coded into aesni-intel_glue.c. It may make sense to eventually move it into a more central location. xts-aes-vaes-avx10_512 is slightly faster than xts-aes-vaes-avx10_256 on some current CPUs. E.g., on AMD Zen 4, AES-256-XTS decryption throughput increases by 13% with 4096-byte inputs, or 14% with 512-byte inputs. On Intel Sapphire Rapids, AES-256-XTS decryption throughput increases by 2% with 4096-byte inputs, or 3% with 512-byte inputs. Future CPUs may provide stronger 512-bit support, in which case a larger benefit should be seen. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-xts-avx-x86_64.S | 9 ++++++++ arch/x86/crypto/aesni-intel_glue.c | 32 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index 71be474b22da..b8005d0205f8 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -826,4 +826,13 @@ SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256) SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256) _aes_xts_crypt 0 SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256) + +.set VL, 64 +.set USE_AVX10, 1 +SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_512) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_512) +SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_512) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_512) #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index fce794758937..0b37a470325b 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1298,8 +1298,29 @@ DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500); #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600); DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700); +DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800); #endif +/* + * This is a list of CPU models that are known to suffer from downclocking when + * zmm registers (512-bit vectors) are used. On these CPUs, the AES-XTS + * implementation with zmm registers won't be used by default. An + * implementation with ymm registers (256-bit vectors) will be used instead. + */ +static const struct x86_cpu_id zmm_exclusion_list[] = { + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_SKYLAKE_X }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_X }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_D }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_L }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_NNPI }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE_L }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE }, + /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ + /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */ + {}, +}; + static int __init register_xts_algs(void) { int err; @@ -1333,6 +1354,14 @@ static int __init register_xts_algs(void) &aes_xts_simdalg_vaes_avx10_256); if (err) return err; + + if (x86_match_cpu(zmm_exclusion_list)) + aes_xts_alg_vaes_avx10_512.base.cra_priority = 1; + + err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1, + &aes_xts_simdalg_vaes_avx10_512); + if (err) + return err; #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ return 0; } @@ -1349,6 +1378,9 @@ static void unregister_xts_algs(void) if (aes_xts_simdalg_vaes_avx10_256) simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1, &aes_xts_simdalg_vaes_avx10_256); + if (aes_xts_simdalg_vaes_avx10_512) + simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1, + &aes_xts_simdalg_vaes_avx10_512); #endif } #else /* CONFIG_X86_64 */ From 8fa5f4f01c9fb4d4e2af1ebf6537c7b814c9eb2e Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 29 Mar 2024 16:44:54 +0100 Subject: [PATCH 041/123] crypto: jitter - Remove duplicate word in comment s/in// Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu --- crypto/jitterentropy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c index 26a9048bc893..ee2aa0b7aa9e 100644 --- a/crypto/jitterentropy.c +++ b/crypto/jitterentropy.c @@ -158,7 +158,7 @@ struct rand_data { * See the SP 800-90B comment #10b for the corrected cutoff for the SP 800-90B * APT. * http://www.untruth.org/~josh/sp80090b/UL%20SP800-90B-final%20comments%20v1.9%2020191212.pdf - * In in the syntax of R, this is C = 2 + qbinom(1 − 2^(−30), 511, 2^(-1/osr)). + * In the syntax of R, this is C = 2 + qbinom(1 − 2^(−30), 511, 2^(-1/osr)). * (The original formula wasn't correct because the first symbol must * necessarily have been observed, so there is no chance of observing 0 of these * symbols.) From 4ad27a8be9dbefd4820da0f60da879d512b2f659 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 29 Mar 2024 16:53:45 +0100 Subject: [PATCH 042/123] crypto: jitter - Replace http with https The PDF is also available via https. Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu --- crypto/jitterentropy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c index ee2aa0b7aa9e..d7056de8c0d7 100644 --- a/crypto/jitterentropy.c +++ b/crypto/jitterentropy.c @@ -157,7 +157,7 @@ struct rand_data { /* * See the SP 800-90B comment #10b for the corrected cutoff for the SP 800-90B * APT. - * http://www.untruth.org/~josh/sp80090b/UL%20SP800-90B-final%20comments%20v1.9%2020191212.pdf + * https://www.untruth.org/~josh/sp80090b/UL%20SP800-90B-final%20comments%20v1.9%2020191212.pdf * In the syntax of R, this is C = 2 + qbinom(1 − 2^(−30), 511, 2^(-1/osr)). * (The original formula wasn't correct because the first symbol must * necessarily have been observed, so there is no chance of observing 0 of these From 233e750592921c26d48bc8440bfe8b2334852eb0 Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Mon, 1 Apr 2024 15:16:23 +0300 Subject: [PATCH 043/123] crypto: ecc - remove checks in crypto_ecdh_shared_secret() and ecc_make_pub_key() With the current state of the code, ecc_get_curve() cannot return NULL in crypto_ecdh_shared_secret() and ecc_make_pub_key(). This is conditioned by the fact that they are only called from ecdh_compute_value(), which implements the kpp_alg::{generate_public_key,compute_shared_secret}() methods. Also ecdh implements the kpp_alg::init() method, which is called before the other methods and sets ecdh_ctx::curve_id to a valid value. Signed-off-by: Roman Smirnov Reviewed-by: Sergey Shtylyov Signed-off-by: Herbert Xu --- crypto/ecc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/ecc.c b/crypto/ecc.c index 3581027e9f92..7c960e0d8e43 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -1520,7 +1520,7 @@ int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits, u64 priv[ECC_MAX_DIGITS]; const struct ecc_curve *curve = ecc_get_curve(curve_id); - if (!private_key || !curve || ndigits > ARRAY_SIZE(priv)) { + if (!private_key || ndigits > ARRAY_SIZE(priv)) { ret = -EINVAL; goto out; } @@ -1622,7 +1622,7 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, unsigned int nbytes; const struct ecc_curve *curve = ecc_get_curve(curve_id); - if (!private_key || !public_key || !curve || + if (!private_key || !public_key || ndigits > ARRAY_SIZE(priv) || ndigits > ARRAY_SIZE(rand_z)) { ret = -EINVAL; goto out; From ea32d5474be1fc4e92e19b0e1a827ffff772eb22 Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Mon, 1 Apr 2024 15:22:58 +0300 Subject: [PATCH 044/123] crypto: algboss - remove NULL check in cryptomgr_schedule_probe() The for loop will be executed at least once, so i > 0. If the loop is interrupted before i is incremented (e.g., when checking len for NULL), i will not be checked. Found by Linux Verification Center (linuxtesting.org) with Svace. Signed-off-by: Roman Smirnov Reviewed-by: Sergey Shtylyov Signed-off-by: Herbert Xu --- crypto/algboss.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/crypto/algboss.c b/crypto/algboss.c index 0de1e6697949..1aa5f306998a 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -138,9 +138,6 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval) goto err_free_param; } - if (!i) - goto err_free_param; - param->tb[i + 1] = NULL; param->type.attr.rta_len = sizeof(param->type); From f5c2cf9d14be283e5240c04d03ad96577d55f9f4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 2 Apr 2024 09:13:55 +0100 Subject: [PATCH 045/123] crypto: qat - Fix spelling mistake "Invalide" -> "Invalid" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Acked-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c index 78a39cfe196f..a62eb5e8dbe6 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c @@ -976,7 +976,7 @@ static int adf_gen4_vfmig_load_setup(struct qat_mig_dev *mdev, int len) ret = adf_mstate_mgr_init_from_remote(vfmig->mstate_mgr, mdev->state, setup_size, NULL, NULL); if (ret) { - dev_err(&GET_DEV(accel_dev), "Invalide setup for vf_nr %d\n", + dev_err(&GET_DEV(accel_dev), "Invalid setup for vf_nr %d\n", vf_nr); return ret; } From 42c2d7d02977ef09d434b1f5b354f5bc6c1027ab Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2024 10:06:42 +0200 Subject: [PATCH 046/123] crypto: ccp - drop platform ifdef checks When both ACPI and OF are disabled, the dev_vdata variable is unused: drivers/crypto/ccp/sp-platform.c:33:34: error: unused variable 'dev_vdata' [-Werror,-Wunused-const-variable] This is not a useful configuration, and there is not much point in saving a few bytes when only one of the two is enabled, so just remove all these ifdef checks and rely on of_match_node() and acpi_match_device() returning NULL when these subsystems are disabled. Fixes: 6c5063434098 ("crypto: ccp - Add ACPI support") Signed-off-by: Arnd Bergmann Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sp-platform.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c index 473301237760..ff6ceb4feee0 100644 --- a/drivers/crypto/ccp/sp-platform.c +++ b/drivers/crypto/ccp/sp-platform.c @@ -39,44 +39,38 @@ static const struct sp_dev_vdata dev_vdata[] = { }, }; -#ifdef CONFIG_ACPI static const struct acpi_device_id sp_acpi_match[] = { { "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] }, { }, }; MODULE_DEVICE_TABLE(acpi, sp_acpi_match); -#endif -#ifdef CONFIG_OF static const struct of_device_id sp_of_match[] = { { .compatible = "amd,ccp-seattle-v1a", .data = (const void *)&dev_vdata[0] }, { }, }; MODULE_DEVICE_TABLE(of, sp_of_match); -#endif static struct sp_dev_vdata *sp_get_of_version(struct platform_device *pdev) { -#ifdef CONFIG_OF const struct of_device_id *match; match = of_match_node(sp_of_match, pdev->dev.of_node); if (match && match->data) return (struct sp_dev_vdata *)match->data; -#endif + return NULL; } static struct sp_dev_vdata *sp_get_acpi_version(struct platform_device *pdev) { -#ifdef CONFIG_ACPI const struct acpi_device_id *match; match = acpi_match_device(sp_acpi_match, &pdev->dev); if (match && match->driver_data) return (struct sp_dev_vdata *)match->driver_data; -#endif + return NULL; } @@ -212,12 +206,8 @@ static int sp_platform_resume(struct platform_device *pdev) static struct platform_driver sp_platform_driver = { .driver = { .name = "ccp", -#ifdef CONFIG_ACPI .acpi_match_table = sp_acpi_match, -#endif -#ifdef CONFIG_OF .of_match_table = sp_of_match, -#endif }, .probe = sp_platform_probe, .remove_new = sp_platform_remove, From 58329c4312031603bb1786b44265c26d5065fe72 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 3 Apr 2024 17:36:18 +0800 Subject: [PATCH 047/123] padata: Disable BH when taking works lock on MT path As the old padata code can execute in softirq context, disable softirqs for the new padata_do_mutithreaded code too as otherwise lockdep will get antsy. Reported-by: syzbot+0cb5bb0f4bf9e79db3b3@syzkaller.appspotmail.com Signed-off-by: Herbert Xu Acked-by: Daniel Jordan Signed-off-by: Herbert Xu --- kernel/padata.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index e3f639ff1670..53f4bc912712 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -106,7 +106,7 @@ static int __init padata_work_alloc_mt(int nworks, void *data, { int i; - spin_lock(&padata_works_lock); + spin_lock_bh(&padata_works_lock); /* Start at 1 because the current task participates in the job. */ for (i = 1; i < nworks; ++i) { struct padata_work *pw = padata_work_alloc(); @@ -116,7 +116,7 @@ static int __init padata_work_alloc_mt(int nworks, void *data, padata_work_init(pw, padata_mt_helper, data, 0); list_add(&pw->pw_list, head); } - spin_unlock(&padata_works_lock); + spin_unlock_bh(&padata_works_lock); return i; } @@ -134,12 +134,12 @@ static void __init padata_works_free(struct list_head *works) if (list_empty(works)) return; - spin_lock(&padata_works_lock); + spin_lock_bh(&padata_works_lock); list_for_each_entry_safe(cur, next, works, pw_list) { list_del(&cur->pw_list); padata_work_free(cur); } - spin_unlock(&padata_works_lock); + spin_unlock_bh(&padata_works_lock); } static void padata_parallel_worker(struct work_struct *parallel_work) From 17048b225b0307b7b271b2fa4f5479fa688e3694 Mon Sep 17 00:00:00 2001 From: Akhil R Date: Wed, 3 Apr 2024 15:30:35 +0530 Subject: [PATCH 048/123] dt-bindings: crypto: Add Tegra Security Engine Add DT binding document for Tegra Security Engine. The AES and HASH algorithms are handled independently by separate engines within the Security Engine. These engines are registered as two separate crypto engine drivers. Signed-off-by: Akhil R Reviewed-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- .../crypto/nvidia,tegra234-se-aes.yaml | 52 +++++++++++++++++++ .../crypto/nvidia,tegra234-se-hash.yaml | 52 +++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml create mode 100644 Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml diff --git a/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml new file mode 100644 index 000000000000..cb47ae2889b6 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/nvidia,tegra234-se-aes.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NVIDIA Tegra Security Engine for AES algorithms + +description: + The Tegra Security Engine accelerates the following AES encryption/decryption + algorithms - AES-ECB, AES-CBC, AES-OFB, AES-XTS, AES-CTR, AES-GCM, AES-CCM, + AES-CMAC + +maintainers: + - Akhil R + +properties: + compatible: + const: nvidia,tegra234-se-aes + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + iommus: + maxItems: 1 + + dma-coherent: true + +required: + - compatible + - reg + - clocks + - iommus + +additionalProperties: false + +examples: + - | + #include + #include + + crypto@15820000 { + compatible = "nvidia,tegra234-se-aes"; + reg = <0x15820000 0x10000>; + clocks = <&bpmp TEGRA234_CLK_SE>; + iommus = <&smmu TEGRA234_SID_SES_SE1>; + dma-coherent; + }; +... diff --git a/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml new file mode 100644 index 000000000000..f57ef10645e2 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/nvidia,tegra234-se-hash.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NVIDIA Tegra Security Engine for HASH algorithms + +description: + The Tegra Security HASH Engine accelerates the following HASH functions - + SHA1, SHA224, SHA256, SHA384, SHA512, SHA3-224, SHA3-256, SHA3-384, SHA3-512 + HMAC(SHA224), HMAC(SHA256), HMAC(SHA384), HMAC(SHA512) + +maintainers: + - Akhil R + +properties: + compatible: + const: nvidia,tegra234-se-hash + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + iommus: + maxItems: 1 + + dma-coherent: true + +required: + - compatible + - reg + - clocks + - iommus + +additionalProperties: false + +examples: + - | + #include + #include + + crypto@15840000 { + compatible = "nvidia,tegra234-se-hash"; + reg = <0x15840000 0x10000>; + clocks = <&bpmp TEGRA234_CLK_SE>; + iommus = <&smmu TEGRA234_SID_SES_SE2>; + dma-coherent; + }; +... From cc370ff85bae5373330518e4ebb2d36c0ca8a470 Mon Sep 17 00:00:00 2001 From: Akhil R Date: Wed, 3 Apr 2024 15:30:36 +0530 Subject: [PATCH 049/123] gpu: host1x: Add Tegra SE to SID table Add Tegra Security Engine details to the SID table in host1x driver. These entries are required to be in place to configure the stream ID for SE. Register writes to stream ID registers fail otherwise. Signed-off-by: Akhil R Acked-by: Mikko Perttunen Signed-off-by: Herbert Xu --- drivers/gpu/host1x/dev.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 89983d7d73ca..3a0aaa68ac8d 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -215,6 +215,30 @@ static const struct host1x_info host1x07_info = { * and firmware stream ID in the MMIO path table. */ static const struct host1x_sid_entry tegra234_sid_table[] = { + { + /* SE2 MMIO */ + .base = 0x1658, + .offset = 0x90, + .limit = 0x90 + }, + { + /* SE4 MMIO */ + .base = 0x1660, + .offset = 0x90, + .limit = 0x90 + }, + { + /* SE2 channel */ + .base = 0x1738, + .offset = 0x90, + .limit = 0x90 + }, + { + /* SE4 channel */ + .base = 0x1740, + .offset = 0x90, + .limit = 0x90 + }, { /* VIC channel */ .base = 0x17b8, From 0880bb3b00c855fc244b7177ffdaafef4d0aa1e0 Mon Sep 17 00:00:00 2001 From: Akhil R Date: Wed, 3 Apr 2024 15:30:37 +0530 Subject: [PATCH 050/123] crypto: tegra - Add Tegra Security Engine driver Add support for Tegra Security Engine which can accelerate various crypto algorithms. The Engine has two separate instances within for AES and HASH algorithms respectively. The driver registers two crypto engines - one for AES and another for HASH algorithms and these operate independently and both uses the host1x bus. Additionally, it provides hardware-assisted key protection for up to 15 symmetric keys which it can use for the cipher operations. Signed-off-by: Akhil R Signed-off-by: Herbert Xu --- MAINTAINERS | 5 + drivers/crypto/Kconfig | 8 + drivers/crypto/Makefile | 1 + drivers/crypto/tegra/Makefile | 9 + drivers/crypto/tegra/tegra-se-aes.c | 1933 ++++++++++++++++++++++++++ drivers/crypto/tegra/tegra-se-hash.c | 1060 ++++++++++++++ drivers/crypto/tegra/tegra-se-key.c | 156 +++ drivers/crypto/tegra/tegra-se-main.c | 439 ++++++ drivers/crypto/tegra/tegra-se.h | 560 ++++++++ 9 files changed, 4171 insertions(+) create mode 100644 drivers/crypto/tegra/Makefile create mode 100644 drivers/crypto/tegra/tegra-se-aes.c create mode 100644 drivers/crypto/tegra/tegra-se-hash.c create mode 100644 drivers/crypto/tegra/tegra-se-key.c create mode 100644 drivers/crypto/tegra/tegra-se-main.c create mode 100644 drivers/crypto/tegra/tegra-se.h diff --git a/MAINTAINERS b/MAINTAINERS index aa3b947fb080..613604d2e999 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21702,6 +21702,11 @@ M: Prashant Gaikwad S: Supported F: drivers/clk/tegra/ +TEGRA CRYPTO DRIVERS +M: Akhil R +S: Supported +F: drivers/crypto/tegra/* + TEGRA DMA DRIVERS M: Laxman Dewangan M: Jon Hunter diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 3d02702456a5..bb27690f8f7c 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -660,6 +660,14 @@ config CRYPTO_DEV_ROCKCHIP_DEBUG This will create /sys/kernel/debug/rk3288_crypto/stats for displaying the number of requests per algorithm and other internal stats. +config CRYPTO_DEV_TEGRA + tristate "Enable Tegra Security Engine" + depends on TEGRA_HOST1X + select CRYPTO_ENGINE + + help + Select this to enable Tegra Security Engine which accelerates various + AES encryption/decryption and HASH algorithms. config CRYPTO_DEV_ZYNQMP_AES tristate "Support for Xilinx ZynqMP AES hw accelerator" diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 95331bc6456b..ad4ccef67d12 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/ obj-y += stm32/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o +obj-$(CONFIG_CRYPTO_DEV_TEGRA) += tegra/ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ #obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/ diff --git a/drivers/crypto/tegra/Makefile b/drivers/crypto/tegra/Makefile new file mode 100644 index 000000000000..a32001e58eb2 --- /dev/null +++ b/drivers/crypto/tegra/Makefile @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +tegra-se-objs := tegra-se-key.o tegra-se-main.o + +tegra-se-y += tegra-se-aes.o +tegra-se-y += tegra-se-hash.o + +obj-$(CONFIG_CRYPTO_DEV_TEGRA) += tegra-se.o diff --git a/drivers/crypto/tegra/tegra-se-aes.c b/drivers/crypto/tegra/tegra-se-aes.c new file mode 100644 index 000000000000..adc6cdab389e --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-aes.c @@ -0,0 +1,1933 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver to handle block cipher algorithms using NVIDIA Security Engine. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tegra-se.h" + +struct tegra_aes_ctx { + struct tegra_se *se; + u32 alg; + u32 ivsize; + u32 key1_id; + u32 key2_id; +}; + +struct tegra_aes_reqctx { + struct tegra_se_datbuf datbuf; + bool encrypt; + u32 config; + u32 crypto_config; + u32 len; + u32 *iv; +}; + +struct tegra_aead_ctx { + struct tegra_se *se; + unsigned int authsize; + u32 alg; + u32 keylen; + u32 key_id; +}; + +struct tegra_aead_reqctx { + struct tegra_se_datbuf inbuf; + struct tegra_se_datbuf outbuf; + struct scatterlist *src_sg; + struct scatterlist *dst_sg; + unsigned int assoclen; + unsigned int cryptlen; + unsigned int authsize; + bool encrypt; + u32 config; + u32 crypto_config; + u32 key_id; + u32 iv[4]; + u8 authdata[16]; +}; + +struct tegra_cmac_ctx { + struct tegra_se *se; + unsigned int alg; + u32 key_id; + struct crypto_shash *fallback_tfm; +}; + +struct tegra_cmac_reqctx { + struct scatterlist *src_sg; + struct tegra_se_datbuf datbuf; + struct tegra_se_datbuf residue; + unsigned int total_len; + unsigned int blk_size; + unsigned int task; + u32 crypto_config; + u32 config; + u32 key_id; + u32 *iv; + u32 result[CMAC_RESULT_REG_COUNT]; +}; + +/* increment counter (128-bit int) */ +static void ctr_iv_inc(__u8 *counter, __u8 bits, __u32 nums) +{ + do { + --bits; + nums += counter[bits]; + counter[bits] = nums & 0xff; + nums >>= 8; + } while (bits && nums); +} + +static void tegra_cbc_iv_copyback(struct skcipher_request *req, struct tegra_aes_ctx *ctx) +{ + struct tegra_aes_reqctx *rctx = skcipher_request_ctx(req); + unsigned int offset; + + offset = req->cryptlen - ctx->ivsize; + + if (rctx->encrypt) + memcpy(req->iv, rctx->datbuf.buf + offset, ctx->ivsize); + else + scatterwalk_map_and_copy(req->iv, req->src, offset, ctx->ivsize, 0); +} + +static void tegra_aes_update_iv(struct skcipher_request *req, struct tegra_aes_ctx *ctx) +{ + int num; + + if (ctx->alg == SE_ALG_CBC) { + tegra_cbc_iv_copyback(req, ctx); + } else if (ctx->alg == SE_ALG_CTR) { + num = req->cryptlen / ctx->ivsize; + if (req->cryptlen % ctx->ivsize) + num++; + + ctr_iv_inc(req->iv, ctx->ivsize, num); + } +} + +static int tegra234_aes_crypto_cfg(u32 alg, bool encrypt) +{ + switch (alg) { + case SE_ALG_CMAC: + case SE_ALG_GMAC: + case SE_ALG_GCM: + case SE_ALG_GCM_FINAL: + return 0; + case SE_ALG_CBC: + if (encrypt) + return SE_CRYPTO_CFG_CBC_ENCRYPT; + else + return SE_CRYPTO_CFG_CBC_DECRYPT; + case SE_ALG_ECB: + if (encrypt) + return SE_CRYPTO_CFG_ECB_ENCRYPT; + else + return SE_CRYPTO_CFG_ECB_DECRYPT; + case SE_ALG_XTS: + if (encrypt) + return SE_CRYPTO_CFG_XTS_ENCRYPT; + else + return SE_CRYPTO_CFG_XTS_DECRYPT; + + case SE_ALG_CTR: + return SE_CRYPTO_CFG_CTR; + case SE_ALG_CBC_MAC: + return SE_CRYPTO_CFG_CBC_MAC; + + default: + break; + } + + return -EINVAL; +} + +static int tegra234_aes_cfg(u32 alg, bool encrypt) +{ + switch (alg) { + case SE_ALG_CBC: + case SE_ALG_ECB: + case SE_ALG_XTS: + case SE_ALG_CTR: + if (encrypt) + return SE_CFG_AES_ENCRYPT; + else + return SE_CFG_AES_DECRYPT; + + case SE_ALG_GMAC: + if (encrypt) + return SE_CFG_GMAC_ENCRYPT; + else + return SE_CFG_GMAC_DECRYPT; + + case SE_ALG_GCM: + if (encrypt) + return SE_CFG_GCM_ENCRYPT; + else + return SE_CFG_GCM_DECRYPT; + + case SE_ALG_GCM_FINAL: + if (encrypt) + return SE_CFG_GCM_FINAL_ENCRYPT; + else + return SE_CFG_GCM_FINAL_DECRYPT; + + case SE_ALG_CMAC: + return SE_CFG_CMAC; + + case SE_ALG_CBC_MAC: + return SE_AES_ENC_ALG_AES_ENC | + SE_AES_DST_HASH_REG; + } + return -EINVAL; +} + +static unsigned int tegra_aes_prep_cmd(struct tegra_aes_ctx *ctx, + struct tegra_aes_reqctx *rctx) +{ + unsigned int data_count, res_bits, i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + dma_addr_t addr = rctx->datbuf.addr; + + data_count = rctx->len / AES_BLOCK_SIZE; + res_bits = (rctx->len % AES_BLOCK_SIZE) * 8; + + /* + * Hardware processes data_count + 1 blocks. + * Reduce 1 block if there is no residue + */ + if (!res_bits) + data_count--; + + if (rctx->iv) { + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + } + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source address setting */ + cpuvaddr[i++] = lower_32_bits(addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(addr)) | SE_ADDR_HI_SZ(rctx->len); + + /* Destination address setting */ + cpuvaddr[i++] = lower_32_bits(addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(addr)) | + SE_ADDR_HI_SZ(rctx->len); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_LASTBUF | + SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config); + + return i; +} + +static int tegra_aes_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct skcipher_request *req = container_of(areq, struct skcipher_request, base); + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct tegra_aes_reqctx *rctx = skcipher_request_ctx(req); + struct tegra_se *se = ctx->se; + unsigned int cmdlen; + int ret; + + rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_AES_BUFLEN, + &rctx->datbuf.addr, GFP_KERNEL); + if (!rctx->datbuf.buf) + return -ENOMEM; + + rctx->datbuf.size = SE_AES_BUFLEN; + rctx->iv = (u32 *)req->iv; + rctx->len = req->cryptlen; + + /* Pad input to AES Block size */ + if (ctx->alg != SE_ALG_XTS) { + if (rctx->len % AES_BLOCK_SIZE) + rctx->len += AES_BLOCK_SIZE - (rctx->len % AES_BLOCK_SIZE); + } + + scatterwalk_map_and_copy(rctx->datbuf.buf, req->src, 0, req->cryptlen, 0); + + /* Prepare the command and submit for execution */ + cmdlen = tegra_aes_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + + /* Copy the result */ + tegra_aes_update_iv(req, ctx); + scatterwalk_map_and_copy(rctx->datbuf.buf, req->dst, 0, req->cryptlen, 1); + + /* Free the buffer */ + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->datbuf.buf, rctx->datbuf.addr); + + crypto_finalize_skcipher_request(se->engine, req, ret); + + return 0; +} + +static int tegra_aes_cra_init(struct crypto_skcipher *tfm) +{ + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + se_alg = container_of(alg, struct tegra_se_alg, alg.skcipher.base); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct tegra_aes_reqctx)); + + ctx->ivsize = crypto_skcipher_ivsize(tfm); + ctx->se = se_alg->se_dev; + ctx->key1_id = 0; + ctx->key2_id = 0; + + algname = crypto_tfm_alg_name(&tfm->base); + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + ctx->alg = ret; + + return 0; +} + +static void tegra_aes_cra_exit(struct crypto_skcipher *tfm) +{ + struct tegra_aes_ctx *ctx = crypto_tfm_ctx(&tfm->base); + + if (ctx->key1_id) + tegra_key_invalidate(ctx->se, ctx->key1_id, ctx->alg); + + if (ctx->key2_id) + tegra_key_invalidate(ctx->se, ctx->key2_id, ctx->alg); +} + +static int tegra_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (aes_check_keylen(keylen)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key1_id); +} + +static int tegra_xts_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 len = keylen / 2; + int ret; + + ret = xts_verify_key(tfm, key, keylen); + if (ret || aes_check_keylen(len)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + ret = tegra_key_submit(ctx->se, key, len, + ctx->alg, &ctx->key1_id); + if (ret) + return ret; + + return tegra_key_submit(ctx->se, key + len, len, + ctx->alg, &ctx->key2_id); + + return 0; +} + +static int tegra_aes_kac_manifest(u32 user, u32 alg, u32 keylen) +{ + int manifest; + + manifest = SE_KAC_USER_NS; + + switch (alg) { + case SE_ALG_CBC: + case SE_ALG_ECB: + case SE_ALG_CTR: + manifest |= SE_KAC_ENC; + break; + case SE_ALG_XTS: + manifest |= SE_KAC_XTS; + break; + case SE_ALG_GCM: + manifest |= SE_KAC_GCM; + break; + case SE_ALG_CMAC: + manifest |= SE_KAC_CMAC; + break; + case SE_ALG_CBC_MAC: + manifest |= SE_KAC_ENC; + break; + default: + return -EINVAL; + } + + switch (keylen) { + case AES_KEYSIZE_128: + manifest |= SE_KAC_SIZE_128; + break; + case AES_KEYSIZE_192: + manifest |= SE_KAC_SIZE_192; + break; + case AES_KEYSIZE_256: + manifest |= SE_KAC_SIZE_256; + break; + default: + return -EINVAL; + } + + return manifest; +} + +static int tegra_aes_crypt(struct skcipher_request *req, bool encrypt) + +{ + struct crypto_skcipher *tfm; + struct tegra_aes_ctx *ctx; + struct tegra_aes_reqctx *rctx; + + tfm = crypto_skcipher_reqtfm(req); + ctx = crypto_skcipher_ctx(tfm); + rctx = skcipher_request_ctx(req); + + if (ctx->alg != SE_ALG_XTS) { + if (!IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(tfm))) { + dev_dbg(ctx->se->dev, "invalid length (%d)", req->cryptlen); + return -EINVAL; + } + } else if (req->cryptlen < XTS_BLOCK_SIZE) { + dev_dbg(ctx->se->dev, "invalid length (%d)", req->cryptlen); + return -EINVAL; + } + + if (!req->cryptlen) + return 0; + + rctx->encrypt = encrypt; + rctx->config = tegra234_aes_cfg(ctx->alg, encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(ctx->alg, encrypt); + rctx->crypto_config |= SE_AES_KEY_INDEX(ctx->key1_id); + + if (ctx->key2_id) + rctx->crypto_config |= SE_AES_KEY2_INDEX(ctx->key2_id); + + return crypto_transfer_skcipher_request_to_engine(ctx->se->engine, req); +} + +static int tegra_aes_encrypt(struct skcipher_request *req) +{ + return tegra_aes_crypt(req, true); +} + +static int tegra_aes_decrypt(struct skcipher_request *req) +{ + return tegra_aes_crypt(req, false); +} + +static struct tegra_se_alg tegra_aes_algs[] = { + { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_aes_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .base = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-tegra", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_aes_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .base = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-tegra", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_aes_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .base = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-tegra", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_xts_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .base = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-tegra", + .cra_priority = 500, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = (__alignof__(u64) - 1), + .cra_module = THIS_MODULE, + }, + } + }, +}; + +static unsigned int tegra_gmac_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int data_count, res_bits, i = 0; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + + data_count = (rctx->assoclen / AES_BLOCK_SIZE); + res_bits = (rctx->assoclen % AES_BLOCK_SIZE) * 8; + + /* + * Hardware processes data_count + 1 blocks. + * Reduce 1 block if there is no residue + */ + if (!res_bits) + data_count--; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 4); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->assoclen); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL | + SE_AES_OP_INIT | SE_AES_OP_LASTBUF | + SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + return i; +} + +static unsigned int tegra_gcm_crypt_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int data_count, res_bits, i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr, op; + + data_count = (rctx->cryptlen / AES_BLOCK_SIZE); + res_bits = (rctx->cryptlen % AES_BLOCK_SIZE) * 8; + op = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL | + SE_AES_OP_LASTBUF | SE_AES_OP_START; + + /* + * If there is no assoc data, + * this will be the init command + */ + if (!rctx->assoclen) + op |= SE_AES_OP_INIT; + + /* + * Hardware processes data_count + 1 blocks. + * Reduce 1 block if there is no residue + */ + if (!res_bits) + data_count--; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source Address */ + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->cryptlen); + + /* Destination Address */ + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(rctx->cryptlen); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = op; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config); + return i; +} + +static int tegra_gcm_prep_final_cmd(struct tegra_se *se, u32 *cpuvaddr, + struct tegra_aead_reqctx *rctx) +{ + unsigned int i = 0, j; + u32 op; + + op = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL | + SE_AES_OP_LASTBUF | SE_AES_OP_START; + + /* + * Set init for zero sized vector + */ + if (!rctx->assoclen && !rctx->cryptlen) + op |= SE_AES_OP_INIT; + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->aad_len, 2); + cpuvaddr[i++] = rctx->assoclen * 8; + cpuvaddr[i++] = 0; + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->cryp_msg_len, 2); + cpuvaddr[i++] = rctx->cryptlen * 8; + cpuvaddr[i++] = 0; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + cpuvaddr[i++] = 0; + cpuvaddr[i++] = 0; + + /* Destination Address */ + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(0x10); /* HW always generates 128-bit tag */ + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = op; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config); + + return i; +} + +static int tegra_gcm_do_gmac(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + unsigned int cmdlen; + + scatterwalk_map_and_copy(rctx->inbuf.buf, + rctx->src_sg, 0, rctx->assoclen, 0); + + rctx->config = tegra234_aes_cfg(SE_ALG_GMAC, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GMAC, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + cmdlen = tegra_gmac_prep_cmd(ctx, rctx); + + return tegra_se_host1x_submit(se, cmdlen); +} + +static int tegra_gcm_do_crypt(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + int cmdlen, ret; + + scatterwalk_map_and_copy(rctx->inbuf.buf, rctx->src_sg, + rctx->assoclen, rctx->cryptlen, 0); + + rctx->config = tegra234_aes_cfg(SE_ALG_GCM, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GCM, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Prepare command and submit */ + cmdlen = tegra_gcm_crypt_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + return ret; + + /* Copy the result */ + scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg, + rctx->assoclen, rctx->cryptlen, 1); + + return 0; +} + +static int tegra_gcm_do_final(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + int cmdlen, ret, offset; + + rctx->config = tegra234_aes_cfg(SE_ALG_GCM_FINAL, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GCM_FINAL, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Prepare command and submit */ + cmdlen = tegra_gcm_prep_final_cmd(se, cpuvaddr, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + return ret; + + if (rctx->encrypt) { + /* Copy the result */ + offset = rctx->assoclen + rctx->cryptlen; + scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg, + offset, rctx->authsize, 1); + } + + return 0; +} + +static int tegra_gcm_do_verify(struct tegra_se *se, struct tegra_aead_reqctx *rctx) +{ + unsigned int offset; + u8 mac[16]; + + offset = rctx->assoclen + rctx->cryptlen; + scatterwalk_map_and_copy(mac, rctx->src_sg, offset, rctx->authsize, 0); + + if (crypto_memneq(rctx->outbuf.buf, mac, rctx->authsize)) + return -EBADMSG; + + return 0; +} + +static inline int tegra_ccm_check_iv(const u8 *iv) +{ + /* iv[0] gives value of q-1 + * 2 <= q <= 8 as per NIST 800-38C notation + * 2 <= L <= 8, so 1 <= L' <= 7. as per rfc 3610 notation + */ + if (iv[0] < 1 || iv[0] > 7) { + pr_debug("ccm_check_iv failed %d\n", iv[0]); + return -EINVAL; + } + + return 0; +} + +static unsigned int tegra_cbcmac_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int data_count, i = 0; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + + data_count = (rctx->inbuf.size / AES_BLOCK_SIZE) - 1; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->inbuf.size); + + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(0x10); /* HW always generates 128 bit tag */ + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | + SE_AES_OP_LASTBUF | SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + return i; +} + +static unsigned int tegra_ctr_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = (rctx->inbuf.size / AES_BLOCK_SIZE) - 1; + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source address setting */ + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->inbuf.size); + + /* Destination address setting */ + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(rctx->inbuf.size); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_LASTBUF | + SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", + rctx->config, rctx->crypto_config); + + return i; +} + +static int tegra_ccm_do_cbcmac(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + int cmdlen; + + rctx->config = tegra234_aes_cfg(SE_ALG_CBC_MAC, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_CBC_MAC, + rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Prepare command and submit */ + cmdlen = tegra_cbcmac_prep_cmd(ctx, rctx); + + return tegra_se_host1x_submit(se, cmdlen); +} + +static int tegra_ccm_set_msg_len(u8 *block, unsigned int msglen, int csize) +{ + __be32 data; + + memset(block, 0, csize); + block += csize; + + if (csize >= 4) + csize = 4; + else if (msglen > (1 << (8 * csize))) + return -EOVERFLOW; + + data = cpu_to_be32(msglen); + memcpy(block - csize, (u8 *)&data + 4 - csize, csize); + + return 0; +} + +static int tegra_ccm_format_nonce(struct tegra_aead_reqctx *rctx, u8 *nonce) +{ + unsigned int q, t; + u8 *q_ptr, *iv = (u8 *)rctx->iv; + + memcpy(nonce, rctx->iv, 16); + + /*** 1. Prepare Flags Octet ***/ + + /* Encode t (mac length) */ + t = rctx->authsize; + nonce[0] |= (((t - 2) / 2) << 3); + + /* Adata */ + if (rctx->assoclen) + nonce[0] |= (1 << 6); + + /*** Encode Q - message length ***/ + q = iv[0] + 1; + q_ptr = nonce + 16 - q; + + return tegra_ccm_set_msg_len(q_ptr, rctx->cryptlen, q); +} + +static int tegra_ccm_format_adata(u8 *adata, unsigned int a) +{ + int len = 0; + + /* add control info for associated data + * RFC 3610 and NIST Special Publication 800-38C + */ + if (a < 65280) { + *(__be16 *)adata = cpu_to_be16(a); + len = 2; + } else { + *(__be16 *)adata = cpu_to_be16(0xfffe); + *(__be32 *)&adata[2] = cpu_to_be32(a); + len = 6; + } + + return len; +} + +static int tegra_ccm_add_padding(u8 *buf, unsigned int len) +{ + unsigned int padlen = 16 - (len % 16); + u8 padding[16] = {0}; + + if (padlen == 16) + return 0; + + memcpy(buf, padding, padlen); + + return padlen; +} + +static int tegra_ccm_format_blocks(struct tegra_aead_reqctx *rctx) +{ + unsigned int alen = 0, offset = 0; + u8 nonce[16], adata[16]; + int ret; + + ret = tegra_ccm_format_nonce(rctx, nonce); + if (ret) + return ret; + + memcpy(rctx->inbuf.buf, nonce, 16); + offset = 16; + + if (rctx->assoclen) { + alen = tegra_ccm_format_adata(adata, rctx->assoclen); + memcpy(rctx->inbuf.buf + offset, adata, alen); + offset += alen; + + scatterwalk_map_and_copy(rctx->inbuf.buf + offset, + rctx->src_sg, 0, rctx->assoclen, 0); + + offset += rctx->assoclen; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, + rctx->assoclen + alen); + } + + return offset; +} + +static int tegra_ccm_mac_result(struct tegra_se *se, struct tegra_aead_reqctx *rctx) +{ + u32 result[16]; + int i, ret; + + /* Read and clear Result */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + result[i] = readl(se->base + se->hw->regs->result + (i * 4)); + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + + if (rctx->encrypt) { + memcpy(rctx->authdata, result, rctx->authsize); + } else { + ret = crypto_memneq(rctx->authdata, result, rctx->authsize); + if (ret) + return -EBADMSG; + } + + return 0; +} + +static int tegra_ccm_ctr_result(struct tegra_se *se, struct tegra_aead_reqctx *rctx) +{ + /* Copy result */ + scatterwalk_map_and_copy(rctx->outbuf.buf + 16, rctx->dst_sg, + rctx->assoclen, rctx->cryptlen, 1); + + if (rctx->encrypt) + scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg, + rctx->assoclen + rctx->cryptlen, + rctx->authsize, 1); + else + memcpy(rctx->authdata, rctx->outbuf.buf, rctx->authsize); + + return 0; +} + +static int tegra_ccm_compute_auth(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + struct scatterlist *sg; + int offset, ret; + + offset = tegra_ccm_format_blocks(rctx); + if (offset < 0) + return -EINVAL; + + /* Copy plain text to the buffer */ + sg = rctx->encrypt ? rctx->src_sg : rctx->dst_sg; + + scatterwalk_map_and_copy(rctx->inbuf.buf + offset, + sg, rctx->assoclen, + rctx->cryptlen, 0); + offset += rctx->cryptlen; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->cryptlen); + + rctx->inbuf.size = offset; + + ret = tegra_ccm_do_cbcmac(ctx, rctx); + if (ret) + return ret; + + return tegra_ccm_mac_result(se, rctx); +} + +static int tegra_ccm_do_ctr(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + unsigned int cmdlen, offset = 0; + struct scatterlist *sg = rctx->src_sg; + int ret; + + rctx->config = tegra234_aes_cfg(SE_ALG_CTR, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_CTR, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Copy authdata in the top of buffer for encryption/decryption */ + if (rctx->encrypt) + memcpy(rctx->inbuf.buf, rctx->authdata, rctx->authsize); + else + scatterwalk_map_and_copy(rctx->inbuf.buf, sg, + rctx->assoclen + rctx->cryptlen, + rctx->authsize, 0); + + offset += rctx->authsize; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->authsize); + + /* If there is no cryptlen, proceed to submit the task */ + if (rctx->cryptlen) { + scatterwalk_map_and_copy(rctx->inbuf.buf + offset, sg, + rctx->assoclen, rctx->cryptlen, 0); + offset += rctx->cryptlen; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->cryptlen); + } + + rctx->inbuf.size = offset; + + /* Prepare command and submit */ + cmdlen = tegra_ctr_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + return ret; + + return tegra_ccm_ctr_result(se, rctx); +} + +static int tegra_ccm_crypt_init(struct aead_request *req, struct tegra_se *se, + struct tegra_aead_reqctx *rctx) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + u8 *iv = (u8 *)rctx->iv; + int ret, i; + + rctx->src_sg = req->src; + rctx->dst_sg = req->dst; + rctx->assoclen = req->assoclen; + rctx->authsize = crypto_aead_authsize(tfm); + + memcpy(iv, req->iv, 16); + + ret = tegra_ccm_check_iv(iv); + if (ret) + return ret; + + /* Note: rfc 3610 and NIST 800-38C require counter (ctr_0) of + * zero to encrypt auth tag. + * req->iv has the formatted ctr_0 (i.e. Flags || N || 0). + */ + memset(iv + 15 - iv[0], 0, iv[0] + 1); + + /* Clear any previous result */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + + return 0; +} + +static int tegra_ccm_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct aead_request *req = container_of(areq, struct aead_request, base); + struct tegra_aead_reqctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct tegra_se *se = ctx->se; + int ret; + + /* Allocate buffers required */ + rctx->inbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->inbuf.addr, GFP_KERNEL); + if (!rctx->inbuf.buf) + return -ENOMEM; + + rctx->inbuf.size = SE_AES_BUFLEN; + + rctx->outbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->outbuf.addr, GFP_KERNEL); + if (!rctx->outbuf.buf) { + ret = ENOMEM; + goto outbuf_err; + } + + rctx->outbuf.size = SE_AES_BUFLEN; + + ret = tegra_ccm_crypt_init(req, se, rctx); + if (ret) + goto out; + + if (rctx->encrypt) { + rctx->cryptlen = req->cryptlen; + + /* CBC MAC Operation */ + ret = tegra_ccm_compute_auth(ctx, rctx); + if (ret) + goto out; + + /* CTR operation */ + ret = tegra_ccm_do_ctr(ctx, rctx); + if (ret) + goto out; + } else { + rctx->cryptlen = req->cryptlen - ctx->authsize; + if (ret) + goto out; + + /* CTR operation */ + ret = tegra_ccm_do_ctr(ctx, rctx); + if (ret) + goto out; + + /* CBC MAC Operation */ + ret = tegra_ccm_compute_auth(ctx, rctx); + if (ret) + goto out; + } + +out: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->outbuf.buf, rctx->outbuf.addr); + +outbuf_err: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->inbuf.buf, rctx->inbuf.addr); + + crypto_finalize_aead_request(ctx->se->engine, req, ret); + + return 0; +} + +static int tegra_gcm_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct aead_request *req = container_of(areq, struct aead_request, base); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct tegra_aead_reqctx *rctx = aead_request_ctx(req); + int ret; + + /* Allocate buffers required */ + rctx->inbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->inbuf.addr, GFP_KERNEL); + if (!rctx->inbuf.buf) + return -ENOMEM; + + rctx->inbuf.size = SE_AES_BUFLEN; + + rctx->outbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->outbuf.addr, GFP_KERNEL); + if (!rctx->outbuf.buf) { + ret = ENOMEM; + goto outbuf_err; + } + + rctx->outbuf.size = SE_AES_BUFLEN; + + rctx->src_sg = req->src; + rctx->dst_sg = req->dst; + rctx->assoclen = req->assoclen; + rctx->authsize = crypto_aead_authsize(tfm); + + if (rctx->encrypt) + rctx->cryptlen = req->cryptlen; + else + rctx->cryptlen = req->cryptlen - ctx->authsize; + + memcpy(rctx->iv, req->iv, GCM_AES_IV_SIZE); + rctx->iv[3] = (1 << 24); + + /* If there is associated data perform GMAC operation */ + if (rctx->assoclen) { + ret = tegra_gcm_do_gmac(ctx, rctx); + if (ret) + goto out; + } + + /* GCM Encryption/Decryption operation */ + if (rctx->cryptlen) { + ret = tegra_gcm_do_crypt(ctx, rctx); + if (ret) + goto out; + } + + /* GCM_FINAL operation */ + ret = tegra_gcm_do_final(ctx, rctx); + if (ret) + goto out; + + if (!rctx->encrypt) + ret = tegra_gcm_do_verify(ctx->se, rctx); + +out: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->outbuf.buf, rctx->outbuf.addr); + +outbuf_err: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->inbuf.buf, rctx->inbuf.addr); + + /* Finalize the request if there are no errors */ + crypto_finalize_aead_request(ctx->se->engine, req, ret); + + return 0; +} + +static int tegra_aead_cra_init(struct crypto_aead *tfm) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct aead_alg *alg = crypto_aead_alg(tfm); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + algname = crypto_tfm_alg_name(&tfm->base); + + se_alg = container_of(alg, struct tegra_se_alg, alg.aead.base); + + crypto_aead_set_reqsize(tfm, sizeof(struct tegra_aead_reqctx)); + + ctx->se = se_alg->se_dev; + ctx->key_id = 0; + + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + ctx->alg = ret; + + return 0; +} + +static int tegra_ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + + switch (authsize) { + case 4: + case 6: + case 8: + case 10: + case 12: + case 14: + case 16: + break; + default: + return -EINVAL; + } + + ctx->authsize = authsize; + + return 0; +} + +static int tegra_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + int ret; + + ret = crypto_gcm_check_authsize(authsize); + if (ret) + return ret; + + ctx->authsize = authsize; + + return 0; +} + +static void tegra_aead_cra_exit(struct crypto_aead *tfm) +{ + struct tegra_aead_ctx *ctx = crypto_tfm_ctx(&tfm->base); + + if (ctx->key_id) + tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg); +} + +static int tegra_aead_crypt(struct aead_request *req, bool encrypt) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct tegra_aead_reqctx *rctx = aead_request_ctx(req); + + rctx->encrypt = encrypt; + + return crypto_transfer_aead_request_to_engine(ctx->se->engine, req); +} + +static int tegra_aead_encrypt(struct aead_request *req) +{ + return tegra_aead_crypt(req, true); +} + +static int tegra_aead_decrypt(struct aead_request *req) +{ + return tegra_aead_crypt(req, false); +} + +static int tegra_aead_setkey(struct crypto_aead *tfm, + const u8 *key, u32 keylen) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + + if (aes_check_keylen(keylen)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id); +} + +static unsigned int tegra_cmac_prep_cmd(struct tegra_cmac_ctx *ctx, + struct tegra_cmac_reqctx *rctx) +{ + unsigned int data_count, res_bits = 0, i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr, op; + + data_count = (rctx->datbuf.size / AES_BLOCK_SIZE); + + op = SE_AES_OP_WRSTALL | SE_AES_OP_START | SE_AES_OP_LASTBUF; + + if (!(rctx->task & SHA_UPDATE)) { + op |= SE_AES_OP_FINAL; + res_bits = (rctx->datbuf.size % AES_BLOCK_SIZE) * 8; + } + + if (!res_bits && data_count) + data_count--; + + if (rctx->task & SHA_FIRST) { + rctx->task &= ~SHA_FIRST; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + /* Load 0 IV */ + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = 0; + } + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source Address */ + cpuvaddr[i++] = lower_32_bits(rctx->datbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->datbuf.addr)) | + SE_ADDR_HI_SZ(rctx->datbuf.size); + cpuvaddr[i++] = 0; + cpuvaddr[i++] = SE_ADDR_HI_SZ(AES_BLOCK_SIZE); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = op; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + return i; +} + +static void tegra_cmac_copy_result(struct tegra_se *se, struct tegra_cmac_reqctx *rctx) +{ + int i; + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + rctx->result[i] = readl(se->base + se->hw->regs->result + (i * 4)); +} + +static void tegra_cmac_paste_result(struct tegra_se *se, struct tegra_cmac_reqctx *rctx) +{ + int i; + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(rctx->result[i], + se->base + se->hw->regs->result + (i * 4)); +} + +static int tegra_cmac_do_update(struct ahash_request *req) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + unsigned int nblks, nresidue, cmdlen; + int ret; + + if (!req->nbytes) + return 0; + + nresidue = (req->nbytes + rctx->residue.size) % rctx->blk_size; + nblks = (req->nbytes + rctx->residue.size) / rctx->blk_size; + + /* + * Reserve the last block as residue during final() to process. + */ + if (!nresidue && nblks) { + nresidue += rctx->blk_size; + nblks--; + } + + rctx->src_sg = req->src; + rctx->datbuf.size = (req->nbytes + rctx->residue.size) - nresidue; + rctx->total_len += rctx->datbuf.size; + rctx->config = tegra234_aes_cfg(SE_ALG_CMAC, 0); + rctx->crypto_config = SE_AES_KEY_INDEX(ctx->key_id); + + /* + * Keep one block and residue bytes in residue and + * return. The bytes will be processed in final() + */ + if (nblks < 1) { + scatterwalk_map_and_copy(rctx->residue.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes, 0); + + rctx->residue.size += req->nbytes; + return 0; + } + + /* Copy the previous residue first */ + if (rctx->residue.size) + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + + scatterwalk_map_and_copy(rctx->datbuf.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes - nresidue, 0); + + scatterwalk_map_and_copy(rctx->residue.buf, rctx->src_sg, + req->nbytes - nresidue, nresidue, 0); + + /* Update residue value with the residue after current block */ + rctx->residue.size = nresidue; + + /* + * If this is not the first 'update' call, paste the previous copied + * intermediate results to the registers so that it gets picked up. + * This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FIRST)) + tegra_cmac_paste_result(ctx->se, rctx); + + cmdlen = tegra_cmac_prep_cmd(ctx, rctx); + + ret = tegra_se_host1x_submit(se, cmdlen); + /* + * If this is not the final update, copy the intermediate results + * from the registers so that it can be used in the next 'update' + * call. This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FINAL)) + tegra_cmac_copy_result(ctx->se, rctx); + + return ret; +} + +static int tegra_cmac_do_final(struct ahash_request *req) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + u32 *result = (u32 *)req->result; + int ret = 0, i, cmdlen; + + if (!req->nbytes && !rctx->total_len && ctx->fallback_tfm) { + return crypto_shash_tfm_digest(ctx->fallback_tfm, + rctx->datbuf.buf, 0, req->result); + } + + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + rctx->datbuf.size = rctx->residue.size; + rctx->total_len += rctx->residue.size; + rctx->config = tegra234_aes_cfg(SE_ALG_CMAC, 0); + + /* Prepare command and submit */ + cmdlen = tegra_cmac_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + goto out; + + /* Read and clear Result register */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + result[i] = readl(se->base + se->hw->regs->result + (i * 4)); + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + +out: + dma_free_coherent(se->dev, SE_SHA_BUFLEN, + rctx->datbuf.buf, rctx->datbuf.addr); + dma_free_coherent(se->dev, crypto_ahash_blocksize(tfm) * 2, + rctx->residue.buf, rctx->residue.addr); + return ret; +} + +static int tegra_cmac_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = ahash_request_cast(areq); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + int ret; + + if (rctx->task & SHA_UPDATE) { + ret = tegra_cmac_do_update(req); + rctx->task &= ~SHA_UPDATE; + } + + if (rctx->task & SHA_FINAL) { + ret = tegra_cmac_do_final(req); + rctx->task &= ~SHA_FINAL; + } + + crypto_finalize_hash_request(se->engine, req, ret); + + return 0; +} + +static void tegra_cmac_init_fallback(struct crypto_ahash *tfm, struct tegra_cmac_ctx *ctx, + const char *algname) +{ + unsigned int statesize; + + ctx->fallback_tfm = crypto_alloc_shash(algname, 0, CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(ctx->fallback_tfm)) { + dev_warn(ctx->se->dev, "failed to allocate fallback for %s\n", algname); + ctx->fallback_tfm = NULL; + return; + } + + statesize = crypto_shash_statesize(ctx->fallback_tfm); + + if (statesize > sizeof(struct tegra_cmac_reqctx)) + crypto_ahash_set_statesize(tfm, statesize); +} + +static int tegra_cmac_cra_init(struct crypto_tfm *tfm) +{ + struct tegra_cmac_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_ahash *ahash_tfm = __crypto_ahash_cast(tfm); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + algname = crypto_tfm_alg_name(tfm); + se_alg = container_of(alg, struct tegra_se_alg, alg.ahash.base); + + crypto_ahash_set_reqsize(ahash_tfm, sizeof(struct tegra_cmac_reqctx)); + + ctx->se = se_alg->se_dev; + ctx->key_id = 0; + + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + ctx->alg = ret; + + tegra_cmac_init_fallback(ahash_tfm, ctx, algname); + + return 0; +} + +static void tegra_cmac_cra_exit(struct crypto_tfm *tfm) +{ + struct tegra_cmac_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback_tfm) + crypto_free_shash(ctx->fallback_tfm); + + tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg); +} + +static int tegra_cmac_init(struct ahash_request *req) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + int i; + + rctx->total_len = 0; + rctx->datbuf.size = 0; + rctx->residue.size = 0; + rctx->task = SHA_FIRST; + rctx->blk_size = crypto_ahash_blocksize(tfm); + + rctx->residue.buf = dma_alloc_coherent(se->dev, rctx->blk_size * 2, + &rctx->residue.addr, GFP_KERNEL); + if (!rctx->residue.buf) + goto resbuf_fail; + + rctx->residue.size = 0; + + rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_SHA_BUFLEN, + &rctx->datbuf.addr, GFP_KERNEL); + if (!rctx->datbuf.buf) + goto datbuf_fail; + + rctx->datbuf.size = 0; + + /* Clear any previous result */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + + return 0; + +datbuf_fail: + dma_free_coherent(se->dev, rctx->blk_size, rctx->residue.buf, + rctx->residue.addr); +resbuf_fail: + return -ENOMEM; +} + +static int tegra_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + + if (aes_check_keylen(keylen)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + if (ctx->fallback_tfm) + crypto_shash_setkey(ctx->fallback_tfm, key, keylen); + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id); +} + +static int tegra_cmac_update(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + rctx->task |= SHA_UPDATE; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_final(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + rctx->task |= SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_finup(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + tegra_cmac_init(req); + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_export(struct ahash_request *req, void *out) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + memcpy(out, rctx, sizeof(*rctx)); + + return 0; +} + +static int tegra_cmac_import(struct ahash_request *req, const void *in) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + memcpy(rctx, in, sizeof(*rctx)); + + return 0; +} + +static struct tegra_se_alg tegra_aead_algs[] = { + { + .alg.aead.op.do_one_request = tegra_gcm_do_one_req, + .alg.aead.base = { + .init = tegra_aead_cra_init, + .exit = tegra_aead_cra_exit, + .setkey = tegra_aead_setkey, + .setauthsize = tegra_gcm_setauthsize, + .encrypt = tegra_aead_encrypt, + .decrypt = tegra_aead_decrypt, + .maxauthsize = AES_BLOCK_SIZE, + .ivsize = GCM_AES_IV_SIZE, + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-tegra", + .cra_priority = 500, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct tegra_aead_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.aead.op.do_one_request = tegra_ccm_do_one_req, + .alg.aead.base = { + .init = tegra_aead_cra_init, + .exit = tegra_aead_cra_exit, + .setkey = tegra_aead_setkey, + .setauthsize = tegra_ccm_setauthsize, + .encrypt = tegra_aead_encrypt, + .decrypt = tegra_aead_decrypt, + .maxauthsize = AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .base = { + .cra_name = "ccm(aes)", + .cra_driver_name = "ccm-aes-tegra", + .cra_priority = 500, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct tegra_aead_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + } +}; + +static struct tegra_se_alg tegra_cmac_algs[] = { + { + .alg.ahash.op.do_one_request = tegra_cmac_do_one_req, + .alg.ahash.base = { + .init = tegra_cmac_init, + .setkey = tegra_cmac_setkey, + .update = tegra_cmac_update, + .final = tegra_cmac_final, + .finup = tegra_cmac_finup, + .digest = tegra_cmac_digest, + .export = tegra_cmac_export, + .import = tegra_cmac_import, + .halg.digestsize = AES_BLOCK_SIZE, + .halg.statesize = sizeof(struct tegra_cmac_reqctx), + .halg.base = { + .cra_name = "cmac(aes)", + .cra_driver_name = "tegra-se-cmac", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_cmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_cmac_cra_init, + .cra_exit = tegra_cmac_cra_exit, + } + } + } +}; + +int tegra_init_aes(struct tegra_se *se) +{ + struct aead_engine_alg *aead_alg; + struct ahash_engine_alg *ahash_alg; + struct skcipher_engine_alg *sk_alg; + int i, ret; + + se->manifest = tegra_aes_kac_manifest; + + for (i = 0; i < ARRAY_SIZE(tegra_aes_algs); i++) { + sk_alg = &tegra_aes_algs[i].alg.skcipher; + tegra_aes_algs[i].se_dev = se; + + ret = crypto_engine_register_skcipher(sk_alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + sk_alg->base.base.cra_name); + goto err_aes; + } + } + + for (i = 0; i < ARRAY_SIZE(tegra_aead_algs); i++) { + aead_alg = &tegra_aead_algs[i].alg.aead; + tegra_aead_algs[i].se_dev = se; + + ret = crypto_engine_register_aead(aead_alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + aead_alg->base.base.cra_name); + goto err_aead; + } + } + + for (i = 0; i < ARRAY_SIZE(tegra_cmac_algs); i++) { + ahash_alg = &tegra_cmac_algs[i].alg.ahash; + tegra_cmac_algs[i].se_dev = se; + + ret = crypto_engine_register_ahash(ahash_alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + ahash_alg->base.halg.base.cra_name); + goto err_cmac; + } + } + + return 0; + +err_cmac: + while (i--) + crypto_engine_unregister_ahash(&tegra_cmac_algs[i].alg.ahash); + + i = ARRAY_SIZE(tegra_aead_algs); +err_aead: + while (i--) + crypto_engine_unregister_aead(&tegra_aead_algs[i].alg.aead); + + i = ARRAY_SIZE(tegra_aes_algs); +err_aes: + while (i--) + crypto_engine_unregister_skcipher(&tegra_aes_algs[i].alg.skcipher); + + return ret; +} + +void tegra_deinit_aes(struct tegra_se *se) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tegra_aes_algs); i++) + crypto_engine_unregister_skcipher(&tegra_aes_algs[i].alg.skcipher); + + for (i = 0; i < ARRAY_SIZE(tegra_aead_algs); i++) + crypto_engine_unregister_aead(&tegra_aead_algs[i].alg.aead); + + for (i = 0; i < ARRAY_SIZE(tegra_cmac_algs); i++) + crypto_engine_unregister_ahash(&tegra_cmac_algs[i].alg.ahash); +} diff --git a/drivers/crypto/tegra/tegra-se-hash.c b/drivers/crypto/tegra/tegra-se-hash.c new file mode 100644 index 000000000000..4d4bd727f498 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-hash.c @@ -0,0 +1,1060 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver to handle HASH algorithms using NVIDIA Security Engine. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tegra-se.h" + +struct tegra_sha_ctx { + struct tegra_se *se; + unsigned int alg; + bool fallback; + u32 key_id; + struct crypto_ahash *fallback_tfm; +}; + +struct tegra_sha_reqctx { + struct scatterlist *src_sg; + struct tegra_se_datbuf datbuf; + struct tegra_se_datbuf residue; + struct tegra_se_datbuf digest; + unsigned int alg; + unsigned int config; + unsigned int total_len; + unsigned int blk_size; + unsigned int task; + u32 key_id; + u32 result[HASH_RESULT_REG_COUNT]; + struct ahash_request fallback_req; +}; + +static int tegra_sha_get_config(u32 alg) +{ + int cfg = 0; + + switch (alg) { + case SE_ALG_SHA1: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA1; + break; + + case SE_ALG_HMAC_SHA224: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA224: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA224; + break; + + case SE_ALG_HMAC_SHA256: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA256: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA256; + break; + + case SE_ALG_HMAC_SHA384: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA384: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA384; + break; + + case SE_ALG_HMAC_SHA512: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA512: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA512; + break; + + case SE_ALG_SHA3_224: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_224; + break; + case SE_ALG_SHA3_256: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_256; + break; + case SE_ALG_SHA3_384: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_384; + break; + case SE_ALG_SHA3_512: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_512; + break; + default: + return -EINVAL; + } + + return cfg; +} + +static int tegra_sha_fallback_init(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_init(&rctx->fallback_req); +} + +static int tegra_sha_fallback_update(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->fallback_req.nbytes = req->nbytes; + rctx->fallback_req.src = req->src; + + return crypto_ahash_update(&rctx->fallback_req); +} + +static int tegra_sha_fallback_final(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->fallback_req.result = req->result; + + return crypto_ahash_final(&rctx->fallback_req); +} + +static int tegra_sha_fallback_finup(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + rctx->fallback_req.nbytes = req->nbytes; + rctx->fallback_req.src = req->src; + rctx->fallback_req.result = req->result; + + return crypto_ahash_finup(&rctx->fallback_req); +} + +static int tegra_sha_fallback_digest(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + rctx->fallback_req.nbytes = req->nbytes; + rctx->fallback_req.src = req->src; + rctx->fallback_req.result = req->result; + + return crypto_ahash_digest(&rctx->fallback_req); +} + +static int tegra_sha_fallback_import(struct ahash_request *req, const void *in) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_import(&rctx->fallback_req, in); +} + +static int tegra_sha_fallback_export(struct ahash_request *req, void *out) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_export(&rctx->fallback_req, out); +} + +static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr, + struct tegra_sha_reqctx *rctx) +{ + u64 msg_len, msg_left; + int i = 0; + + msg_len = rctx->total_len * 8; + msg_left = rctx->datbuf.size * 8; + + /* + * If IN_ADDR_HI_0.SZ > SHA_MSG_LEFT_[0-3] to the HASH engine, + * HW treats it as the last buffer and process the data. + * Therefore, add an extra byte to msg_left if it is not the + * last buffer. + */ + if (rctx->task & SHA_UPDATE) { + msg_left += 8; + msg_len += 8; + } + + cpuvaddr[i++] = host1x_opcode_setpayload(8); + cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_MSG_LENGTH); + cpuvaddr[i++] = lower_32_bits(msg_len); + cpuvaddr[i++] = upper_32_bits(msg_len); + cpuvaddr[i++] = 0; + cpuvaddr[i++] = 0; + cpuvaddr[i++] = lower_32_bits(msg_left); + cpuvaddr[i++] = upper_32_bits(msg_left); + cpuvaddr[i++] = 0; + cpuvaddr[i++] = 0; + cpuvaddr[i++] = host1x_opcode_setpayload(6); + cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_CFG); + cpuvaddr[i++] = rctx->config; + + if (rctx->task & SHA_FIRST) { + cpuvaddr[i++] = SE_SHA_TASK_HASH_INIT; + rctx->task &= ~SHA_FIRST; + } else { + cpuvaddr[i++] = 0; + } + + cpuvaddr[i++] = rctx->datbuf.addr; + cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->datbuf.addr)) | + SE_ADDR_HI_SZ(rctx->datbuf.size)); + cpuvaddr[i++] = rctx->digest.addr; + cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->digest.addr)) | + SE_ADDR_HI_SZ(rctx->digest.size)); + if (rctx->key_id) { + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_CRYPTO_CFG); + cpuvaddr[i++] = SE_AES_KEY_INDEX(rctx->key_id); + } + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_OPERATION); + cpuvaddr[i++] = SE_SHA_OP_WRSTALL | + SE_SHA_OP_START | + SE_SHA_OP_LASTBUF; + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "msg len %llu msg left %llu cfg %#x", + msg_len, msg_left, rctx->config); + + return i; +} + +static void tegra_sha_copy_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx) +{ + int i; + + for (i = 0; i < HASH_RESULT_REG_COUNT; i++) + rctx->result[i] = readl(se->base + se->hw->regs->result + (i * 4)); +} + +static void tegra_sha_paste_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx) +{ + int i; + + for (i = 0; i < HASH_RESULT_REG_COUNT; i++) + writel(rctx->result[i], + se->base + se->hw->regs->result + (i * 4)); +} + +static int tegra_sha_do_update(struct ahash_request *req) +{ + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + unsigned int nblks, nresidue, size, ret; + u32 *cpuvaddr = ctx->se->cmdbuf->addr; + + nresidue = (req->nbytes + rctx->residue.size) % rctx->blk_size; + nblks = (req->nbytes + rctx->residue.size) / rctx->blk_size; + + /* + * If nbytes is a multiple of block size and there is no residue, + * then reserve the last block as residue during final() to process. + */ + if (!nresidue && nblks) { + nresidue = rctx->blk_size; + nblks--; + } + + rctx->src_sg = req->src; + rctx->datbuf.size = (req->nbytes + rctx->residue.size) - nresidue; + rctx->total_len += rctx->datbuf.size; + + /* + * If nbytes are less than a block size, copy it residue and + * return. The bytes will be processed in final() + */ + if (nblks < 1) { + scatterwalk_map_and_copy(rctx->residue.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes, 0); + + rctx->residue.size += req->nbytes; + return 0; + } + + /* Copy the previous residue first */ + if (rctx->residue.size) + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + + scatterwalk_map_and_copy(rctx->datbuf.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes - nresidue, 0); + + scatterwalk_map_and_copy(rctx->residue.buf, rctx->src_sg, + req->nbytes - nresidue, nresidue, 0); + + /* Update residue value with the residue after current block */ + rctx->residue.size = nresidue; + + rctx->config = tegra_sha_get_config(rctx->alg) | + SE_SHA_DST_HASH_REG; + + /* + * If this is not the first 'update' call, paste the previous copied + * intermediate results to the registers so that it gets picked up. + * This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FIRST)) + tegra_sha_paste_hash_result(ctx->se, rctx); + + size = tegra_sha_prep_cmd(ctx->se, cpuvaddr, rctx); + + ret = tegra_se_host1x_submit(ctx->se, size); + + /* + * If this is not the final update, copy the intermediate results + * from the registers so that it can be used in the next 'update' + * call. This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FINAL)) + tegra_sha_copy_hash_result(ctx->se, rctx); + + return ret; +} + +static int tegra_sha_do_final(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + int size, ret = 0; + + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + rctx->datbuf.size = rctx->residue.size; + rctx->total_len += rctx->residue.size; + + rctx->config = tegra_sha_get_config(rctx->alg) | + SE_SHA_DST_MEMORY; + + size = tegra_sha_prep_cmd(se, cpuvaddr, rctx); + + ret = tegra_se_host1x_submit(se, size); + if (ret) + goto out; + + /* Copy result */ + memcpy(req->result, rctx->digest.buf, rctx->digest.size); + +out: + dma_free_coherent(se->dev, SE_SHA_BUFLEN, + rctx->datbuf.buf, rctx->datbuf.addr); + dma_free_coherent(se->dev, crypto_ahash_blocksize(tfm), + rctx->residue.buf, rctx->residue.addr); + dma_free_coherent(se->dev, rctx->digest.size, rctx->digest.buf, + rctx->digest.addr); + return ret; +} + +static int tegra_sha_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = ahash_request_cast(areq); + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + int ret = 0; + + if (rctx->task & SHA_UPDATE) { + ret = tegra_sha_do_update(req); + rctx->task &= ~SHA_UPDATE; + } + + if (rctx->task & SHA_FINAL) { + ret = tegra_sha_do_final(req); + rctx->task &= ~SHA_FINAL; + } + + crypto_finalize_hash_request(se->engine, req, ret); + + return 0; +} + +static void tegra_sha_init_fallback(struct crypto_ahash *tfm, struct tegra_sha_ctx *ctx, + const char *algname) +{ + unsigned int statesize; + + ctx->fallback_tfm = crypto_alloc_ahash(algname, 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(ctx->fallback_tfm)) { + dev_warn(ctx->se->dev, + "failed to allocate fallback for %s\n", algname); + ctx->fallback_tfm = NULL; + return; + } + + statesize = crypto_ahash_statesize(ctx->fallback_tfm); + + if (statesize > sizeof(struct tegra_sha_reqctx)) + crypto_ahash_set_statesize(tfm, statesize); + + /* Update reqsize if fallback is added */ + crypto_ahash_set_reqsize(tfm, + sizeof(struct tegra_sha_reqctx) + + crypto_ahash_reqsize(ctx->fallback_tfm)); +} + +static int tegra_sha_cra_init(struct crypto_tfm *tfm) +{ + struct tegra_sha_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_ahash *ahash_tfm = __crypto_ahash_cast(tfm); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + algname = crypto_tfm_alg_name(tfm); + se_alg = container_of(alg, struct tegra_se_alg, alg.ahash.base); + + crypto_ahash_set_reqsize(ahash_tfm, sizeof(struct tegra_sha_reqctx)); + + ctx->se = se_alg->se_dev; + ctx->fallback = false; + ctx->key_id = 0; + + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + if (se_alg->alg_base) + tegra_sha_init_fallback(ahash_tfm, ctx, algname); + + ctx->alg = ret; + + return 0; +} + +static void tegra_sha_cra_exit(struct crypto_tfm *tfm) +{ + struct tegra_sha_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback_tfm) + crypto_free_ahash(ctx->fallback_tfm); + + tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg); +} + +static int tegra_sha_init(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + + if (ctx->fallback) + return tegra_sha_fallback_init(req); + + rctx->total_len = 0; + rctx->datbuf.size = 0; + rctx->residue.size = 0; + rctx->key_id = ctx->key_id; + rctx->task = SHA_FIRST; + rctx->alg = ctx->alg; + rctx->blk_size = crypto_ahash_blocksize(tfm); + rctx->digest.size = crypto_ahash_digestsize(tfm); + + rctx->digest.buf = dma_alloc_coherent(se->dev, rctx->digest.size, + &rctx->digest.addr, GFP_KERNEL); + if (!rctx->digest.buf) + goto digbuf_fail; + + rctx->residue.buf = dma_alloc_coherent(se->dev, rctx->blk_size, + &rctx->residue.addr, GFP_KERNEL); + if (!rctx->residue.buf) + goto resbuf_fail; + + rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_SHA_BUFLEN, + &rctx->datbuf.addr, GFP_KERNEL); + if (!rctx->datbuf.buf) + goto datbuf_fail; + + return 0; + +datbuf_fail: + dma_free_coherent(se->dev, rctx->blk_size, rctx->residue.buf, + rctx->residue.addr); +resbuf_fail: + dma_free_coherent(se->dev, SE_SHA_BUFLEN, rctx->datbuf.buf, + rctx->datbuf.addr); +digbuf_fail: + return -ENOMEM; +} + +static int tegra_hmac_fallback_setkey(struct tegra_sha_ctx *ctx, const u8 *key, + unsigned int keylen) +{ + if (!ctx->fallback_tfm) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + ctx->fallback = true; + return crypto_ahash_setkey(ctx->fallback_tfm, key, keylen); +} + +static int tegra_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (aes_check_keylen(keylen)) + return tegra_hmac_fallback_setkey(ctx, key, keylen); + + ctx->fallback = false; + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id); +} + +static int tegra_sha_update(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_update(req); + + rctx->task |= SHA_UPDATE; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_final(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_final(req); + + rctx->task |= SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_finup(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_finup(req); + + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_digest(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_digest(req); + + tegra_sha_init(req); + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_export(struct ahash_request *req, void *out) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_export(req, out); + + memcpy(out, rctx, sizeof(*rctx)); + + return 0; +} + +static int tegra_sha_import(struct ahash_request *req, const void *in) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_import(req, in); + + memcpy(rctx, in, sizeof(*rctx)); + + return 0; +} + +static struct tegra_se_alg tegra_hash_algs[] = { + { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "tegra-se-sha1", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha224", + .cra_driver_name = "tegra-se-sha224", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "tegra-se-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha384", + .cra_driver_name = "tegra-se-sha384", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha512", + .cra_driver_name = "tegra-se-sha512", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_224_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-224", + .cra_driver_name = "tegra-se-sha3-224", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_256_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-256", + .cra_driver_name = "tegra-se-sha3-256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_384_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-384", + .cra_driver_name = "tegra-se-sha3-384", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_512_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-512", + .cra_driver_name = "tegra-se-sha3-512", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha224", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "tegra-se-hmac-sha224", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha256", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "tegra-se-hmac-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha384", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "tegra-se-hmac-sha384", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha512", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "tegra-se-hmac-sha512", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + } +}; + +static int tegra_hash_kac_manifest(u32 user, u32 alg, u32 keylen) +{ + int manifest; + + manifest = SE_KAC_USER_NS; + + switch (alg) { + case SE_ALG_HMAC_SHA224: + case SE_ALG_HMAC_SHA256: + case SE_ALG_HMAC_SHA384: + case SE_ALG_HMAC_SHA512: + manifest |= SE_KAC_HMAC; + break; + default: + return -EINVAL; + } + + switch (keylen) { + case AES_KEYSIZE_128: + manifest |= SE_KAC_SIZE_128; + break; + case AES_KEYSIZE_192: + manifest |= SE_KAC_SIZE_192; + break; + case AES_KEYSIZE_256: + default: + manifest |= SE_KAC_SIZE_256; + break; + } + + return manifest; +} + +int tegra_init_hash(struct tegra_se *se) +{ + struct ahash_engine_alg *alg; + int i, ret; + + se->manifest = tegra_hash_kac_manifest; + + for (i = 0; i < ARRAY_SIZE(tegra_hash_algs); i++) { + tegra_hash_algs[i].se_dev = se; + alg = &tegra_hash_algs[i].alg.ahash; + + ret = crypto_engine_register_ahash(alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + alg->base.halg.base.cra_name); + goto sha_err; + } + } + + return 0; + +sha_err: + while (i--) + crypto_engine_unregister_ahash(&tegra_hash_algs[i].alg.ahash); + + return ret; +} + +void tegra_deinit_hash(struct tegra_se *se) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tegra_hash_algs); i++) + crypto_engine_unregister_ahash(&tegra_hash_algs[i].alg.ahash); +} diff --git a/drivers/crypto/tegra/tegra-se-key.c b/drivers/crypto/tegra/tegra-se-key.c new file mode 100644 index 000000000000..ac14678dbd30 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-key.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver file to manage keys of NVIDIA Security Engine. + */ + +#include +#include +#include + +#include "tegra-se.h" + +#define SE_KEY_FULL_MASK GENMASK(SE_MAX_KEYSLOT, 0) + +/* Reserve keyslot 0, 14, 15 */ +#define SE_KEY_RSVD_MASK (BIT(0) | BIT(14) | BIT(15)) +#define SE_KEY_VALID_MASK (SE_KEY_FULL_MASK & ~SE_KEY_RSVD_MASK) + +/* Mutex lock to guard keyslots */ +static DEFINE_MUTEX(kslt_lock); + +/* Keyslot bitmask (0 = available, 1 = in use/not available) */ +static u16 tegra_se_keyslots = SE_KEY_RSVD_MASK; + +static u16 tegra_keyslot_alloc(void) +{ + u16 keyid; + + mutex_lock(&kslt_lock); + /* Check if all key slots are full */ + if (tegra_se_keyslots == GENMASK(SE_MAX_KEYSLOT, 0)) { + mutex_unlock(&kslt_lock); + return 0; + } + + keyid = ffz(tegra_se_keyslots); + tegra_se_keyslots |= BIT(keyid); + + mutex_unlock(&kslt_lock); + + return keyid; +} + +static void tegra_keyslot_free(u16 slot) +{ + mutex_lock(&kslt_lock); + tegra_se_keyslots &= ~(BIT(slot)); + mutex_unlock(&kslt_lock); +} + +static unsigned int tegra_key_prep_ins_cmd(struct tegra_se *se, u32 *cpuvaddr, + const u32 *key, u32 keylen, u16 slot, u32 alg) +{ + int i = 0, j; + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->op); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_DUMMY; + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->manifest); + cpuvaddr[i++] = se->manifest(se->owner, alg, keylen); + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_dst); + + cpuvaddr[i++] = SE_AES_KEY_DST_INDEX(slot); + + for (j = 0; j < keylen / 4; j++) { + /* Set key address */ + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_addr); + cpuvaddr[i++] = j; + + /* Set key data */ + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_data); + cpuvaddr[i++] = key[j]; + } + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->config); + cpuvaddr[i++] = SE_CFG_INS; + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->op); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_START | + SE_AES_OP_LASTBUF; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "key-slot %u key-manifest %#x\n", + slot, se->manifest(se->owner, alg, keylen)); + + return i; +} + +static bool tegra_key_in_kslt(u32 keyid) +{ + bool ret; + + if (keyid > SE_MAX_KEYSLOT) + return false; + + mutex_lock(&kslt_lock); + ret = ((BIT(keyid) & SE_KEY_VALID_MASK) && + (BIT(keyid) & tegra_se_keyslots)); + mutex_unlock(&kslt_lock); + + return ret; +} + +static int tegra_key_insert(struct tegra_se *se, const u8 *key, + u32 keylen, u16 slot, u32 alg) +{ + const u32 *keyval = (u32 *)key; + u32 *addr = se->cmdbuf->addr, size; + + size = tegra_key_prep_ins_cmd(se, addr, keyval, keylen, slot, alg); + + return tegra_se_host1x_submit(se, size); +} + +void tegra_key_invalidate(struct tegra_se *se, u32 keyid, u32 alg) +{ + u8 zkey[AES_MAX_KEY_SIZE] = {0}; + + if (!keyid) + return; + + /* Overwrite the key with 0s */ + tegra_key_insert(se, zkey, AES_MAX_KEY_SIZE, keyid, alg); + + tegra_keyslot_free(keyid); +} + +int tegra_key_submit(struct tegra_se *se, const u8 *key, u32 keylen, u32 alg, u32 *keyid) +{ + int ret; + + /* Use the existing slot if it is already allocated */ + if (!tegra_key_in_kslt(*keyid)) { + *keyid = tegra_keyslot_alloc(); + if (!(*keyid)) { + dev_err(se->dev, "failed to allocate key slot\n"); + return -ENOMEM; + } + } + + ret = tegra_key_insert(se, key, keylen, *keyid, alg); + if (ret) + return ret; + + return 0; +} diff --git a/drivers/crypto/tegra/tegra-se-main.c b/drivers/crypto/tegra/tegra-se-main.c new file mode 100644 index 000000000000..f4758e320615 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-main.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver for NVIDIA Security Engine in Tegra Chips + */ + +#include +#include +#include +#include +#include + +#include + +#include "tegra-se.h" + +static struct host1x_bo *tegra_se_cmdbuf_get(struct host1x_bo *host_bo) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo); + + kref_get(&cmdbuf->ref); + + return host_bo; +} + +static void tegra_se_cmdbuf_release(struct kref *ref) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(ref, struct tegra_se_cmdbuf, ref); + + dma_free_attrs(cmdbuf->dev, cmdbuf->size, cmdbuf->addr, + cmdbuf->iova, 0); + + kfree(cmdbuf); +} + +static void tegra_se_cmdbuf_put(struct host1x_bo *host_bo) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo); + + kref_put(&cmdbuf->ref, tegra_se_cmdbuf_release); +} + +static struct host1x_bo_mapping * +tegra_se_cmdbuf_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction direction) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(bo, struct tegra_se_cmdbuf, bo); + struct host1x_bo_mapping *map; + int err; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return ERR_PTR(-ENOMEM); + + kref_init(&map->ref); + map->bo = host1x_bo_get(bo); + map->direction = direction; + map->dev = dev; + + map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL); + if (!map->sgt) { + err = -ENOMEM; + goto free; + } + + err = dma_get_sgtable(dev, map->sgt, cmdbuf->addr, + cmdbuf->iova, cmdbuf->words * 4); + if (err) + goto free_sgt; + + err = dma_map_sgtable(dev, map->sgt, direction, 0); + if (err) + goto free_sgt; + + map->phys = sg_dma_address(map->sgt->sgl); + map->size = cmdbuf->words * 4; + map->chunks = err; + + return map; + +free_sgt: + sg_free_table(map->sgt); + kfree(map->sgt); +free: + kfree(map); + return ERR_PTR(err); +} + +static void tegra_se_cmdbuf_unpin(struct host1x_bo_mapping *map) +{ + if (!map) + return; + + dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0); + sg_free_table(map->sgt); + kfree(map->sgt); + host1x_bo_put(map->bo); + + kfree(map); +} + +static void *tegra_se_cmdbuf_mmap(struct host1x_bo *host_bo) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo); + + return cmdbuf->addr; +} + +static void tegra_se_cmdbuf_munmap(struct host1x_bo *host_bo, void *addr) +{ +} + +static const struct host1x_bo_ops tegra_se_cmdbuf_ops = { + .get = tegra_se_cmdbuf_get, + .put = tegra_se_cmdbuf_put, + .pin = tegra_se_cmdbuf_pin, + .unpin = tegra_se_cmdbuf_unpin, + .mmap = tegra_se_cmdbuf_mmap, + .munmap = tegra_se_cmdbuf_munmap, +}; + +static struct tegra_se_cmdbuf *tegra_se_host1x_bo_alloc(struct tegra_se *se, ssize_t size) +{ + struct tegra_se_cmdbuf *cmdbuf; + struct device *dev = se->dev->parent; + + cmdbuf = kzalloc(sizeof(*cmdbuf), GFP_KERNEL); + if (!cmdbuf) + return NULL; + + cmdbuf->addr = dma_alloc_attrs(dev, size, &cmdbuf->iova, + GFP_KERNEL, 0); + if (!cmdbuf->addr) + return NULL; + + cmdbuf->size = size; + cmdbuf->dev = dev; + + host1x_bo_init(&cmdbuf->bo, &tegra_se_cmdbuf_ops); + kref_init(&cmdbuf->ref); + + return cmdbuf; +} + +int tegra_se_host1x_submit(struct tegra_se *se, u32 size) +{ + struct host1x_job *job; + int ret; + + job = host1x_job_alloc(se->channel, 1, 0, true); + if (!job) { + dev_err(se->dev, "failed to allocate host1x job\n"); + return -ENOMEM; + } + + job->syncpt = host1x_syncpt_get(se->syncpt); + job->syncpt_incrs = 1; + job->client = &se->client; + job->class = se->client.class; + job->serialize = true; + job->engine_fallback_streamid = se->stream_id; + job->engine_streamid_offset = SE_STREAM_ID; + + se->cmdbuf->words = size; + + host1x_job_add_gather(job, &se->cmdbuf->bo, size, 0); + + ret = host1x_job_pin(job, se->dev); + if (ret) { + dev_err(se->dev, "failed to pin host1x job\n"); + goto job_put; + } + + ret = host1x_job_submit(job); + if (ret) { + dev_err(se->dev, "failed to submit host1x job\n"); + goto job_unpin; + } + + ret = host1x_syncpt_wait(job->syncpt, job->syncpt_end, + MAX_SCHEDULE_TIMEOUT, NULL); + if (ret) { + dev_err(se->dev, "host1x job timed out\n"); + return ret; + } + + host1x_job_put(job); + return 0; + +job_unpin: + host1x_job_unpin(job); +job_put: + host1x_job_put(job); + + return ret; +} + +static int tegra_se_client_init(struct host1x_client *client) +{ + struct tegra_se *se = container_of(client, struct tegra_se, client); + int ret; + + se->channel = host1x_channel_request(&se->client); + if (!se->channel) { + dev_err(se->dev, "host1x channel map failed\n"); + return -ENODEV; + } + + se->syncpt = host1x_syncpt_request(&se->client, 0); + if (!se->syncpt) { + dev_err(se->dev, "host1x syncpt allocation failed\n"); + ret = -EINVAL; + goto channel_put; + } + + se->syncpt_id = host1x_syncpt_id(se->syncpt); + + se->cmdbuf = tegra_se_host1x_bo_alloc(se, SZ_4K); + if (!se->cmdbuf) { + ret = -ENOMEM; + goto syncpt_put; + } + + ret = se->hw->init_alg(se); + if (ret) { + dev_err(se->dev, "failed to register algorithms\n"); + goto cmdbuf_put; + } + + return 0; + +cmdbuf_put: + tegra_se_cmdbuf_put(&se->cmdbuf->bo); +syncpt_put: + host1x_syncpt_put(se->syncpt); +channel_put: + host1x_channel_put(se->channel); + + return ret; +} + +static int tegra_se_client_deinit(struct host1x_client *client) +{ + struct tegra_se *se = container_of(client, struct tegra_se, client); + + se->hw->deinit_alg(se); + tegra_se_cmdbuf_put(&se->cmdbuf->bo); + host1x_syncpt_put(se->syncpt); + host1x_channel_put(se->channel); + + return 0; +} + +static const struct host1x_client_ops tegra_se_client_ops = { + .init = tegra_se_client_init, + .exit = tegra_se_client_deinit, +}; + +static int tegra_se_host1x_register(struct tegra_se *se) +{ + INIT_LIST_HEAD(&se->client.list); + se->client.dev = se->dev; + se->client.ops = &tegra_se_client_ops; + se->client.class = se->hw->host1x_class; + se->client.num_syncpts = 1; + + host1x_client_register(&se->client); + + return 0; +} + +static int tegra_se_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct tegra_se *se; + int ret; + + se = devm_kzalloc(dev, sizeof(*se), GFP_KERNEL); + if (!se) + return -ENOMEM; + + se->dev = dev; + se->owner = TEGRA_GPSE_ID; + se->hw = device_get_match_data(&pdev->dev); + + se->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(se->base)) + return PTR_ERR(se->base); + + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39)); + platform_set_drvdata(pdev, se); + + se->clk = devm_clk_get_enabled(se->dev, NULL); + if (IS_ERR(se->clk)) + return dev_err_probe(dev, PTR_ERR(se->clk), + "failed to enable clocks\n"); + + if (!tegra_dev_iommu_get_stream_id(dev, &se->stream_id)) + return dev_err_probe(dev, -ENODEV, + "failed to get IOMMU stream ID\n"); + + writel(se->stream_id, se->base + SE_STREAM_ID); + + se->engine = crypto_engine_alloc_init(dev, 0); + if (!se->engine) + return dev_err_probe(dev, -ENOMEM, "failed to init crypto engine\n"); + + ret = crypto_engine_start(se->engine); + if (ret) { + crypto_engine_exit(se->engine); + return dev_err_probe(dev, ret, "failed to start crypto engine\n"); + } + + ret = tegra_se_host1x_register(se); + if (ret) { + crypto_engine_stop(se->engine); + crypto_engine_exit(se->engine); + return dev_err_probe(dev, ret, "failed to init host1x params\n"); + } + + return 0; +} + +static int tegra_se_remove(struct platform_device *pdev) +{ + struct tegra_se *se = platform_get_drvdata(pdev); + + crypto_engine_stop(se->engine); + crypto_engine_exit(se->engine); + iommu_fwspec_free(se->dev); + host1x_client_unregister(&se->client); + + return 0; +} + +static const struct tegra_se_regs tegra234_aes1_regs = { + .config = SE_AES1_CFG, + .op = SE_AES1_OPERATION, + .last_blk = SE_AES1_LAST_BLOCK, + .linear_ctr = SE_AES1_LINEAR_CTR, + .aad_len = SE_AES1_AAD_LEN, + .cryp_msg_len = SE_AES1_CRYPTO_MSG_LEN, + .manifest = SE_AES1_KEYMANIFEST, + .key_addr = SE_AES1_KEY_ADDR, + .key_data = SE_AES1_KEY_DATA, + .key_dst = SE_AES1_KEY_DST, + .result = SE_AES1_CMAC_RESULT, +}; + +static const struct tegra_se_regs tegra234_hash_regs = { + .config = SE_SHA_CFG, + .op = SE_SHA_OPERATION, + .manifest = SE_SHA_KEYMANIFEST, + .key_addr = SE_SHA_KEY_ADDR, + .key_data = SE_SHA_KEY_DATA, + .key_dst = SE_SHA_KEY_DST, + .result = SE_SHA_HASH_RESULT, +}; + +static const struct tegra_se_hw tegra234_aes_hw = { + .regs = &tegra234_aes1_regs, + .kac_ver = 1, + .host1x_class = 0x3b, + .init_alg = tegra_init_aes, + .deinit_alg = tegra_deinit_aes, +}; + +static const struct tegra_se_hw tegra234_hash_hw = { + .regs = &tegra234_hash_regs, + .kac_ver = 1, + .host1x_class = 0x3d, + .init_alg = tegra_init_hash, + .deinit_alg = tegra_deinit_hash, +}; + +static const struct of_device_id tegra_se_of_match[] = { + { + .compatible = "nvidia,tegra234-se-aes", + .data = &tegra234_aes_hw + }, { + .compatible = "nvidia,tegra234-se-hash", + .data = &tegra234_hash_hw, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_se_of_match); + +static struct platform_driver tegra_se_driver = { + .driver = { + .name = "tegra-se", + .of_match_table = tegra_se_of_match, + }, + .probe = tegra_se_probe, + .remove = tegra_se_remove, +}; + +static int tegra_se_host1x_probe(struct host1x_device *dev) +{ + return host1x_device_init(dev); +} + +static int tegra_se_host1x_remove(struct host1x_device *dev) +{ + host1x_device_exit(dev); + + return 0; +} + +static struct host1x_driver tegra_se_host1x_driver = { + .driver = { + .name = "tegra-se-host1x", + }, + .probe = tegra_se_host1x_probe, + .remove = tegra_se_host1x_remove, + .subdevs = tegra_se_of_match, +}; + +static int __init tegra_se_module_init(void) +{ + int ret; + + ret = host1x_driver_register(&tegra_se_host1x_driver); + if (ret) + return ret; + + return platform_driver_register(&tegra_se_driver); +} + +static void __exit tegra_se_module_exit(void) +{ + host1x_driver_unregister(&tegra_se_host1x_driver); + platform_driver_unregister(&tegra_se_driver); +} + +module_init(tegra_se_module_init); +module_exit(tegra_se_module_exit); + +MODULE_DESCRIPTION("NVIDIA Tegra Security Engine Driver"); +MODULE_AUTHOR("Akhil R "); +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/tegra/tegra-se.h b/drivers/crypto/tegra/tegra-se.h new file mode 100644 index 000000000000..b9dd7ceb8783 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se.h @@ -0,0 +1,560 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * Header file for NVIDIA Security Engine driver. + */ + +#ifndef _TEGRA_SE_H +#define _TEGRA_SE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SE_OWNERSHIP 0x14 +#define SE_OWNERSHIP_UID(x) FIELD_GET(GENMASK(7, 0), x) +#define TEGRA_GPSE_ID 3 + +#define SE_STREAM_ID 0x90 + +#define SE_SHA_CFG 0x4004 +#define SE_SHA_KEY_ADDR 0x4094 +#define SE_SHA_KEY_DATA 0x4098 +#define SE_SHA_KEYMANIFEST 0x409c +#define SE_SHA_CRYPTO_CFG 0x40a4 +#define SE_SHA_KEY_DST 0x40a8 +#define SE_SHA_SRC_KSLT 0x4180 +#define SE_SHA_TGT_KSLT 0x4184 +#define SE_SHA_MSG_LENGTH 0x401c +#define SE_SHA_OPERATION 0x407c +#define SE_SHA_HASH_RESULT 0x40b0 + +#define SE_SHA_ENC_MODE(x) FIELD_PREP(GENMASK(31, 24), x) +#define SE_SHA_ENC_MODE_SHA1 SE_SHA_ENC_MODE(0) +#define SE_SHA_ENC_MODE_SHA224 SE_SHA_ENC_MODE(4) +#define SE_SHA_ENC_MODE_SHA256 SE_SHA_ENC_MODE(5) +#define SE_SHA_ENC_MODE_SHA384 SE_SHA_ENC_MODE(6) +#define SE_SHA_ENC_MODE_SHA512 SE_SHA_ENC_MODE(7) +#define SE_SHA_ENC_MODE_SHA_CTX_INTEGRITY SE_SHA_ENC_MODE(8) +#define SE_SHA_ENC_MODE_SHA3_224 SE_SHA_ENC_MODE(9) +#define SE_SHA_ENC_MODE_SHA3_256 SE_SHA_ENC_MODE(10) +#define SE_SHA_ENC_MODE_SHA3_384 SE_SHA_ENC_MODE(11) +#define SE_SHA_ENC_MODE_SHA3_512 SE_SHA_ENC_MODE(12) +#define SE_SHA_ENC_MODE_SHAKE128 SE_SHA_ENC_MODE(13) +#define SE_SHA_ENC_MODE_SHAKE256 SE_SHA_ENC_MODE(14) +#define SE_SHA_ENC_MODE_HMAC_SHA256_1KEY SE_SHA_ENC_MODE(0) +#define SE_SHA_ENC_MODE_HMAC_SHA256_2KEY SE_SHA_ENC_MODE(1) +#define SE_SHA_ENC_MODE_SM3_256 SE_SHA_ENC_MODE(0) + +#define SE_SHA_CFG_ENC_ALG(x) FIELD_PREP(GENMASK(15, 12), x) +#define SE_SHA_ENC_ALG_NOP SE_SHA_CFG_ENC_ALG(0) +#define SE_SHA_ENC_ALG_SHA_ENC SE_SHA_CFG_ENC_ALG(1) +#define SE_SHA_ENC_ALG_RNG SE_SHA_CFG_ENC_ALG(2) +#define SE_SHA_ENC_ALG_SHA SE_SHA_CFG_ENC_ALG(3) +#define SE_SHA_ENC_ALG_SM3 SE_SHA_CFG_ENC_ALG(4) +#define SE_SHA_ENC_ALG_HMAC SE_SHA_CFG_ENC_ALG(7) +#define SE_SHA_ENC_ALG_KDF SE_SHA_CFG_ENC_ALG(8) +#define SE_SHA_ENC_ALG_KEY_INVLD SE_SHA_CFG_ENC_ALG(10) +#define SE_SHA_ENC_ALG_KEY_INQUIRE SE_SHA_CFG_ENC_ALG(12) +#define SE_SHA_ENC_ALG_INS SE_SHA_CFG_ENC_ALG(13) + +#define SE_SHA_OP_LASTBUF FIELD_PREP(BIT(16), 1) +#define SE_SHA_OP_WRSTALL FIELD_PREP(BIT(15), 1) + +#define SE_SHA_OP_OP(x) FIELD_PREP(GENMASK(2, 0), x) +#define SE_SHA_OP_START SE_SHA_OP_OP(1) +#define SE_SHA_OP_RESTART_OUT SE_SHA_OP_OP(2) +#define SE_SHA_OP_RESTART_IN SE_SHA_OP_OP(4) +#define SE_SHA_OP_RESTART_INOUT SE_SHA_OP_OP(5) +#define SE_SHA_OP_DUMMY SE_SHA_OP_OP(6) + +#define SE_SHA_CFG_DEC_ALG(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_SHA_DEC_ALG_NOP SE_SHA_CFG_DEC_ALG(0) +#define SE_SHA_DEC_ALG_AES_DEC SE_SHA_CFG_DEC_ALG(1) +#define SE_SHA_DEC_ALG_HMAC SE_SHA_CFG_DEC_ALG(7) +#define SE_SHA_DEC_ALG_HMAC_VERIFY SE_SHA_CFG_DEC_ALG(9) + +#define SE_SHA_CFG_DST(x) FIELD_PREP(GENMASK(4, 2), x) +#define SE_SHA_DST_MEMORY SE_SHA_CFG_DST(0) +#define SE_SHA_DST_HASH_REG SE_SHA_CFG_DST(1) +#define SE_SHA_DST_KEYTABLE SE_SHA_CFG_DST(2) +#define SE_SHA_DST_SRK SE_SHA_CFG_DST(3) + +#define SE_SHA_TASK_HASH_INIT BIT(0) + +/* AES Configuration */ +#define SE_AES0_CFG 0x1004 +#define SE_AES0_CRYPTO_CONFIG 0x1008 +#define SE_AES0_KEY_DST 0x1030 +#define SE_AES0_OPERATION 0x1038 +#define SE_AES0_LINEAR_CTR 0x101c +#define SE_AES0_LAST_BLOCK 0x102c +#define SE_AES0_KEY_ADDR 0x10bc +#define SE_AES0_KEY_DATA 0x10c0 +#define SE_AES0_CMAC_RESULT 0x10c4 +#define SE_AES0_SRC_KSLT 0x1100 +#define SE_AES0_TGT_KSLT 0x1104 +#define SE_AES0_KEYMANIFEST 0x1114 +#define SE_AES0_AAD_LEN 0x112c +#define SE_AES0_CRYPTO_MSG_LEN 0x1134 + +#define SE_AES1_CFG 0x2004 +#define SE_AES1_CRYPTO_CONFIG 0x2008 +#define SE_AES1_KEY_DST 0x2030 +#define SE_AES1_OPERATION 0x2038 +#define SE_AES1_LINEAR_CTR 0x201c +#define SE_AES1_LAST_BLOCK 0x202c +#define SE_AES1_KEY_ADDR 0x20bc +#define SE_AES1_KEY_DATA 0x20c0 +#define SE_AES1_CMAC_RESULT 0x20c4 +#define SE_AES1_SRC_KSLT 0x2100 +#define SE_AES1_TGT_KSLT 0x2104 +#define SE_AES1_KEYMANIFEST 0x2114 +#define SE_AES1_AAD_LEN 0x212c +#define SE_AES1_CRYPTO_MSG_LEN 0x2134 + +#define SE_AES_CFG_ENC_MODE(x) FIELD_PREP(GENMASK(31, 24), x) +#define SE_AES_ENC_MODE_GMAC SE_AES_CFG_ENC_MODE(3) +#define SE_AES_ENC_MODE_GCM SE_AES_CFG_ENC_MODE(4) +#define SE_AES_ENC_MODE_GCM_FINAL SE_AES_CFG_ENC_MODE(5) +#define SE_AES_ENC_MODE_CMAC SE_AES_CFG_ENC_MODE(7) +#define SE_AES_ENC_MODE_CBC_MAC SE_AES_CFG_ENC_MODE(12) + +#define SE_AES_CFG_DEC_MODE(x) FIELD_PREP(GENMASK(23, 16), x) +#define SE_AES_DEC_MODE_GMAC SE_AES_CFG_DEC_MODE(3) +#define SE_AES_DEC_MODE_GCM SE_AES_CFG_DEC_MODE(4) +#define SE_AES_DEC_MODE_GCM_FINAL SE_AES_CFG_DEC_MODE(5) +#define SE_AES_DEC_MODE_CBC_MAC SE_AES_CFG_DEC_MODE(12) + +#define SE_AES_CFG_ENC_ALG(x) FIELD_PREP(GENMASK(15, 12), x) +#define SE_AES_ENC_ALG_NOP SE_AES_CFG_ENC_ALG(0) +#define SE_AES_ENC_ALG_AES_ENC SE_AES_CFG_ENC_ALG(1) +#define SE_AES_ENC_ALG_RNG SE_AES_CFG_ENC_ALG(2) +#define SE_AES_ENC_ALG_SHA SE_AES_CFG_ENC_ALG(3) +#define SE_AES_ENC_ALG_HMAC SE_AES_CFG_ENC_ALG(7) +#define SE_AES_ENC_ALG_KDF SE_AES_CFG_ENC_ALG(8) +#define SE_AES_ENC_ALG_INS SE_AES_CFG_ENC_ALG(13) + +#define SE_AES_CFG_DEC_ALG(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_AES_DEC_ALG_NOP SE_AES_CFG_DEC_ALG(0) +#define SE_AES_DEC_ALG_AES_DEC SE_AES_CFG_DEC_ALG(1) + +#define SE_AES_CFG_DST(x) FIELD_PREP(GENMASK(4, 2), x) +#define SE_AES_DST_MEMORY SE_AES_CFG_DST(0) +#define SE_AES_DST_HASH_REG SE_AES_CFG_DST(1) +#define SE_AES_DST_KEYTABLE SE_AES_CFG_DST(2) +#define SE_AES_DST_SRK SE_AES_CFG_DST(3) + +/* AES Crypto Configuration */ +#define SE_AES_KEY2_INDEX(x) FIELD_PREP(GENMASK(31, 28), x) +#define SE_AES_KEY_INDEX(x) FIELD_PREP(GENMASK(27, 24), x) + +#define SE_AES_CRYPTO_CFG_SCC_DIS FIELD_PREP(BIT(20), 1) + +#define SE_AES_CRYPTO_CFG_CTR_CNTN(x) FIELD_PREP(GENMASK(18, 11), x) + +#define SE_AES_CRYPTO_CFG_IV_MODE(x) FIELD_PREP(BIT(10), x) +#define SE_AES_IV_MODE_SWIV SE_AES_CRYPTO_CFG_IV_MODE(0) +#define SE_AES_IV_MODE_HWIV SE_AES_CRYPTO_CFG_IV_MODE(1) + +#define SE_AES_CRYPTO_CFG_CORE_SEL(x) FIELD_PREP(BIT(9), x) +#define SE_AES_CORE_SEL_DECRYPT SE_AES_CRYPTO_CFG_CORE_SEL(0) +#define SE_AES_CORE_SEL_ENCRYPT SE_AES_CRYPTO_CFG_CORE_SEL(1) + +#define SE_AES_CRYPTO_CFG_IV_SEL(x) FIELD_PREP(GENMASK(8, 7), x) +#define SE_AES_IV_SEL_UPDATED SE_AES_CRYPTO_CFG_IV_SEL(1) +#define SE_AES_IV_SEL_REG SE_AES_CRYPTO_CFG_IV_SEL(2) +#define SE_AES_IV_SEL_RANDOM SE_AES_CRYPTO_CFG_IV_SEL(3) + +#define SE_AES_CRYPTO_CFG_VCTRAM_SEL(x) FIELD_PREP(GENMASK(6, 5), x) +#define SE_AES_VCTRAM_SEL_MEMORY SE_AES_CRYPTO_CFG_VCTRAM_SEL(0) +#define SE_AES_VCTRAM_SEL_TWEAK SE_AES_CRYPTO_CFG_VCTRAM_SEL(1) +#define SE_AES_VCTRAM_SEL_AESOUT SE_AES_CRYPTO_CFG_VCTRAM_SEL(2) +#define SE_AES_VCTRAM_SEL_PREV_MEM SE_AES_CRYPTO_CFG_VCTRAM_SEL(3) + +#define SE_AES_CRYPTO_CFG_INPUT_SEL(x) FIELD_PREP(GENMASK(4, 3), x) +#define SE_AES_INPUT_SEL_MEMORY SE_AES_CRYPTO_CFG_INPUT_SEL(0) +#define SE_AES_INPUT_SEL_RANDOM SE_AES_CRYPTO_CFG_INPUT_SEL(1) +#define SE_AES_INPUT_SEL_AESOUT SE_AES_CRYPTO_CFG_INPUT_SEL(2) +#define SE_AES_INPUT_SEL_LINEAR_CTR SE_AES_CRYPTO_CFG_INPUT_SEL(3) +#define SE_AES_INPUT_SEL_REG SE_AES_CRYPTO_CFG_INPUT_SEL(1) + +#define SE_AES_CRYPTO_CFG_XOR_POS(x) FIELD_PREP(GENMASK(2, 1), x) +#define SE_AES_XOR_POS_BYPASS SE_AES_CRYPTO_CFG_XOR_POS(0) +#define SE_AES_XOR_POS_BOTH SE_AES_CRYPTO_CFG_XOR_POS(1) +#define SE_AES_XOR_POS_TOP SE_AES_CRYPTO_CFG_XOR_POS(2) +#define SE_AES_XOR_POS_BOTTOM SE_AES_CRYPTO_CFG_XOR_POS(3) + +#define SE_AES_CRYPTO_CFG_HASH_EN(x) FIELD_PREP(BIT(0), x) +#define SE_AES_HASH_DISABLE SE_AES_CRYPTO_CFG_HASH_EN(0) +#define SE_AES_HASH_ENABLE SE_AES_CRYPTO_CFG_HASH_EN(1) + +#define SE_LAST_BLOCK_VAL(x) FIELD_PREP(GENMASK(19, 0), x) +#define SE_LAST_BLOCK_RES_BITS(x) FIELD_PREP(GENMASK(26, 20), x) + +#define SE_AES_OP_LASTBUF FIELD_PREP(BIT(16), 1) +#define SE_AES_OP_WRSTALL FIELD_PREP(BIT(15), 1) +#define SE_AES_OP_FINAL FIELD_PREP(BIT(5), 1) +#define SE_AES_OP_INIT FIELD_PREP(BIT(4), 1) + +#define SE_AES_OP_OP(x) FIELD_PREP(GENMASK(2, 0), x) +#define SE_AES_OP_START SE_AES_OP_OP(1) +#define SE_AES_OP_RESTART_OUT SE_AES_OP_OP(2) +#define SE_AES_OP_RESTART_IN SE_AES_OP_OP(4) +#define SE_AES_OP_RESTART_INOUT SE_AES_OP_OP(5) +#define SE_AES_OP_DUMMY SE_AES_OP_OP(6) + +#define SE_KAC_SIZE(x) FIELD_PREP(GENMASK(15, 14), x) +#define SE_KAC_SIZE_128 SE_KAC_SIZE(0) +#define SE_KAC_SIZE_192 SE_KAC_SIZE(1) +#define SE_KAC_SIZE_256 SE_KAC_SIZE(2) + +#define SE_KAC_EXPORTABLE FIELD_PREP(BIT(12), 1) + +#define SE_KAC_PURPOSE(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_KAC_ENC SE_KAC_PURPOSE(0) +#define SE_KAC_CMAC SE_KAC_PURPOSE(1) +#define SE_KAC_HMAC SE_KAC_PURPOSE(2) +#define SE_KAC_GCM_KW SE_KAC_PURPOSE(3) +#define SE_KAC_HMAC_KDK SE_KAC_PURPOSE(6) +#define SE_KAC_HMAC_KDD SE_KAC_PURPOSE(7) +#define SE_KAC_HMAC_KDD_KUW SE_KAC_PURPOSE(8) +#define SE_KAC_XTS SE_KAC_PURPOSE(9) +#define SE_KAC_GCM SE_KAC_PURPOSE(10) + +#define SE_KAC_USER_NS FIELD_PREP(GENMASK(6, 4), 3) + +#define SE_AES_KEY_DST_INDEX(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_ADDR_HI_MSB(x) FIELD_PREP(GENMASK(31, 24), x) +#define SE_ADDR_HI_SZ(x) FIELD_PREP(GENMASK(23, 0), x) + +#define SE_CFG_AES_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_AES_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GMAC_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_ENC_MODE_GMAC | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GMAC_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DEC_MODE_GMAC | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_ENC_MODE_GCM | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DEC_MODE_GCM | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_FINAL_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_ENC_MODE_GCM_FINAL | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_FINAL_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DEC_MODE_GCM_FINAL | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_CMAC (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_ENC_MODE_CMAC | \ + SE_AES_DST_HASH_REG) + +#define SE_CFG_CBC_MAC (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_ENC_MODE_CBC_MAC) + +#define SE_CFG_INS (SE_AES_ENC_ALG_INS | \ + SE_AES_DEC_ALG_NOP) + +#define SE_CRYPTO_CFG_ECB_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_XOR_POS_BYPASS | \ + SE_AES_CORE_SEL_ENCRYPT) + +#define SE_CRYPTO_CFG_ECB_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_XOR_POS_BYPASS | \ + SE_AES_CORE_SEL_DECRYPT) + +#define SE_CRYPTO_CFG_CBC_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_AESOUT | \ + SE_AES_XOR_POS_TOP | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_CBC_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_PREV_MEM | \ + SE_AES_XOR_POS_BOTTOM | \ + SE_AES_CORE_SEL_DECRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_CTR (SE_AES_INPUT_SEL_LINEAR_CTR | \ + SE_AES_VCTRAM_SEL_MEMORY | \ + SE_AES_XOR_POS_BOTTOM | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_CRYPTO_CFG_CTR_CNTN(1) | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_XTS_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_TWEAK | \ + SE_AES_XOR_POS_BOTH | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_XTS_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_TWEAK | \ + SE_AES_XOR_POS_BOTH | \ + SE_AES_CORE_SEL_DECRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_XTS_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_TWEAK | \ + SE_AES_XOR_POS_BOTH | \ + SE_AES_CORE_SEL_DECRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_CBC_MAC (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_AESOUT | \ + SE_AES_XOR_POS_TOP | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_HASH_ENABLE | \ + SE_AES_IV_SEL_REG) + +#define HASH_RESULT_REG_COUNT 50 +#define CMAC_RESULT_REG_COUNT 4 + +#define SE_CRYPTO_CTR_REG_COUNT 4 +#define SE_MAX_KEYSLOT 15 +#define SE_MAX_MEM_ALLOC SZ_4M +#define SE_AES_BUFLEN 0x8000 +#define SE_SHA_BUFLEN 0x2000 + +#define SHA_FIRST BIT(0) +#define SHA_UPDATE BIT(1) +#define SHA_FINAL BIT(2) + +/* Security Engine operation modes */ +enum se_aes_alg { + SE_ALG_CBC, /* Cipher Block Chaining (CBC) mode */ + SE_ALG_ECB, /* Electronic Codebook (ECB) mode */ + SE_ALG_CTR, /* Counter (CTR) mode */ + SE_ALG_XTS, /* XTS mode */ + SE_ALG_GMAC, /* GMAC mode */ + SE_ALG_GCM, /* GCM mode */ + SE_ALG_GCM_FINAL, /* GCM FINAL mode */ + SE_ALG_CMAC, /* Cipher-based MAC (CMAC) mode */ + SE_ALG_CBC_MAC, /* CBC MAC mode */ +}; + +enum se_hash_alg { + SE_ALG_RNG_DRBG, /* Deterministic Random Bit Generator */ + SE_ALG_SHA1, /* Secure Hash Algorithm-1 (SHA1) mode */ + SE_ALG_SHA224, /* Secure Hash Algorithm-224 (SHA224) mode */ + SE_ALG_SHA256, /* Secure Hash Algorithm-256 (SHA256) mode */ + SE_ALG_SHA384, /* Secure Hash Algorithm-384 (SHA384) mode */ + SE_ALG_SHA512, /* Secure Hash Algorithm-512 (SHA512) mode */ + SE_ALG_SHA3_224, /* Secure Hash Algorithm3-224 (SHA3-224) mode */ + SE_ALG_SHA3_256, /* Secure Hash Algorithm3-256 (SHA3-256) mode */ + SE_ALG_SHA3_384, /* Secure Hash Algorithm3-384 (SHA3-384) mode */ + SE_ALG_SHA3_512, /* Secure Hash Algorithm3-512 (SHA3-512) mode */ + SE_ALG_SHAKE128, /* Secure Hash Algorithm3 (SHAKE128) mode */ + SE_ALG_SHAKE256, /* Secure Hash Algorithm3 (SHAKE256) mode */ + SE_ALG_HMAC_SHA224, /* Hash based MAC (HMAC) - 224 */ + SE_ALG_HMAC_SHA256, /* Hash based MAC (HMAC) - 256 */ + SE_ALG_HMAC_SHA384, /* Hash based MAC (HMAC) - 384 */ + SE_ALG_HMAC_SHA512, /* Hash based MAC (HMAC) - 512 */ +}; + +struct tegra_se_alg { + struct tegra_se *se_dev; + const char *alg_base; + + union { + struct skcipher_engine_alg skcipher; + struct aead_engine_alg aead; + struct ahash_engine_alg ahash; + } alg; +}; + +struct tegra_se_regs { + u32 op; + u32 config; + u32 last_blk; + u32 linear_ctr; + u32 out_addr; + u32 aad_len; + u32 cryp_msg_len; + u32 manifest; + u32 key_addr; + u32 key_data; + u32 key_dst; + u32 result; +}; + +struct tegra_se_hw { + const struct tegra_se_regs *regs; + int (*init_alg)(struct tegra_se *se); + void (*deinit_alg)(struct tegra_se *se); + bool support_sm_alg; + u32 host1x_class; + u32 kac_ver; +}; + +struct tegra_se { + int (*manifest)(u32 user, u32 alg, u32 keylen); + const struct tegra_se_hw *hw; + struct host1x_client client; + struct host1x_channel *channel; + struct tegra_se_cmdbuf *cmdbuf; + struct crypto_engine *engine; + struct host1x_syncpt *syncpt; + struct device *dev; + struct clk *clk; + unsigned int opcode_addr; + unsigned int stream_id; + unsigned int syncpt_id; + void __iomem *base; + u32 owner; +}; + +struct tegra_se_cmdbuf { + dma_addr_t iova; + u32 *addr; + struct device *dev; + struct kref ref; + struct host1x_bo bo; + ssize_t size; + u32 words; +}; + +struct tegra_se_datbuf { + u8 *buf; + dma_addr_t addr; + ssize_t size; +}; + +static inline int se_algname_to_algid(const char *name) +{ + if (!strcmp(name, "cbc(aes)")) + return SE_ALG_CBC; + else if (!strcmp(name, "ecb(aes)")) + return SE_ALG_ECB; + else if (!strcmp(name, "ctr(aes)")) + return SE_ALG_CTR; + else if (!strcmp(name, "xts(aes)")) + return SE_ALG_XTS; + else if (!strcmp(name, "cmac(aes)")) + return SE_ALG_CMAC; + else if (!strcmp(name, "gcm(aes)")) + return SE_ALG_GCM; + else if (!strcmp(name, "ccm(aes)")) + return SE_ALG_CBC_MAC; + + else if (!strcmp(name, "sha1")) + return SE_ALG_SHA1; + else if (!strcmp(name, "sha224")) + return SE_ALG_SHA224; + else if (!strcmp(name, "sha256")) + return SE_ALG_SHA256; + else if (!strcmp(name, "sha384")) + return SE_ALG_SHA384; + else if (!strcmp(name, "sha512")) + return SE_ALG_SHA512; + else if (!strcmp(name, "sha3-224")) + return SE_ALG_SHA3_224; + else if (!strcmp(name, "sha3-256")) + return SE_ALG_SHA3_256; + else if (!strcmp(name, "sha3-384")) + return SE_ALG_SHA3_384; + else if (!strcmp(name, "sha3-512")) + return SE_ALG_SHA3_512; + else if (!strcmp(name, "hmac(sha224)")) + return SE_ALG_HMAC_SHA224; + else if (!strcmp(name, "hmac(sha256)")) + return SE_ALG_HMAC_SHA256; + else if (!strcmp(name, "hmac(sha384)")) + return SE_ALG_HMAC_SHA384; + else if (!strcmp(name, "hmac(sha512)")) + return SE_ALG_HMAC_SHA512; + else + return -EINVAL; +} + +/* Functions */ +int tegra_init_aes(struct tegra_se *se); +int tegra_init_hash(struct tegra_se *se); +void tegra_deinit_aes(struct tegra_se *se); +void tegra_deinit_hash(struct tegra_se *se); +int tegra_key_submit(struct tegra_se *se, const u8 *key, + u32 keylen, u32 alg, u32 *keyid); +void tegra_key_invalidate(struct tegra_se *se, u32 keyid, u32 alg); +int tegra_se_host1x_submit(struct tegra_se *se, u32 size); + +/* HOST1x OPCODES */ +static inline u32 host1x_opcode_setpayload(unsigned int payload) +{ + return (9 << 28) | payload; +} + +static inline u32 host1x_opcode_incr_w(unsigned int offset) +{ + /* 22-bit offset supported */ + return (10 << 28) | offset; +} + +static inline u32 host1x_opcode_nonincr_w(unsigned int offset) +{ + /* 22-bit offset supported */ + return (11 << 28) | offset; +} + +static inline u32 host1x_opcode_incr(unsigned int offset, unsigned int count) +{ + return (1 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_nonincr(unsigned int offset, unsigned int count) +{ + return (2 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 10; +} + +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0x3ff) << 0; +} + +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} + +#define se_host1x_opcode_incr_w(x) host1x_opcode_incr_w((x) / 4) +#define se_host1x_opcode_nonincr_w(x) host1x_opcode_nonincr_w((x) / 4) +#define se_host1x_opcode_incr(x, y) host1x_opcode_incr((x) / 4, y) +#define se_host1x_opcode_nonincr(x, y) host1x_opcode_nonincr((x) / 4, y) + +#endif /*_TEGRA_SE_H*/ From 526d23fc2dd03a749297f5ef5edcbfb8c492dcfd Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:44 -0400 Subject: [PATCH 051/123] crypto: ecc - Use ECC_CURVE_NIST_P192/256/384_DIGITS where possible Replace hard-coded numbers with ECC_CURVE_NIST_P192/256/384_DIGITS where possible. Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crypto/ecc.c b/crypto/ecc.c index 7c960e0d8e43..e94183f6c418 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -689,7 +689,7 @@ static void vli_mmod_barrett(u64 *result, u64 *product, const u64 *mod, static void vli_mmod_fast_192(u64 *result, const u64 *product, const u64 *curve_prime, u64 *tmp) { - const unsigned int ndigits = 3; + const unsigned int ndigits = ECC_CURVE_NIST_P192_DIGITS; int carry; vli_set(result, product, ndigits); @@ -717,7 +717,7 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product, const u64 *curve_prime, u64 *tmp) { int carry; - const unsigned int ndigits = 4; + const unsigned int ndigits = ECC_CURVE_NIST_P256_DIGITS; /* t */ vli_set(result, product, ndigits); @@ -800,7 +800,7 @@ static void vli_mmod_fast_384(u64 *result, const u64 *product, const u64 *curve_prime, u64 *tmp) { int carry; - const unsigned int ndigits = 6; + const unsigned int ndigits = ECC_CURVE_NIST_P384_DIGITS; /* t */ vli_set(result, product, ndigits); @@ -932,13 +932,13 @@ static bool vli_mmod_fast(u64 *result, u64 *product, } switch (ndigits) { - case 3: + case ECC_CURVE_NIST_P192_DIGITS: vli_mmod_fast_192(result, product, curve_prime, tmp); break; - case 4: + case ECC_CURVE_NIST_P256_DIGITS: vli_mmod_fast_256(result, product, curve_prime, tmp); break; - case 6: + case ECC_CURVE_NIST_P384_DIGITS: vli_mmod_fast_384(result, product, curve_prime, tmp); break; default: From d67c96fb97b5811e15c881d5cb72e293faa5f8e1 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:45 -0400 Subject: [PATCH 052/123] crypto: ecdsa - Convert byte arrays with key coordinates to digits For NIST P192/256/384 the public key's x and y parameters could be copied directly from a given array since both parameters filled 'ndigits' of digits (a 'digit' is a u64). For support of NIST P521 the key parameters need to have leading zeros prepended to the most significant digit since only 2 bytes of the most significant digit are provided. Therefore, implement ecc_digits_from_bytes to convert a byte array into an array of digits and use this function in ecdsa_set_pub_key where an input byte array needs to be converted into digits. Suggested-by: Lukas Wunner Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecdsa.c | 14 +++++++++----- include/crypto/internal/ecc.h | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 3f9ec273a121..e819d0983bd3 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -222,9 +222,8 @@ static int ecdsa_ecc_ctx_reset(struct ecc_ctx *ctx) static int ecdsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); + unsigned int digitlen, ndigits; const unsigned char *d = key; - const u64 *digits = (const u64 *)&d[1]; - unsigned int ndigits; int ret; ret = ecdsa_ecc_ctx_reset(ctx); @@ -238,12 +237,17 @@ static int ecdsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsig return -EINVAL; keylen--; - ndigits = (keylen >> 1) / sizeof(u64); + digitlen = keylen >> 1; + + ndigits = DIV_ROUND_UP(digitlen, sizeof(u64)); if (ndigits != ctx->curve->g.ndigits) return -EINVAL; - ecc_swap_digits(digits, ctx->pub_key.x, ndigits); - ecc_swap_digits(&digits[ndigits], ctx->pub_key.y, ndigits); + d++; + + ecc_digits_from_bytes(d, digitlen, ctx->pub_key.x, ndigits); + ecc_digits_from_bytes(&d[digitlen], digitlen, ctx->pub_key.y, ndigits); + ret = ecc_is_pubkey_valid_full(ctx->curve, &ctx->pub_key); ctx->pub_key_set = ret == 0; diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index 4f6c1a68882f..ab722a8986b7 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -56,6 +56,27 @@ static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigit out[i] = get_unaligned_be64(&src[ndigits - 1 - i]); } +/** + * ecc_digits_from_bytes() - Create ndigits-sized digits array from byte array + * @in: Input byte array + * @nbytes Size of input byte array + * @out Output digits array + * @ndigits: Number of digits to create from byte array + */ +static inline void ecc_digits_from_bytes(const u8 *in, unsigned int nbytes, + u64 *out, unsigned int ndigits) +{ + unsigned int o = nbytes & 7; + __be64 msd = 0; + + if (o) { + memcpy((u8 *)&msd + sizeof(msd) - o, in, o); + out[--ndigits] = be64_to_cpu(msd); + in += o; + } + ecc_swap_digits(in, out, ndigits); +} + /** * ecc_is_key_valid() - Validate a given ECDH private key * From dcee6068d4ab503bdb3eed6561c4ce7c7e2175f6 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:46 -0400 Subject: [PATCH 053/123] crypto: ecdsa - Adjust tests on length of key parameters In preparation for support of NIST P521, adjust the basic tests on the length of the provided key parameters to only ensure that the length of the x plus y coordinates parameter array is not an odd number and that each coordinate fits into an array of 'ndigits' digits. Mathematical tests on the key's parameters are then done in ecc_is_pubkey_valid_full rejecting invalid keys. The change is necessary since NIST P521 keys do not have keys with coordinates that each require 'full' digits (= all bits in u64 used). NIST P521 only requires 2 bytes (9 bits) in the most significant digit unlike NIST P192/256/384 that each require multiple 'full' digits. Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecdsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index e819d0983bd3..9462a4d640a7 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -230,7 +230,7 @@ static int ecdsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsig if (ret < 0) return ret; - if (keylen < 1 || (((keylen - 1) >> 1) % sizeof(u64)) != 0) + if (keylen < 1 || ((keylen - 1) & 1) != 0) return -EINVAL; /* we only accept uncompressed format indicated by '4' */ if (d[0] != 4) From 48e8d3a5f4f902f2bcd70cc88ff784aa7d6d2c63 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:47 -0400 Subject: [PATCH 054/123] crypto: ecdsa - Extend res.x mod n calculation for NIST P521 res.x has been calculated by ecc_point_mult_shamir, which uses 'mod curve_prime' on res.x and therefore p > res.x with 'p' being the curve_prime. Further, it is true that for the NIST curves p > n with 'n' being the 'curve_order' and therefore the following may be true as well: p > res.x >= n. If res.x >= n then res.x mod n can be calculated by iteratively sub- tracting n from res.x until res.x < n. For NIST P192/256/384 this can be done in a single subtraction. This can also be done in a single subtraction for NIST P521. The mathematical reason why a single subtraction is sufficient is due to the values of 'p' and 'n' of the NIST curves where the following holds true: note: max(res.x) = p - 1 max(res.x) - n < n p - 1 - n < n p - 1 < 2n => holds true for the NIST curves Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecdsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 9462a4d640a7..9b84ab503276 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -122,7 +122,7 @@ static int _ecdsa_verify(struct ecc_ctx *ctx, const u64 *hash, const u64 *r, con /* res.x = res.x mod n (if res.x > order) */ if (unlikely(vli_cmp(res.x, curve->n, ndigits) == 1)) - /* faster alternative for NIST p384, p256 & p192 */ + /* faster alternative for NIST p521, p384, p256 & p192 */ vli_sub(res.x, res.x, curve->n, ndigits); if (!vli_cmp(res.x, r, ndigits)) From c0d6bd1fd367a5374bff7e3f3bdf47beb84893c8 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:48 -0400 Subject: [PATCH 055/123] crypto: ecc - Add nbits field to ecc_curve structure Add the number of bits a curve has to the ecc_curve definition to be able to derive the number of bytes a curve requires for its coordinates from it. It also allows one to identify a curve by its particular size. Set the number of bits on all curve definitions. Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Reviewed-by: Vitaly Chikunov Signed-off-by: Herbert Xu --- crypto/ecc_curve_defs.h | 4 ++++ crypto/ecrdsa_defs.h | 5 +++++ include/crypto/ecc_curve.h | 2 ++ 3 files changed, 11 insertions(+) diff --git a/crypto/ecc_curve_defs.h b/crypto/ecc_curve_defs.h index 9719934c9428..ab1ef3d94be5 100644 --- a/crypto/ecc_curve_defs.h +++ b/crypto/ecc_curve_defs.h @@ -17,6 +17,7 @@ static u64 nist_p192_b[] = { 0xFEB8DEECC146B9B1ull, 0x0FA7E9AB72243049ull, 0x64210519E59C80E7ull }; static struct ecc_curve nist_p192 = { .name = "nist_192", + .nbits = 192, .g = { .x = nist_p192_g_x, .y = nist_p192_g_y, @@ -43,6 +44,7 @@ static u64 nist_p256_b[] = { 0x3BCE3C3E27D2604Bull, 0x651D06B0CC53B0F6ull, 0xB3EBBD55769886BCull, 0x5AC635D8AA3A93E7ull }; static struct ecc_curve nist_p256 = { .name = "nist_256", + .nbits = 256, .g = { .x = nist_p256_g_x, .y = nist_p256_g_y, @@ -75,6 +77,7 @@ static u64 nist_p384_b[] = { 0x2a85c8edd3ec2aefull, 0xc656398d8a2ed19dull, 0x988e056be3f82d19ull, 0xb3312fa7e23ee7e4ull }; static struct ecc_curve nist_p384 = { .name = "nist_384", + .nbits = 384, .g = { .x = nist_p384_g_x, .y = nist_p384_g_y, @@ -95,6 +98,7 @@ static u64 curve25519_a[] = { 0x000000000001DB41, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }; static const struct ecc_curve ecc_25519 = { .name = "curve25519", + .nbits = 255, .g = { .x = curve25519_g_x, .ndigits = 4, diff --git a/crypto/ecrdsa_defs.h b/crypto/ecrdsa_defs.h index 0056335b9d03..1c2c2449e331 100644 --- a/crypto/ecrdsa_defs.h +++ b/crypto/ecrdsa_defs.h @@ -47,6 +47,7 @@ static u64 cp256a_b[] = { static struct ecc_curve gost_cp256a = { .name = "cp256a", + .nbits = 256, .g = { .x = cp256a_g_x, .y = cp256a_g_y, @@ -80,6 +81,7 @@ static u64 cp256b_b[] = { static struct ecc_curve gost_cp256b = { .name = "cp256b", + .nbits = 256, .g = { .x = cp256b_g_x, .y = cp256b_g_y, @@ -117,6 +119,7 @@ static u64 cp256c_b[] = { static struct ecc_curve gost_cp256c = { .name = "cp256c", + .nbits = 256, .g = { .x = cp256c_g_x, .y = cp256c_g_y, @@ -166,6 +169,7 @@ static u64 tc512a_b[] = { static struct ecc_curve gost_tc512a = { .name = "tc512a", + .nbits = 512, .g = { .x = tc512a_g_x, .y = tc512a_g_y, @@ -211,6 +215,7 @@ static u64 tc512b_b[] = { static struct ecc_curve gost_tc512b = { .name = "tc512b", + .nbits = 512, .g = { .x = tc512b_g_x, .y = tc512b_g_y, diff --git a/include/crypto/ecc_curve.h b/include/crypto/ecc_curve.h index 70964781eb68..7d90c5e82266 100644 --- a/include/crypto/ecc_curve.h +++ b/include/crypto/ecc_curve.h @@ -23,6 +23,7 @@ struct ecc_point { * struct ecc_curve - definition of elliptic curve * * @name: Short name of the curve. + * @nbits: The number of bits of a curve. * @g: Generator point of the curve. * @p: Prime number, if Barrett's reduction is used for this curve * pre-calculated value 'mu' is appended to the @p after ndigits. @@ -34,6 +35,7 @@ struct ecc_point { */ struct ecc_curve { char *name; + u32 nbits; struct ecc_point g; u64 *p; u64 *n; From e7fb062754ef9f656ee004f2be8f59ce8a79bffb Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:49 -0400 Subject: [PATCH 056/123] crypto: ecc - Implement vli_mmod_fast_521 for NIST p521 Implement vli_mmod_fast_521 following the description for how to calculate the modulus for NIST P521 in the NIST publication "Recommendations for Discrete Logarithm-Based Cryptography: Elliptic Curve Domain Parameters" section G.1.4. NIST p521 requires 9 64bit digits, so increase the ECC_MAX_DIGITS so that the vli digit array provides enough elements to fit the larger integers required by this curve. Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecc.c | 25 +++++++++++++++++++++++++ include/crypto/internal/ecc.h | 3 ++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/crypto/ecc.c b/crypto/ecc.c index e94183f6c418..8914e437f94b 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -902,6 +902,28 @@ static void vli_mmod_fast_384(u64 *result, const u64 *product, #undef AND64H #undef AND64L +/* + * Computes result = product % curve_prime + * from "Recommendations for Discrete Logarithm-Based Cryptography: + * Elliptic Curve Domain Parameters" section G.1.4 + */ +static void vli_mmod_fast_521(u64 *result, const u64 *product, + const u64 *curve_prime, u64 *tmp) +{ + const unsigned int ndigits = ECC_CURVE_NIST_P521_DIGITS; + size_t i; + + /* Initialize result with lowest 521 bits from product */ + vli_set(result, product, ndigits); + result[8] &= 0x1ff; + + for (i = 0; i < ndigits; i++) + tmp[i] = (product[8 + i] >> 9) | (product[9 + i] << 55); + tmp[8] &= 0x1ff; + + vli_mod_add(result, result, tmp, curve_prime, ndigits); +} + /* Computes result = product % curve_prime for different curve_primes. * * Note that curve_primes are distinguished just by heuristic check and @@ -941,6 +963,9 @@ static bool vli_mmod_fast(u64 *result, u64 *product, case ECC_CURVE_NIST_P384_DIGITS: vli_mmod_fast_384(result, product, curve_prime, tmp); break; + case ECC_CURVE_NIST_P521_DIGITS: + vli_mmod_fast_521(result, product, curve_prime, tmp); + break; default: pr_err_ratelimited("ecc: unsupported digits size!\n"); return false; diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index ab722a8986b7..4e2f5f938e91 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -33,7 +33,8 @@ #define ECC_CURVE_NIST_P192_DIGITS 3 #define ECC_CURVE_NIST_P256_DIGITS 4 #define ECC_CURVE_NIST_P384_DIGITS 6 -#define ECC_MAX_DIGITS (512 / 64) /* due to ecrdsa */ +#define ECC_CURVE_NIST_P521_DIGITS 9 +#define ECC_MAX_DIGITS DIV_ROUND_UP(521, 64) /* NIST P521 */ #define ECC_DIGITS_TO_BYTES_SHIFT 3 From 114e80437e0e48b967d0bd533a4c8ff1186ce12f Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:50 -0400 Subject: [PATCH 057/123] crypto: ecc - Add special case for NIST P521 in ecc_point_mult In ecc_point_mult use the number of bits of the NIST P521 curve + 2. The change is required specifically for NIST P521 to pass mathematical tests on the public key. Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crypto/ecc.c b/crypto/ecc.c index 8914e437f94b..d15ef0747a76 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -1320,7 +1320,10 @@ static void ecc_point_mult(struct ecc_point *result, carry = vli_add(sk[0], scalar, curve->n, ndigits); vli_add(sk[1], sk[0], curve->n, ndigits); scalar = sk[!carry]; - num_bits = sizeof(u64) * ndigits * 8 + 1; + if (curve->nbits == 521) /* NIST P521 */ + num_bits = curve->nbits + 2; + else + num_bits = sizeof(u64) * ndigits * 8 + 1; vli_set(rx[1], point->x, ndigits); vli_set(ry[1], point->y, ndigits); From 288b46c57c658d3c798de0c9154e1215bdde8476 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:51 -0400 Subject: [PATCH 058/123] crypto: ecc - Add NIST P521 curve parameters Add the parameters for the NIST P521 curve and define a new curve ID for it. Make the curve available in ecc_get_curve. Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecc.c | 2 ++ crypto/ecc_curve_defs.h | 45 +++++++++++++++++++++++++++++++++++++++++ include/crypto/ecdh.h | 1 + 3 files changed, 48 insertions(+) diff --git a/crypto/ecc.c b/crypto/ecc.c index d15ef0747a76..2e05387b9499 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -60,6 +60,8 @@ const struct ecc_curve *ecc_get_curve(unsigned int curve_id) return &nist_p256; case ECC_CURVE_NIST_P384: return &nist_p384; + case ECC_CURVE_NIST_P521: + return &nist_p521; default: return NULL; } diff --git a/crypto/ecc_curve_defs.h b/crypto/ecc_curve_defs.h index ab1ef3d94be5..0ecade7d02f5 100644 --- a/crypto/ecc_curve_defs.h +++ b/crypto/ecc_curve_defs.h @@ -89,6 +89,51 @@ static struct ecc_curve nist_p384 = { .b = nist_p384_b }; +/* NIST P-521 */ +static u64 nist_p521_g_x[] = { 0xf97e7e31c2e5bd66ull, 0x3348b3c1856a429bull, + 0xfe1dc127a2ffa8deull, 0xa14b5e77efe75928ull, + 0xf828af606b4d3dbaull, 0x9c648139053fb521ull, + 0x9e3ecb662395b442ull, 0x858e06b70404e9cdull, + 0xc6ull }; +static u64 nist_p521_g_y[] = { 0x88be94769fd16650ull, 0x353c7086a272c240ull, + 0xc550b9013fad0761ull, 0x97ee72995ef42640ull, + 0x17afbd17273e662cull, 0x98f54449579b4468ull, + 0x5c8a5fb42c7d1bd9ull, 0x39296a789a3bc004ull, + 0x118ull }; +static u64 nist_p521_p[] = { 0xffffffffffffffffull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0x1ffull }; +static u64 nist_p521_n[] = { 0xbb6fb71e91386409ull, 0x3bb5c9b8899c47aeull, + 0x7fcc0148f709a5d0ull, 0x51868783bf2f966bull, + 0xfffffffffffffffaull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0x1ffull }; +static u64 nist_p521_a[] = { 0xfffffffffffffffcull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0x1ffull }; +static u64 nist_p521_b[] = { 0xef451fd46b503f00ull, 0x3573df883d2c34f1ull, + 0x1652c0bd3bb1bf07ull, 0x56193951ec7e937bull, + 0xb8b489918ef109e1ull, 0xa2da725b99b315f3ull, + 0x929a21a0b68540eeull, 0x953eb9618e1c9a1full, + 0x051ull }; +static struct ecc_curve nist_p521 = { + .name = "nist_521", + .nbits = 521, + .g = { + .x = nist_p521_g_x, + .y = nist_p521_g_y, + .ndigits = 9, + }, + .p = nist_p521_p, + .n = nist_p521_n, + .a = nist_p521_a, + .b = nist_p521_b +}; + /* curve25519 */ static u64 curve25519_g_x[] = { 0x0000000000000009, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }; diff --git a/include/crypto/ecdh.h b/include/crypto/ecdh.h index a9f98078d29c..9784ecdd2fb4 100644 --- a/include/crypto/ecdh.h +++ b/include/crypto/ecdh.h @@ -26,6 +26,7 @@ #define ECC_CURVE_NIST_P192 0x0001 #define ECC_CURVE_NIST_P256 0x0002 #define ECC_CURVE_NIST_P384 0x0003 +#define ECC_CURVE_NIST_P521 0x0004 /** * struct ecdh - define an ECDH private key From dee45a607abbd52fd572622c281010037a85544b Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:52 -0400 Subject: [PATCH 059/123] crypto: ecdsa - Replace ndigits with nbits where precision is needed Replace the usage of ndigits with nbits where precise space calculations are needed, such as in ecdsa_max_size where the length of a coordinate is determined. Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecdsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 9b84ab503276..75d3eea885db 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -266,7 +266,7 @@ static unsigned int ecdsa_max_size(struct crypto_akcipher *tfm) { struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); - return ctx->pub_key.ndigits << ECC_DIGITS_TO_BYTES_SHIFT; + return DIV_ROUND_UP(ctx->curve->nbits, 8); } static int ecdsa_nist_p384_init_tfm(struct crypto_akcipher *tfm) From 703ca5cda1ea04735e48882a7cccff97d57656c3 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:53 -0400 Subject: [PATCH 060/123] crypto: ecdsa - Rename keylen to bufsize where necessary In cases where 'keylen' was referring to the size of the buffer used by a curve's digits, it does not reflect the purpose of the variable anymore once NIST P521 is used. What it refers to then is the size of the buffer, which may be a few bytes larger than the size a coordinate of a key. Therefore, rename keylen to bufsize where appropriate. Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecdsa.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 75d3eea885db..c7923f30661b 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -35,8 +35,8 @@ struct ecdsa_signature_ctx { static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, const void *value, size_t vlen, unsigned int ndigits) { - size_t keylen = ndigits * sizeof(u64); - ssize_t diff = vlen - keylen; + size_t bufsize = ndigits * sizeof(u64); + ssize_t diff = vlen - bufsize; const char *d = value; u8 rs[ECC_MAX_BYTES]; @@ -58,7 +58,7 @@ static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, if (diff) return -EINVAL; } - if (-diff >= keylen) + if (-diff >= bufsize) return -EINVAL; if (diff) { @@ -138,7 +138,7 @@ static int ecdsa_verify(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); - size_t keylen = ctx->curve->g.ndigits * sizeof(u64); + size_t bufsize = ctx->curve->g.ndigits * sizeof(u64); struct ecdsa_signature_ctx sig_ctx = { .curve = ctx->curve, }; @@ -165,14 +165,14 @@ static int ecdsa_verify(struct akcipher_request *req) goto error; /* if the hash is shorter then we will add leading zeros to fit to ndigits */ - diff = keylen - req->dst_len; + diff = bufsize - req->dst_len; if (diff >= 0) { if (diff) memset(rawhash, 0, diff); memcpy(&rawhash[diff], buffer + req->src_len, req->dst_len); } else if (diff < 0) { /* given hash is longer, we take the left-most bytes */ - memcpy(&rawhash, buffer + req->src_len, keylen); + memcpy(&rawhash, buffer + req->src_len, bufsize); } ecc_swap_digits((u64 *)rawhash, hash, ctx->curve->g.ndigits); From a7d45ba77d3d20ba029532090da4cbf058d3f36b Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:54 -0400 Subject: [PATCH 061/123] crypto: ecdsa - Register NIST P521 and extend test suite Register NIST P521 as an akcipher and extend the testmgr with NIST P521-specific test vectors. Add a module alias so the module gets automatically loaded by the crypto subsystem when the curve is needed. Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/ecdsa.c | 31 ++++++++++ crypto/testmgr.c | 7 +++ crypto/testmgr.h | 146 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 184 insertions(+) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index c7923f30661b..258fffbf623d 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -269,6 +269,28 @@ static unsigned int ecdsa_max_size(struct crypto_akcipher *tfm) return DIV_ROUND_UP(ctx->curve->nbits, 8); } +static int ecdsa_nist_p521_init_tfm(struct crypto_akcipher *tfm) +{ + struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); + + return ecdsa_ecc_ctx_init(ctx, ECC_CURVE_NIST_P521); +} + +static struct akcipher_alg ecdsa_nist_p521 = { + .verify = ecdsa_verify, + .set_pub_key = ecdsa_set_pub_key, + .max_size = ecdsa_max_size, + .init = ecdsa_nist_p521_init_tfm, + .exit = ecdsa_exit_tfm, + .base = { + .cra_name = "ecdsa-nist-p521", + .cra_driver_name = "ecdsa-nist-p521-generic", + .cra_priority = 100, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct ecc_ctx), + }, +}; + static int ecdsa_nist_p384_init_tfm(struct crypto_akcipher *tfm) { struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); @@ -352,8 +374,15 @@ static int __init ecdsa_init(void) if (ret) goto nist_p384_error; + ret = crypto_register_akcipher(&ecdsa_nist_p521); + if (ret) + goto nist_p521_error; + return 0; +nist_p521_error: + crypto_unregister_akcipher(&ecdsa_nist_p384); + nist_p384_error: crypto_unregister_akcipher(&ecdsa_nist_p256); @@ -369,6 +398,7 @@ static void __exit ecdsa_exit(void) crypto_unregister_akcipher(&ecdsa_nist_p192); crypto_unregister_akcipher(&ecdsa_nist_p256); crypto_unregister_akcipher(&ecdsa_nist_p384); + crypto_unregister_akcipher(&ecdsa_nist_p521); } subsys_initcall(ecdsa_init); @@ -380,4 +410,5 @@ MODULE_DESCRIPTION("ECDSA generic algorithm"); MODULE_ALIAS_CRYPTO("ecdsa-nist-p192"); MODULE_ALIAS_CRYPTO("ecdsa-nist-p256"); MODULE_ALIAS_CRYPTO("ecdsa-nist-p384"); +MODULE_ALIAS_CRYPTO("ecdsa-nist-p521"); MODULE_ALIAS_CRYPTO("ecdsa-generic"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 3dddd288ca02..00f5a6cf341a 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5097,6 +5097,13 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .akcipher = __VECS(ecdsa_nist_p384_tv_template) } + }, { + .alg = "ecdsa-nist-p521", + .test = alg_test_akcipher, + .fips_allowed = 1, + .suite = { + .akcipher = __VECS(ecdsa_nist_p521_tv_template) + } }, { .alg = "ecrdsa", .test = alg_test_akcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 12e1c892f366..5350cfd9d325 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -1071,6 +1071,152 @@ static const struct akcipher_testvec ecdsa_nist_p384_tv_template[] = { }, }; +static const struct akcipher_testvec ecdsa_nist_p521_tv_template[] = { + { + .key = /* secp521r1(sha224) */ + "\x04\x01\x4f\x43\x18\xb6\xa9\xc9\x5d\x68\xd3\xa9\x42\xf8\x98\xc0" + "\xd2\xd1\xa9\x50\x3b\xe8\xc4\x40\xe6\x11\x78\x88\x4b\xbd\x76\xa7" + "\x9a\xe0\xdd\x31\xa4\x67\x78\x45\x33\x9e\x8c\xd1\xc7\x44\xac\x61" + "\x68\xc8\x04\xe7\x5c\x79\xb1\xf1\x41\x0c\x71\xc0\x53\xa8\xbc\xfb" + "\xf5\xca\xd4\x01\x40\xfd\xa3\x45\xda\x08\xe0\xb4\xcb\x28\x3b\x0a" + "\x02\x35\x5f\x02\x9f\x3f\xcd\xef\x08\x22\x40\x97\x74\x65\xb7\x76" + "\x85\xc7\xc0\x5c\xfb\x81\xe1\xa5\xde\x0c\x4e\x8b\x12\x31\xb6\x47" + "\xed\x37\x0f\x99\x3f\x26\xba\xa3\x8e\xff\x79\x34\x7c\x3a\xfe\x1f" + "\x3b\x83\x82\x2f\x14", + .key_len = 133, + .params = + "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04" + "\x00\x23", + .param_len = 18, + .m = + "\xa2\x3a\x6a\x8c\x7b\x3c\xf2\x51\xf8\xbe\x5f\x4f\x3b\x15\x05\xc4" + "\xb5\xbc\x19\xe7\x21\x85\xe9\x23\x06\x33\x62\xfb", + .m_size = 28, + .algo = OID_id_ecdsa_with_sha224, + .c = + "\x30\x81\x86\x02\x41\x01\xd6\x43\xe7\xff\x42\xb2\xba\x74\x35\xf6" + "\xdc\x6d\x02\x7b\x22\xac\xe2\xef\x07\x92\xee\x60\x94\x06\xf8\x3f" + "\x59\x0f\x74\xf0\x3f\xd8\x18\xc6\x37\x8a\xcb\xa7\xd8\x7d\x98\x85" + "\x29\x88\xff\x0b\x94\x94\x6c\xa6\x9b\x89\x8b\x1e\xfd\x09\x46\x6b" + "\xc7\xaf\x7a\xb9\x19\x0a\x02\x41\x3a\x26\x0d\x55\xcd\x23\x1e\x7d" + "\xa0\x5e\xf9\x88\xf3\xd2\x32\x90\x57\x0f\xf8\x65\x97\x6b\x09\x4d" + "\x22\x26\x0b\x5f\x49\x32\x6b\x91\x99\x30\x90\x0f\x1c\x8f\x78\xd3" + "\x9f\x0e\x64\xcc\xc4\xe8\x43\xd9\x0e\x1c\xad\x22\xda\x82\x00\x35" + "\xa3\x50\xb1\xa5\x98\x92\x2a\xa5\x52", + .c_size = 137, + .public_key_vec = true, + .siggen_sigver_test = true, + }, + { + .key = /* secp521r1(sha256) */ + "\x04\x01\x05\x3a\x6b\x3b\x5a\x0f\xa7\xb9\xb7\x32\x53\x4e\xe2\xae" + "\x0a\x52\xc5\xda\xdd\x5a\x79\x1c\x30\x2d\x33\x07\x79\xd5\x70\x14" + "\x61\x0c\xec\x26\x4d\xd8\x35\x57\x04\x1d\x88\x33\x4d\xce\x05\x36" + "\xa5\xaf\x56\x84\xfa\x0b\x9e\xff\x7b\x30\x4b\x92\x1d\x06\xf8\x81" + "\x24\x1e\x51\x00\x09\x21\x51\xf7\x46\x0a\x77\xdb\xb5\x0c\xe7\x9c" + "\xff\x27\x3c\x02\x71\xd7\x85\x36\xf1\xaa\x11\x59\xd8\xb8\xdc\x09" + "\xdc\x6d\x5a\x6f\x63\x07\x6c\xe1\xe5\x4d\x6e\x0f\x6e\xfb\x7c\x05" + "\x8a\xe9\x53\xa8\xcf\xce\x43\x0e\x82\x20\x86\xbc\x88\x9c\xb7\xe3" + "\xe6\x77\x1e\x1f\x8a", + .key_len = 133, + .params = + "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04" + "\x00\x23", + .param_len = 18, + .m = + "\xcc\x97\x73\x0c\x73\xa2\x53\x2b\xfa\xd7\x83\x1d\x0c\x72\x1b\x39" + "\x80\x71\x8d\xdd\xc5\x9b\xff\x55\x32\x98\x25\xa2\x58\x2e\xb7\x73", + .m_size = 32, + .algo = OID_id_ecdsa_with_sha256, + .c = + "\x30\x81\x88\x02\x42\x00\xcd\xa5\x5f\x57\x52\x27\x78\x3a\xb5\x06" + "\x0f\xfd\x83\xfc\x0e\xd9\xce\x50\x9f\x7d\x1f\xca\x8b\xa8\x2d\x56" + "\x3c\xf6\xf0\xd8\xe1\xb7\x5d\x95\x35\x6f\x02\x0e\xaf\xe1\x4c\xae" + "\xce\x54\x76\x9a\xc2\x8f\xb8\x38\x1f\x46\x0b\x04\x64\x34\x79\xde" + "\x7e\xd7\x59\x10\xe9\xd9\xd5\x02\x42\x01\xcf\x50\x85\x38\xf9\x15" + "\x83\x18\x04\x6b\x35\xae\x65\xb5\x99\x12\x0a\xa9\x79\x24\xb9\x37" + "\x35\xdd\xa0\xe0\x87\x2c\x44\x4b\x5a\xee\xaf\xfa\x10\xdd\x9b\xfb" + "\x36\x1a\x31\x03\x42\x02\x5f\x50\xf0\xa2\x0d\x1c\x57\x56\x8f\x12" + "\xb7\x1d\x91\x55\x38\xb6\xf6\x34\x65\xc7\xbd", + .c_size = 139, + .public_key_vec = true, + .siggen_sigver_test = true, + }, + { + .key = /* secp521r1(sha384) */ + "\x04\x00\x2e\xd6\x21\x04\x75\xc3\xdc\x7d\xff\x0e\xf3\x70\x25\x2b" + "\xad\x72\xfc\x5a\x91\xf1\xd5\x9c\x64\xf3\x1f\x47\x11\x10\x62\x33" + "\xfd\x2e\xe8\x32\xca\x9e\x6f\x0a\x4c\x5b\x35\x9a\x46\xc5\xe7\xd4" + "\x38\xda\xb2\xf0\xf4\x87\xf3\x86\xf4\xea\x70\xad\x1e\xd4\x78\x8c" + "\x36\x18\x17\x00\xa2\xa0\x34\x1b\x2e\x6a\xdf\x06\xd6\x99\x2d\x47" + "\x50\x92\x1a\x8a\x72\x9c\x23\x44\xfa\xa7\xa9\xed\xa6\xef\x26\x14" + "\xb3\x9d\xfe\x5e\xa3\x8c\xd8\x29\xf8\xdf\xad\xa6\xab\xfc\xdd\x46" + "\x22\x6e\xd7\x35\xc7\x23\xb7\x13\xae\xb6\x34\xff\xd7\x80\xe5\x39" + "\xb3\x3b\x5b\x1b\x94", + .key_len = 133, + .params = + "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04" + "\x00\x23", + .param_len = 18, + .m = + "\x36\x98\xd6\x82\xfa\xad\xed\x3c\xb9\x40\xb6\x4d\x9e\xb7\x04\x26" + "\xad\x72\x34\x44\xd2\x81\xb4\x9b\xbe\x01\x04\x7a\xd8\x50\xf8\x59" + "\xba\xad\x23\x85\x6b\x59\xbe\xfb\xf6\x86\xd4\x67\xa8\x43\x28\x76", + .m_size = 48, + .algo = OID_id_ecdsa_with_sha384, + .c = + "\x30\x81\x88\x02\x42\x00\x93\x96\x76\x3c\x27\xea\xaa\x9c\x26\xec" + "\x51\xdc\xe8\x35\x5e\xae\x16\xf2\x4b\x64\x98\xf7\xec\xda\xc7\x7e" + "\x42\x71\x86\x57\x2d\xf1\x7d\xe4\xdf\x9b\x7d\x9e\x47\xca\x33\x32" + "\x76\x06\xd0\xf9\xc0\xe4\xe6\x84\x59\xfd\x1a\xc4\x40\xdd\x43\xb8" + "\x6a\xdd\xfb\xe6\x63\x4e\x28\x02\x42\x00\xff\xc3\x6a\x87\x6e\xb5" + "\x13\x1f\x20\x55\xce\x37\x97\xc9\x05\x51\xe5\xe4\x3c\xbc\x93\x65" + "\x57\x1c\x30\xda\xa7\xcd\x26\x28\x76\x3b\x52\xdf\xc4\xc0\xdb\x54" + "\xdb\x8a\x0d\x6a\xc3\xf3\x7a\xd1\xfa\xe7\xa7\xe5\x5a\x94\x56\xcf" + "\x8f\xb4\x22\xc6\x4f\xab\x2b\x62\xc1\x42\xb1", + .c_size = 139, + .public_key_vec = true, + .siggen_sigver_test = true, + }, + { + .key = /* secp521r1(sha512) */ + "\x04\x00\xc7\x65\xee\x0b\x86\x7d\x8f\x02\xf1\x74\x5b\xb0\x4c\x3f" + "\xa6\x35\x60\x9f\x55\x23\x11\xcc\xdf\xb8\x42\x99\xee\x6c\x96\x6a" + "\x27\xa2\x56\xb2\x2b\x03\xad\x0f\xe7\x97\xde\x09\x5d\xb4\xc5\x5f" + "\xbd\x87\x37\xbf\x5a\x16\x35\x56\x08\xfd\x6f\x06\x1a\x1c\x84\xee" + "\xc3\x64\xb3\x00\x9e\xbd\x6e\x60\x76\xee\x69\xfd\x3a\xb8\xcd\x7e" + "\x91\x68\x53\x57\x44\x13\x2e\x77\x09\x2a\xbe\x48\xbd\x91\xd8\xf6" + "\x21\x16\x53\x99\xd5\xf0\x40\xad\xa6\xf8\x58\x26\xb6\x9a\xf8\x77" + "\xfe\x3a\x05\x1a\xdb\xa9\x0f\xc0\x6c\x76\x30\x8c\xd8\xde\x44\xae" + "\xd0\x17\xdf\x49\x6a", + .key_len = 133, + .params = + "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04" + "\x00\x23", + .param_len = 18, + .m = + "\x5c\xa6\xbc\x79\xb8\xa0\x1e\x11\x83\xf7\xe9\x05\xdf\xba\xf7\x69" + "\x97\x22\x32\xe4\x94\x7c\x65\xbd\x74\xc6\x9a\x8b\xbd\x0d\xdc\xed" + "\xf5\x9c\xeb\xe1\xc5\x68\x40\xf2\xc7\x04\xde\x9e\x0d\x76\xc5\xa3" + "\xf9\x3c\x6c\x98\x08\x31\xbd\x39\xe8\x42\x7f\x80\x39\x6f\xfe\x68", + .m_size = 64, + .algo = OID_id_ecdsa_with_sha512, + .c = + "\x30\x81\x88\x02\x42\x01\x5c\x71\x86\x96\xac\x21\x33\x7e\x4e\xaa" + "\x86\xec\xa8\x05\x03\x52\x56\x63\x0e\x02\xcc\x94\xa9\x05\xb9\xfb" + "\x62\x1e\x42\x03\x6c\x74\x8a\x1f\x12\x3e\xb7\x7e\x51\xff\x7f\x27" + "\x93\xe8\x6c\x49\x7d\x28\xfc\x80\xa6\x13\xfc\xb6\x90\xf7\xbb\x28" + "\xb5\x04\xb0\xb6\x33\x1c\x7e\x02\x42\x01\x70\x43\x52\x1d\xe3\xc6" + "\xbd\x5a\x40\x95\x35\x89\x4f\x41\x5f\x9e\x19\x88\x05\x3e\x43\x39" + "\x01\xbd\xb7\x7a\x76\x37\x51\x47\x49\x98\x12\x71\xd0\xe9\xca\xa7" + "\xc0\xcb\xaa\x00\x55\xbb\x6a\xb4\x73\x00\xd2\x72\x74\x13\x63\x39" + "\xa6\xe5\x25\x46\x1e\x77\x44\x78\xe0\xd1\x04", + .c_size = 139, + .public_key_vec = true, + .siggen_sigver_test = true, + }, +}; + /* * EC-RDSA test vectors are generated by gost-engine. */ From 4dc50330a437e91063c68da074574bf6459d68a5 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:55 -0400 Subject: [PATCH 062/123] crypto: asymmetric_keys - Adjust signature size calculation for NIST P521 Adjust the calculation of the maximum signature size for support of NIST P521. While existing curves may prepend a 0 byte to their coordinates (to make the number positive), NIST P521 will not do this since only the first bit in the most significant byte is used. If the encoding of the x & y coordinates requires at least 128 bytes then an additional byte is needed for the encoding of the length. Take this into account when calculating the maximum signature size. Reviewed-by: Lukas Wunner Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/asymmetric_keys/public_key.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index e314fd57e6f8..3474fb34ded9 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -234,6 +234,7 @@ static int software_key_query(const struct kernel_pkey_params *params, info->key_size = len * 8; if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) { + int slen = len; /* * ECDSA key sizes are much smaller than RSA, and thus could * operate on (hashed) inputs that are larger than key size. @@ -247,8 +248,19 @@ static int software_key_query(const struct kernel_pkey_params *params, * Verify takes ECDSA-Sig (described in RFC 5480) as input, * which is actually 2 'key_size'-bit integers encoded in * ASN.1. Account for the ASN.1 encoding overhead here. + * + * NIST P192/256/384 may prepend a '0' to a coordinate to + * indicate a positive integer. NIST P521 never needs it. */ - info->max_sig_size = 2 * (len + 3) + 2; + if (strcmp(pkey->pkey_algo, "ecdsa-nist-p521") != 0) + slen += 1; + /* Length of encoding the x & y coordinates */ + slen = 2 * (slen + 2); + /* + * If coordinate encoding takes at least 128 bytes then an + * additional byte for length encoding is needed. + */ + info->max_sig_size = 1 + (slen >= 128) + 1 + slen; } else { info->max_data_size = len; info->max_sig_size = len; From 3ba2ae361402f28754adf873954a22bf97b856a9 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:56 -0400 Subject: [PATCH 063/123] crypto: x509 - Add OID for NIST P521 and extend parser for it Enable the x509 parser to accept NIST P521 certificates and add the OID for ansip521r1, which is the identifier for NIST P521. Cc: David Howells Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/asymmetric_keys/x509_cert_parser.c | 3 +++ include/linux/oid_registry.h | 1 + 2 files changed, 4 insertions(+) diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index bb0bffa271b5..964208d1a35f 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -546,6 +546,9 @@ int x509_extract_key_data(void *context, size_t hdrlen, case OID_id_ansip384r1: ctx->cert->pub->pkey_algo = "ecdsa-nist-p384"; break; + case OID_id_ansip521r1: + ctx->cert->pub->pkey_algo = "ecdsa-nist-p521"; + break; default: return -ENOPKG; } diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 51421fdbb0ba..6f9242259edc 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -69,6 +69,7 @@ enum OID { OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */ OID_sha1, /* 1.3.14.3.2.26 */ OID_id_ansip384r1, /* 1.3.132.0.34 */ + OID_id_ansip521r1, /* 1.3.132.0.35 */ OID_sha256, /* 2.16.840.1.101.3.4.2.1 */ OID_sha384, /* 2.16.840.1.101.3.4.2.2 */ OID_sha512, /* 2.16.840.1.101.3.4.2.3 */ From 8f0e0cf74ccef41b383daddcf5447bba655031b3 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 5 Apr 2024 13:57:30 -0500 Subject: [PATCH 064/123] crypto: iaa - Use cpumask_weight() when rebalancing If some cpus are offlined, or if the node mask is smaller than expected, the 'nonexistent cpu' warning in rebalance_wq_table() may be erroneously triggered. Use cpumask_weight() to make sure we only iterate over the exact number of cpus in the mask. Also use num_possible_cpus() instead of num_online_cpus() to make sure all slots in the wq table are initialized. Signed-off-by: Tom Zanussi Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 6229b24b0d35..814fb2c31626 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -922,7 +922,7 @@ static void rebalance_wq_table(void) for_each_node_with_cpus(node) { node_cpus = cpumask_of_node(node); - for (cpu = 0; cpu < nr_cpus_per_node; cpu++) { + for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) { int node_cpu = cpumask_nth(cpu, node_cpus); if (WARN_ON(node_cpu >= nr_cpu_ids)) { @@ -2005,7 +2005,7 @@ static int __init iaa_crypto_init_module(void) int ret = 0; int node; - nr_cpus = num_online_cpus(); + nr_cpus = num_possible_cpus(); for_each_node_with_cpus(node) nr_nodes++; if (!nr_nodes) { From 4ad096cca942959871d8ff73826d30f81f856f6e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Apr 2024 20:26:08 -0400 Subject: [PATCH 065/123] crypto: x86/nh-avx2 - add missing vzeroupper Since nh_avx2() uses ymm registers, execute vzeroupper before returning from it. This is necessary to avoid reducing the performance of SSE code. Fixes: 0f961f9f670e ("crypto: x86/nhpoly1305 - add AVX2 accelerated NHPoly1305") Signed-off-by: Eric Biggers Acked-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/nh-avx2-x86_64.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S index ef73a3ab8726..791386d9a83a 100644 --- a/arch/x86/crypto/nh-avx2-x86_64.S +++ b/arch/x86/crypto/nh-avx2-x86_64.S @@ -154,5 +154,6 @@ SYM_TYPED_FUNC_START(nh_avx2) vpaddq T1, T0, T0 vpaddq T4, T0, T0 vmovdqu T0, (HASH) + vzeroupper RET SYM_FUNC_END(nh_avx2) From 57ce8a4e162599cf9adafef1f29763160a8e5564 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Apr 2024 20:26:09 -0400 Subject: [PATCH 066/123] crypto: x86/sha256-avx2 - add missing vzeroupper Since sha256_transform_rorx() uses ymm registers, execute vzeroupper before returning from it. This is necessary to avoid reducing the performance of SSE code. Fixes: d34a460092d8 ("crypto: sha256 - Optimized sha256 x86_64 routine using AVX2's RORX instructions") Signed-off-by: Eric Biggers Acked-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256-avx2-asm.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 9918212faf91..0ffb072be956 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -716,6 +716,7 @@ SYM_TYPED_FUNC_START(sha256_transform_rorx) popq %r13 popq %r12 popq %rbx + vzeroupper RET SYM_FUNC_END(sha256_transform_rorx) From 6a24fdfe1edbafacdacd53516654d99068f20eec Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Apr 2024 20:26:10 -0400 Subject: [PATCH 067/123] crypto: x86/sha512-avx2 - add missing vzeroupper Since sha512_transform_rorx() uses ymm registers, execute vzeroupper before returning from it. This is necessary to avoid reducing the performance of SSE code. Fixes: e01d69cb0195 ("crypto: sha512 - Optimized SHA512 x86_64 assembly routine using AVX instructions.") Signed-off-by: Eric Biggers Acked-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512-avx2-asm.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index f08496cd6870..24973f42c43f 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -680,6 +680,7 @@ SYM_TYPED_FUNC_START(sha512_transform_rorx) pop %r12 pop %rbx + vzeroupper RET SYM_FUNC_END(sha512_transform_rorx) From 5307147b5e2342b96cd1f1eb19e47e8d2c6c1428 Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Sun, 7 Apr 2024 15:59:52 +0800 Subject: [PATCH 068/123] crypto: hisilicon/sec - Add the condition for configuring the sriov function When CONFIG_PCI_IOV is disabled, the SRIOV configuration function is not required. An error occurs if this function is incorrectly called. Consistent with other modules, add the condition for configuring the sriov function of sec_pci_driver. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index c290d8937b19..f4e10741610f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -1324,7 +1324,8 @@ static struct pci_driver sec_pci_driver = { .probe = sec_probe, .remove = sec_remove, .err_handler = &sec_err_handler, - .sriov_configure = hisi_qm_sriov_configure, + .sriov_configure = IS_ENABLED(CONFIG_PCI_IOV) ? + hisi_qm_sriov_configure : NULL, .shutdown = hisi_qm_dev_shutdown, .driver.pm = &sec_pm_ops, }; From 8be0913389718e8d27c4f1d4537b5e1b99ed7739 Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Sun, 7 Apr 2024 15:59:53 +0800 Subject: [PATCH 069/123] crypto: hisilicon/debugfs - Fix debugfs uninit process issue During the zip probe process, the debugfs failure does not stop the probe. When debugfs initialization fails, jumping to the error branch will also release regs, in addition to its own rollback operation. As a result, it may be released repeatedly during the regs uninit process. Therefore, the null check needs to be added to the regs uninit process. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/debugfs.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c index cd67fa348ca7..6351a452878d 100644 --- a/drivers/crypto/hisilicon/debugfs.c +++ b/drivers/crypto/hisilicon/debugfs.c @@ -809,8 +809,14 @@ static void dfx_regs_uninit(struct hisi_qm *qm, { int i; + if (!dregs) + return; + /* Setting the pointer is NULL to prevent double free */ for (i = 0; i < reg_len; i++) { + if (!dregs[i].regs) + continue; + kfree(dregs[i].regs); dregs[i].regs = NULL; } @@ -860,14 +866,21 @@ static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm, static int qm_diff_regs_init(struct hisi_qm *qm, struct dfx_diff_registers *dregs, u32 reg_len) { + int ret; + qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); - if (IS_ERR(qm->debug.qm_diff_regs)) - return PTR_ERR(qm->debug.qm_diff_regs); + if (IS_ERR(qm->debug.qm_diff_regs)) { + ret = PTR_ERR(qm->debug.qm_diff_regs); + qm->debug.qm_diff_regs = NULL; + return ret; + } qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len); if (IS_ERR(qm->debug.acc_diff_regs)) { dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); - return PTR_ERR(qm->debug.acc_diff_regs); + ret = PTR_ERR(qm->debug.acc_diff_regs); + qm->debug.acc_diff_regs = NULL; + return ret; } return 0; @@ -908,7 +921,9 @@ static int qm_last_regs_init(struct hisi_qm *qm) static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len) { dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len); + qm->debug.acc_diff_regs = NULL; dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); + qm->debug.qm_diff_regs = NULL; } /** From 040279e84d4e94d65cd5a9f95f9821e26c72d26e Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Sun, 7 Apr 2024 15:59:54 +0800 Subject: [PATCH 070/123] crypto: hisilicon/sgl - Delete redundant parameter verification The input parameter check in acc_get_sgl is redundant. The caller has been verified once. When the check is performed for multiple times, the performance deteriorates. So the redundant parameter verification is deleted, and the index verification is changed to the module entry function for verification. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sgl.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index 0beca257c20b..568acd0aee3f 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -161,9 +161,6 @@ static struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool, struct mem_block *block; u32 block_index, offset; - if (!pool || !hw_sgl_dma || index >= pool->count) - return ERR_PTR(-EINVAL); - block = pool->mem_block; block_index = index / pool->sgl_num_per_block; offset = index % pool->sgl_num_per_block; @@ -230,7 +227,7 @@ hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, struct scatterlist *sg; int sg_n; - if (!dev || !sgl || !pool || !hw_sgl_dma) + if (!dev || !sgl || !pool || !hw_sgl_dma || index >= pool->count) return ERR_PTR(-EINVAL); sg_n = sg_nents(sgl); From 3b7db97e60ac25c1e050a5d3e3a2e6a22c3f7f2d Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Sun, 7 Apr 2024 15:59:55 +0800 Subject: [PATCH 071/123] crypto: hisilicon/debugfs - Fix the processing logic issue in the debugfs creation There is a scenario where the file directory is created but the file attribute is not set. In this case, if a user accesses the file, an error occurs. So adjust the processing logic in the debugfs creation to prevent the file from being accessed before the file attributes such as the index are set. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c index 6351a452878d..e9fa42381242 100644 --- a/drivers/crypto/hisilicon/debugfs.c +++ b/drivers/crypto/hisilicon/debugfs.c @@ -1090,12 +1090,12 @@ static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir, { struct debugfs_file *file = qm->debug.files + index; - debugfs_create_file(qm_debug_file_name[index], 0600, dir, file, - &qm_debug_fops); - file->index = index; mutex_init(&file->lock); file->debug = &qm->debug; + + debugfs_create_file(qm_debug_file_name[index], 0600, dir, file, + &qm_debug_fops); } static int qm_debugfs_atomic64_set(void *data, u64 val) From 56f37ceaf4409d0c3a69101b7ea5d14eb6568f42 Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Sun, 7 Apr 2024 15:59:56 +0800 Subject: [PATCH 072/123] crypto: hisilicon/qm - Add the default processing branch The cmd type can be extended. Currently, only four types of cmd can be processed. Therefor, add the default processing branch to intercept incorrect parameter input. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 92f0a1d9b4a6..cedb3af1fc1a 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -645,6 +645,9 @@ int qm_set_and_get_xqc(struct hisi_qm *qm, u8 cmd, void *xqc, u32 qp_id, bool op tmp_xqc = qm->xqc_buf.aeqc; xqc_dma = qm->xqc_buf.aeqc_dma; break; + default: + dev_err(&qm->pdev->dev, "unknown mailbox cmd %u\n", cmd); + return -EINVAL; } /* Setting xqc will fail if master OOO is blocked. */ From 0a6e038f0c4d9a5d5f638757fa03ef6ffc550b6a Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Sun, 7 Apr 2024 15:59:57 +0800 Subject: [PATCH 073/123] crypto: hisilicon - Adjust debugfs creation and release order There is a scenario where the file directory is created but the file memory is not set. In this case, if a user accesses the file, an error occurs. So during the creation process of debugfs, memory should be allocated first before creating the directory. In the release process, the directory should be deleted first before releasing the memory to avoid the situation where the memory does not exist when accessing the directory. In addition, the directory released by the debugfs is a global variable. When the debugfs of an accelerator fails to be initialized, releasing the directory of the global variable affects the debugfs initialization of other accelerators. The debugfs root directory released by debugfs init should be a member of qm, not a global variable. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 21 ++++++++++---------- drivers/crypto/hisilicon/sec2/sec_main.c | 23 +++++++++++----------- drivers/crypto/hisilicon/zip/zip_main.c | 24 +++++++++++------------ 3 files changed, 32 insertions(+), 36 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index d93aa6630a57..25b44416da45 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -1074,41 +1074,40 @@ static int hpre_debugfs_init(struct hisi_qm *qm) struct device *dev = &qm->pdev->dev; int ret; - qm->debug.debug_root = debugfs_create_dir(dev_name(dev), - hpre_debugfs_root); - - qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET; - qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; ret = hisi_qm_regs_debugfs_init(qm, hpre_diff_regs, ARRAY_SIZE(hpre_diff_regs)); if (ret) { dev_warn(dev, "Failed to init HPRE diff regs!\n"); - goto debugfs_remove; + return ret; } + qm->debug.debug_root = debugfs_create_dir(dev_name(dev), + hpre_debugfs_root); + qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET; + qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; + hisi_qm_debug_init(qm); if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) { ret = hpre_ctrl_debug_init(qm); if (ret) - goto failed_to_create; + goto debugfs_remove; } hpre_dfx_debug_init(qm); return 0; -failed_to_create: - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); debugfs_remove: debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); return ret; } static void hpre_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); - debugfs_remove_recursive(qm->debug.debug_root); + + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); } static int hpre_pre_store_cap_reg(struct hisi_qm *qm) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index f4e10741610f..853b1cb85016 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -901,37 +901,36 @@ static int sec_debugfs_init(struct hisi_qm *qm) struct device *dev = &qm->pdev->dev; int ret; - qm->debug.debug_root = debugfs_create_dir(dev_name(dev), - sec_debugfs_root); - qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET; - qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN; - ret = hisi_qm_regs_debugfs_init(qm, sec_diff_regs, ARRAY_SIZE(sec_diff_regs)); if (ret) { dev_warn(dev, "Failed to init SEC diff regs!\n"); - goto debugfs_remove; + return ret; } + qm->debug.debug_root = debugfs_create_dir(dev_name(dev), + sec_debugfs_root); + qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET; + qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN; + hisi_qm_debug_init(qm); ret = sec_debug_init(qm); if (ret) - goto failed_to_create; + goto debugfs_remove; return 0; -failed_to_create: - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); debugfs_remove: - debugfs_remove_recursive(sec_debugfs_root); + debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); return ret; } static void sec_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); - debugfs_remove_recursive(qm->debug.debug_root); + + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); } static int sec_show_last_regs_init(struct hisi_qm *qm) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index c065fd867161..c94a7b20d07e 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -887,36 +887,34 @@ static int hisi_zip_ctrl_debug_init(struct hisi_qm *qm) static int hisi_zip_debugfs_init(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; - struct dentry *dev_d; int ret; - dev_d = debugfs_create_dir(dev_name(dev), hzip_debugfs_root); - - qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET; - qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN; - qm->debug.debug_root = dev_d; ret = hisi_qm_regs_debugfs_init(qm, hzip_diff_regs, ARRAY_SIZE(hzip_diff_regs)); if (ret) { dev_warn(dev, "Failed to init ZIP diff regs!\n"); - goto debugfs_remove; + return ret; } + qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET; + qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN; + qm->debug.debug_root = debugfs_create_dir(dev_name(dev), + hzip_debugfs_root); + hisi_qm_debug_init(qm); if (qm->fun_type == QM_HW_PF) { ret = hisi_zip_ctrl_debug_init(qm); if (ret) - goto failed_to_create; + goto debugfs_remove; } hisi_zip_dfx_debug_init(qm); return 0; -failed_to_create: - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); debugfs_remove: - debugfs_remove_recursive(hzip_debugfs_root); + debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); return ret; } @@ -940,10 +938,10 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm) static void hisi_zip_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); - debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); + if (qm->fun_type == QM_HW_PF) { hisi_zip_debug_regs_clear(qm); qm->debug.curr_qm_qp_num = 0; From bba4250757b4ae1680fea435a358d8093f254094 Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Sun, 7 Apr 2024 15:59:58 +0800 Subject: [PATCH 074/123] crypto: hisilicon/sec - Fix memory leak for sec resource release The AIV is one of the SEC resources. When releasing resources, it need to release the AIV resources at the same time. Otherwise, memory leakage occurs. The aiv resource release is added to the sec resource release function. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 93a972fcbf63..0558f98e221f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -481,8 +481,10 @@ static void sec_alg_resource_free(struct sec_ctx *ctx, if (ctx->pbuf_supported) sec_free_pbuf_resource(dev, qp_ctx->res); - if (ctx->alg_type == SEC_AEAD) + if (ctx->alg_type == SEC_AEAD) { sec_free_mac_resource(dev, qp_ctx->res); + sec_free_aiv_resource(dev, qp_ctx->res); + } } static int sec_alloc_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) From 745a11899a8c1e8c6213ec054585a556ad11fdb6 Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Sun, 7 Apr 2024 15:59:59 +0800 Subject: [PATCH 075/123] crypto: hisilicon/debugfs - Resolve the problem of applying for redundant space in sq dump When dumping SQ, only the corresponding ID's SQE needs to be dumped, and there is no need to apply for the entire SQE memory. This is because excessive dump operations can lead to memory resource waste. Therefor apply for the space corresponding to sqe_id separately to avoid space waste. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/debugfs.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c index e9fa42381242..8e7dfdc5b989 100644 --- a/drivers/crypto/hisilicon/debugfs.c +++ b/drivers/crypto/hisilicon/debugfs.c @@ -311,7 +311,7 @@ static int q_dump_param_parse(struct hisi_qm *qm, char *s, static int qm_sq_dump(struct hisi_qm *qm, char *s, char *name) { u16 sq_depth = qm->qp_array->cq_depth; - void *sqe, *sqe_curr; + void *sqe; struct hisi_qp *qp; u32 qp_id, sqe_id; int ret; @@ -320,17 +320,16 @@ static int qm_sq_dump(struct hisi_qm *qm, char *s, char *name) if (ret) return ret; - sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL); + sqe = kzalloc(qm->sqe_size, GFP_KERNEL); if (!sqe) return -ENOMEM; qp = &qm->qp_array[qp_id]; - memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth); - sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size); - memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK, + memcpy(sqe, qp->sqe + sqe_id * qm->sqe_size, qm->sqe_size); + memset(sqe + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK, qm->debug.sqe_mask_len); - dump_show(qm, sqe_curr, qm->sqe_size, name); + dump_show(qm, sqe, qm->sqe_size, name); kfree(sqe); From c9ccfd5e0ff0dd929ce86d1b5f3c6a414110947a Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Sun, 7 Apr 2024 16:00:00 +0800 Subject: [PATCH 076/123] crypto: hisilicon/qm - Add the err memory release process to qm uninit When the qm uninit command is executed, the err data needs to be released to prevent memory leakage. The error information release operation and uacce_remove are integrated in qm_remove_uacce. So add the qm_remove_uacce to qm uninit to avoid err memory leakage. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index cedb3af1fc1a..3dac8d8e8568 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -2896,12 +2896,9 @@ void hisi_qm_uninit(struct hisi_qm *qm) hisi_qm_set_state(qm, QM_NOT_READY); up_write(&qm->qps_lock); + qm_remove_uacce(qm); qm_irqs_unregister(qm); hisi_qm_pci_uninit(qm); - if (qm->use_sva) { - uacce_remove(qm->uacce); - qm->uacce = NULL; - } } EXPORT_SYMBOL_GPL(hisi_qm_uninit); From 5c6ca9d936654a135b459c846885e08966e5e5bf Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 7 Apr 2024 19:57:40 +0200 Subject: [PATCH 077/123] X.509: Introduce scope-based x509_certificate allocation Add a DEFINE_FREE() clause for x509_certificate structs and use it in x509_cert_parse() and x509_key_preparse(). These are the only functions where scope-based x509_certificate allocation currently makes sense. A third user will be introduced with the forthcoming SPDM library (Security Protocol and Data Model) for PCI device authentication. Unlike most other DEFINE_FREE() clauses, this one checks for IS_ERR() instead of NULL before calling x509_free_certificate() at end of scope. That's because the "constructor" of x509_certificate structs, x509_cert_parse(), returns a valid pointer or an ERR_PTR(), but never NULL. Comparing the Assembler output before/after has shown they are identical, save for the fact that gcc-12 always generates two return paths when __cleanup() is used, one for the success case and one for the error case. In x509_cert_parse(), add a hint for the compiler that kzalloc() never returns an ERR_PTR(). Otherwise the compiler adds a gratuitous IS_ERR() check on return. Introduce an assume() macro for this which can be re-used elsewhere in the kernel to provide hints for the compiler. Suggested-by: Jonathan Cameron Link: https://lore.kernel.org/all/20231003153937.000034ca@Huawei.com/ Link: https://lwn.net/Articles/934679/ Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- crypto/asymmetric_keys/x509_cert_parser.c | 43 +++++++++-------------- crypto/asymmetric_keys/x509_parser.h | 3 ++ crypto/asymmetric_keys/x509_public_key.c | 31 +++++----------- include/linux/compiler.h | 2 ++ 4 files changed, 30 insertions(+), 49 deletions(-) diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 964208d1a35f..a814e5f136e2 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -60,24 +60,24 @@ EXPORT_SYMBOL_GPL(x509_free_certificate); */ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen) { - struct x509_certificate *cert; - struct x509_parse_context *ctx; + struct x509_certificate *cert __free(x509_free_certificate); + struct x509_parse_context *ctx __free(kfree) = NULL; struct asymmetric_key_id *kid; long ret; - ret = -ENOMEM; cert = kzalloc(sizeof(struct x509_certificate), GFP_KERNEL); + assume(!IS_ERR(cert)); /* Avoid gratuitous IS_ERR() check on return */ if (!cert) - goto error_no_cert; + return ERR_PTR(-ENOMEM); cert->pub = kzalloc(sizeof(struct public_key), GFP_KERNEL); if (!cert->pub) - goto error_no_ctx; + return ERR_PTR(-ENOMEM); cert->sig = kzalloc(sizeof(struct public_key_signature), GFP_KERNEL); if (!cert->sig) - goto error_no_ctx; + return ERR_PTR(-ENOMEM); ctx = kzalloc(sizeof(struct x509_parse_context), GFP_KERNEL); if (!ctx) - goto error_no_ctx; + return ERR_PTR(-ENOMEM); ctx->cert = cert; ctx->data = (unsigned long)data; @@ -85,7 +85,7 @@ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen) /* Attempt to decode the certificate */ ret = asn1_ber_decoder(&x509_decoder, ctx, data, datalen); if (ret < 0) - goto error_decode; + return ERR_PTR(ret); /* Decode the AuthorityKeyIdentifier */ if (ctx->raw_akid) { @@ -95,20 +95,19 @@ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen) ctx->raw_akid, ctx->raw_akid_size); if (ret < 0) { pr_warn("Couldn't decode AuthKeyIdentifier\n"); - goto error_decode; + return ERR_PTR(ret); } } - ret = -ENOMEM; cert->pub->key = kmemdup(ctx->key, ctx->key_size, GFP_KERNEL); if (!cert->pub->key) - goto error_decode; + return ERR_PTR(-ENOMEM); cert->pub->keylen = ctx->key_size; cert->pub->params = kmemdup(ctx->params, ctx->params_size, GFP_KERNEL); if (!cert->pub->params) - goto error_decode; + return ERR_PTR(-ENOMEM); cert->pub->paramlen = ctx->params_size; cert->pub->algo = ctx->key_algo; @@ -116,33 +115,23 @@ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen) /* Grab the signature bits */ ret = x509_get_sig_params(cert); if (ret < 0) - goto error_decode; + return ERR_PTR(ret); /* Generate cert issuer + serial number key ID */ kid = asymmetric_key_generate_id(cert->raw_serial, cert->raw_serial_size, cert->raw_issuer, cert->raw_issuer_size); - if (IS_ERR(kid)) { - ret = PTR_ERR(kid); - goto error_decode; - } + if (IS_ERR(kid)) + return ERR_CAST(kid); cert->id = kid; /* Detect self-signed certificates */ ret = x509_check_for_self_signed(cert); if (ret < 0) - goto error_decode; + return ERR_PTR(ret); - kfree(ctx); - return cert; - -error_decode: - kfree(ctx); -error_no_ctx: - x509_free_certificate(cert); -error_no_cert: - return ERR_PTR(ret); + return_ptr(cert); } EXPORT_SYMBOL_GPL(x509_cert_parse); diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h index 97a886cbe01c..0688c222806b 100644 --- a/crypto/asymmetric_keys/x509_parser.h +++ b/crypto/asymmetric_keys/x509_parser.h @@ -5,6 +5,7 @@ * Written by David Howells (dhowells@redhat.com) */ +#include #include #include #include @@ -44,6 +45,8 @@ struct x509_certificate { * x509_cert_parser.c */ extern void x509_free_certificate(struct x509_certificate *cert); +DEFINE_FREE(x509_free_certificate, struct x509_certificate *, + if (!IS_ERR(_T)) x509_free_certificate(_T)) extern struct x509_certificate *x509_cert_parse(const void *data, size_t datalen); extern int x509_decode_time(time64_t *_t, size_t hdrlen, unsigned char tag, diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index 6a4f00be22fc..00ac7159fba2 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -161,12 +161,11 @@ int x509_check_for_self_signed(struct x509_certificate *cert) */ static int x509_key_preparse(struct key_preparsed_payload *prep) { - struct asymmetric_key_ids *kids; - struct x509_certificate *cert; + struct x509_certificate *cert __free(x509_free_certificate); + struct asymmetric_key_ids *kids __free(kfree) = NULL; + char *p, *desc __free(kfree) = NULL; const char *q; size_t srlen, sulen; - char *desc = NULL, *p; - int ret; cert = x509_cert_parse(prep->data, prep->datalen); if (IS_ERR(cert)) @@ -188,9 +187,8 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) } /* Don't permit addition of blacklisted keys */ - ret = -EKEYREJECTED; if (cert->blacklisted) - goto error_free_cert; + return -EKEYREJECTED; /* Propose a description */ sulen = strlen(cert->subject); @@ -202,10 +200,9 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) q = cert->raw_serial; } - ret = -ENOMEM; desc = kmalloc(sulen + 2 + srlen * 2 + 1, GFP_KERNEL); if (!desc) - goto error_free_cert; + return -ENOMEM; p = memcpy(desc, cert->subject, sulen); p += sulen; *p++ = ':'; @@ -215,16 +212,14 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) kids = kmalloc(sizeof(struct asymmetric_key_ids), GFP_KERNEL); if (!kids) - goto error_free_desc; + return -ENOMEM; kids->id[0] = cert->id; kids->id[1] = cert->skid; kids->id[2] = asymmetric_key_generate_id(cert->raw_subject, cert->raw_subject_size, "", 0); - if (IS_ERR(kids->id[2])) { - ret = PTR_ERR(kids->id[2]); - goto error_free_kids; - } + if (IS_ERR(kids->id[2])) + return PTR_ERR(kids->id[2]); /* We're pinning the module by being linked against it */ __module_get(public_key_subtype.owner); @@ -242,15 +237,7 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) cert->sig = NULL; desc = NULL; kids = NULL; - ret = 0; - -error_free_kids: - kfree(kids); -error_free_desc: - kfree(desc); -error_free_cert: - x509_free_certificate(cert); - return ret; + return 0; } static struct asymmetric_key_parser x509_key_parser = { diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c00cc6c0878a..53666eb19909 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -148,6 +148,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, } while (0) #endif +#define assume(cond) do { if (!(cond)) __builtin_unreachable(); } while (0) + /* * KENTRY - kernel entry point * This can be used to annotate symbols (functions or data) that are used From 751fb2528c12ef64d1e863efb196cdc968b384f6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 7 Apr 2024 17:22:31 -0400 Subject: [PATCH 078/123] crypto: x86/aes-xts - make non-AVX implementation use new glue code Make the non-AVX implementation of AES-XTS (xts-aes-aesni) use the new glue code that was introduced for the AVX implementations of AES-XTS. This reduces code size, and it improves the performance of xts-aes-aesni due to the optimization for messages that don't span page boundaries. This required moving the new glue functions higher up in the file and allowing the IV encryption function to be specified by the caller. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-xts-avx-x86_64.S | 2 +- arch/x86/crypto/aesni-intel_asm.S | 16 +- arch/x86/crypto/aesni-intel_glue.c | 329 +++++++++++---------------- 3 files changed, 138 insertions(+), 209 deletions(-) diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index b8005d0205f8..fcaf64a2f8c6 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -751,7 +751,7 @@ // void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, // u8 iv[AES_BLOCK_SIZE]); -SYM_FUNC_START(aes_xts_encrypt_iv) +SYM_TYPED_FUNC_START(aes_xts_encrypt_iv) vmovdqu (%rsi), %xmm0 vpxor 0*16(%rdi), %xmm0, %xmm0 vaesenc 1*16(%rdi), %xmm0, %xmm0 diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 7ecb55cae3d6..1cb55eea2efa 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2843,10 +2843,10 @@ SYM_FUNC_END(aesni_ctr_enc) pxor KEY, IV; /* - * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, - * const u8 *src, unsigned int len, le128 *iv) + * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) */ -SYM_FUNC_START(aesni_xts_encrypt) +SYM_FUNC_START(aesni_xts_enc) FRAME_BEGIN #ifndef __x86_64__ pushl IVP @@ -2995,13 +2995,13 @@ SYM_FUNC_START(aesni_xts_encrypt) movups STATE, (OUTP) jmp .Lxts_enc_ret -SYM_FUNC_END(aesni_xts_encrypt) +SYM_FUNC_END(aesni_xts_enc) /* - * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, - * const u8 *src, unsigned int len, le128 *iv) + * void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) */ -SYM_FUNC_START(aesni_xts_decrypt) +SYM_FUNC_START(aesni_xts_dec) FRAME_BEGIN #ifndef __x86_64__ pushl IVP @@ -3157,4 +3157,4 @@ SYM_FUNC_START(aesni_xts_decrypt) movups STATE, (OUTP) jmp .Lxts_dec_ret -SYM_FUNC_END(aesni_xts_decrypt) +SYM_FUNC_END(aesni_xts_dec) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 0b37a470325b..e7d21000cb05 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -107,11 +107,11 @@ asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, #define AVX_GEN2_OPTSIZE 640 #define AVX_GEN4_OPTSIZE 4096 -asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); -asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); #ifdef CONFIG_X86_64 @@ -875,7 +875,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) } #endif -static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, +static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); @@ -896,108 +896,152 @@ static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, return aes_set_key_common(&ctx->tweak_ctx, key + keylen, keylen); } -static int xts_crypt(struct skcipher_request *req, bool encrypt) +typedef void (*xts_encrypt_iv_func)(const struct crypto_aes_ctx *tweak_key, + u8 iv[AES_BLOCK_SIZE]); +typedef void (*xts_crypt_func)(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, size_t len, + u8 tweak[AES_BLOCK_SIZE]); + +/* This handles cases where the source and/or destination span pages. */ +static noinline int +xts_crypt_slowpath(struct skcipher_request *req, xts_crypt_func crypt_func) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); + const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); int tail = req->cryptlen % AES_BLOCK_SIZE; + struct scatterlist sg_src[2], sg_dst[2]; struct skcipher_request subreq; struct skcipher_walk walk; + struct scatterlist *src, *dst; int err; - if (req->cryptlen < AES_BLOCK_SIZE) - return -EINVAL; - - err = skcipher_walk_virt(&walk, req, false); - if (!walk.nbytes) - return err; - - if (unlikely(tail > 0 && walk.nbytes < walk.total)) { - int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; - - skcipher_walk_abort(&walk); - + /* + * If the message length isn't divisible by the AES block size, then + * separate off the last full block and the partial block. This ensures + * that they are processed in the same call to the assembly function, + * which is required for ciphertext stealing. + */ + if (tail) { skcipher_request_set_tfm(&subreq, tfm); skcipher_request_set_callback(&subreq, skcipher_request_flags(req), NULL, NULL); skcipher_request_set_crypt(&subreq, req->src, req->dst, - blocks * AES_BLOCK_SIZE, req->iv); + req->cryptlen - tail - AES_BLOCK_SIZE, + req->iv); req = &subreq; - - err = skcipher_walk_virt(&walk, req, false); - if (!walk.nbytes) - return err; - } else { - tail = 0; } - kernel_fpu_begin(); + err = skcipher_walk_virt(&walk, req, false); - /* calculate first value of T */ - aesni_enc(&ctx->tweak_ctx, walk.iv, walk.iv); - - while (walk.nbytes > 0) { - int nbytes = walk.nbytes; + while (walk.nbytes) { + unsigned int nbytes = walk.nbytes; if (nbytes < walk.total) - nbytes &= ~(AES_BLOCK_SIZE - 1); - - if (encrypt) - aesni_xts_encrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - nbytes, walk.iv); - else - aesni_xts_decrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - nbytes, walk.iv); - kernel_fpu_end(); - - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - - if (walk.nbytes > 0) - kernel_fpu_begin(); - } - - if (unlikely(tail > 0 && !err)) { - struct scatterlist sg_src[2], sg_dst[2]; - struct scatterlist *src, *dst; - - dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); - if (req->dst != req->src) - dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); - - skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, - req->iv); - - err = skcipher_walk_virt(&walk, &subreq, false); - if (err) - return err; + nbytes = round_down(nbytes, AES_BLOCK_SIZE); kernel_fpu_begin(); - if (encrypt) - aesni_xts_encrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes, walk.iv); - else - aesni_xts_decrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes, walk.iv); + (*crypt_func)(&ctx->crypt_ctx, walk.src.virt.addr, + walk.dst.virt.addr, nbytes, req->iv); kernel_fpu_end(); - - err = skcipher_walk_done(&walk, 0); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } - return err; + + if (err || !tail) + return err; + + /* Do ciphertext stealing with the last full block and partial block. */ + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + kernel_fpu_begin(); + (*crypt_func)(&ctx->crypt_ctx, walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, req->iv); + kernel_fpu_end(); + + return skcipher_walk_done(&walk, 0); } -static int xts_encrypt(struct skcipher_request *req) +/* __always_inline to avoid indirect call in fastpath */ +static __always_inline int +xts_crypt(struct skcipher_request *req, xts_encrypt_iv_func encrypt_iv, + xts_crypt_func crypt_func) { - return xts_crypt(req, true); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); + const unsigned int cryptlen = req->cryptlen; + struct scatterlist *src = req->src; + struct scatterlist *dst = req->dst; + + if (unlikely(cryptlen < AES_BLOCK_SIZE)) + return -EINVAL; + + kernel_fpu_begin(); + (*encrypt_iv)(&ctx->tweak_ctx, req->iv); + + /* + * In practice, virtually all XTS plaintexts and ciphertexts are either + * 512 or 4096 bytes, aligned such that they don't span page boundaries. + * To optimize the performance of these cases, and also any other case + * where no page boundary is spanned, the below fast-path handles + * single-page sources and destinations as efficiently as possible. + */ + if (likely(src->length >= cryptlen && dst->length >= cryptlen && + src->offset + cryptlen <= PAGE_SIZE && + dst->offset + cryptlen <= PAGE_SIZE)) { + struct page *src_page = sg_page(src); + struct page *dst_page = sg_page(dst); + void *src_virt = kmap_local_page(src_page) + src->offset; + void *dst_virt = kmap_local_page(dst_page) + dst->offset; + + (*crypt_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen, + req->iv); + kunmap_local(dst_virt); + kunmap_local(src_virt); + kernel_fpu_end(); + return 0; + } + kernel_fpu_end(); + return xts_crypt_slowpath(req, crypt_func); } -static int xts_decrypt(struct skcipher_request *req) +static void aesni_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, + u8 iv[AES_BLOCK_SIZE]) { - return xts_crypt(req, false); + aesni_enc(tweak_key, iv, iv); +} + +static void aesni_xts_encrypt(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, size_t len, + u8 tweak[AES_BLOCK_SIZE]) +{ + aesni_xts_enc(key, dst, src, len, tweak); +} + +static void aesni_xts_decrypt(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, size_t len, + u8 tweak[AES_BLOCK_SIZE]) +{ + aesni_xts_dec(key, dst, src, len, tweak); +} + +static int xts_encrypt_aesni(struct skcipher_request *req) +{ + return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_encrypt); +} + +static int xts_decrypt_aesni(struct skcipher_request *req) +{ + return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_decrypt); } static struct crypto_alg aesni_cipher_alg = { @@ -1101,9 +1145,9 @@ static struct skcipher_alg aesni_skciphers[] = { .max_keysize = 2 * AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, .walksize = 2 * AES_BLOCK_SIZE, - .setkey = xts_aesni_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, + .setkey = xts_setkey_aesni, + .encrypt = xts_encrypt_aesni, + .decrypt = xts_decrypt_aesni, } }; @@ -1139,121 +1183,6 @@ static struct simd_skcipher_alg *aesni_simd_xctr; asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, u8 iv[AES_BLOCK_SIZE]); -typedef void (*xts_asm_func)(const struct crypto_aes_ctx *key, - const u8 *src, u8 *dst, size_t len, - u8 tweak[AES_BLOCK_SIZE]); - -/* This handles cases where the source and/or destination span pages. */ -static noinline int -xts_crypt_slowpath(struct skcipher_request *req, xts_asm_func asm_func) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); - int tail = req->cryptlen % AES_BLOCK_SIZE; - struct scatterlist sg_src[2], sg_dst[2]; - struct skcipher_request subreq; - struct skcipher_walk walk; - struct scatterlist *src, *dst; - int err; - - /* - * If the message length isn't divisible by the AES block size, then - * separate off the last full block and the partial block. This ensures - * that they are processed in the same call to the assembly function, - * which is required for ciphertext stealing. - */ - if (tail) { - skcipher_request_set_tfm(&subreq, tfm); - skcipher_request_set_callback(&subreq, - skcipher_request_flags(req), - NULL, NULL); - skcipher_request_set_crypt(&subreq, req->src, req->dst, - req->cryptlen - tail - AES_BLOCK_SIZE, - req->iv); - req = &subreq; - } - - err = skcipher_walk_virt(&walk, req, false); - - while (walk.nbytes) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, AES_BLOCK_SIZE); - - kernel_fpu_begin(); - (*asm_func)(&ctx->crypt_ctx, walk.src.virt.addr, - walk.dst.virt.addr, nbytes, req->iv); - kernel_fpu_end(); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - if (err || !tail) - return err; - - /* Do ciphertext stealing with the last full block and partial block. */ - - dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); - if (req->dst != req->src) - dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); - - skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, - req->iv); - - err = skcipher_walk_virt(&walk, req, false); - if (err) - return err; - - kernel_fpu_begin(); - (*asm_func)(&ctx->crypt_ctx, walk.src.virt.addr, walk.dst.virt.addr, - walk.nbytes, req->iv); - kernel_fpu_end(); - - return skcipher_walk_done(&walk, 0); -} - -/* __always_inline to avoid indirect call in fastpath */ -static __always_inline int -xts_crypt2(struct skcipher_request *req, xts_asm_func asm_func) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); - const unsigned int cryptlen = req->cryptlen; - struct scatterlist *src = req->src; - struct scatterlist *dst = req->dst; - - if (unlikely(cryptlen < AES_BLOCK_SIZE)) - return -EINVAL; - - kernel_fpu_begin(); - aes_xts_encrypt_iv(&ctx->tweak_ctx, req->iv); - - /* - * In practice, virtually all XTS plaintexts and ciphertexts are either - * 512 or 4096 bytes, aligned such that they don't span page boundaries. - * To optimize the performance of these cases, and also any other case - * where no page boundary is spanned, the below fast-path handles - * single-page sources and destinations as efficiently as possible. - */ - if (likely(src->length >= cryptlen && dst->length >= cryptlen && - src->offset + cryptlen <= PAGE_SIZE && - dst->offset + cryptlen <= PAGE_SIZE)) { - struct page *src_page = sg_page(src); - struct page *dst_page = sg_page(dst); - void *src_virt = kmap_local_page(src_page) + src->offset; - void *dst_virt = kmap_local_page(dst_page) + dst->offset; - - (*asm_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen, - req->iv); - kunmap_local(dst_virt); - kunmap_local(src_virt); - kernel_fpu_end(); - return 0; - } - kernel_fpu_end(); - return xts_crypt_slowpath(req, asm_func); -} - #define DEFINE_XTS_ALG(suffix, driver_name, priority) \ \ asmlinkage void aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, \ @@ -1265,12 +1194,12 @@ asmlinkage void aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, \ \ static int xts_encrypt_##suffix(struct skcipher_request *req) \ { \ - return xts_crypt2(req, aes_xts_encrypt_##suffix); \ + return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_encrypt_##suffix); \ } \ \ static int xts_decrypt_##suffix(struct skcipher_request *req) \ { \ - return xts_crypt2(req, aes_xts_decrypt_##suffix); \ + return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_decrypt_##suffix); \ } \ \ static struct skcipher_alg aes_xts_alg_##suffix = { \ @@ -1287,7 +1216,7 @@ static struct skcipher_alg aes_xts_alg_##suffix = { \ .max_keysize = 2 * AES_MAX_KEY_SIZE, \ .ivsize = AES_BLOCK_SIZE, \ .walksize = 2 * AES_BLOCK_SIZE, \ - .setkey = xts_aesni_setkey, \ + .setkey = xts_setkey_aesni, \ .encrypt = xts_encrypt_##suffix, \ .decrypt = xts_decrypt_##suffix, \ }; \ From 6a6d6a3a328a59ed0d8ae2e65696ef38e49133a0 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Mon, 8 Apr 2024 01:59:14 +0000 Subject: [PATCH 079/123] crypto: octeontx2 - add missing check for dma_map_single Add check for dma_map_single() and return error if it fails in order to avoid invalid dma address. Fixes: e92971117c2c ("crypto: octeontx2 - add ctx_val workaround") Signed-off-by: Chen Ni Reviewed-by: Bharat Bhushan Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/cn10k_cpt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c index 79b4e74804f6..6bfc59e67747 100644 --- a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c +++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c @@ -138,6 +138,10 @@ int cn10k_cpt_hw_ctx_init(struct pci_dev *pdev, return -ENOMEM; cptr_dma = dma_map_single(&pdev->dev, hctx, CN10K_CPT_HW_CTX_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&pdev->dev, cptr_dma)) { + kfree(hctx); + return -ENOMEM; + } cn10k_cpt_hw_ctx_set(hctx, 1); er_ctx->hw_ctx = hctx; From b924ecd305c43c41b21ab246e986e2a8effa5743 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 8 Apr 2024 20:01:54 -0400 Subject: [PATCH 080/123] crypto: x86/aes-xts - access round keys using single-byte offsets Access the AES round keys using offsets -7*16 through 7*16, instead of 0*16 through 14*16. This allows VEX-encoded instructions to address all round keys using 1-byte offsets, whereas before some needed 4-byte offsets. This decreases the code size of aes-xts-avx-x86_64.o by 4.2%. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-xts-avx-x86_64.S | 81 +++++++++++++++------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index fcaf64a2f8c6..95e412e7601d 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -82,7 +82,7 @@ // Function parameters .set KEY, %rdi // Initially points to crypto_aes_ctx, then is - // advanced to point directly to the round keys + // advanced to point directly to 7th round key .set SRC, %rsi // Pointer to next source data .set DST, %rdx // Pointer to next destination data .set LEN, %rcx // Remaining length in bytes @@ -408,24 +408,24 @@ // Load the round keys: just the first one if !USE_AVX10, otherwise all of them. .macro _load_round_keys - _vbroadcast128 0*16(KEY), KEY0 + _vbroadcast128 -7*16(KEY), KEY0 .if USE_AVX10 - _vbroadcast128 1*16(KEY), KEY1 - _vbroadcast128 2*16(KEY), KEY2 - _vbroadcast128 3*16(KEY), KEY3 - _vbroadcast128 4*16(KEY), KEY4 - _vbroadcast128 5*16(KEY), KEY5 - _vbroadcast128 6*16(KEY), KEY6 - _vbroadcast128 7*16(KEY), KEY7 - _vbroadcast128 8*16(KEY), KEY8 - _vbroadcast128 9*16(KEY), KEY9 - _vbroadcast128 10*16(KEY), KEY10 + _vbroadcast128 -6*16(KEY), KEY1 + _vbroadcast128 -5*16(KEY), KEY2 + _vbroadcast128 -4*16(KEY), KEY3 + _vbroadcast128 -3*16(KEY), KEY4 + _vbroadcast128 -2*16(KEY), KEY5 + _vbroadcast128 -1*16(KEY), KEY6 + _vbroadcast128 0*16(KEY), KEY7 + _vbroadcast128 1*16(KEY), KEY8 + _vbroadcast128 2*16(KEY), KEY9 + _vbroadcast128 3*16(KEY), KEY10 // Note: if it's AES-128 or AES-192, the last several round keys won't // be used. We do the loads anyway to save a conditional jump. - _vbroadcast128 11*16(KEY), KEY11 - _vbroadcast128 12*16(KEY), KEY12 - _vbroadcast128 13*16(KEY), KEY13 - _vbroadcast128 14*16(KEY), KEY14 + _vbroadcast128 4*16(KEY), KEY11 + _vbroadcast128 5*16(KEY), KEY12 + _vbroadcast128 6*16(KEY), KEY13 + _vbroadcast128 7*16(KEY), KEY14 .endif .endm @@ -456,9 +456,9 @@ _vaes \enc, \last, KEY\i\xmm_suffix, \data .else .ifnb \xmm_suffix - _vaes \enc, \last, \i*16(KEY), \data + _vaes \enc, \last, (\i-7)*16(KEY), \data .else - _vbroadcast128 \i*16(KEY), V4 + _vbroadcast128 (\i-7)*16(KEY), V4 _vaes \enc, \last, V4, \data .endif .endif @@ -477,7 +477,7 @@ _vaes \enc, \last, KEY\i, V2 _vaes \enc, \last, KEY\i, V3 .else - _vbroadcast128 \i*16(KEY), V4 + _vbroadcast128 (\i-7)*16(KEY), V4 _tweak_step (2*(\i-1)) _vaes \enc, \last, V4, V0 _vaes \enc, \last, V4, V1 @@ -528,9 +528,15 @@ // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). movl 480(KEY), KEYLEN - // If decrypting, advance KEY to the decryption round keys. -.if !\enc - add $240, KEY + // Advance KEY to point to the 7th encryption round key (if encrypting) + // or the 7th decryption round key (if decrypting). This makes the + // offset to any round key be in the range [-112, 112], fitting in a + // signed byte. This shortens VEX-encoded instructions that access the + // 8th and later round keys which otherwise would need 4-byte offsets. +.if \enc + add $7*16, KEY +.else + add $(15+7)*16, KEY .endif // Check whether the data length is a multiple of the AES block length. @@ -753,23 +759,24 @@ // u8 iv[AES_BLOCK_SIZE]); SYM_TYPED_FUNC_START(aes_xts_encrypt_iv) vmovdqu (%rsi), %xmm0 - vpxor 0*16(%rdi), %xmm0, %xmm0 + add $7*16, %rdi + vpxor -7*16(%rdi), %xmm0, %xmm0 + vaesenc -6*16(%rdi), %xmm0, %xmm0 + vaesenc -5*16(%rdi), %xmm0, %xmm0 + vaesenc -4*16(%rdi), %xmm0, %xmm0 + vaesenc -3*16(%rdi), %xmm0, %xmm0 + vaesenc -2*16(%rdi), %xmm0, %xmm0 + vaesenc -1*16(%rdi), %xmm0, %xmm0 + vaesenc 0*16(%rdi), %xmm0, %xmm0 vaesenc 1*16(%rdi), %xmm0, %xmm0 vaesenc 2*16(%rdi), %xmm0, %xmm0 + cmpl $24, 480-(7*16)(%rdi) + jle .Lencrypt_iv_aes_128_or_192 vaesenc 3*16(%rdi), %xmm0, %xmm0 vaesenc 4*16(%rdi), %xmm0, %xmm0 vaesenc 5*16(%rdi), %xmm0, %xmm0 vaesenc 6*16(%rdi), %xmm0, %xmm0 - vaesenc 7*16(%rdi), %xmm0, %xmm0 - vaesenc 8*16(%rdi), %xmm0, %xmm0 - vaesenc 9*16(%rdi), %xmm0, %xmm0 - cmpl $24, 480(%rdi) - jle .Lencrypt_iv_aes_128_or_192 - vaesenc 10*16(%rdi), %xmm0, %xmm0 - vaesenc 11*16(%rdi), %xmm0, %xmm0 - vaesenc 12*16(%rdi), %xmm0, %xmm0 - vaesenc 13*16(%rdi), %xmm0, %xmm0 - vaesenclast 14*16(%rdi), %xmm0, %xmm0 + vaesenclast 7*16(%rdi), %xmm0, %xmm0 .Lencrypt_iv_done: vmovdqu %xmm0, (%rsi) RET @@ -777,12 +784,12 @@ SYM_TYPED_FUNC_START(aes_xts_encrypt_iv) // Out-of-line handling of AES-128 and AES-192 .Lencrypt_iv_aes_128_or_192: jz .Lencrypt_iv_aes_192 - vaesenclast 10*16(%rdi), %xmm0, %xmm0 + vaesenclast 3*16(%rdi), %xmm0, %xmm0 jmp .Lencrypt_iv_done .Lencrypt_iv_aes_192: - vaesenc 10*16(%rdi), %xmm0, %xmm0 - vaesenc 11*16(%rdi), %xmm0, %xmm0 - vaesenclast 12*16(%rdi), %xmm0, %xmm0 + vaesenc 3*16(%rdi), %xmm0, %xmm0 + vaesenc 4*16(%rdi), %xmm0, %xmm0 + vaesenclast 5*16(%rdi), %xmm0, %xmm0 jmp .Lencrypt_iv_done SYM_FUNC_END(aes_xts_encrypt_iv) From 5d5bd24f415516b212d56e8a66fffd40cdaeab30 Mon Sep 17 00:00:00 2001 From: Damian Muszynski Date: Thu, 11 Apr 2024 11:24:58 +0200 Subject: [PATCH 081/123] crypto: qat - implement dh fallback for primes > 4K The Intel QAT driver provides support for the Diffie-Hellman (DH) algorithm, limited to prime numbers up to 4K. This driver is used by default on platforms with integrated QAT hardware for all DH requests. This has led to failures with algorithms requiring larger prime sizes, such as ffdhe6144. alg: ffdhe6144(dh): test failed on vector 1, err=-22 alg: self-tests for ffdhe6144(qat-dh) (ffdhe6144(dh)) failed (rc=-22) Implement a fallback mechanism when an unsupported request is received. Signed-off-by: Damian Muszynski Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../intel/qat/qat_common/qat_asym_algs.c | 66 +++++++++++++++++-- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c b/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c index 4128200a9032..85c682e248fb 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c @@ -110,6 +110,8 @@ struct qat_dh_ctx { unsigned int p_size; bool g2; struct qat_crypto_instance *inst; + struct crypto_kpp *ftfm; + bool fallback; } __packed __aligned(64); struct qat_asym_request { @@ -381,6 +383,36 @@ static int qat_dh_compute_value(struct kpp_request *req) return ret; } +static int qat_dh_generate_public_key(struct kpp_request *req) +{ + struct kpp_request *nreq = kpp_request_ctx(req); + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + + if (ctx->fallback) { + memcpy(nreq, req, sizeof(*req)); + kpp_request_set_tfm(nreq, ctx->ftfm); + return crypto_kpp_generate_public_key(nreq); + } + + return qat_dh_compute_value(req); +} + +static int qat_dh_compute_shared_secret(struct kpp_request *req) +{ + struct kpp_request *nreq = kpp_request_ctx(req); + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + + if (ctx->fallback) { + memcpy(nreq, req, sizeof(*req)); + kpp_request_set_tfm(nreq, ctx->ftfm); + return crypto_kpp_compute_shared_secret(nreq); + } + + return qat_dh_compute_value(req); +} + static int qat_dh_check_params_length(unsigned int p_len) { switch (p_len) { @@ -398,9 +430,6 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params) struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); - if (qat_dh_check_params_length(params->p_size << 3)) - return -EINVAL; - ctx->p_size = params->p_size; ctx->p = dma_alloc_coherent(dev, ctx->p_size, &ctx->dma_p, GFP_KERNEL); if (!ctx->p) @@ -454,6 +483,13 @@ static int qat_dh_set_secret(struct crypto_kpp *tfm, const void *buf, if (crypto_dh_decode_key(buf, len, ¶ms) < 0) return -EINVAL; + if (qat_dh_check_params_length(params.p_size << 3)) { + ctx->fallback = true; + return crypto_kpp_set_secret(ctx->ftfm, buf, len); + } + + ctx->fallback = false; + /* Free old secret if any */ qat_dh_clear_ctx(dev, ctx); @@ -481,6 +517,9 @@ static unsigned int qat_dh_max_size(struct crypto_kpp *tfm) { struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + if (ctx->fallback) + return crypto_kpp_maxsize(ctx->ftfm); + return ctx->p_size; } @@ -489,11 +528,22 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm) struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); struct qat_crypto_instance *inst = qat_crypto_get_instance_node(numa_node_id()); + const char *alg = kpp_alg_name(tfm); + unsigned int reqsize; if (!inst) return -EINVAL; - kpp_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64); + ctx->ftfm = crypto_alloc_kpp(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->ftfm)) + return PTR_ERR(ctx->ftfm); + + crypto_kpp_set_flags(ctx->ftfm, crypto_kpp_get_flags(tfm)); + + reqsize = max(sizeof(struct qat_asym_request) + 64, + sizeof(struct kpp_request) + crypto_kpp_reqsize(ctx->ftfm)); + + kpp_set_reqsize(tfm, reqsize); ctx->p_size = 0; ctx->g2 = false; @@ -506,6 +556,9 @@ static void qat_dh_exit_tfm(struct crypto_kpp *tfm) struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); struct device *dev = &GET_DEV(ctx->inst->accel_dev); + if (ctx->ftfm) + crypto_free_kpp(ctx->ftfm); + qat_dh_clear_ctx(dev, ctx); qat_crypto_put_instance(ctx->inst); } @@ -1265,8 +1318,8 @@ static struct akcipher_alg rsa = { static struct kpp_alg dh = { .set_secret = qat_dh_set_secret, - .generate_public_key = qat_dh_compute_value, - .compute_shared_secret = qat_dh_compute_value, + .generate_public_key = qat_dh_generate_public_key, + .compute_shared_secret = qat_dh_compute_shared_secret, .max_size = qat_dh_max_size, .init = qat_dh_init_tfm, .exit = qat_dh_exit_tfm, @@ -1276,6 +1329,7 @@ static struct kpp_alg dh = { .cra_priority = 1000, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct qat_dh_ctx), + .cra_flags = CRYPTO_ALG_NEED_FALLBACK, }, }; From ffaec34b0f2ba624f20614c4aaed0d5eb6181b2d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 11 Apr 2024 09:23:56 -0700 Subject: [PATCH 082/123] crypto: x86/sha256-ni - convert to use rounds macros To avoid source code duplication, do the SHA-256 rounds using macros. This reduces the length of sha256_ni_asm.S by 153 lines while still producing the exact same object file. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256_ni_asm.S | 211 +++++--------------------------- 1 file changed, 29 insertions(+), 182 deletions(-) diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 537b6dcd7ed8..498f67727b94 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -76,6 +76,29 @@ #define ABEF_SAVE %xmm9 #define CDGH_SAVE %xmm10 +.macro do_4rounds i, m0, m1, m2, m3 +.if \i < 16 + movdqu \i*4(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, \m0 +.else + movdqa \m0, MSG +.endif + paddd \i*4(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 +.if \i >= 12 && \i < 60 + movdqa \m0, MSGTMP4 + palignr $4, \m3, MSGTMP4 + paddd MSGTMP4, \m1 + sha256msg2 \m0, \m1 +.endif + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 +.if \i >= 4 && \i < 52 + sha256msg1 \m0, \m3 +.endif +.endm + /* * Intel SHA Extensions optimized implementation of a SHA-256 update function * @@ -86,9 +109,6 @@ * store partial blocks. All message padding and hash value initialization must * be done outside the update function. * - * The indented lines in the loop are instructions related to rounds processing. - * The non-indented lines are instructions related to the message schedule. - * * void sha256_ni_transform(uint32_t *digest, const void *data, uint32_t numBlocks); * digest : pointer to digest @@ -125,185 +145,12 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) movdqa STATE0, ABEF_SAVE movdqa STATE1, CDGH_SAVE - /* Rounds 0-3 */ - movdqu 0*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP0 - paddd 0*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 4-7 */ - movdqu 1*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP1 - paddd 1*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 8-11 */ - movdqu 2*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP2 - paddd 2*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 12-15 */ - movdqu 3*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP3 - paddd 3*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 16-19 */ - movdqa MSGTMP0, MSG - paddd 4*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 20-23 */ - movdqa MSGTMP1, MSG - paddd 5*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 24-27 */ - movdqa MSGTMP2, MSG - paddd 6*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 28-31 */ - movdqa MSGTMP3, MSG - paddd 7*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 32-35 */ - movdqa MSGTMP0, MSG - paddd 8*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 36-39 */ - movdqa MSGTMP1, MSG - paddd 9*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 40-43 */ - movdqa MSGTMP2, MSG - paddd 10*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 44-47 */ - movdqa MSGTMP3, MSG - paddd 11*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 48-51 */ - movdqa MSGTMP0, MSG - paddd 12*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 52-55 */ - movdqa MSGTMP1, MSG - paddd 13*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 56-59 */ - movdqa MSGTMP2, MSG - paddd 14*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 60-63 */ - movdqa MSGTMP3, MSG - paddd 15*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 +.irp i, 0, 16, 32, 48 + do_4rounds (\i + 0), MSGTMP0, MSGTMP1, MSGTMP2, MSGTMP3 + do_4rounds (\i + 4), MSGTMP1, MSGTMP2, MSGTMP3, MSGTMP0 + do_4rounds (\i + 8), MSGTMP2, MSGTMP3, MSGTMP0, MSGTMP1 + do_4rounds (\i + 12), MSGTMP3, MSGTMP0, MSGTMP1, MSGTMP2 +.endr /* Add current hash values with previously saved */ paddd ABEF_SAVE, STATE0 From 1b5ddb067df930c8232020cd059b2060275427cf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 11 Apr 2024 09:23:57 -0700 Subject: [PATCH 083/123] crypto: x86/sha256-ni - rename some register aliases MSGTMP[0-3] are used to hold the message schedule and are not temporary registers per se. MSGTMP4 is used as a temporary register for several different purposes and isn't really related to MSGTMP[0-3]. Rename them to MSG[0-3] and TMP accordingly. Also add a comment that clarifies what MSG is. Suggested-by: Stefan Kanthak Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256_ni_asm.S | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 498f67727b94..b7e7001dafdf 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -62,14 +62,14 @@ #define SHA256CONSTANTS %rax -#define MSG %xmm0 +#define MSG %xmm0 /* sha256rnds2 implicit operand */ #define STATE0 %xmm1 #define STATE1 %xmm2 -#define MSGTMP0 %xmm3 -#define MSGTMP1 %xmm4 -#define MSGTMP2 %xmm5 -#define MSGTMP3 %xmm6 -#define MSGTMP4 %xmm7 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 +#define TMP %xmm7 #define SHUF_MASK %xmm8 @@ -87,9 +87,9 @@ paddd \i*4(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 .if \i >= 12 && \i < 60 - movdqa \m0, MSGTMP4 - palignr $4, \m3, MSGTMP4 - paddd MSGTMP4, \m1 + movdqa \m0, TMP + palignr $4, \m3, TMP + paddd TMP, \m1 sha256msg2 \m0, \m1 .endif pshufd $0x0E, MSG, MSG @@ -133,9 +133,9 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) pshufd $0xB1, STATE0, STATE0 /* CDAB */ pshufd $0x1B, STATE1, STATE1 /* EFGH */ - movdqa STATE0, MSGTMP4 + movdqa STATE0, TMP palignr $8, STATE1, STATE0 /* ABEF */ - pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ + pblendw $0xF0, TMP, STATE1 /* CDGH */ movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK lea K256(%rip), SHA256CONSTANTS @@ -146,10 +146,10 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) movdqa STATE1, CDGH_SAVE .irp i, 0, 16, 32, 48 - do_4rounds (\i + 0), MSGTMP0, MSGTMP1, MSGTMP2, MSGTMP3 - do_4rounds (\i + 4), MSGTMP1, MSGTMP2, MSGTMP3, MSGTMP0 - do_4rounds (\i + 8), MSGTMP2, MSGTMP3, MSGTMP0, MSGTMP1 - do_4rounds (\i + 12), MSGTMP3, MSGTMP0, MSGTMP1, MSGTMP2 + do_4rounds (\i + 0), MSG0, MSG1, MSG2, MSG3 + do_4rounds (\i + 4), MSG1, MSG2, MSG3, MSG0 + do_4rounds (\i + 8), MSG2, MSG3, MSG0, MSG1 + do_4rounds (\i + 12), MSG3, MSG0, MSG1, MSG2 .endr /* Add current hash values with previously saved */ @@ -164,9 +164,9 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) /* Write hash values back in the correct order */ pshufd $0x1B, STATE0, STATE0 /* FEBA */ pshufd $0xB1, STATE1, STATE1 /* DCHG */ - movdqa STATE0, MSGTMP4 + movdqa STATE0, TMP pblendw $0xF0, STATE1, STATE0 /* DCBA */ - palignr $8, MSGTMP4, STATE1 /* HGFE */ + palignr $8, TMP, STATE1 /* HGFE */ movdqu STATE0, 0*16(DIGEST_PTR) movdqu STATE1, 1*16(DIGEST_PTR) From 59e62b20acc3161cafe3dce52cd3d6211379c4c5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 11 Apr 2024 09:23:58 -0700 Subject: [PATCH 084/123] crypto: x86/sha256-ni - optimize code size - Load the SHA-256 round constants relative to a pointer that points into the middle of the constants rather than to the beginning. Since x86 instructions use signed offsets, this decreases the instruction length required to access some of the later round constants. - Use punpcklqdq or punpckhqdq instead of longer instructions such as pshufd, pblendw, and palignr. This doesn't harm performance. The end result is that sha256_ni_transform shrinks from 839 bytes to 791 bytes, with no loss in performance. Suggested-by: Stefan Kanthak Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256_ni_asm.S | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index b7e7001dafdf..ffc9f1c75c15 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -84,7 +84,7 @@ .else movdqa \m0, MSG .endif - paddd \i*4(SHA256CONSTANTS), MSG + paddd (\i-32)*4(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 .if \i >= 12 && \i < 60 movdqa \m0, TMP @@ -92,7 +92,7 @@ paddd TMP, \m1 sha256msg2 \m0, \m1 .endif - pshufd $0x0E, MSG, MSG + punpckhqdq MSG, MSG sha256rnds2 STATE1, STATE0 .if \i >= 4 && \i < 52 sha256msg1 \m0, \m3 @@ -128,17 +128,17 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) * Need to reorder these appropriately * DCBA, HGFE -> ABEF, CDGH */ - movdqu 0*16(DIGEST_PTR), STATE0 - movdqu 1*16(DIGEST_PTR), STATE1 + movdqu 0*16(DIGEST_PTR), STATE0 /* DCBA */ + movdqu 1*16(DIGEST_PTR), STATE1 /* HGFE */ - pshufd $0xB1, STATE0, STATE0 /* CDAB */ - pshufd $0x1B, STATE1, STATE1 /* EFGH */ movdqa STATE0, TMP - palignr $8, STATE1, STATE0 /* ABEF */ - pblendw $0xF0, TMP, STATE1 /* CDGH */ + punpcklqdq STATE1, STATE0 /* FEBA */ + punpckhqdq TMP, STATE1 /* DCHG */ + pshufd $0x1B, STATE0, STATE0 /* ABEF */ + pshufd $0xB1, STATE1, STATE1 /* CDGH */ movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK - lea K256(%rip), SHA256CONSTANTS + lea K256+32*4(%rip), SHA256CONSTANTS .Lloop0: /* Save hash values for addition after rounds */ @@ -162,14 +162,14 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) jne .Lloop0 /* Write hash values back in the correct order */ - pshufd $0x1B, STATE0, STATE0 /* FEBA */ - pshufd $0xB1, STATE1, STATE1 /* DCHG */ movdqa STATE0, TMP - pblendw $0xF0, STATE1, STATE0 /* DCBA */ - palignr $8, TMP, STATE1 /* HGFE */ + punpcklqdq STATE1, STATE0 /* GHEF */ + punpckhqdq TMP, STATE1 /* ABCD */ + pshufd $0xB1, STATE0, STATE0 /* HGFE */ + pshufd $0x1B, STATE1, STATE1 /* DCBA */ - movdqu STATE0, 0*16(DIGEST_PTR) - movdqu STATE1, 1*16(DIGEST_PTR) + movdqu STATE1, 0*16(DIGEST_PTR) + movdqu STATE0, 1*16(DIGEST_PTR) .Ldone_hash: From 7daba20cc72d4b3aa047cc3868b96e8e45d5eeca Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 11 Apr 2024 09:23:59 -0700 Subject: [PATCH 085/123] crypto: x86/sha256-ni - simplify do_4rounds Instead of loading the message words into both MSG and \m0 and then adding the round constants to MSG, load the message words into \m0 and the round constants into MSG and then add \m0 to MSG. This shortens the source code slightly. It changes the instructions slightly, but it doesn't affect binary code size and doesn't seem to affect performance. Suggested-by: Stefan Kanthak Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256_ni_asm.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index ffc9f1c75c15..d515a55a3bc1 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -78,13 +78,11 @@ .macro do_4rounds i, m0, m1, m2, m3 .if \i < 16 - movdqu \i*4(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, \m0 -.else - movdqa \m0, MSG + movdqu \i*4(DATA_PTR), \m0 + pshufb SHUF_MASK, \m0 .endif - paddd (\i-32)*4(SHA256CONSTANTS), MSG + movdqa (\i-32)*4(SHA256CONSTANTS), MSG + paddd \m0, MSG sha256rnds2 STATE0, STATE1 .if \i >= 12 && \i < 60 movdqa \m0, TMP From 4a4fc6c0c7fe29f2538013a57ebd7813ec6c12a8 Mon Sep 17 00:00:00 2001 From: Adam Guerin Date: Fri, 12 Apr 2024 13:24:02 +0100 Subject: [PATCH 086/123] crypto: qat - improve error message in adf_get_arbiter_mapping() Improve error message to be more readable. Fixes: 5da6a2d5353e ("crypto: qat - generate dynamically arbiter mappings") Signed-off-by: Adam Guerin Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c | 2 +- drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c index d255cb3ebd9c..78f0ea49254d 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c @@ -298,7 +298,7 @@ static const u32 *adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev) { if (adf_gen4_init_thd2arb_map(accel_dev)) dev_warn(&GET_DEV(accel_dev), - "Generate of the thread to arbiter map failed"); + "Failed to generate thread to arbiter mapping"); return GET_HW_DATA(accel_dev)->thd_to_arb_map; } diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 1e77e189a938..9fd7ec53b9f3 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -210,7 +210,7 @@ static const u32 *adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev) { if (adf_gen4_init_thd2arb_map(accel_dev)) dev_warn(&GET_DEV(accel_dev), - "Generate of the thread to arbiter map failed"); + "Failed to generate thread to arbiter mapping"); return GET_HW_DATA(accel_dev)->thd_to_arb_map; } From d281a28bd2a94d72c440457e05a2f04a52f15947 Mon Sep 17 00:00:00 2001 From: Adam Guerin Date: Fri, 12 Apr 2024 13:24:03 +0100 Subject: [PATCH 087/123] crypto: qat - improve error logging to be consistent across features Improve error logging in rate limiting feature. Staying consistent with the error logging found in the telemetry feature. Fixes: d9fb8408376e ("crypto: qat - add rate limiting feature to qat_4xxx") Signed-off-by: Adam Guerin Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_rl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c index 65f752f4792a..346ef8bee99d 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c @@ -1125,7 +1125,7 @@ int adf_rl_start(struct adf_accel_dev *accel_dev) } if ((fw_caps & RL_CAPABILITY_MASK) != RL_CAPABILITY_VALUE) { - dev_info(&GET_DEV(accel_dev), "not supported\n"); + dev_info(&GET_DEV(accel_dev), "feature not supported by FW\n"); ret = -EOPNOTSUPP; goto ret_free; } From 3525fe475245ec5e8bc119749d31a727bc8f41ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maxime=20M=C3=A9r=C3=A9?= Date: Fri, 12 Apr 2024 14:45:45 +0200 Subject: [PATCH 088/123] crypto: stm32/hash - add full DMA support for stm32mpx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to a lack of alignment in the data sent by requests, the actual DMA support of the STM32 hash driver is only working with digest calls. This patch, based on the algorithm used in the driver omap-sham.c, allows for the usage of DMA in any situation. It has been functionally tested on STM32MP15, STM32MP13 and STM32MP25. By checking the performance of this new driver with OpenSSL, the following results were found: Performance: (datasize: 4096, number of hashes performed in 10s) |type |no DMA |DMA support|software | |-------|----------|-----------|----------| |md5 |13873.56k |10958.03k |71163.08k | |sha1 |13796.15k |10729.47k |39670.58k | |sha224 |13737.98k |10775.76k |22094.64k | |sha256 |13655.65k |10872.01k |22075.39k | CPU Usage: (algorithm used: sha256, computation time: 20s, measurement taken at ~10s) |datasize |no DMA |DMA | software | |----------|-------|-----|----------| | 2048 | 56% | 49% | 50% | | 4096 | 54% | 46% | 50% | | 8192 | 53% | 40% | 50% | | 16384 | 53% | 33% | 50% | Note: this update doesn't change the driver performance without DMA. As shown, performance with DMA is slightly lower than without, but in most cases, it will save CPU time. Signed-off-by: Maxime Méré Signed-off-by: Herbert Xu --- drivers/crypto/stm32/stm32-hash.c | 570 +++++++++++++++++++++++------- 1 file changed, 448 insertions(+), 122 deletions(-) diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index 34e0d7e381a8..351827372ea6 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -94,6 +94,7 @@ #define HASH_FLAGS_ERRORS BIT(21) #define HASH_FLAGS_EMPTY BIT(22) #define HASH_FLAGS_HMAC BIT(23) +#define HASH_FLAGS_SGS_COPIED BIT(24) #define HASH_OP_UPDATE 1 #define HASH_OP_FINAL 2 @@ -145,7 +146,7 @@ struct stm32_hash_state { u16 bufcnt; u16 blocklen; - u8 buffer[HASH_BUFLEN] __aligned(4); + u8 buffer[HASH_BUFLEN] __aligned(sizeof(u32)); /* hash state */ u32 hw_context[3 + HASH_CSR_NB_MAX]; @@ -158,8 +159,8 @@ struct stm32_hash_request_ctx { u8 digest[SHA512_DIGEST_SIZE] __aligned(sizeof(u32)); size_t digcnt; - /* DMA */ struct scatterlist *sg; + struct scatterlist sgl[2]; /* scatterlist used to realize alignment */ unsigned int offset; unsigned int total; struct scatterlist sg_key; @@ -184,6 +185,7 @@ struct stm32_hash_pdata { size_t algs_info_size; bool has_sr; bool has_mdmat; + bool context_secured; bool broken_emptymsg; bool ux500; }; @@ -195,6 +197,7 @@ struct stm32_hash_dev { struct reset_control *rst; void __iomem *io_base; phys_addr_t phys_base; + u8 xmit_buf[HASH_BUFLEN] __aligned(sizeof(u32)); u32 dma_mode; bool polled; @@ -220,6 +223,8 @@ static struct stm32_hash_drv stm32_hash = { }; static void stm32_hash_dma_callback(void *param); +static int stm32_hash_prepare_request(struct ahash_request *req); +static void stm32_hash_unprepare_request(struct ahash_request *req); static inline u32 stm32_hash_read(struct stm32_hash_dev *hdev, u32 offset) { @@ -232,6 +237,11 @@ static inline void stm32_hash_write(struct stm32_hash_dev *hdev, writel_relaxed(value, hdev->io_base + offset); } +/** + * stm32_hash_wait_busy - wait until hash processor is available. It return an + * error if the hash core is processing a block of data for more than 10 ms. + * @hdev: the stm32_hash_dev device. + */ static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev) { u32 status; @@ -245,6 +255,11 @@ static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev) !(status & HASH_SR_BUSY), 10, 10000); } +/** + * stm32_hash_set_nblw - set the number of valid bytes in the last word. + * @hdev: the stm32_hash_dev device. + * @length: the length of the final word. + */ static void stm32_hash_set_nblw(struct stm32_hash_dev *hdev, int length) { u32 reg; @@ -282,6 +297,11 @@ static int stm32_hash_write_key(struct stm32_hash_dev *hdev) return 0; } +/** + * stm32_hash_write_ctrl - Initialize the hash processor, only if + * HASH_FLAGS_INIT is set. + * @hdev: the stm32_hash_dev device + */ static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); @@ -469,9 +489,7 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); struct stm32_hash_state *state = &rctx->state; - u32 *preg = state->hw_context; int bufcnt, err = 0, final; - int i, swap_reg; dev_dbg(hdev->dev, "%s flags %x\n", __func__, state->flags); @@ -495,34 +513,23 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev) return stm32_hash_xmit_cpu(hdev, state->buffer, bufcnt, 1); } - if (!(hdev->flags & HASH_FLAGS_INIT)) - return 0; - - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - - swap_reg = hash_swap_reg(rctx); - - if (!hdev->pdata->ux500) - *preg++ = stm32_hash_read(hdev, HASH_IMR); - *preg++ = stm32_hash_read(hdev, HASH_STR); - *preg++ = stm32_hash_read(hdev, HASH_CR); - for (i = 0; i < swap_reg; i++) - *preg++ = stm32_hash_read(hdev, HASH_CSR(i)); - - state->flags |= HASH_FLAGS_INIT; - return err; } static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, - struct scatterlist *sg, int length, int mdma) + struct scatterlist *sg, int length, int mdmat) { struct dma_async_tx_descriptor *in_desc; dma_cookie_t cookie; u32 reg; int err; + dev_dbg(hdev->dev, "%s mdmat: %x length: %d\n", __func__, mdmat, length); + + /* do not use dma if there is no data to send */ + if (length <= 0) + return 0; + in_desc = dmaengine_prep_slave_sg(hdev->dma_lch, sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -535,13 +542,12 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, in_desc->callback = stm32_hash_dma_callback; in_desc->callback_param = hdev; - hdev->flags |= HASH_FLAGS_FINAL; hdev->flags |= HASH_FLAGS_DMA_ACTIVE; reg = stm32_hash_read(hdev, HASH_CR); if (hdev->pdata->has_mdmat) { - if (mdma) + if (mdmat) reg |= HASH_CR_MDMAT; else reg &= ~HASH_CR_MDMAT; @@ -550,7 +556,6 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, stm32_hash_write(hdev, HASH_CR, reg); - stm32_hash_set_nblw(hdev, length); cookie = dmaengine_submit(in_desc); err = dma_submit_error(cookie); @@ -590,7 +595,7 @@ static int stm32_hash_hmac_dma_send(struct stm32_hash_dev *hdev) struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); int err; - if (ctx->keylen < rctx->state.blocklen || hdev->dma_mode == 1) { + if (ctx->keylen < rctx->state.blocklen || hdev->dma_mode > 0) { err = stm32_hash_write_key(hdev); if (stm32_hash_wait_busy(hdev)) return -ETIMEDOUT; @@ -655,18 +660,20 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) struct scatterlist sg[1], *tsg; int err = 0, reg, ncp = 0; unsigned int i, len = 0, bufcnt = 0; + bool final = hdev->flags & HASH_FLAGS_FINAL; bool is_last = false; + u32 last_word; - rctx->sg = hdev->req->src; - rctx->total = hdev->req->nbytes; + dev_dbg(hdev->dev, "%s total: %d bufcnt: %d final: %d\n", + __func__, rctx->total, rctx->state.bufcnt, final); - rctx->nents = sg_nents(rctx->sg); if (rctx->nents < 0) return -EINVAL; stm32_hash_write_ctrl(hdev); - if (hdev->flags & HASH_FLAGS_HMAC) { + if (hdev->flags & HASH_FLAGS_HMAC && (!(hdev->flags & HASH_FLAGS_HMAC_KEY))) { + hdev->flags |= HASH_FLAGS_HMAC_KEY; err = stm32_hash_hmac_dma_send(hdev); if (err != -EINPROGRESS) return err; @@ -677,22 +684,36 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) len = sg->length; if (sg_is_last(sg) || (bufcnt + sg[0].length) >= rctx->total) { - sg->length = rctx->total - bufcnt; - is_last = true; - if (hdev->dma_mode == 1) { - len = (ALIGN(sg->length, 16) - 16); - - ncp = sg_pcopy_to_buffer( - rctx->sg, rctx->nents, - rctx->state.buffer, sg->length - len, - rctx->total - sg->length + len); - - sg->length = len; + if (!final) { + /* Always manually put the last word of a non-final transfer. */ + len -= sizeof(u32); + sg_pcopy_to_buffer(rctx->sg, rctx->nents, &last_word, 4, len); + sg->length -= sizeof(u32); } else { - if (!(IS_ALIGNED(sg->length, sizeof(u32)))) { - len = sg->length; - sg->length = ALIGN(sg->length, - sizeof(u32)); + /* + * In Multiple DMA mode, DMA must be aborted before the final + * transfer. + */ + sg->length = rctx->total - bufcnt; + if (hdev->dma_mode > 0) { + len = (ALIGN(sg->length, 16) - 16); + + ncp = sg_pcopy_to_buffer(rctx->sg, rctx->nents, + rctx->state.buffer, + sg->length - len, + rctx->total - sg->length + len); + + if (!len) + break; + + sg->length = len; + } else { + is_last = true; + if (!(IS_ALIGNED(sg->length, sizeof(u32)))) { + len = sg->length; + sg->length = ALIGN(sg->length, + sizeof(u32)); + } } } } @@ -706,43 +727,67 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) err = stm32_hash_xmit_dma(hdev, sg, len, !is_last); + /* The last word of a non final transfer is sent manually. */ + if (!final) { + stm32_hash_write(hdev, HASH_DIN, last_word); + len += sizeof(u32); + } + + rctx->total -= len; + bufcnt += sg[0].length; dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE); - if (err == -ENOMEM) + if (err == -ENOMEM || err == -ETIMEDOUT) return err; if (is_last) break; } - if (hdev->dma_mode == 1) { - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - reg = stm32_hash_read(hdev, HASH_CR); - reg &= ~HASH_CR_DMAE; - reg |= HASH_CR_DMAA; - stm32_hash_write(hdev, HASH_CR, reg); + /* + * When the second last block transfer of 4 words is performed by the DMA, + * the software must set the DMA Abort bit (DMAA) to 1 before completing the + * last transfer of 4 words or less. + */ + if (final) { + if (hdev->dma_mode > 0) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + reg = stm32_hash_read(hdev, HASH_CR); + reg &= ~HASH_CR_DMAE; + reg |= HASH_CR_DMAA; + stm32_hash_write(hdev, HASH_CR, reg); - if (ncp) { - memset(buffer + ncp, 0, - DIV_ROUND_UP(ncp, sizeof(u32)) - ncp); - writesl(hdev->io_base + HASH_DIN, buffer, - DIV_ROUND_UP(ncp, sizeof(u32))); + if (ncp) { + memset(buffer + ncp, 0, 4 - DIV_ROUND_UP(ncp, sizeof(u32))); + writesl(hdev->io_base + HASH_DIN, buffer, + DIV_ROUND_UP(ncp, sizeof(u32))); + } + + stm32_hash_set_nblw(hdev, ncp); + reg = stm32_hash_read(hdev, HASH_STR); + reg |= HASH_STR_DCAL; + stm32_hash_write(hdev, HASH_STR, reg); + err = -EINPROGRESS; } - stm32_hash_set_nblw(hdev, ncp); - reg = stm32_hash_read(hdev, HASH_STR); - reg |= HASH_STR_DCAL; - stm32_hash_write(hdev, HASH_STR, reg); - err = -EINPROGRESS; + + /* + * The hash processor needs the key to be loaded a second time in order + * to process the HMAC. + */ + if (hdev->flags & HASH_FLAGS_HMAC) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + err = stm32_hash_hmac_dma_send(hdev); + } + + return err; } - if (hdev->flags & HASH_FLAGS_HMAC) { - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - err = stm32_hash_hmac_dma_send(hdev); - } + if (err != -EINPROGRESS) + return err; - return err; + return 0; } static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx) @@ -765,33 +810,6 @@ static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx) return hdev; } -static bool stm32_hash_dma_aligned_data(struct ahash_request *req) -{ - struct scatterlist *sg; - struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); - struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); - int i; - - if (!hdev->dma_lch || req->nbytes <= rctx->state.blocklen) - return false; - - if (sg_nents(req->src) > 1) { - if (hdev->dma_mode == 1) - return false; - for_each_sg(req->src, sg, sg_nents(req->src), i) { - if ((!IS_ALIGNED(sg->length, sizeof(u32))) && - (!sg_is_last(sg))) - return false; - } - } - - if (req->src->offset % 4) - return false; - - return true; -} - static int stm32_hash_init(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -802,8 +820,10 @@ static int stm32_hash_init(struct ahash_request *req) bool sha3_mode = ctx->flags & HASH_FLAGS_SHA3_MODE; rctx->hdev = hdev; + state->flags = 0; - state->flags = HASH_FLAGS_CPU; + if (!(hdev->dma_lch && hdev->pdata->has_mdmat)) + state->flags |= HASH_FLAGS_CPU; if (sha3_mode) state->flags |= HASH_FLAGS_SHA3_MODE; @@ -857,6 +877,7 @@ static int stm32_hash_init(struct ahash_request *req) dev_err(hdev->dev, "Error, block too large"); return -EINVAL; } + rctx->nents = 0; rctx->total = 0; rctx->offset = 0; rctx->data_type = HASH_DATA_8_BITS; @@ -874,6 +895,9 @@ static int stm32_hash_update_req(struct stm32_hash_dev *hdev) struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); struct stm32_hash_state *state = &rctx->state; + dev_dbg(hdev->dev, "update_req: total: %u, digcnt: %zd, final: 0", + rctx->total, rctx->digcnt); + if (!(state->flags & HASH_FLAGS_CPU)) return stm32_hash_dma_send(hdev); @@ -887,6 +911,11 @@ static int stm32_hash_final_req(struct stm32_hash_dev *hdev) struct stm32_hash_state *state = &rctx->state; int buflen = state->bufcnt; + if (!(state->flags & HASH_FLAGS_CPU)) { + hdev->flags |= HASH_FLAGS_FINAL; + return stm32_hash_dma_send(hdev); + } + if (state->flags & HASH_FLAGS_FINUP) return stm32_hash_update_req(hdev); @@ -968,15 +997,21 @@ static int stm32_hash_finish(struct ahash_request *req) static void stm32_hash_finish_req(struct ahash_request *req, int err) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_state *state = &rctx->state; struct stm32_hash_dev *hdev = rctx->hdev; + if (hdev->flags & HASH_FLAGS_DMA_ACTIVE) + state->flags |= HASH_FLAGS_DMA_ACTIVE; + else + state->flags &= ~HASH_FLAGS_DMA_ACTIVE; + if (!err && (HASH_FLAGS_FINAL & hdev->flags)) { stm32_hash_copy_hash(req); err = stm32_hash_finish(req); } - pm_runtime_mark_last_busy(hdev->dev); - pm_runtime_put_autosuspend(hdev->dev); + /* Finalized request mist be unprepared here */ + stm32_hash_unprepare_request(req); crypto_finalize_hash_request(hdev->engine, req, err); } @@ -1006,6 +1041,10 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) pm_runtime_get_sync(hdev->dev); + err = stm32_hash_prepare_request(req); + if (err) + return err; + hdev->req = req; hdev->flags = 0; swap_reg = hash_swap_reg(rctx); @@ -1030,6 +1069,12 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) if (state->flags & HASH_FLAGS_HMAC) hdev->flags |= HASH_FLAGS_HMAC | HASH_FLAGS_HMAC_KEY; + + if (state->flags & HASH_FLAGS_CPU) + hdev->flags |= HASH_FLAGS_CPU; + + if (state->flags & HASH_FLAGS_DMA_ACTIVE) + hdev->flags |= HASH_FLAGS_DMA_ACTIVE; } if (rctx->op == HASH_OP_UPDATE) @@ -1054,6 +1099,284 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) return 0; } +static int stm32_hash_copy_sgs(struct stm32_hash_request_ctx *rctx, + struct scatterlist *sg, int bs, + unsigned int new_len) +{ + struct stm32_hash_state *state = &rctx->state; + int pages; + void *buf; + + pages = get_order(new_len); + + buf = (void *)__get_free_pages(GFP_ATOMIC, pages); + if (!buf) { + pr_err("Couldn't allocate pages for unaligned cases.\n"); + return -ENOMEM; + } + + if (state->bufcnt) + memcpy(buf, rctx->hdev->xmit_buf, state->bufcnt); + + scatterwalk_map_and_copy(buf + state->bufcnt, sg, rctx->offset, + min(new_len, rctx->total) - state->bufcnt, 0); + sg_init_table(rctx->sgl, 1); + sg_set_buf(rctx->sgl, buf, new_len); + rctx->sg = rctx->sgl; + state->flags |= HASH_FLAGS_SGS_COPIED; + rctx->nents = 1; + rctx->offset += new_len - state->bufcnt; + state->bufcnt = 0; + rctx->total = new_len; + + return 0; +} + +static int stm32_hash_align_sgs(struct scatterlist *sg, + int nbytes, int bs, bool init, bool final, + struct stm32_hash_request_ctx *rctx) +{ + struct stm32_hash_state *state = &rctx->state; + struct stm32_hash_dev *hdev = rctx->hdev; + struct scatterlist *sg_tmp = sg; + int offset = rctx->offset; + int new_len; + int n = 0; + int bufcnt = state->bufcnt; + bool secure_ctx = hdev->pdata->context_secured; + bool aligned = true; + + if (!sg || !sg->length || !nbytes) { + if (bufcnt) { + bufcnt = DIV_ROUND_UP(bufcnt, bs) * bs; + sg_init_table(rctx->sgl, 1); + sg_set_buf(rctx->sgl, rctx->hdev->xmit_buf, bufcnt); + rctx->sg = rctx->sgl; + rctx->nents = 1; + } + + return 0; + } + + new_len = nbytes; + + if (offset) + aligned = false; + + if (final) { + new_len = DIV_ROUND_UP(new_len, bs) * bs; + } else { + new_len = (new_len - 1) / bs * bs; // return n block - 1 block + + /* + * Context save in some version of HASH IP can only be done when the + * FIFO is ready to get a new block. This implies to send n block plus a + * 32 bit word in the first DMA send. + */ + if (init && secure_ctx) { + new_len += sizeof(u32); + if (unlikely(new_len > nbytes)) + new_len -= bs; + } + } + + if (!new_len) + return 0; + + if (nbytes != new_len) + aligned = false; + + while (nbytes > 0 && sg_tmp) { + n++; + + if (bufcnt) { + if (!IS_ALIGNED(bufcnt, bs)) { + aligned = false; + break; + } + nbytes -= bufcnt; + bufcnt = 0; + if (!nbytes) + aligned = false; + + continue; + } + + if (offset < sg_tmp->length) { + if (!IS_ALIGNED(offset + sg_tmp->offset, 4)) { + aligned = false; + break; + } + + if (!IS_ALIGNED(sg_tmp->length - offset, bs)) { + aligned = false; + break; + } + } + + if (offset) { + offset -= sg_tmp->length; + if (offset < 0) { + nbytes += offset; + offset = 0; + } + } else { + nbytes -= sg_tmp->length; + } + + sg_tmp = sg_next(sg_tmp); + + if (nbytes < 0) { + aligned = false; + break; + } + } + + if (!aligned) + return stm32_hash_copy_sgs(rctx, sg, bs, new_len); + + rctx->total = new_len; + rctx->offset += new_len; + rctx->nents = n; + if (state->bufcnt) { + sg_init_table(rctx->sgl, 2); + sg_set_buf(rctx->sgl, rctx->hdev->xmit_buf, state->bufcnt); + sg_chain(rctx->sgl, 2, sg); + rctx->sg = rctx->sgl; + } else { + rctx->sg = sg; + } + + return 0; +} + +static int stm32_hash_prepare_request(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + struct stm32_hash_state *state = &rctx->state; + unsigned int nbytes; + int ret, hash_later, bs; + bool update = rctx->op & HASH_OP_UPDATE; + bool init = !(state->flags & HASH_FLAGS_INIT); + bool finup = state->flags & HASH_FLAGS_FINUP; + bool final = state->flags & HASH_FLAGS_FINAL; + + if (!hdev->dma_lch || state->flags & HASH_FLAGS_CPU) + return 0; + + bs = crypto_ahash_blocksize(tfm); + + nbytes = state->bufcnt; + + /* + * In case of update request nbytes must correspond to the content of the + * buffer + the offset minus the content of the request already in the + * buffer. + */ + if (update || finup) + nbytes += req->nbytes - rctx->offset; + + dev_dbg(hdev->dev, + "%s: nbytes=%d, bs=%d, total=%d, offset=%d, bufcnt=%d\n", + __func__, nbytes, bs, rctx->total, rctx->offset, state->bufcnt); + + if (!nbytes) + return 0; + + rctx->total = nbytes; + + if (update && req->nbytes && (!IS_ALIGNED(state->bufcnt, bs))) { + int len = bs - state->bufcnt % bs; + + if (len > req->nbytes) + len = req->nbytes; + scatterwalk_map_and_copy(state->buffer + state->bufcnt, req->src, + 0, len, 0); + state->bufcnt += len; + rctx->offset = len; + } + + /* copy buffer in a temporary one that is used for sg alignment */ + if (state->bufcnt) + memcpy(hdev->xmit_buf, state->buffer, state->bufcnt); + + ret = stm32_hash_align_sgs(req->src, nbytes, bs, init, final, rctx); + if (ret) + return ret; + + hash_later = nbytes - rctx->total; + if (hash_later < 0) + hash_later = 0; + + if (hash_later && hash_later <= state->blocklen) { + scatterwalk_map_and_copy(state->buffer, + req->src, + req->nbytes - hash_later, + hash_later, 0); + + state->bufcnt = hash_later; + } else { + state->bufcnt = 0; + } + + if (hash_later > state->blocklen) { + /* FIXME: add support of this case */ + pr_err("Buffer contains more than one block.\n"); + return -ENOMEM; + } + + rctx->total = min(nbytes, rctx->total); + + return 0; +} + +static void stm32_hash_unprepare_request(struct ahash_request *req) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_state *state = &rctx->state; + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + u32 *preg = state->hw_context; + int swap_reg, i; + + if (hdev->dma_lch) + dmaengine_terminate_sync(hdev->dma_lch); + + if (state->flags & HASH_FLAGS_SGS_COPIED) + free_pages((unsigned long)sg_virt(rctx->sg), get_order(rctx->sg->length)); + + rctx->sg = NULL; + rctx->offset = 0; + + state->flags &= ~(HASH_FLAGS_SGS_COPIED); + + if (!(hdev->flags & HASH_FLAGS_INIT)) + goto pm_runtime; + + state->flags |= HASH_FLAGS_INIT; + + if (stm32_hash_wait_busy(hdev)) { + dev_warn(hdev->dev, "Wait busy failed."); + return; + } + + swap_reg = hash_swap_reg(rctx); + + if (!hdev->pdata->ux500) + *preg++ = stm32_hash_read(hdev, HASH_IMR); + *preg++ = stm32_hash_read(hdev, HASH_STR); + *preg++ = stm32_hash_read(hdev, HASH_CR); + for (i = 0; i < swap_reg; i++) + *preg++ = stm32_hash_read(hdev, HASH_CSR(i)); + +pm_runtime: + pm_runtime_mark_last_busy(hdev->dev); + pm_runtime_put_autosuspend(hdev->dev); +} + static int stm32_hash_enqueue(struct ahash_request *req, unsigned int op) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); @@ -1070,16 +1393,26 @@ static int stm32_hash_update(struct ahash_request *req) struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); struct stm32_hash_state *state = &rctx->state; - if (!req->nbytes || !(state->flags & HASH_FLAGS_CPU)) + if (!req->nbytes) return 0; - rctx->total = req->nbytes; - rctx->sg = req->src; - rctx->offset = 0; - if ((state->bufcnt + rctx->total < state->blocklen)) { - stm32_hash_append_sg(rctx); - return 0; + if (state->flags & HASH_FLAGS_CPU) { + rctx->total = req->nbytes; + rctx->sg = req->src; + rctx->offset = 0; + + if ((state->bufcnt + rctx->total < state->blocklen)) { + stm32_hash_append_sg(rctx); + return 0; + } + } else { /* DMA mode */ + if (state->bufcnt + req->nbytes <= state->blocklen) { + scatterwalk_map_and_copy(state->buffer + state->bufcnt, req->src, + 0, req->nbytes, 0); + state->bufcnt += req->nbytes; + return 0; + } } return stm32_hash_enqueue(req, HASH_OP_UPDATE); @@ -1098,20 +1431,18 @@ static int stm32_hash_final(struct ahash_request *req) static int stm32_hash_finup(struct ahash_request *req) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); - struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); struct stm32_hash_state *state = &rctx->state; if (!req->nbytes) goto out; state->flags |= HASH_FLAGS_FINUP; - rctx->total = req->nbytes; - rctx->sg = req->src; - rctx->offset = 0; - if (hdev->dma_lch && stm32_hash_dma_aligned_data(req)) - state->flags &= ~HASH_FLAGS_CPU; + if ((state->flags & HASH_FLAGS_CPU)) { + rctx->total = req->nbytes; + rctx->sg = req->src; + rctx->offset = 0; + } out: return stm32_hash_final(req); @@ -1215,7 +1546,6 @@ static int stm32_hash_cra_sha3_hmac_init(struct crypto_tfm *tfm) HASH_FLAGS_HMAC); } - static void stm32_hash_cra_exit(struct crypto_tfm *tfm) { struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm); @@ -1228,14 +1558,9 @@ static irqreturn_t stm32_hash_irq_thread(int irq, void *dev_id) { struct stm32_hash_dev *hdev = dev_id; - if (HASH_FLAGS_CPU & hdev->flags) { - if (HASH_FLAGS_OUTPUT_READY & hdev->flags) { - hdev->flags &= ~HASH_FLAGS_OUTPUT_READY; - goto finish; - } - } else if (HASH_FLAGS_DMA_ACTIVE & hdev->flags) { - hdev->flags &= ~HASH_FLAGS_DMA_ACTIVE; - goto finish; + if (HASH_FLAGS_OUTPUT_READY & hdev->flags) { + hdev->flags &= ~HASH_FLAGS_OUTPUT_READY; + goto finish; } return IRQ_HANDLED; @@ -1984,6 +2309,7 @@ static const struct stm32_hash_pdata stm32_hash_pdata_stm32mp13 = { .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_stm32mp13), .has_sr = true, .has_mdmat = true, + .context_secured = true, }; static const struct of_device_id stm32_hash_of_match[] = { From 1d27e1f5c8f7ade40f0e85ddecbe9158393265e5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 12 Apr 2024 08:45:59 -0700 Subject: [PATCH 089/123] crypto: x86/aes-xts - handle CTS encryption more efficiently When encrypting a message whose length isn't a multiple of 16 bytes, encrypt the last full block in the main loop. This works because only decryption uses the last two tweaks in reverse order, not encryption. This improves the performance of decrypting messages whose length isn't a multiple of the AES block length, shrinks the size of aes-xts-avx-x86_64.o by 5.0%, and eliminates two instructions (a test and a not-taken conditional jump) when encrypting a message whose length *is* a multiple of the AES block length. While it's not super useful to optimize for ciphertext stealing given that it's rarely needed in practice, the other two benefits mentioned above make this optimization worthwhile. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-xts-avx-x86_64.S | 55 +++++++++++++++------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index 95e412e7601d..52f1997ed05a 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -537,12 +537,16 @@ add $7*16, KEY .else add $(15+7)*16, KEY -.endif - // Check whether the data length is a multiple of the AES block length. + // When decrypting a message whose length isn't a multiple of the AES + // block length, exclude the last full block from the main loop by + // subtracting 16 from LEN. This is needed because ciphertext stealing + // decryption uses the last two tweaks in reverse order. We'll handle + // the last full block and the partial block specially at the end. test $15, LEN - jnz .Lneed_cts\@ + jnz .Lneed_cts_dec\@ .Lxts_init\@: +.endif // Cache as many round keys as possible. _load_round_keys @@ -685,31 +689,31 @@ _vaes_4x \enc, 1, 12 jmp .Lencrypt_4x_done\@ -.Lneed_cts\@: - // The data length isn't a multiple of the AES block length, so - // ciphertext stealing (CTS) will be needed. Subtract one block from - // LEN so that the main loop doesn't process the last full block. The - // CTS step will process it specially along with the partial block. +.if !\enc +.Lneed_cts_dec\@: sub $16, LEN jmp .Lxts_init\@ +.endif .Lcts\@: // Do ciphertext stealing (CTS) to en/decrypt the last full block and - // the partial block. CTS needs two tweaks. TWEAK0_XMM contains the - // next tweak; compute the one after that. Decryption uses these two - // tweaks in reverse order, so also define aliases to handle that. - _next_tweak TWEAK0_XMM, %xmm0, TWEAK1_XMM -.if \enc - .set CTS_TWEAK0, TWEAK0_XMM - .set CTS_TWEAK1, TWEAK1_XMM -.else - .set CTS_TWEAK0, TWEAK1_XMM - .set CTS_TWEAK1, TWEAK0_XMM -.endif + // the partial block. TWEAK0_XMM contains the next tweak. - // En/decrypt the last full block. +.if \enc + // If encrypting, the main loop already encrypted the last full block to + // create the CTS intermediate ciphertext. Prepare for the rest of CTS + // by rewinding the pointers and loading the intermediate ciphertext. + sub $16, SRC + sub $16, DST + vmovdqu (DST), %xmm0 +.else + // If decrypting, the main loop didn't decrypt the last full block + // because CTS decryption uses the last two tweaks in reverse order. + // Do it now by advancing the tweak and decrypting the last full block. + _next_tweak TWEAK0_XMM, %xmm0, TWEAK1_XMM vmovdqu (SRC), %xmm0 - _aes_crypt \enc, _XMM, CTS_TWEAK0, %xmm0 + _aes_crypt \enc, _XMM, TWEAK1_XMM, %xmm0 +.endif .if USE_AVX10 // Create a mask that has the first LEN bits set. @@ -717,9 +721,10 @@ bzhi LEN, %rax, %rax kmovq %rax, %k1 - // Swap the first LEN bytes of the above result with the partial block. - // Note that to support in-place en/decryption, the load from the src - // partial block must happen before the store to the dst partial block. + // Swap the first LEN bytes of the en/decryption of the last full block + // with the partial block. Note that to support in-place en/decryption, + // the load from the src partial block must happen before the store to + // the dst partial block. vmovdqa %xmm0, %xmm1 vmovdqu8 16(SRC), %xmm0{%k1} vmovdqu8 %xmm1, 16(DST){%k1} @@ -750,7 +755,7 @@ vpblendvb %xmm3, %xmm0, %xmm1, %xmm0 .endif // En/decrypt again and store the last full block. - _aes_crypt \enc, _XMM, CTS_TWEAK1, %xmm0 + _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0 vmovdqu %xmm0, (DST) jmp .Ldone\@ .endm From ea9459ef363e46b1b353b3fd45761d738b1458a9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 12 Apr 2024 17:09:47 -0700 Subject: [PATCH 090/123] crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec() Since aesni_xts_enc() and aesni_xts_dec() are very similar, generate them from a macro that's passed an argument enc=1 or enc=0. This reduces the length of aesni-intel_asm.S by 112 lines while still producing the exact same object file in both 32-bit and 64-bit mode. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 278 +++++++++--------------------- 1 file changed, 83 insertions(+), 195 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 1cb55eea2efa..3a3e46188dec 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2825,28 +2825,24 @@ SYM_FUNC_END(aesni_ctr_enc) .previous /* - * _aesni_gf128mul_x_ble: internal ABI - * Multiply in GF(2^128) for XTS IVs + * _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs * input: * IV: current IV * GF128MUL_MASK == mask with 0x87 and 0x01 * output: * IV: next IV * changed: - * CTR: == temporary value + * KEY: == temporary value */ -#define _aesni_gf128mul_x_ble() \ - pshufd $0x13, IV, KEY; \ - paddq IV, IV; \ - psrad $31, KEY; \ - pand GF128MUL_MASK, KEY; \ - pxor KEY, IV; +.macro _aesni_gf128mul_x_ble + pshufd $0x13, IV, KEY + paddq IV, IV + psrad $31, KEY + pand GF128MUL_MASK, KEY + pxor KEY, IV +.endm -/* - * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst, - * const u8 *src, unsigned int len, le128 *iv) - */ -SYM_FUNC_START(aesni_xts_enc) +.macro _aesni_xts_crypt enc FRAME_BEGIN #ifndef __x86_64__ pushl IVP @@ -2865,35 +2861,46 @@ SYM_FUNC_START(aesni_xts_enc) movups (IVP), IV mov 480(KEYP), KLEN +.if !\enc + add $240, KEYP -.Lxts_enc_loop4: + test $15, LEN + jz .Lxts_loop4\@ + sub $16, LEN +.endif + +.Lxts_loop4\@: sub $64, LEN - jl .Lxts_enc_1x + jl .Lxts_1x\@ movdqa IV, STATE1 movdqu 0x00(INP), IN pxor IN, STATE1 movdqu IV, 0x00(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble movdqa IV, STATE2 movdqu 0x10(INP), IN pxor IN, STATE2 movdqu IV, 0x10(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble movdqa IV, STATE3 movdqu 0x20(INP), IN pxor IN, STATE3 movdqu IV, 0x20(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble movdqa IV, STATE4 movdqu 0x30(INP), IN pxor IN, STATE4 movdqu IV, 0x30(OUTP) +.if \enc call _aesni_enc4 +.else + call _aesni_dec4 +.endif movdqu 0x00(OUTP), IN pxor IN, STATE1 @@ -2911,17 +2918,17 @@ SYM_FUNC_START(aesni_xts_enc) pxor IN, STATE4 movdqu STATE4, 0x30(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble add $64, INP add $64, OUTP test LEN, LEN - jnz .Lxts_enc_loop4 + jnz .Lxts_loop4\@ -.Lxts_enc_ret_iv: +.Lxts_ret_iv\@: movups IV, (IVP) -.Lxts_enc_ret: +.Lxts_ret\@: #ifndef __x86_64__ popl KLEN popl KEYP @@ -2931,39 +2938,60 @@ SYM_FUNC_START(aesni_xts_enc) FRAME_END RET -.Lxts_enc_1x: +.Lxts_1x\@: add $64, LEN - jz .Lxts_enc_ret_iv + jz .Lxts_ret_iv\@ +.if \enc sub $16, LEN - jl .Lxts_enc_cts4 + jl .Lxts_cts4\@ +.endif -.Lxts_enc_loop1: +.Lxts_loop1\@: movdqu (INP), STATE +.if \enc pxor IV, STATE call _aesni_enc1 - pxor IV, STATE - _aesni_gf128mul_x_ble() - - test LEN, LEN - jz .Lxts_enc_out - +.else add $16, INP sub $16, LEN - jl .Lxts_enc_cts1 + jl .Lxts_cts1\@ + pxor IV, STATE + call _aesni_dec1 +.endif + pxor IV, STATE + _aesni_gf128mul_x_ble + + test LEN, LEN + jz .Lxts_out\@ + +.if \enc + add $16, INP + sub $16, LEN + jl .Lxts_cts1\@ +.endif movdqu STATE, (OUTP) add $16, OUTP - jmp .Lxts_enc_loop1 + jmp .Lxts_loop1\@ -.Lxts_enc_out: +.Lxts_out\@: movdqu STATE, (OUTP) - jmp .Lxts_enc_ret_iv + jmp .Lxts_ret_iv\@ -.Lxts_enc_cts4: +.if \enc +.Lxts_cts4\@: movdqa STATE4, STATE sub $16, OUTP +.Lxts_cts1\@: +.else +.Lxts_cts1\@: + movdqa IV, STATE4 + _aesni_gf128mul_x_ble -.Lxts_enc_cts1: + pxor IV, STATE + call _aesni_dec1 + pxor IV, STATE +.endif #ifndef __x86_64__ lea .Lcts_permute_table, T1 #else @@ -2989,12 +3017,26 @@ SYM_FUNC_START(aesni_xts_enc) pblendvb IN2, IN1 movaps IN1, STATE +.if \enc pxor IV, STATE call _aesni_enc1 pxor IV, STATE +.else + pxor STATE4, STATE + call _aesni_dec1 + pxor STATE4, STATE +.endif movups STATE, (OUTP) - jmp .Lxts_enc_ret + jmp .Lxts_ret\@ +.endm + +/* + * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) + */ +SYM_FUNC_START(aesni_xts_enc) + _aesni_xts_crypt 1 SYM_FUNC_END(aesni_xts_enc) /* @@ -3002,159 +3044,5 @@ SYM_FUNC_END(aesni_xts_enc) * const u8 *src, unsigned int len, le128 *iv) */ SYM_FUNC_START(aesni_xts_dec) - FRAME_BEGIN -#ifndef __x86_64__ - pushl IVP - pushl LEN - pushl KEYP - pushl KLEN - movl (FRAME_OFFSET+20)(%esp), KEYP # ctx - movl (FRAME_OFFSET+24)(%esp), OUTP # dst - movl (FRAME_OFFSET+28)(%esp), INP # src - movl (FRAME_OFFSET+32)(%esp), LEN # len - movl (FRAME_OFFSET+36)(%esp), IVP # iv - movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK -#else - movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK -#endif - movups (IVP), IV - - mov 480(KEYP), KLEN - add $240, KEYP - - test $15, LEN - jz .Lxts_dec_loop4 - sub $16, LEN - -.Lxts_dec_loop4: - sub $64, LEN - jl .Lxts_dec_1x - - movdqa IV, STATE1 - movdqu 0x00(INP), IN - pxor IN, STATE1 - movdqu IV, 0x00(OUTP) - - _aesni_gf128mul_x_ble() - movdqa IV, STATE2 - movdqu 0x10(INP), IN - pxor IN, STATE2 - movdqu IV, 0x10(OUTP) - - _aesni_gf128mul_x_ble() - movdqa IV, STATE3 - movdqu 0x20(INP), IN - pxor IN, STATE3 - movdqu IV, 0x20(OUTP) - - _aesni_gf128mul_x_ble() - movdqa IV, STATE4 - movdqu 0x30(INP), IN - pxor IN, STATE4 - movdqu IV, 0x30(OUTP) - - call _aesni_dec4 - - movdqu 0x00(OUTP), IN - pxor IN, STATE1 - movdqu STATE1, 0x00(OUTP) - - movdqu 0x10(OUTP), IN - pxor IN, STATE2 - movdqu STATE2, 0x10(OUTP) - - movdqu 0x20(OUTP), IN - pxor IN, STATE3 - movdqu STATE3, 0x20(OUTP) - - movdqu 0x30(OUTP), IN - pxor IN, STATE4 - movdqu STATE4, 0x30(OUTP) - - _aesni_gf128mul_x_ble() - - add $64, INP - add $64, OUTP - test LEN, LEN - jnz .Lxts_dec_loop4 - -.Lxts_dec_ret_iv: - movups IV, (IVP) - -.Lxts_dec_ret: -#ifndef __x86_64__ - popl KLEN - popl KEYP - popl LEN - popl IVP -#endif - FRAME_END - RET - -.Lxts_dec_1x: - add $64, LEN - jz .Lxts_dec_ret_iv - -.Lxts_dec_loop1: - movdqu (INP), STATE - - add $16, INP - sub $16, LEN - jl .Lxts_dec_cts1 - - pxor IV, STATE - call _aesni_dec1 - pxor IV, STATE - _aesni_gf128mul_x_ble() - - test LEN, LEN - jz .Lxts_dec_out - - movdqu STATE, (OUTP) - add $16, OUTP - jmp .Lxts_dec_loop1 - -.Lxts_dec_out: - movdqu STATE, (OUTP) - jmp .Lxts_dec_ret_iv - -.Lxts_dec_cts1: - movdqa IV, STATE4 - _aesni_gf128mul_x_ble() - - pxor IV, STATE - call _aesni_dec1 - pxor IV, STATE - -#ifndef __x86_64__ - lea .Lcts_permute_table, T1 -#else - lea .Lcts_permute_table(%rip), T1 -#endif - add LEN, INP /* rewind input pointer */ - add $16, LEN /* # bytes in final block */ - movups (INP), IN1 - - mov T1, IVP - add $32, IVP - add LEN, T1 - sub LEN, IVP - add OUTP, LEN - - movups (T1), %xmm4 - movaps STATE, IN2 - pshufb %xmm4, STATE - movups STATE, (LEN) - - movups (IVP), %xmm0 - pshufb %xmm0, IN1 - pblendvb IN2, IN1 - movaps IN1, STATE - - pxor STATE4, STATE - call _aesni_dec1 - pxor STATE4, STATE - - movups STATE, (OUTP) - jmp .Lxts_dec_ret + _aesni_xts_crypt 0 SYM_FUNC_END(aesni_xts_dec) From 2717e01fc3fb4d37b625b9bd6cf161d0d9d5c4b5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 12 Apr 2024 20:17:26 -0700 Subject: [PATCH 091/123] crypto: x86/aes-xts - handle AES-128 and AES-192 more efficiently Decrease the amount of code specific to the different AES variants by "right-aligning" the sequence of round keys, and for AES-128 and AES-192 just skipping irrelevant rounds at the beginning. This shrinks the size of aes-xts-avx-x86_64.o by 13.3%, and it improves the efficiency of AES-128 and AES-192. The tradeoff is that for AES-256 some additional not-taken conditional jumps are now executed. But these are predicted well and are cheap on x86. Note that the ARMv8 CE based AES-XTS implementation uses a similar strategy to handle the different AES variants. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-xts-avx-x86_64.S | 178 ++++++++++++++------------- 1 file changed, 92 insertions(+), 86 deletions(-) diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index 52f1997ed05a..f5e7ab739105 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -82,14 +82,15 @@ // Function parameters .set KEY, %rdi // Initially points to crypto_aes_ctx, then is - // advanced to point directly to 7th round key + // advanced to point to 7th-from-last round key .set SRC, %rsi // Pointer to next source data .set DST, %rdx // Pointer to next destination data .set LEN, %rcx // Remaining length in bytes .set TWEAK, %r8 // Pointer to next tweak -// %r9d holds the AES key length in bytes. +// %r9 holds the AES key length in bytes. .set KEYLEN, %r9d +.set KEYLEN64, %r9 // %rax and %r10-r11 are available as temporaries. @@ -165,12 +166,18 @@ .set GF_POLY_XMM, %xmm14 .set GF_POLY, V14 - // V15 holds the first AES round key, copied to all 128-bit lanes. + // V15 holds the key for AES "round 0", copied to all 128-bit lanes. .set KEY0_XMM, %xmm15 .set KEY0, V15 // If 32 SIMD registers are available, then V16-V29 hold the remaining // AES round keys, copied to all 128-bit lanes. + // + // AES-128, AES-192, and AES-256 use different numbers of round keys. + // To allow handling all three variants efficiently, we align the round + // keys to the *end* of this register range. I.e., AES-128 uses + // KEY5-KEY14, AES-192 uses KEY3-KEY14, and AES-256 uses KEY1-KEY14. + // (All also use KEY0 for the XOR-only "round" at the beginning.) .if USE_AVX10 .set KEY1_XMM, %xmm16 .set KEY1, V16 @@ -340,15 +347,15 @@ .set PREV_TWEAK, NEXT_TWEAK2 .set NEXT_TWEAK, NEXT_TWEAK3 .endif -.if \i < 20 && \i % 5 == 0 +.if \i >= 0 && \i < 20 && \i % 5 == 0 vpshufd $0x13, PREV_TWEAK, V5 -.elseif \i < 20 && \i % 5 == 1 +.elseif \i >= 0 && \i < 20 && \i % 5 == 1 vpaddq PREV_TWEAK, PREV_TWEAK, NEXT_TWEAK -.elseif \i < 20 && \i % 5 == 2 +.elseif \i >= 0 && \i < 20 && \i % 5 == 2 vpsrad $31, V5, V5 -.elseif \i < 20 && \i % 5 == 3 +.elseif \i >= 0 && \i < 20 && \i % 5 == 3 vpand GF_POLY, V5, V5 -.elseif \i < 20 && \i % 5 == 4 +.elseif \i >= 0 && \i < 20 && \i % 5 == 4 vpxor V5, NEXT_TWEAK, NEXT_TWEAK .elseif \i == 1000 vmovdqa NEXT_TWEAK0, TWEAK0 @@ -364,21 +371,21 @@ // when VL > 16 (which it is here), the needed shift amounts are byte-aligned, // which allows the use of vpsrldq and vpslldq to do 128-bit wide shifts. .macro _tweak_step_pclmul i -.if \i == 2 +.if \i == 0 vpsrldq $(128 - 4*VL/16) / 8, TWEAK0, NEXT_TWEAK0 -.elseif \i == 4 +.elseif \i == 2 vpsrldq $(128 - 4*VL/16) / 8, TWEAK1, NEXT_TWEAK1 -.elseif \i == 6 +.elseif \i == 4 vpsrldq $(128 - 4*VL/16) / 8, TWEAK2, NEXT_TWEAK2 -.elseif \i == 8 +.elseif \i == 6 vpsrldq $(128 - 4*VL/16) / 8, TWEAK3, NEXT_TWEAK3 -.elseif \i == 10 +.elseif \i == 8 vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK0, NEXT_TWEAK0 -.elseif \i == 12 +.elseif \i == 10 vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK1, NEXT_TWEAK1 -.elseif \i == 14 +.elseif \i == 12 vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK2, NEXT_TWEAK2 -.elseif \i == 16 +.elseif \i == 14 vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK3, NEXT_TWEAK3 .elseif \i == 1000 vpslldq $(4*VL/16) / 8, TWEAK0, TWEAK0 @@ -393,8 +400,8 @@ .endm // _tweak_step does one step of the computation of the next set of tweaks from -// TWEAK[0-3]. To complete all steps, this must be invoked with \i values 0 -// through at least 19, then 1000 which signals the last step. +// TWEAK[0-3]. To complete all steps, this is invoked with increasing values of +// \i that include at least 0 through 19, then 1000 which signals the last step. // // This is used to interleave the computation of the next set of tweaks with the // AES en/decryptions, which increases performance in some cases. @@ -406,22 +413,56 @@ .endif .endm -// Load the round keys: just the first one if !USE_AVX10, otherwise all of them. -.macro _load_round_keys - _vbroadcast128 -7*16(KEY), KEY0 +.macro _setup_round_keys enc + + // Select either the encryption round keys or the decryption round keys. +.if \enc + .set OFFS, 0 +.else + .set OFFS, 240 +.endif + + // Load the round key for "round 0". + _vbroadcast128 OFFS(KEY), KEY0 + + // Increment KEY to make it so that 7*16(KEY) is the last round key. + // For AES-128, increment by 3*16, resulting in the 10 round keys (not + // counting the zero-th round key which was just loaded into KEY0) being + // -2*16(KEY) through 7*16(KEY). For AES-192, increment by 5*16 and use + // 12 round keys -4*16(KEY) through 7*16(KEY). For AES-256, increment + // by 7*16 and use 14 round keys -6*16(KEY) through 7*16(KEY). + // + // This rebasing provides two benefits. First, it makes the offset to + // any round key be in the range [-96, 112], fitting in a signed byte. + // This shortens VEX-encoded instructions that access the later round + // keys which otherwise would need 4-byte offsets. Second, it makes it + // easy to do AES-128 and AES-192 by skipping irrelevant rounds at the + // beginning. Skipping rounds at the end doesn't work as well because + // the last round needs different instructions. + // + // An alternative approach would be to roll up all the round loops. We + // don't do that because it isn't compatible with caching the round keys + // in registers which we do when possible (see below), and also because + // it seems unwise to rely *too* heavily on the CPU's branch predictor. + lea OFFS-16(KEY, KEYLEN64, 4), KEY + + // If all 32 SIMD registers are available, cache all the round keys. .if USE_AVX10 + cmp $24, KEYLEN + jl .Laes128\@ + je .Laes192\@ _vbroadcast128 -6*16(KEY), KEY1 _vbroadcast128 -5*16(KEY), KEY2 +.Laes192\@: _vbroadcast128 -4*16(KEY), KEY3 _vbroadcast128 -3*16(KEY), KEY4 +.Laes128\@: _vbroadcast128 -2*16(KEY), KEY5 _vbroadcast128 -1*16(KEY), KEY6 _vbroadcast128 0*16(KEY), KEY7 _vbroadcast128 1*16(KEY), KEY8 _vbroadcast128 2*16(KEY), KEY9 _vbroadcast128 3*16(KEY), KEY10 - // Note: if it's AES-128 or AES-192, the last several round keys won't - // be used. We do the loads anyway to save a conditional jump. _vbroadcast128 4*16(KEY), KEY11 _vbroadcast128 5*16(KEY), KEY12 _vbroadcast128 6*16(KEY), KEY13 @@ -466,22 +507,22 @@ // Do a single round of AES en/decryption on the blocks in registers V0-V3, // using the same key for all blocks. The round key is loaded from the -// appropriate register or memory location for round \i. In addition, does step -// \i of the computation of the next set of tweaks. May clobber V4. +// appropriate register or memory location for round \i. In addition, does two +// steps of the computation of the next set of tweaks. May clobber V4. .macro _vaes_4x enc, last, i .if USE_AVX10 - _tweak_step (2*(\i-1)) + _tweak_step (2*(\i-5)) _vaes \enc, \last, KEY\i, V0 _vaes \enc, \last, KEY\i, V1 - _tweak_step (2*(\i-1) + 1) + _tweak_step (2*(\i-5) + 1) _vaes \enc, \last, KEY\i, V2 _vaes \enc, \last, KEY\i, V3 .else _vbroadcast128 (\i-7)*16(KEY), V4 - _tweak_step (2*(\i-1)) + _tweak_step (2*(\i-5)) _vaes \enc, \last, V4, V0 _vaes \enc, \last, V4, V1 - _tweak_step (2*(\i-1) + 1) + _tweak_step (2*(\i-5) + 1) _vaes \enc, \last, V4, V2 _vaes \enc, \last, V4, V3 .endif @@ -493,32 +534,25 @@ // length VL, use V* registers and leave \xmm_suffix empty. May clobber V4. .macro _aes_crypt enc, xmm_suffix, tweak, data _xor3 KEY0\xmm_suffix, \tweak, \data + cmp $24, KEYLEN + jl .Laes128\@ + je .Laes192\@ _vaes_1x \enc, 0, 1, \xmm_suffix, \data _vaes_1x \enc, 0, 2, \xmm_suffix, \data +.Laes192\@: _vaes_1x \enc, 0, 3, \xmm_suffix, \data _vaes_1x \enc, 0, 4, \xmm_suffix, \data +.Laes128\@: _vaes_1x \enc, 0, 5, \xmm_suffix, \data _vaes_1x \enc, 0, 6, \xmm_suffix, \data _vaes_1x \enc, 0, 7, \xmm_suffix, \data _vaes_1x \enc, 0, 8, \xmm_suffix, \data _vaes_1x \enc, 0, 9, \xmm_suffix, \data - cmp $24, KEYLEN - jle .Laes_128_or_192\@ _vaes_1x \enc, 0, 10, \xmm_suffix, \data _vaes_1x \enc, 0, 11, \xmm_suffix, \data _vaes_1x \enc, 0, 12, \xmm_suffix, \data _vaes_1x \enc, 0, 13, \xmm_suffix, \data _vaes_1x \enc, 1, 14, \xmm_suffix, \data - jmp .Laes_done\@ -.Laes_128_or_192\@: - je .Laes_192\@ - _vaes_1x \enc, 1, 10, \xmm_suffix, \data - jmp .Laes_done\@ -.Laes_192\@: - _vaes_1x \enc, 0, 10, \xmm_suffix, \data - _vaes_1x \enc, 0, 11, \xmm_suffix, \data - _vaes_1x \enc, 1, 12, \xmm_suffix, \data -.Laes_done\@: _vpxor \tweak, \data, \data .endm @@ -528,16 +562,7 @@ // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). movl 480(KEY), KEYLEN - // Advance KEY to point to the 7th encryption round key (if encrypting) - // or the 7th decryption round key (if decrypting). This makes the - // offset to any round key be in the range [-112, 112], fitting in a - // signed byte. This shortens VEX-encoded instructions that access the - // 8th and later round keys which otherwise would need 4-byte offsets. -.if \enc - add $7*16, KEY -.else - add $(15+7)*16, KEY - +.if !\enc // When decrypting a message whose length isn't a multiple of the AES // block length, exclude the last full block from the main loop by // subtracting 16 from LEN. This is needed because ciphertext stealing @@ -548,8 +573,8 @@ .Lxts_init\@: .endif - // Cache as many round keys as possible. - _load_round_keys + // Setup the pointer to the round keys and cache as many as possible. + _setup_round_keys \enc // Compute the first set of tweaks TWEAK[0-3]. _compute_first_set_of_tweaks @@ -560,7 +585,7 @@ .Lmain_loop\@: // This is the main loop, en/decrypting 4*VL bytes per iteration. - // XOR each source block with its tweak and the first round key. + // XOR each source block with its tweak and the zero-th round key. .if USE_AVX10 vmovdqu8 0*VL(SRC), V0 vmovdqu8 1*VL(SRC), V1 @@ -580,27 +605,27 @@ vpxor TWEAK2, V2, V2 vpxor TWEAK3, V3, V3 .endif + cmp $24, KEYLEN + jl .Laes128\@ + je .Laes192\@ // Do all the AES rounds on the data blocks, interleaved with // the computation of the next set of tweaks. _vaes_4x \enc, 0, 1 _vaes_4x \enc, 0, 2 +.Laes192\@: _vaes_4x \enc, 0, 3 _vaes_4x \enc, 0, 4 +.Laes128\@: _vaes_4x \enc, 0, 5 _vaes_4x \enc, 0, 6 _vaes_4x \enc, 0, 7 _vaes_4x \enc, 0, 8 _vaes_4x \enc, 0, 9 - // Try to optimize for AES-256 by keeping the code for AES-128 and - // AES-192 out-of-line. - cmp $24, KEYLEN - jle .Lencrypt_4x_aes_128_or_192\@ _vaes_4x \enc, 0, 10 _vaes_4x \enc, 0, 11 _vaes_4x \enc, 0, 12 _vaes_4x \enc, 0, 13 _vaes_4x \enc, 1, 14 -.Lencrypt_4x_done\@: // XOR in the tweaks again. _vpxor TWEAK0, V0, V0 @@ -678,17 +703,6 @@ jnz .Lcts\@ jmp .Ldone\@ - // Out-of-line handling of AES-128 and AES-192 -.Lencrypt_4x_aes_128_or_192\@: - jz .Lencrypt_4x_aes_192\@ - _vaes_4x \enc, 1, 10 - jmp .Lencrypt_4x_done\@ -.Lencrypt_4x_aes_192\@: - _vaes_4x \enc, 0, 10 - _vaes_4x \enc, 0, 11 - _vaes_4x \enc, 1, 12 - jmp .Lencrypt_4x_done\@ - .if !\enc .Lneed_cts_dec\@: sub $16, LEN @@ -764,38 +778,30 @@ // u8 iv[AES_BLOCK_SIZE]); SYM_TYPED_FUNC_START(aes_xts_encrypt_iv) vmovdqu (%rsi), %xmm0 - add $7*16, %rdi - vpxor -7*16(%rdi), %xmm0, %xmm0 + vpxor (%rdi), %xmm0, %xmm0 + movl 480(%rdi), %eax // AES key length + lea -16(%rdi, %rax, 4), %rdi + cmp $24, %eax + jl .Lencrypt_iv_aes128 + je .Lencrypt_iv_aes192 vaesenc -6*16(%rdi), %xmm0, %xmm0 vaesenc -5*16(%rdi), %xmm0, %xmm0 +.Lencrypt_iv_aes192: vaesenc -4*16(%rdi), %xmm0, %xmm0 vaesenc -3*16(%rdi), %xmm0, %xmm0 +.Lencrypt_iv_aes128: vaesenc -2*16(%rdi), %xmm0, %xmm0 vaesenc -1*16(%rdi), %xmm0, %xmm0 vaesenc 0*16(%rdi), %xmm0, %xmm0 vaesenc 1*16(%rdi), %xmm0, %xmm0 vaesenc 2*16(%rdi), %xmm0, %xmm0 - cmpl $24, 480-(7*16)(%rdi) - jle .Lencrypt_iv_aes_128_or_192 vaesenc 3*16(%rdi), %xmm0, %xmm0 vaesenc 4*16(%rdi), %xmm0, %xmm0 vaesenc 5*16(%rdi), %xmm0, %xmm0 vaesenc 6*16(%rdi), %xmm0, %xmm0 vaesenclast 7*16(%rdi), %xmm0, %xmm0 -.Lencrypt_iv_done: vmovdqu %xmm0, (%rsi) RET - - // Out-of-line handling of AES-128 and AES-192 -.Lencrypt_iv_aes_128_or_192: - jz .Lencrypt_iv_aes_192 - vaesenclast 3*16(%rdi), %xmm0, %xmm0 - jmp .Lencrypt_iv_done -.Lencrypt_iv_aes_192: - vaesenc 3*16(%rdi), %xmm0, %xmm0 - vaesenc 4*16(%rdi), %xmm0, %xmm0 - vaesenclast 5*16(%rdi), %xmm0, %xmm0 - jmp .Lencrypt_iv_done SYM_FUNC_END(aes_xts_encrypt_iv) // Below are the actual AES-XTS encryption and decryption functions, From e619723a857dfdcf0050713f12b3916816cd8d12 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 12 Apr 2024 20:17:27 -0700 Subject: [PATCH 092/123] crypto: x86/aes-xts - eliminate a few more instructions - For conditionally subtracting 16 from LEN when decrypting a message whose length isn't a multiple of 16, use the cmovnz instruction. - Fold the addition of 4*VL to LEN into the sub of VL or 16 from LEN. - Remove an unnecessary test instruction. This results in slightly shorter code, both source and binary. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-xts-avx-x86_64.S | 39 ++++++++++------------------ 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index f5e7ab739105..802d3b90d337 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -559,20 +559,20 @@ .macro _aes_xts_crypt enc _define_aliases - // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). - movl 480(KEY), KEYLEN - .if !\enc // When decrypting a message whose length isn't a multiple of the AES // block length, exclude the last full block from the main loop by // subtracting 16 from LEN. This is needed because ciphertext stealing // decryption uses the last two tweaks in reverse order. We'll handle // the last full block and the partial block specially at the end. + lea -16(LEN), %rax test $15, LEN - jnz .Lneed_cts_dec\@ -.Lxts_init\@: + cmovnz %rax, LEN .endif + // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). + movl 480(KEY), KEYLEN + // Setup the pointer to the round keys and cache as many as possible. _setup_round_keys \enc @@ -661,11 +661,10 @@ RET .Lhandle_remainder\@: - add $4*VL, LEN // Undo the extra sub from earlier. // En/decrypt any remaining full blocks, one vector at a time. .if VL > 16 - sub $VL, LEN + add $3*VL, LEN // Undo extra sub of 4*VL, then sub VL. jl .Lvec_at_a_time_done\@ .Lvec_at_a_time\@: _vmovdqu (SRC), V0 @@ -677,9 +676,9 @@ sub $VL, LEN jge .Lvec_at_a_time\@ .Lvec_at_a_time_done\@: - add $VL-16, LEN // Undo the extra sub from earlier. + add $VL-16, LEN // Undo extra sub of VL, then sub 16. .else - sub $16, LEN + add $4*VL-16, LEN // Undo extra sub of 4*VL, then sub 16. .endif // En/decrypt any remaining full blocks, one at a time. @@ -694,24 +693,12 @@ sub $16, LEN jge .Lblock_at_a_time\@ .Lblock_at_a_time_done\@: - add $16, LEN // Undo the extra sub from earlier. + add $16, LEN // Undo the extra sub of 16. + // Now 0 <= LEN <= 15. If LEN is zero, we're done. + jz .Ldone\@ -.Lfull_blocks_done\@: - // Now 0 <= LEN <= 15. If LEN is nonzero, do ciphertext stealing to - // process the last 16 + LEN bytes. If LEN is zero, we're done. - test LEN, LEN - jnz .Lcts\@ - jmp .Ldone\@ - -.if !\enc -.Lneed_cts_dec\@: - sub $16, LEN - jmp .Lxts_init\@ -.endif - -.Lcts\@: - // Do ciphertext stealing (CTS) to en/decrypt the last full block and - // the partial block. TWEAK0_XMM contains the next tweak. + // Otherwise 1 <= LEN <= 15, but the real remaining length is 16 + LEN. + // Do ciphertext stealing to process the last 16 + LEN bytes. .if \enc // If encrypting, the main loop already encrypted the last full block to From 543ea178fbfadeaf79e15766ac989f3351349f02 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 12 Apr 2024 20:17:28 -0700 Subject: [PATCH 093/123] crypto: x86/aes-xts - optimize size of instructions operating on lengths x86_64 has the "interesting" property that the instruction size is generally a bit shorter for instructions that operate on the 32-bit (or less) part of registers, or registers that are in the original set of 8. This patch adjusts the AES-XTS code to take advantage of that property by changing the LEN parameter from size_t to unsigned int (which is all that's needed and is what the non-AVX implementation uses) and using the %eax register for KEYLEN. This decreases the size of aes-xts-avx-x86_64.o by 1.2%. Note that changing the kmovq to kmovd was going to be needed anyway to make the AVX10/256 code really work on CPUs that don't support 512-bit vectors (since the AVX10 spec says that 64-bit opmask instructions will only be supported on processors that support 512-bit vectors). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-xts-avx-x86_64.S | 40 +++++++++++++++------------- arch/x86/crypto/aesni-intel_glue.c | 18 ++++++------- 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index 802d3b90d337..48f97b79f7a9 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -85,14 +85,16 @@ // advanced to point to 7th-from-last round key .set SRC, %rsi // Pointer to next source data .set DST, %rdx // Pointer to next destination data -.set LEN, %rcx // Remaining length in bytes +.set LEN, %ecx // Remaining length in bytes +.set LEN8, %cl +.set LEN64, %rcx .set TWEAK, %r8 // Pointer to next tweak -// %r9 holds the AES key length in bytes. -.set KEYLEN, %r9d -.set KEYLEN64, %r9 +// %rax holds the AES key length in bytes. +.set KEYLEN, %eax +.set KEYLEN64, %rax -// %rax and %r10-r11 are available as temporaries. +// %r9-r11 are available as temporaries. .macro _define_Vi i .if VL == 16 @@ -565,9 +567,9 @@ // subtracting 16 from LEN. This is needed because ciphertext stealing // decryption uses the last two tweaks in reverse order. We'll handle // the last full block and the partial block specially at the end. - lea -16(LEN), %rax - test $15, LEN - cmovnz %rax, LEN + lea -16(LEN), %eax + test $15, LEN8 + cmovnz %eax, LEN .endif // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). @@ -650,7 +652,7 @@ // Check for the uncommon case where the data length isn't a multiple of // 4*VL. Handle it out-of-line in order to optimize for the common // case. In the common case, just fall through to the ret. - test $4*VL-1, LEN + test $4*VL-1, LEN8 jnz .Lhandle_remainder\@ .Ldone\@: // Store the next tweak back to *TWEAK to support continuation calls. @@ -718,9 +720,9 @@ .if USE_AVX10 // Create a mask that has the first LEN bits set. - mov $-1, %rax - bzhi LEN, %rax, %rax - kmovq %rax, %k1 + mov $-1, %r9d + bzhi LEN, %r9d, %r9d + kmovd %r9d, %k1 // Swap the first LEN bytes of the en/decryption of the last full block // with the partial block. Note that to support in-place en/decryption, @@ -730,23 +732,23 @@ vmovdqu8 16(SRC), %xmm0{%k1} vmovdqu8 %xmm1, 16(DST){%k1} .else - lea .Lcts_permute_table(%rip), %rax + lea .Lcts_permute_table(%rip), %r9 // Load the src partial block, left-aligned. Note that to support // in-place en/decryption, this must happen before the store to the dst // partial block. - vmovdqu (SRC, LEN, 1), %xmm1 + vmovdqu (SRC, LEN64, 1), %xmm1 // Shift the first LEN bytes of the en/decryption of the last full block // to the end of a register, then store it to DST+LEN. This stores the // dst partial block. It also writes to the second part of the dst last // full block, but that part is overwritten later. - vpshufb (%rax, LEN, 1), %xmm0, %xmm2 - vmovdqu %xmm2, (DST, LEN, 1) + vpshufb (%r9, LEN64, 1), %xmm0, %xmm2 + vmovdqu %xmm2, (DST, LEN64, 1) // Make xmm3 contain [16-LEN,16-LEN+1,...,14,15,0x80,0x80,...]. - sub LEN, %rax - vmovdqu 32(%rax), %xmm3 + sub LEN64, %r9 + vmovdqu 32(%r9), %xmm3 // Shift the src partial block to the beginning of its register. vpshufb %xmm3, %xmm1, %xmm1 @@ -795,7 +797,7 @@ SYM_FUNC_END(aes_xts_encrypt_iv) // instantiated from the above macro. They all have the following prototype: // // void (*xts_asm_func)(const struct crypto_aes_ctx *key, -// const u8 *src, u8 *dst, size_t len, +// const u8 *src, u8 *dst, unsigned int len, // u8 tweak[AES_BLOCK_SIZE]); // // |key| is the data key. |tweak| contains the next tweak; the encryption of diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index e7d21000cb05..110b3282a1f2 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -899,7 +899,7 @@ static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key, typedef void (*xts_encrypt_iv_func)(const struct crypto_aes_ctx *tweak_key, u8 iv[AES_BLOCK_SIZE]); typedef void (*xts_crypt_func)(const struct crypto_aes_ctx *key, - const u8 *src, u8 *dst, size_t len, + const u8 *src, u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); /* This handles cases where the source and/or destination span pages. */ @@ -1021,14 +1021,14 @@ static void aesni_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, } static void aesni_xts_encrypt(const struct crypto_aes_ctx *key, - const u8 *src, u8 *dst, size_t len, + const u8 *src, u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]) { aesni_xts_enc(key, dst, src, len, tweak); } static void aesni_xts_decrypt(const struct crypto_aes_ctx *key, - const u8 *src, u8 *dst, size_t len, + const u8 *src, u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]) { aesni_xts_dec(key, dst, src, len, tweak); @@ -1185,12 +1185,12 @@ asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, #define DEFINE_XTS_ALG(suffix, driver_name, priority) \ \ -asmlinkage void aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, \ - const u8 *src, u8 *dst, size_t len, \ - u8 tweak[AES_BLOCK_SIZE]); \ -asmlinkage void aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, \ - const u8 *src, u8 *dst, size_t len, \ - u8 tweak[AES_BLOCK_SIZE]); \ +asmlinkage void \ +aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ + u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ +asmlinkage void \ +aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ + u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ \ static int xts_encrypt_##suffix(struct skcipher_request *req) \ { \ From 3f4d1482dad9a87c3bf00490fcf339cb5f6d53a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 15 Apr 2024 09:34:21 +0200 Subject: [PATCH 094/123] crypto: tegra - Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Fixes: 0880bb3b00c8 ("crypto: tegra - Add Tegra Security Engine driver") Signed-off-by: Uwe Kleine-König Acked-by: Akhil R Signed-off-by: Herbert Xu --- drivers/crypto/tegra/tegra-se-main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/tegra/tegra-se-main.c b/drivers/crypto/tegra/tegra-se-main.c index f4758e320615..9955874b3dc3 100644 --- a/drivers/crypto/tegra/tegra-se-main.c +++ b/drivers/crypto/tegra/tegra-se-main.c @@ -320,7 +320,7 @@ static int tegra_se_probe(struct platform_device *pdev) return 0; } -static int tegra_se_remove(struct platform_device *pdev) +static void tegra_se_remove(struct platform_device *pdev) { struct tegra_se *se = platform_get_drvdata(pdev); @@ -328,8 +328,6 @@ static int tegra_se_remove(struct platform_device *pdev) crypto_engine_exit(se->engine); iommu_fwspec_free(se->dev); host1x_client_unregister(&se->client); - - return 0; } static const struct tegra_se_regs tegra234_aes1_regs = { @@ -390,7 +388,7 @@ static struct platform_driver tegra_se_driver = { .of_match_table = tegra_se_of_match, }, .probe = tegra_se_probe, - .remove = tegra_se_remove, + .remove_new = tegra_se_remove, }; static int tegra_se_host1x_probe(struct host1x_device *dev) From 571e557cbaf748124aaf0f0ac26772d7380e78fc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 15 Apr 2024 15:04:26 +0200 Subject: [PATCH 095/123] crypto: arm64/aes-ce - Simplify round key load sequence Tweak the round key logic so that they can be loaded using a single branchless sequence using overlapping loads. This is shorter and simpler, and puts the conditional branches based on the key size further apart, which might benefit microarchitectures that cannot record taken branches at every instruction. For these branches, use test-bit-branch instructions that don't clobber the condition flags. Note that none of this has any impact on performance, positive or otherwise (and the branch prediction benefit would only benefit AES-192 which nobody uses). It does make for nicer code, though. While at it, use \@ to generate the labels inside the macros, which is more robust than using fixed numbers, which could clash inadvertently. Also, bring aes-neon.S in line with these changes, including the switch to test-and-branch instructions, to avoid surprises in the future when we might start relying on the condition flags being preserved in the chaining mode wrappers in aes-modes.S Signed-off-by: Ard Biesheuvel Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-ce.S | 34 ++++++++++++++-------------------- arch/arm64/crypto/aes-neon.S | 20 ++++++++++---------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 1dc5bbbfeed2..b262eaa9170c 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -25,33 +25,28 @@ .endm /* preload all round keys */ - .macro load_round_keys, rounds, rk - cmp \rounds, #12 - blo 2222f /* 128 bits */ - beq 1111f /* 192 bits */ - ld1 {v17.4s-v18.4s}, [\rk], #32 -1111: ld1 {v19.4s-v20.4s}, [\rk], #32 -2222: ld1 {v21.4s-v24.4s}, [\rk], #64 - ld1 {v25.4s-v28.4s}, [\rk], #64 - ld1 {v29.4s-v31.4s}, [\rk] + .macro load_round_keys, rk, nr, tmp + add \tmp, \rk, \nr, sxtw #4 + sub \tmp, \tmp, #160 + ld1 {v17.4s-v20.4s}, [\rk] + ld1 {v21.4s-v24.4s}, [\tmp], #64 + ld1 {v25.4s-v28.4s}, [\tmp], #64 + ld1 {v29.4s-v31.4s}, [\tmp] .endm /* prepare for encryption with key in rk[] */ .macro enc_prepare, rounds, rk, temp - mov \temp, \rk - load_round_keys \rounds, \temp + load_round_keys \rk, \rounds, \temp .endm /* prepare for encryption (again) but with new key in rk[] */ .macro enc_switch_key, rounds, rk, temp - mov \temp, \rk - load_round_keys \rounds, \temp + load_round_keys \rk, \rounds, \temp .endm /* prepare for decryption with key in rk[] */ .macro dec_prepare, rounds, rk, temp - mov \temp, \rk - load_round_keys \rounds, \temp + load_round_keys \rk, \rounds, \temp .endm .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4 @@ -110,14 +105,13 @@ /* up to 5 interleaved blocks */ .macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4 - cmp \rounds, #12 - blo 2222f /* 128 bits */ - beq 1111f /* 192 bits */ + tbz \rounds, #2, .L\@ /* 128 bits */ round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4 round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4 -1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4 + tbz \rounds, #1, .L\@ /* 192 bits */ + round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4 round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4 -2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 +.L\@: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4 .endr fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4 diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index 9de7fbc797af..3a8961b6ea51 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -99,16 +99,16 @@ ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds -1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ +.La\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ movi v15.16b, #0x40 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ sub_bytes \in - subs \i, \i, #1 + sub \i, \i, #1 ld1 {v15.4s}, [\rkp], #16 - beq 2222f + cbz \i, .Lb\@ mix_columns \in, \enc - b 1111b -2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + b .La\@ +.Lb\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ .endm .macro encrypt_block, in, rounds, rk, rkp, i @@ -206,7 +206,7 @@ ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds -1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ +.La\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ @@ -216,13 +216,13 @@ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ sub_bytes_4x \in0, \in1, \in2, \in3 - subs \i, \i, #1 + sub \i, \i, #1 ld1 {v15.4s}, [\rkp], #16 - beq 2222f + cbz \i, .Lb\@ mix_columns_2x \in0, \in1, \enc mix_columns_2x \in2, \in3, \enc - b 1111b -2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + b .La\@ +.Lb\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ From 23e4099bdc3c8381992f9eb975c79196d6755210 Mon Sep 17 00:00:00 2001 From: Hailey Mothershead Date: Mon, 15 Apr 2024 22:19:15 +0000 Subject: [PATCH 096/123] crypto: aead,cipher - zeroize key buffer after use I.G 9.7.B for FIPS 140-3 specifies that variables temporarily holding cryptographic information should be zeroized once they are no longer needed. Accomplish this by using kfree_sensitive for buffers that previously held the private key. Signed-off-by: Hailey Mothershead Signed-off-by: Herbert Xu --- crypto/aead.c | 3 +-- crypto/cipher.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/crypto/aead.c b/crypto/aead.c index 0e75a69189df..cade532413bf 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -36,8 +36,7 @@ static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key, alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); ret = crypto_aead_alg(tfm)->setkey(tfm, alignbuffer, keylen); - memset(alignbuffer, 0, keylen); - kfree(buffer); + kfree_sensitive(buffer); return ret; } diff --git a/crypto/cipher.c b/crypto/cipher.c index 47c77a3e5978..40cae908788e 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -34,8 +34,7 @@ static int setkey_unaligned(struct crypto_cipher *tfm, const u8 *key, alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); ret = cia->cia_setkey(crypto_cipher_tfm(tfm), alignbuffer, keylen); - memset(alignbuffer, 0, keylen); - kfree(buffer); + kfree_sensitive(buffer); return ret; } From 483fd65ce29317044d1d00757e3fd23503b6b04c Mon Sep 17 00:00:00 2001 From: Lucas Segarra Fernandez Date: Tue, 16 Apr 2024 12:33:37 +0200 Subject: [PATCH 097/123] crypto: qat - validate slices count returned by FW The function adf_send_admin_tl_start() enables the telemetry (TL) feature on a QAT device by sending the ICP_QAT_FW_TL_START message to the firmware. This triggers the FW to start writing TL data to a DMA buffer in memory and returns an array containing the number of accelerators of each type (slices) supported by this HW. The pointer to this array is stored in the adf_tl_hw_data data structure called slice_cnt. The array slice_cnt is then used in the function tl_print_dev_data() to report in debugfs only statistics about the supported accelerators. An incorrect value of the elements in slice_cnt might lead to an out of bounds memory read. At the moment, there isn't an implementation of FW that returns a wrong value, but for robustness validate the slice count array returned by FW. Fixes: 69e7649f7cc2 ("crypto: qat - add support for device telemetry") Signed-off-by: Lucas Segarra Fernandez Reviewed-by: Damian Muszynski Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../crypto/intel/qat/qat_common/adf_gen4_tl.c | 1 + .../intel/qat/qat_common/adf_telemetry.c | 21 +++++++++++++++++++ .../intel/qat/qat_common/adf_telemetry.h | 1 + 3 files changed, 23 insertions(+) diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c index 7fc7a77f6aed..c7ad8cf07863 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c @@ -149,5 +149,6 @@ void adf_gen4_init_tl_data(struct adf_tl_hw_data *tl_data) tl_data->sl_exec_counters = sl_exec_counters; tl_data->rp_counters = rp_counters; tl_data->num_rp_counters = ARRAY_SIZE(rp_counters); + tl_data->max_sl_cnt = ADF_GEN4_TL_MAX_SLICES_PER_TYPE; } EXPORT_SYMBOL_GPL(adf_gen4_init_tl_data); diff --git a/drivers/crypto/intel/qat/qat_common/adf_telemetry.c b/drivers/crypto/intel/qat/qat_common/adf_telemetry.c index 2ff714d11bd2..74fb0c2ed241 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_telemetry.c +++ b/drivers/crypto/intel/qat/qat_common/adf_telemetry.c @@ -41,6 +41,20 @@ static int validate_tl_data(struct adf_tl_hw_data *tl_data) return 0; } +static int validate_tl_slice_counters(struct icp_qat_fw_init_admin_slice_cnt *slice_count, + u8 max_slices_per_type) +{ + u8 *sl_counter = (u8 *)slice_count; + int i; + + for (i = 0; i < ADF_TL_SL_CNT_COUNT; i++) { + if (sl_counter[i] > max_slices_per_type) + return -EINVAL; + } + + return 0; +} + static int adf_tl_alloc_mem(struct adf_accel_dev *accel_dev) { struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev); @@ -214,6 +228,13 @@ int adf_tl_run(struct adf_accel_dev *accel_dev, int state) return ret; } + ret = validate_tl_slice_counters(&telemetry->slice_cnt, tl_data->max_sl_cnt); + if (ret) { + dev_err(dev, "invalid value returned by FW\n"); + adf_send_admin_tl_stop(accel_dev); + return ret; + } + telemetry->hbuffs = state; atomic_set(&telemetry->state, state); diff --git a/drivers/crypto/intel/qat/qat_common/adf_telemetry.h b/drivers/crypto/intel/qat/qat_common/adf_telemetry.h index 9be81cd3b886..e54a406cc1b4 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_telemetry.h +++ b/drivers/crypto/intel/qat/qat_common/adf_telemetry.h @@ -40,6 +40,7 @@ struct adf_tl_hw_data { u8 num_dev_counters; u8 num_rp_counters; u8 max_rp; + u8 max_sl_cnt; }; struct adf_telemetry { From ee2615fa4dc2645d8cc530d23a982ed626f402c2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 16 Apr 2024 17:51:49 +0200 Subject: [PATCH 098/123] dt-bindings: crypto: starfive: Restore sort order Restore alphabetical sort order of the list of supported compatible values. Fixes: 2ccf7a5d9c50f3ea ("dt-bindings: crypto: starfive: Add jh8100 support") Signed-off-by: Geert Uytterhoeven Acked-by: Conor Dooley Signed-off-by: Herbert Xu --- .../devicetree/bindings/crypto/starfive,jh7110-crypto.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml b/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml index 446764bc2ccc..7ccb6e1641d0 100644 --- a/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml +++ b/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml @@ -13,8 +13,8 @@ maintainers: properties: compatible: enum: - - starfive,jh8100-crypto - starfive,jh7110-crypto + - starfive,jh8100-crypto reg: maxItems: 1 From 5ae6d3f5c85cfc8d3ce60da776387062171cc174 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 17 Apr 2024 21:12:32 +0300 Subject: [PATCH 099/123] crypto: tegra - Fix some error codes Return negative -ENOMEM, instead of positive ENOMEM. Fixes: 0880bb3b00c8 ("crypto: tegra - Add Tegra Security Engine driver") Signed-off-by: Dan Carpenter Reviewed-by: Jon Hunter Acked-by: Akhil R Acked-by: Thierry Reding Signed-off-by: Herbert Xu --- drivers/crypto/tegra/tegra-se-aes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/tegra/tegra-se-aes.c b/drivers/crypto/tegra/tegra-se-aes.c index adc6cdab389e..ae7a0f8435fc 100644 --- a/drivers/crypto/tegra/tegra-se-aes.c +++ b/drivers/crypto/tegra/tegra-se-aes.c @@ -1156,7 +1156,7 @@ static int tegra_ccm_do_one_req(struct crypto_engine *engine, void *areq) rctx->outbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, &rctx->outbuf.addr, GFP_KERNEL); if (!rctx->outbuf.buf) { - ret = ENOMEM; + ret = -ENOMEM; goto outbuf_err; } @@ -1226,7 +1226,7 @@ static int tegra_gcm_do_one_req(struct crypto_engine *engine, void *areq) rctx->outbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, &rctx->outbuf.addr, GFP_KERNEL); if (!rctx->outbuf.buf) { - ret = ENOMEM; + ret = -ENOMEM; goto outbuf_err; } From bd955a4e928f4b3a5b5eb983df1726300c90201c Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 18 Apr 2024 11:24:44 -0400 Subject: [PATCH 100/123] crypto: ecdh - Pass private key in proper byte order to check valid key ecc_is_key_valid expects a key with the most significant digit in the last entry of the digit array. Currently ecdh_set_secret passes a reversed key to ecc_is_key_valid that then passes the rather simple test checking whether the private key is in range [2, n-3]. For all current ecdh- supported curves (NIST P192/256/384) the 'n' parameter is a rather large number, therefore easily passing this test. Throughout the ecdh and ecc codebase the variable 'priv' is used for a private_key holding the bytes in proper byte order. Therefore, introduce priv in ecdh_set_secret and copy the bytes from ctx->private_key into priv in proper byte order by using ecc_swap_digits. Pass priv to ecc_is_valid_key. Cc: Ard Biesheuvel Cc: Salvatore Benedetto Signed-off-by: Stefan Berger Acked-by: Jarkko Sakkinen Signed-off-by: Herbert Xu --- crypto/ecdh.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crypto/ecdh.c b/crypto/ecdh.c index 3049f147e011..c02c9a2b9682 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -27,7 +27,9 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, unsigned int len) { struct ecdh_ctx *ctx = ecdh_get_ctx(tfm); + u64 priv[ECC_MAX_DIGITS]; struct ecdh params; + int ret = 0; if (crypto_ecdh_decode_key(buf, len, ¶ms) < 0 || params.key_size > sizeof(u64) * ctx->ndigits) @@ -40,13 +42,16 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, ctx->private_key); memcpy(ctx->private_key, params.key, params.key_size); + ecc_swap_digits(ctx->private_key, priv, ctx->ndigits); if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits, - ctx->private_key, params.key_size) < 0) { + priv, params.key_size) < 0) { memzero_explicit(ctx->private_key, params.key_size); - return -EINVAL; + ret = -EINVAL; } - return 0; + memzero_explicit(priv, sizeof(priv)); + + return ret; } static int ecdh_compute_value(struct kpp_request *req) From 01474b70a779319db6d3d2d67a7232a7b4202029 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 18 Apr 2024 11:24:45 -0400 Subject: [PATCH 101/123] crypto: ecdh - Initialize ctx->private_key in proper byte order The private key in ctx->private_key is currently initialized in reverse byte order in ecdh_set_secret and whenever the key is needed in proper byte order the variable priv is introduced and the bytes from ctx->private_key are copied into priv while being byte-swapped (ecc_swap_digits). To get rid of the unnecessary byte swapping initialize ctx->private_key in proper byte order and clean up all functions that were previously using priv or were called with ctx->private_key: - ecc_gen_privkey: Directly initialize the passed ctx->private_key with random bytes filling all the digits of the private key. Get rid of the priv variable. This function only has ecdh_set_secret as a caller to create NIST P192/256/384 private keys. - crypto_ecdh_shared_secret: Called only from ecdh_compute_value with ctx->private_key. Get rid of the priv variable and work with the passed private_key directly. - ecc_make_pub_key: Called only from ecdh_compute_value with ctx->private_key. Get rid of the priv variable and work with the passed private_key directly. Cc: Salvatore Benedetto Signed-off-by: Stefan Berger Acked-by: Jarkko Sakkinen Signed-off-by: Herbert Xu --- crypto/ecc.c | 29 ++++++++++------------------- crypto/ecdh.c | 8 +++----- include/crypto/internal/ecc.h | 3 ++- 3 files changed, 15 insertions(+), 25 deletions(-) diff --git a/crypto/ecc.c b/crypto/ecc.c index 2e05387b9499..c1d2e884be1e 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -1497,10 +1497,10 @@ EXPORT_SYMBOL(ecc_is_key_valid); * This method generates a private key uniformly distributed in the range * [2, n-3]. */ -int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) +int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, + u64 *private_key) { const struct ecc_curve *curve = ecc_get_curve(curve_id); - u64 priv[ECC_MAX_DIGITS]; unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT; unsigned int nbits = vli_num_bits(curve->n, ndigits); int err; @@ -1509,7 +1509,7 @@ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) * Step 1 & 2: check that N is included in Table 1 of FIPS 186-5, * section 6.1.1. */ - if (nbits < 224 || ndigits > ARRAY_SIZE(priv)) + if (nbits < 224) return -EINVAL; /* @@ -1527,17 +1527,16 @@ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) return -EFAULT; /* Step 3: obtain N returned_bits from the DRBG. */ - err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)priv, nbytes); + err = crypto_rng_get_bytes(crypto_default_rng, + (u8 *)private_key, nbytes); crypto_put_default_rng(); if (err) return err; /* Step 4: make sure the private key is in the valid range. */ - if (__ecc_is_key_valid(curve, priv, ndigits)) + if (__ecc_is_key_valid(curve, private_key, ndigits)) return -EINVAL; - ecc_swap_digits(priv, privkey, ndigits); - return 0; } EXPORT_SYMBOL(ecc_gen_privkey); @@ -1547,23 +1546,20 @@ int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits, { int ret = 0; struct ecc_point *pk; - u64 priv[ECC_MAX_DIGITS]; const struct ecc_curve *curve = ecc_get_curve(curve_id); - if (!private_key || ndigits > ARRAY_SIZE(priv)) { + if (!private_key) { ret = -EINVAL; goto out; } - ecc_swap_digits(private_key, priv, ndigits); - pk = ecc_alloc_point(ndigits); if (!pk) { ret = -ENOMEM; goto out; } - ecc_point_mult(pk, &curve->g, priv, NULL, curve, ndigits); + ecc_point_mult(pk, &curve->g, private_key, NULL, curve, ndigits); /* SP800-56A rev 3 5.6.2.1.3 key check */ if (ecc_is_pubkey_valid_full(curve, pk)) { @@ -1647,13 +1643,11 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, { int ret = 0; struct ecc_point *product, *pk; - u64 priv[ECC_MAX_DIGITS]; u64 rand_z[ECC_MAX_DIGITS]; unsigned int nbytes; const struct ecc_curve *curve = ecc_get_curve(curve_id); - if (!private_key || !public_key || - ndigits > ARRAY_SIZE(priv) || ndigits > ARRAY_SIZE(rand_z)) { + if (!private_key || !public_key || ndigits > ARRAY_SIZE(rand_z)) { ret = -EINVAL; goto out; } @@ -1674,15 +1668,13 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, if (ret) goto err_alloc_product; - ecc_swap_digits(private_key, priv, ndigits); - product = ecc_alloc_point(ndigits); if (!product) { ret = -ENOMEM; goto err_alloc_product; } - ecc_point_mult(product, pk, priv, rand_z, curve, ndigits); + ecc_point_mult(product, pk, private_key, rand_z, curve, ndigits); if (ecc_point_is_zero(product)) { ret = -EFAULT; @@ -1692,7 +1684,6 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, ecc_swap_digits(product->x, secret, ndigits); err_validity: - memzero_explicit(priv, sizeof(priv)); memzero_explicit(rand_z, sizeof(rand_z)); ecc_free_point(product); err_alloc_product: diff --git a/crypto/ecdh.c b/crypto/ecdh.c index c02c9a2b9682..72cfd1590156 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -27,7 +27,6 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, unsigned int len) { struct ecdh_ctx *ctx = ecdh_get_ctx(tfm); - u64 priv[ECC_MAX_DIGITS]; struct ecdh params; int ret = 0; @@ -41,15 +40,14 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, return ecc_gen_privkey(ctx->curve_id, ctx->ndigits, ctx->private_key); - memcpy(ctx->private_key, params.key, params.key_size); - ecc_swap_digits(ctx->private_key, priv, ctx->ndigits); + ecc_digits_from_bytes(params.key, params.key_size, + ctx->private_key, ctx->ndigits); if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits, - priv, params.key_size) < 0) { + ctx->private_key, params.key_size) < 0) { memzero_explicit(ctx->private_key, params.key_size); ret = -EINVAL; } - memzero_explicit(priv, sizeof(priv)); return ret; } diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index 4e2f5f938e91..7ca1f463d1ec 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -103,7 +103,8 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits, * Returns 0 if the private key was generated successfully, a negative value * if an error occurred. */ -int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey); +int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, + u64 *private_key); /** * ecc_make_pub_key() - Compute an ECC public key From 31b57788a5024d3a114b28dad224a93831b90b5f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 19 Apr 2024 07:01:12 +0200 Subject: [PATCH 102/123] hwrng: stm32 - use logical OR in conditional The conditional is used to check whether err is non-zero OR whether reg variable is non-zero after clearing bits from it. This should be done using logical OR, not bitwise OR, fix it. Fixes: 6b85a7e141cb ("hwrng: stm32 - implement STM32MP13x support") Signed-off-by: Marek Vasut Signed-off-by: Herbert Xu --- drivers/char/hw_random/stm32-rng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index 379bc245c520..1cc61ef8ee54 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -353,7 +353,7 @@ static int stm32_rng_init(struct hwrng *rng) err = readl_relaxed_poll_timeout_atomic(priv->base + RNG_SR, reg, reg & RNG_SR_DRDY, 10, 100000); - if (err | (reg & ~RNG_SR_DRDY)) { + if (err || (reg & ~RNG_SR_DRDY)) { clk_disable_unprepare(priv->clk); dev_err((struct device *)priv->rng.priv, "%s: timeout:%x SR: %x!\n", __func__, err, reg); From da62ed5c019cc48648f37c7a07e6a56cf637a795 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 19 Apr 2024 07:01:13 +0200 Subject: [PATCH 103/123] hwrng: stm32 - put IP into RPM suspend on failure In case of an irrecoverable failure, put the IP into RPM suspend to avoid RPM imbalance. I did not trigger this case, but it seems it should be done based on reading the code. Fixes: b17bc6eb7c2b ("hwrng: stm32 - rework error handling in stm32_rng_read()") Signed-off-by: Marek Vasut Signed-off-by: Herbert Xu --- drivers/char/hw_random/stm32-rng.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index 1cc61ef8ee54..b6182f86d8a4 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -220,7 +220,8 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) if (err && i > RNG_NB_RECOVER_TRIES) { dev_err((struct device *)priv->rng.priv, "Couldn't recover from seed error\n"); - return -ENOTRECOVERABLE; + retval = -ENOTRECOVERABLE; + goto exit_rpm; } continue; @@ -238,7 +239,8 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) if (err && i > RNG_NB_RECOVER_TRIES) { dev_err((struct device *)priv->rng.priv, "Couldn't recover from seed error"); - return -ENOTRECOVERABLE; + retval = -ENOTRECOVERABLE; + goto exit_rpm; } continue; @@ -250,6 +252,7 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) max -= sizeof(u32); } +exit_rpm: pm_runtime_mark_last_busy((struct device *) priv->rng.priv); pm_runtime_put_sync_autosuspend((struct device *) priv->rng.priv); From c819d7b836c5dfca0854d3e56664293601f2176d Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 19 Apr 2024 07:01:14 +0200 Subject: [PATCH 104/123] hwrng: stm32 - repair clock handling The clock management in this driver does not seem to be correct. The struct hwrng .init callback enables the clock, but there is no matching .cleanup callback to disable the clock. The clock get disabled as some later point by runtime PM suspend callback. Furthermore, both runtime PM and sleep suspend callbacks access registers first and disable clock which are used for register access second. If the IP is already in RPM suspend and the system enters sleep state, the sleep callback will attempt to access registers while the register clock are already disabled. This bug has been fixed once before already in commit 9bae54942b13 ("hwrng: stm32 - fix pm_suspend issue"), and regressed in commit ff4e46104f2e ("hwrng: stm32 - rework power management sequences") . Fix this slightly differently, disable register clock at the end of .init callback, this way the IP is disabled after .init. On every access to the IP, which really is only stm32_rng_read(), do pm_runtime_get_sync() which is already done in stm32_rng_read() to bring the IP from RPM suspend, and pm_runtime_mark_last_busy()/pm_runtime_put_sync_autosuspend() to put it back into RPM suspend. Change sleep suspend/resume callbacks to enable and disable register clock around register access, as those cannot use the RPM suspend/resume callbacks due to slightly different initialization in those sleep callbacks. This way, the register access should always be performed with clock surely enabled. Fixes: ff4e46104f2e ("hwrng: stm32 - rework power management sequences") Signed-off-by: Marek Vasut Signed-off-by: Herbert Xu --- drivers/char/hw_random/stm32-rng.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index b6182f86d8a4..0e903d6e22e3 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -363,6 +363,8 @@ static int stm32_rng_init(struct hwrng *rng) return -EINVAL; } + clk_disable_unprepare(priv->clk); + return 0; } @@ -387,6 +389,11 @@ static int __maybe_unused stm32_rng_runtime_suspend(struct device *dev) static int __maybe_unused stm32_rng_suspend(struct device *dev) { struct stm32_rng_private *priv = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(priv->clk); + if (err) + return err; if (priv->data->has_cond_reset) { priv->pm_conf.nscr = readl_relaxed(priv->base + RNG_NSCR); @@ -468,6 +475,8 @@ static int __maybe_unused stm32_rng_resume(struct device *dev) writel_relaxed(reg, priv->base + RNG_CR); } + clk_disable_unprepare(priv->clk); + return 0; } From 6a805864740cf46ac449ba6cd04fa8a683b27593 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 19 Apr 2024 22:54:55 -0700 Subject: [PATCH 105/123] crypto: x86/aes-xts - simplify loop in xts_crypt_slowpath() Since the total length processed by the loop in xts_crypt_slowpath() is a multiple of AES_BLOCK_SIZE, just round the length down to AES_BLOCK_SIZE even on the last step. This doesn't change behavior, as the last step will process a multiple of AES_BLOCK_SIZE regardless. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 110b3282a1f2..02a4c0c276df 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -935,16 +935,13 @@ xts_crypt_slowpath(struct skcipher_request *req, xts_crypt_func crypt_func) err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, AES_BLOCK_SIZE); - kernel_fpu_begin(); - (*crypt_func)(&ctx->crypt_ctx, walk.src.virt.addr, - walk.dst.virt.addr, nbytes, req->iv); + (*crypt_func)(&ctx->crypt_ctx, + walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes & ~(AES_BLOCK_SIZE - 1), req->iv); kernel_fpu_end(); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + err = skcipher_walk_done(&walk, + walk.nbytes & (AES_BLOCK_SIZE - 1)); } if (err || !tail) From a0bbb1c187e77a14b939036ce00ba5420d46ebe5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 19 Apr 2024 22:56:42 -0700 Subject: [PATCH 106/123] crypto: x86/aes-gcm - delete unused GCM assembly code Delete aesni_gcm_enc() and aesni_gcm_dec() because they are unused. Only the incremental AES-GCM functions (aesni_gcm_init(), aesni_gcm_enc_update(), aesni_gcm_finalize()) are actually used. This saves 17 KB of object code. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 186 ------------------------------ 1 file changed, 186 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 3a3e46188dec..39066b57a70e 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -83,9 +83,6 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff .text - -#define STACK_OFFSET 8*3 - #define AadHash 16*0 #define AadLen 16*1 #define InLen (16*1)+8 @@ -116,11 +113,6 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define arg4 rcx #define arg5 r8 #define arg6 r9 -#define arg7 STACK_OFFSET+8(%rsp) -#define arg8 STACK_OFFSET+16(%rsp) -#define arg9 STACK_OFFSET+24(%rsp) -#define arg10 STACK_OFFSET+32(%rsp) -#define arg11 STACK_OFFSET+40(%rsp) #define keysize 2*15*16(%arg1) #endif @@ -1507,184 +1499,6 @@ _esb_loop_\@: MOVADQ (%r10),\TMP1 aesenclast \TMP1,\XMM0 .endm -/***************************************************************************** -* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. -* struct gcm_context_data *data -* // Context data -* u8 *out, // Plaintext output. Encrypt in-place is allowed. -* const u8 *in, // Ciphertext input -* u64 plaintext_len, // Length of data in bytes for decryption. -* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) -* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) -* // concatenated with 0x00000001. 16-byte aligned pointer. -* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. -* const u8 *aad, // Additional Authentication Data (AAD) -* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes -* u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the -* // given authentication tag and only return the plaintext if they match. -* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 -* // (most likely), 12 or 8. -* -* Assumptions: -* -* keys: -* keys are pre-expanded and aligned to 16 bytes. we are using the first -* set of 11 keys in the data structure void *aes_ctx -* -* iv: -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Salt (From the SA) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Initialization Vector | -* | (This is the sequence number from IPSec header) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x1 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* -* -* AAD: -* AAD padded to 128 bits with 0 -* for example, assume AAD is a u32 vector -* -* if AAD is 8 bytes: -* AAD[3] = {A0, A1}; -* padded AAD in xmm register = {A1 A0 0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A1) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 32-bit Sequence Number (A0) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 32-bit Sequence Number -* -* if AAD is 12 bytes: -* AAD[3] = {A0, A1, A2}; -* padded AAD in xmm register = {A2 A1 A0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A2) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 64-bit Extended Sequence Number {A1,A0} | -* | | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 64-bit Extended Sequence Number -* -* poly = x^128 + x^127 + x^126 + x^121 + 1 -* -*****************************************************************************/ -SYM_FUNC_START(aesni_gcm_dec) - FUNC_SAVE - - GCM_INIT %arg6, arg7, arg8, arg9 - GCM_ENC_DEC dec - GCM_COMPLETE arg10, arg11 - FUNC_RESTORE - RET -SYM_FUNC_END(aesni_gcm_dec) - - -/***************************************************************************** -* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. -* struct gcm_context_data *data -* // Context data -* u8 *out, // Ciphertext output. Encrypt in-place is allowed. -* const u8 *in, // Plaintext input -* u64 plaintext_len, // Length of data in bytes for encryption. -* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) -* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) -* // concatenated with 0x00000001. 16-byte aligned pointer. -* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. -* const u8 *aad, // Additional Authentication Data (AAD) -* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes -* u8 *auth_tag, // Authenticated Tag output. -* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), -* // 12 or 8. -* -* Assumptions: -* -* keys: -* keys are pre-expanded and aligned to 16 bytes. we are using the -* first set of 11 keys in the data structure void *aes_ctx -* -* -* iv: -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Salt (From the SA) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Initialization Vector | -* | (This is the sequence number from IPSec header) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x1 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* -* -* AAD: -* AAD padded to 128 bits with 0 -* for example, assume AAD is a u32 vector -* -* if AAD is 8 bytes: -* AAD[3] = {A0, A1}; -* padded AAD in xmm register = {A1 A0 0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A1) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 32-bit Sequence Number (A0) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 32-bit Sequence Number -* -* if AAD is 12 bytes: -* AAD[3] = {A0, A1, A2}; -* padded AAD in xmm register = {A2 A1 A0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A2) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 64-bit Extended Sequence Number {A1,A0} | -* | | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 64-bit Extended Sequence Number -* -* poly = x^128 + x^127 + x^126 + x^121 + 1 -***************************************************************************/ -SYM_FUNC_START(aesni_gcm_enc) - FUNC_SAVE - - GCM_INIT %arg6, arg7, arg8, arg9 - GCM_ENC_DEC enc - - GCM_COMPLETE arg10, arg11 - FUNC_RESTORE - RET -SYM_FUNC_END(aesni_gcm_enc) /***************************************************************************** * void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. From ed265f7fd9a635d77c8022fc6d9a1b735dd4dfd7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 20 Apr 2024 11:20:16 -0700 Subject: [PATCH 107/123] crypto: x86/aes-gcm - simplify GCM hash subkey derivation Remove a redundant expansion of the AES key, and use rodata for zeroes. Also rename rfc4106_set_hash_subkey() to aes_gcm_derive_hash_subkey() because it's used for both versions of AES-GCM, not just RFC4106. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 02a4c0c276df..5b25d2a58aeb 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -40,7 +40,6 @@ #define AESNI_ALIGN 16 #define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN))) #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1)) -#define RFC4106_HASH_SUBKEY_SIZE 16 #define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1)) #define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA) #define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA) @@ -590,23 +589,12 @@ static int xctr_crypt(struct skcipher_request *req) return err; } -static int -rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) +static int aes_gcm_derive_hash_subkey(const struct crypto_aes_ctx *aes_key, + u8 hash_subkey[AES_BLOCK_SIZE]) { - struct crypto_aes_ctx ctx; - int ret; + static const u8 zeroes[AES_BLOCK_SIZE]; - ret = aes_expandkey(&ctx, key, key_len); - if (ret) - return ret; - - /* Clear the data in the hash sub key container to zero.*/ - /* We want to cipher all zeros to create the hash sub key. */ - memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); - - aes_encrypt(&ctx, hash_subkey, hash_subkey); - - memzero_explicit(&ctx, sizeof(ctx)); + aes_encrypt(aes_key, hash_subkey, zeroes); return 0; } @@ -624,7 +612,8 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?: - rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); + aes_gcm_derive_hash_subkey(&ctx->aes_key_expanded, + ctx->hash_subkey); } /* This is the Integrity Check Value (aka the authentication tag) length and can @@ -1327,7 +1316,8 @@ static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key, struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead); return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?: - rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); + aes_gcm_derive_hash_subkey(&ctx->aes_key_expanded, + ctx->hash_subkey); } static int generic_gcmaes_encrypt(struct aead_request *req) From a3dc1f2b6b932a13f139d3be3c765155542c1070 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 22 Apr 2024 15:13:17 +0100 Subject: [PATCH 108/123] crypto: qat - specify firmware files for 402xx The 4xxx driver can probe 4xxx and 402xx devices. However, the driver only specifies the firmware images required for 4xxx. This might result in external tools missing these binaries, if required, in the initramfs. Specify the firmware image used by 402xx with the MODULE_FIRMWARE() macros in the 4xxx driver. Fixes: a3e8c919b993 ("crypto: qat - add support for 402xx devices") Signed-off-by: Giovanni Cabiddu Reviewed-by: Damian Muszynski Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_4xxx/adf_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c index 9762f2bf7727..d26564cebdec 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c @@ -197,7 +197,9 @@ module_pci_driver(adf_driver); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel"); MODULE_FIRMWARE(ADF_4XXX_FW); +MODULE_FIRMWARE(ADF_402XX_FW); MODULE_FIRMWARE(ADF_4XXX_MMP); +MODULE_FIRMWARE(ADF_402XX_MMP); MODULE_DESCRIPTION("Intel(R) QuickAssist Technology"); MODULE_VERSION(ADF_DRV_VERSION); MODULE_SOFTDEP("pre: crypto-intel_qat"); From 15f112f9cef5ffb549d9479e64767d0bab4bdf38 Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Tue, 23 Apr 2024 09:19:21 +0800 Subject: [PATCH 109/123] crypto: hisilicon/debugfs - mask the unnecessary info from the dump Some information showed by the dump function is invalid. Mask the unnecessary information from the dump file. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/debugfs.c | 29 +++++++++++++++-------- drivers/crypto/hisilicon/hpre/hpre_main.c | 2 +- drivers/crypto/hisilicon/sec2/sec_main.c | 4 ++-- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c index 8e7dfdc5b989..1b9b7bccdeff 100644 --- a/drivers/crypto/hisilicon/debugfs.c +++ b/drivers/crypto/hisilicon/debugfs.c @@ -13,6 +13,7 @@ #define QM_DFX_COMMON_LEN 0xC3 #define QM_DFX_REGS_LEN 4UL #define QM_DBG_TMP_BUF_LEN 22 +#define QM_XQC_ADDR_MASK GENMASK(31, 0) #define CURRENT_FUN_MASK GENMASK(5, 0) #define CURRENT_Q_MASK GENMASK(31, 16) #define QM_SQE_ADDR_MASK GENMASK(7, 0) @@ -167,7 +168,6 @@ static void dump_show(struct hisi_qm *qm, void *info, static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; - struct qm_sqc *sqc_curr; struct qm_sqc sqc; u32 qp_id; int ret; @@ -183,6 +183,8 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 1); if (!ret) { + sqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + sqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); dump_show(qm, &sqc, sizeof(struct qm_sqc), name); return 0; @@ -190,9 +192,10 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) down_read(&qm->qps_lock); if (qm->sqc) { - sqc_curr = qm->sqc + qp_id; - - dump_show(qm, sqc_curr, sizeof(*sqc_curr), "SOFT SQC"); + memcpy(&sqc, qm->sqc + qp_id * sizeof(struct qm_sqc), sizeof(struct qm_sqc)); + sqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + sqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); + dump_show(qm, &sqc, sizeof(struct qm_sqc), "SOFT SQC"); } up_read(&qm->qps_lock); @@ -202,7 +205,6 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; - struct qm_cqc *cqc_curr; struct qm_cqc cqc; u32 qp_id; int ret; @@ -218,6 +220,8 @@ static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 1); if (!ret) { + cqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + cqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); dump_show(qm, &cqc, sizeof(struct qm_cqc), name); return 0; @@ -225,9 +229,10 @@ static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) down_read(&qm->qps_lock); if (qm->cqc) { - cqc_curr = qm->cqc + qp_id; - - dump_show(qm, cqc_curr, sizeof(*cqc_curr), "SOFT CQC"); + memcpy(&cqc, qm->cqc + qp_id * sizeof(struct qm_cqc), sizeof(struct qm_cqc)); + cqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + cqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); + dump_show(qm, &cqc, sizeof(struct qm_cqc), "SOFT CQC"); } up_read(&qm->qps_lock); @@ -263,6 +268,10 @@ static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name) if (ret) return ret; + aeqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + aeqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); + eqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + eqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); dump_show(qm, xeqc, size, name); return ret; @@ -310,10 +319,10 @@ static int q_dump_param_parse(struct hisi_qm *qm, char *s, static int qm_sq_dump(struct hisi_qm *qm, char *s, char *name) { - u16 sq_depth = qm->qp_array->cq_depth; - void *sqe; + u16 sq_depth = qm->qp_array->sq_depth; struct hisi_qp *qp; u32 qp_id, sqe_id; + void *sqe; int ret; ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth); diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 25b44416da45..10aa4da93323 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -106,7 +106,7 @@ #define HPRE_SHAPER_TYPE_RATE 640 #define HPRE_VIA_MSI_DSM 1 #define HPRE_SQE_MASK_OFFSET 8 -#define HPRE_SQE_MASK_LEN 24 +#define HPRE_SQE_MASK_LEN 44 #define HPRE_CTX_Q_NUM_DEF 1 #define HPRE_DFX_BASE 0x301000 diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 853b1cb85016..6eba1f22720f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -99,8 +99,8 @@ #define SEC_DBGFS_VAL_MAX_LEN 20 #define SEC_SINGLE_PORT_MAX_TRANS 0x2060 -#define SEC_SQE_MASK_OFFSET 64 -#define SEC_SQE_MASK_LEN 48 +#define SEC_SQE_MASK_OFFSET 16 +#define SEC_SQE_MASK_LEN 108 #define SEC_SHAPER_TYPE_RATE 400 #define SEC_DFX_BASE 0x301000 From 6117af86365916e4202b5a709c155f7e6e5df810 Mon Sep 17 00:00:00 2001 From: Wenkai Lin Date: Tue, 23 Apr 2024 09:19:22 +0800 Subject: [PATCH 110/123] crypto: hisilicon/sec2 - fix for register offset The offset of SEC_CORE_ENABLE_BITMAP should be 0 instead of 32, it cause a kasan shift-out-bounds warning, fix it. Signed-off-by: Wenkai Lin Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 6eba1f22720f..75aad04ffe5e 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -152,7 +152,7 @@ static const struct hisi_qm_cap_info sec_basic_info[] = { {SEC_CORE_TYPE_NUM_CAP, 0x313c, 16, GENMASK(3, 0), 0x1, 0x1, 0x1}, {SEC_CORE_NUM_CAP, 0x313c, 8, GENMASK(7, 0), 0x4, 0x4, 0x4}, {SEC_CORES_PER_CLUSTER_NUM_CAP, 0x313c, 0, GENMASK(7, 0), 0x4, 0x4, 0x4}, - {SEC_CORE_ENABLE_BITMAP, 0x3140, 32, GENMASK(31, 0), 0x17F, 0x17F, 0xF}, + {SEC_CORE_ENABLE_BITMAP, 0x3140, 0, GENMASK(31, 0), 0x17F, 0x17F, 0xF}, {SEC_DRV_ALG_BITMAP_LOW, 0x3144, 0, GENMASK(31, 0), 0x18050CB, 0x18050CB, 0x18670CF}, {SEC_DRV_ALG_BITMAP_HIGH, 0x3148, 0, GENMASK(31, 0), 0x395C, 0x395C, 0x395C}, {SEC_DEV_ALG_BITMAP_LOW, 0x314c, 0, GENMASK(31, 0), 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, From 3d12d90efadf689b05280ebbb3fca870298d218f Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Mon, 29 Apr 2024 14:06:37 +0800 Subject: [PATCH 111/123] crypto: starfive - Skip dma setup for zeroed message Skip dma setup and mapping for AES driver if plaintext is empty. Signed-off-by: Jia Jie Ho Signed-off-by: Herbert Xu --- drivers/crypto/starfive/jh7110-aes.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/crypto/starfive/jh7110-aes.c b/drivers/crypto/starfive/jh7110-aes.c index 72b7d46150d5..9d6e2f936f03 100644 --- a/drivers/crypto/starfive/jh7110-aes.c +++ b/drivers/crypto/starfive/jh7110-aes.c @@ -590,12 +590,16 @@ static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq) if (ret) return ret; + if (!cryp->total_in) + goto finish_req; + starfive_aes_dma_init(cryp); ret = starfive_aes_map_sg(cryp, rctx->in_sg, rctx->out_sg); if (ret) return ret; +finish_req: starfive_aes_finish_req(ctx); return 0; From 25ca4a85e943d73582f3e576f0f829329568d0a3 Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Mon, 29 Apr 2024 14:06:38 +0800 Subject: [PATCH 112/123] crypto: starfive - Skip unneeded fallback allocation Skip sw fallback allocation if RSA module failed to get device handle. Signed-off-by: Jia Jie Ho Signed-off-by: Herbert Xu --- drivers/crypto/starfive/jh7110-rsa.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/starfive/jh7110-rsa.c b/drivers/crypto/starfive/jh7110-rsa.c index e642e948d747..4d7eb3d1e764 100644 --- a/drivers/crypto/starfive/jh7110-rsa.c +++ b/drivers/crypto/starfive/jh7110-rsa.c @@ -537,16 +537,14 @@ static int starfive_rsa_init_tfm(struct crypto_akcipher *tfm) { struct starfive_cryp_ctx *ctx = akcipher_tfm_ctx(tfm); + ctx->cryp = starfive_cryp_find_dev(ctx); + if (!ctx->cryp) + return -ENODEV; + ctx->akcipher_fbk = crypto_alloc_akcipher("rsa-generic", 0, 0); if (IS_ERR(ctx->akcipher_fbk)) return PTR_ERR(ctx->akcipher_fbk); - ctx->cryp = starfive_cryp_find_dev(ctx); - if (!ctx->cryp) { - crypto_free_akcipher(ctx->akcipher_fbk); - return -ENODEV; - } - akcipher_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) + sizeof(struct crypto_akcipher) + 32); From d7f01649f4eaf1878472d3d3f480ae1e50d98f6c Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Mon, 29 Apr 2024 14:06:39 +0800 Subject: [PATCH 113/123] crypto: starfive - Do not free stack buffer RSA text data uses variable length buffer allocated in software stack. Calling kfree on it causes undefined behaviour in subsequent operations. Cc: #6.7+ Signed-off-by: Jia Jie Ho Signed-off-by: Herbert Xu --- drivers/crypto/starfive/jh7110-rsa.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/starfive/jh7110-rsa.c b/drivers/crypto/starfive/jh7110-rsa.c index 4d7eb3d1e764..33093ba4b13a 100644 --- a/drivers/crypto/starfive/jh7110-rsa.c +++ b/drivers/crypto/starfive/jh7110-rsa.c @@ -276,7 +276,6 @@ static int starfive_rsa_enc_core(struct starfive_cryp_ctx *ctx, int enc) err_rsa_crypt: writel(STARFIVE_RSA_RESET, cryp->base + STARFIVE_PKA_CACR_OFFSET); - kfree(rctx->rsa_data); return ret; } From f8c423bab99cc2facc37cfd7d29ad8864f98a4c2 Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Mon, 29 Apr 2024 14:06:40 +0800 Subject: [PATCH 114/123] crypto: starfive - Use fallback for unaligned dma access Dma address mapping fails on unaligned scatterlist offset. Use sw fallback for these cases. Signed-off-by: Jia Jie Ho Signed-off-by: Herbert Xu --- drivers/crypto/starfive/jh7110-aes.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/starfive/jh7110-aes.c b/drivers/crypto/starfive/jh7110-aes.c index 9d6e2f936f03..86a1a1fa9f8f 100644 --- a/drivers/crypto/starfive/jh7110-aes.c +++ b/drivers/crypto/starfive/jh7110-aes.c @@ -314,7 +314,7 @@ static int starfive_aes_read_authtag(struct starfive_cryp_ctx *ctx) cryp->total_in, cryp->authsize, 1); } else { if (crypto_memneq(cryp->tag_in, cryp->tag_out, cryp->authsize)) - return dev_err_probe(cryp->dev, -EBADMSG, "Failed tag verification\n"); + return -EBADMSG; } return 0; @@ -753,14 +753,16 @@ static bool starfive_aes_check_unaligned(struct starfive_cryp_dev *cryp, int i; for_each_sg(src, tsg, sg_nents(src), i) - if (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) && - !sg_is_last(tsg)) + if (!IS_ALIGNED(tsg->offset, sizeof(u32)) || + (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) && + !sg_is_last(tsg))) return true; if (src != dst) for_each_sg(dst, tsg, sg_nents(dst), i) - if (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) && - !sg_is_last(tsg)) + if (!IS_ALIGNED(tsg->offset, sizeof(u32)) || + (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) && + !sg_is_last(tsg))) return true; return false; From 6144436803b7a8738f3defa1fd6b4b6751f6793e Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Mon, 29 Apr 2024 11:58:52 +0530 Subject: [PATCH 115/123] crypto: caam - init-clk based on caam-page0-access CAAM clock initializat is done based on the basis of soc specific info stored in struct caam_imx_data: - caam-page0-access flag - num_clks CAAM driver needs to be aware of access rights to CAAM control page i.e., page0, to do things differently. Signed-off-by: Pankaj Gupta Reviewed-by: Gaurav Jain Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index bdf367f3f679..02363c467cf3 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -512,6 +512,7 @@ static const struct of_device_id caam_match[] = { MODULE_DEVICE_TABLE(of, caam_match); struct caam_imx_data { + bool page0_access; const struct clk_bulk_data *clks; int num_clks; }; @@ -524,6 +525,7 @@ static const struct clk_bulk_data caam_imx6_clks[] = { }; static const struct caam_imx_data caam_imx6_data = { + .page0_access = true, .clks = caam_imx6_clks, .num_clks = ARRAY_SIZE(caam_imx6_clks), }; @@ -534,6 +536,7 @@ static const struct clk_bulk_data caam_imx7_clks[] = { }; static const struct caam_imx_data caam_imx7_data = { + .page0_access = true, .clks = caam_imx7_clks, .num_clks = ARRAY_SIZE(caam_imx7_clks), }; @@ -545,6 +548,7 @@ static const struct clk_bulk_data caam_imx6ul_clks[] = { }; static const struct caam_imx_data caam_imx6ul_data = { + .page0_access = true, .clks = caam_imx6ul_clks, .num_clks = ARRAY_SIZE(caam_imx6ul_clks), }; @@ -554,6 +558,7 @@ static const struct clk_bulk_data caam_vf610_clks[] = { }; static const struct caam_imx_data caam_vf610_data = { + .page0_access = true, .clks = caam_vf610_clks, .num_clks = ARRAY_SIZE(caam_vf610_clks), }; @@ -860,6 +865,7 @@ static int caam_probe(struct platform_device *pdev) int pg_size; int BLOCK_OFFSET = 0; bool reg_access = true; + const struct caam_imx_data *imx_soc_data; ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(*ctrlpriv), GFP_KERNEL); if (!ctrlpriv) @@ -894,12 +900,20 @@ static int caam_probe(struct platform_device *pdev) return -EINVAL; } + imx_soc_data = imx_soc_match->data; + reg_access = reg_access && imx_soc_data->page0_access; + /* + * CAAM clocks cannot be controlled from kernel. + */ + if (!imx_soc_data->num_clks) + goto iomap_ctrl; + ret = init_clocks(dev, imx_soc_match->data); if (ret) return ret; } - +iomap_ctrl: /* Get configuration properties from device tree */ /* First, get register page */ ctrl = devm_of_iomap(dev, nprop, 0, NULL); From d2835701d93cae6d597672ef9dc3fa889867031a Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Mon, 29 Apr 2024 11:58:54 +0530 Subject: [PATCH 116/123] crypto: caam - i.MX8ULP donot have CAAM page0 access iMX8ULP have a secure-enclave hardware IP called EdgeLock Enclave(ELE), that control access to caam controller's register page, i.e., page0. At all, if the ELE release access to CAAM controller's register page, it will release to secure-world only. Clocks are turned on automatically for iMX8ULP. There exists the caam clock gating bit, but it is not advised to gate the clock at linux, as optee-os or any other entity might be using it. Signed-off-by: Pankaj Gupta Reviewed-by: Gaurav Jain Reviewed-by: Horia Geanta Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 02363c467cf3..bd418dea586d 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -563,11 +563,14 @@ static const struct caam_imx_data caam_vf610_data = { .num_clks = ARRAY_SIZE(caam_vf610_clks), }; +static const struct caam_imx_data caam_imx8ulp_data; + static const struct soc_device_attribute caam_imx_soc_table[] = { { .soc_id = "i.MX6UL", .data = &caam_imx6ul_data }, { .soc_id = "i.MX6*", .data = &caam_imx6_data }, { .soc_id = "i.MX7*", .data = &caam_imx7_data }, { .soc_id = "i.MX8M*", .data = &caam_imx7_data }, + { .soc_id = "i.MX8ULP", .data = &caam_imx8ulp_data }, { .soc_id = "VF*", .data = &caam_vf610_data }, { .family = "Freescale i.MX" }, { /* sentinel */ } From 98f9e447134be08d2edd696bb103ab6068fd3956 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Apr 2024 14:14:42 +0200 Subject: [PATCH 117/123] crypto: api - use 'time_left' variable with wait_for_completion_killable_timeout() There is a confusing pattern in the kernel to use a variable named 'timeout' to store the result of wait_for_completion_killable_timeout() causing patterns like: timeout = wait_for_completion_killable_timeout(...) if (!timeout) return -ETIMEDOUT; with all kinds of permutations. Use 'time_left' as a variable to make the code self explaining. Signed-off-by: Wolfram Sang Signed-off-by: Herbert Xu --- crypto/api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 7f402107f0cc..6aa5a3b4ed5e 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -202,18 +202,18 @@ static void crypto_start_test(struct crypto_larval *larval) static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) { struct crypto_larval *larval = (void *)alg; - long timeout; + long time_left; if (!crypto_boot_test_finished()) crypto_start_test(larval); - timeout = wait_for_completion_killable_timeout( + time_left = wait_for_completion_killable_timeout( &larval->completion, 60 * HZ); alg = larval->adult; - if (timeout < 0) + if (time_left < 0) alg = ERR_PTR(-EINTR); - else if (!timeout) + else if (!time_left) alg = ERR_PTR(-ETIMEDOUT); else if (!alg) alg = ERR_PTR(-ENOENT); From e02ea6f9f2590cc721d0b921c2f2e650105a654c Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Apr 2024 14:15:51 +0200 Subject: [PATCH 118/123] crypto: sahara - use 'time_left' variable with wait_for_completion_timeout() There is a confusing pattern in the kernel to use a variable named 'timeout' to store the result of wait_for_completion_timeout() causing patterns like: timeout = wait_for_completion_timeout(...) if (!timeout) return -ETIMEDOUT; with all kinds of permutations. Use 'time_left' as a variable to make the code self explaining. Signed-off-by: Wolfram Sang Signed-off-by: Herbert Xu --- drivers/crypto/sahara.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 3423b5cde1c7..96d4af5d48a6 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -559,7 +559,7 @@ static int sahara_aes_process(struct skcipher_request *req) struct sahara_ctx *ctx; struct sahara_aes_reqctx *rctx; int ret; - unsigned long timeout; + unsigned long time_left; /* Request is ready to be dispatched by the device */ dev_dbg(dev->device, @@ -597,15 +597,15 @@ static int sahara_aes_process(struct skcipher_request *req) if (ret) return -EINVAL; - timeout = wait_for_completion_timeout(&dev->dma_completion, - msecs_to_jiffies(SAHARA_TIMEOUT_MS)); + time_left = wait_for_completion_timeout(&dev->dma_completion, + msecs_to_jiffies(SAHARA_TIMEOUT_MS)); dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg, DMA_FROM_DEVICE); dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg, DMA_TO_DEVICE); - if (!timeout) { + if (!time_left) { dev_err(dev->device, "AES timeout\n"); return -ETIMEDOUT; } @@ -931,7 +931,7 @@ static int sahara_sha_process(struct ahash_request *req) struct sahara_dev *dev = dev_ptr; struct sahara_sha_reqctx *rctx = ahash_request_ctx(req); int ret; - unsigned long timeout; + unsigned long time_left; ret = sahara_sha_prepare_request(req); if (!ret) @@ -963,14 +963,14 @@ static int sahara_sha_process(struct ahash_request *req) sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR); - timeout = wait_for_completion_timeout(&dev->dma_completion, - msecs_to_jiffies(SAHARA_TIMEOUT_MS)); + time_left = wait_for_completion_timeout(&dev->dma_completion, + msecs_to_jiffies(SAHARA_TIMEOUT_MS)); if (rctx->sg_in_idx) dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg, DMA_TO_DEVICE); - if (!timeout) { + if (!time_left) { dev_err(dev->device, "SHA timeout\n"); return -ETIMEDOUT; } From bfbe27ba59e19e28801cc1a931a8f6d1d35b76d1 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 2 May 2024 17:33:39 +0200 Subject: [PATCH 119/123] crypto: iaa - Use kmemdup() instead of kzalloc() and memcpy() Fixes the following two Coccinelle/coccicheck warnings reported by memdup.cocci: iaa_crypto_main.c:350:19-26: WARNING opportunity for kmemdup iaa_crypto_main.c:358:18-25: WARNING opportunity for kmemdup Signed-off-by: Thorsten Blum Reviewed-by: Tom Zanussi Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 814fb2c31626..e810d286ee8c 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -347,18 +347,16 @@ int add_iaa_compression_mode(const char *name, goto free; if (ll_table) { - mode->ll_table = kzalloc(ll_table_size, GFP_KERNEL); + mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL); if (!mode->ll_table) goto free; - memcpy(mode->ll_table, ll_table, ll_table_size); mode->ll_table_size = ll_table_size; } if (d_table) { - mode->d_table = kzalloc(d_table_size, GFP_KERNEL); + mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL); if (!mode->d_table) goto free; - memcpy(mode->d_table, d_table, d_table_size); mode->d_table_size = d_table_size; } From e228b41abb465402c0d0faef52768277027cc4f0 Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Fri, 3 May 2024 21:10:51 +0000 Subject: [PATCH 120/123] crypto: atmel-i2c - add missing arg description Add missing description for argument hwrng. Signed-off-by: Lothar Rubusch Signed-off-by: Herbert Xu --- drivers/crypto/atmel-i2c.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h index c0bd429ee2c7..a442b47a46b0 100644 --- a/drivers/crypto/atmel-i2c.h +++ b/drivers/crypto/atmel-i2c.h @@ -124,6 +124,7 @@ struct atmel_ecc_driver_data { * @wake_token : wake token array of zeros * @wake_token_sz : size in bytes of the wake_token * @tfm_count : number of active crypto transformations on i2c client + * @hwrng : hold the hardware generated rng * * Reads and writes from/to the i2c client are sequential. The first byte * transmitted to the device is treated as the byte size. Any attempt to send From 3f5f746165f7082f3c3f2f4b464e554c05a2621e Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Fri, 3 May 2024 21:10:52 +0000 Subject: [PATCH 121/123] crypto: atmel-i2c - rename read function Make the memory read function name more specific to the read memory zone. The Atmel SHA204 chips provide config, otp and data zone. The implemented read function in fact only reads some fields in zone config. The function renaming allows for a uniform naming scheme when reading from other memory zones. Signed-off-by: Lothar Rubusch Signed-off-by: Herbert Xu --- drivers/crypto/atmel-i2c.c | 6 +++--- drivers/crypto/atmel-i2c.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 83a9093eff25..a0d0d4f2ab25 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -51,7 +51,7 @@ static void atmel_i2c_checksum(struct atmel_i2c_cmd *cmd) *__crc16 = cpu_to_le16(bitrev16(crc16(0, data, len))); } -void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd) +void atmel_i2c_init_read_config_cmd(struct atmel_i2c_cmd *cmd) { cmd->word_addr = COMMAND; cmd->opcode = OPCODE_READ; @@ -68,7 +68,7 @@ void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd) cmd->msecs = MAX_EXEC_TIME_READ; cmd->rxsize = READ_RSP_SIZE; } -EXPORT_SYMBOL(atmel_i2c_init_read_cmd); +EXPORT_SYMBOL(atmel_i2c_init_read_config_cmd); void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd) { @@ -301,7 +301,7 @@ static int device_sanity_check(struct i2c_client *client) if (!cmd) return -ENOMEM; - atmel_i2c_init_read_cmd(cmd); + atmel_i2c_init_read_config_cmd(cmd); ret = atmel_i2c_send_receive(client, cmd); if (ret) diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h index a442b47a46b0..275297a8208b 100644 --- a/drivers/crypto/atmel-i2c.h +++ b/drivers/crypto/atmel-i2c.h @@ -178,7 +178,7 @@ void atmel_i2c_flush_queue(void); int atmel_i2c_send_receive(struct i2c_client *client, struct atmel_i2c_cmd *cmd); -void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd); +void atmel_i2c_init_read_config_cmd(struct atmel_i2c_cmd *cmd); void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd); void atmel_i2c_init_genkey_cmd(struct atmel_i2c_cmd *cmd, u16 keyid); int atmel_i2c_init_ecdh_cmd(struct atmel_i2c_cmd *cmd, From e05ce444e9e59f924b53da8209bfb7208653817c Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Fri, 3 May 2024 21:10:53 +0000 Subject: [PATCH 122/123] crypto: atmel-sha204a - add reading from otp zone Provide a read function reading the otp zone. The otp zone can be used for storing serial numbers. The otp zone, as also data zone, are only accessible if the chip was locked before. Locking the chip is a post production customization and has to be done manually i.e. not by this driver. Without this step the chip is pretty much not usable, where putting or not putting data into the otp zone is optional. Signed-off-by: Lothar Rubusch Signed-off-by: Herbert Xu --- drivers/crypto/atmel-i2c.c | 24 ++++++++++++++++++++++++ drivers/crypto/atmel-i2c.h | 5 +++++ drivers/crypto/atmel-sha204a.c | 23 +++++++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index a0d0d4f2ab25..a895e4289efa 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -70,6 +70,30 @@ void atmel_i2c_init_read_config_cmd(struct atmel_i2c_cmd *cmd) } EXPORT_SYMBOL(atmel_i2c_init_read_config_cmd); +int atmel_i2c_init_read_otp_cmd(struct atmel_i2c_cmd *cmd, u16 addr) +{ + if (addr < 0 || addr > OTP_ZONE_SIZE) + return -1; + + cmd->word_addr = COMMAND; + cmd->opcode = OPCODE_READ; + /* + * Read the word from OTP zone that may contain e.g. serial + * numbers or similar if persistently pre-initialized and locked + */ + cmd->param1 = OTP_ZONE; + cmd->param2 = cpu_to_le16(addr); + cmd->count = READ_COUNT; + + atmel_i2c_checksum(cmd); + + cmd->msecs = MAX_EXEC_TIME_READ; + cmd->rxsize = READ_RSP_SIZE; + + return 0; +} +EXPORT_SYMBOL(atmel_i2c_init_read_otp_cmd); + void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd) { cmd->word_addr = COMMAND; diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h index 275297a8208b..72f04c15682f 100644 --- a/drivers/crypto/atmel-i2c.h +++ b/drivers/crypto/atmel-i2c.h @@ -64,6 +64,10 @@ struct atmel_i2c_cmd { /* Definitions for eeprom organization */ #define CONFIGURATION_ZONE 0 +#define OTP_ZONE 1 + +/* Definitions for eeprom zone sizes */ +#define OTP_ZONE_SIZE 64 /* Definitions for Indexes common to all commands */ #define RSP_DATA_IDX 1 /* buffer index of data in response */ @@ -179,6 +183,7 @@ void atmel_i2c_flush_queue(void); int atmel_i2c_send_receive(struct i2c_client *client, struct atmel_i2c_cmd *cmd); void atmel_i2c_init_read_config_cmd(struct atmel_i2c_cmd *cmd); +int atmel_i2c_init_read_otp_cmd(struct atmel_i2c_cmd *cmd, u16 addr); void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd); void atmel_i2c_init_genkey_cmd(struct atmel_i2c_cmd *cmd, u16 keyid); int atmel_i2c_init_ecdh_cmd(struct atmel_i2c_cmd *cmd, diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index c77f482d2a97..563a0493b492 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -91,6 +91,29 @@ static int atmel_sha204a_rng_read(struct hwrng *rng, void *data, size_t max, return max; } +static int atmel_sha204a_otp_read(struct i2c_client *client, u16 addr, u8 *otp) +{ + struct atmel_i2c_cmd cmd; + int ret = -1; + + if (atmel_i2c_init_read_otp_cmd(&cmd, addr) < 0) { + dev_err(&client->dev, "failed, invalid otp address %04X\n", + addr); + return ret; + } + + ret = atmel_i2c_send_receive(client, &cmd); + + if (cmd.data[0] == 0xff) { + dev_err(&client->dev, "failed, device not ready\n"); + return -ret; + } + + memcpy(otp, cmd.data+1, 4); + + return ret; +} + static int atmel_sha204a_probe(struct i2c_client *client) { struct atmel_i2c_client_priv *i2c_priv; From 13909a0c88972c5ef5d13f44d1a8bf065a31bdf4 Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Fri, 3 May 2024 21:10:54 +0000 Subject: [PATCH 123/123] crypto: atmel-sha204a - provide the otp content Set up sysfs for the Atmel SHA204a. Provide the content of the otp zone as an attribute field on the sysfs entry. Thereby make sure that if the chip is locked, not connected or trouble with the i2c bus, the sysfs device is not set up. This is mostly already handled in atmel-i2c. Signed-off-by: Lothar Rubusch Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha204a.c | 45 ++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 563a0493b492..24ffdf505023 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -114,6 +114,39 @@ static int atmel_sha204a_otp_read(struct i2c_client *client, u16 addr, u8 *otp) return ret; } +static ssize_t otp_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u16 addr; + u8 otp[OTP_ZONE_SIZE]; + char *str = buf; + struct i2c_client *client = to_i2c_client(dev); + int i; + + for (addr = 0; addr < OTP_ZONE_SIZE/4; addr++) { + if (atmel_sha204a_otp_read(client, addr, otp + addr * 4) < 0) { + dev_err(dev, "failed to read otp zone\n"); + break; + } + } + + for (i = 0; i < addr*2; i++) + str += sprintf(str, "%02X", otp[i]); + str += sprintf(str, "\n"); + return str - buf; +} +static DEVICE_ATTR_RO(otp); + +static struct attribute *atmel_sha204a_attrs[] = { + &dev_attr_otp.attr, + NULL +}; + +static const struct attribute_group atmel_sha204a_groups = { + .name = "atsha204a", + .attrs = atmel_sha204a_attrs, +}; + static int atmel_sha204a_probe(struct i2c_client *client) { struct atmel_i2c_client_priv *i2c_priv; @@ -134,6 +167,16 @@ static int atmel_sha204a_probe(struct i2c_client *client) if (ret) dev_warn(&client->dev, "failed to register RNG (%d)\n", ret); + /* otp read out */ + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) + return -ENODEV; + + ret = sysfs_create_group(&client->dev.kobj, &atmel_sha204a_groups); + if (ret) { + dev_err(&client->dev, "failed to register sysfs entry\n"); + return ret; + } + return ret; } @@ -146,6 +189,8 @@ static void atmel_sha204a_remove(struct i2c_client *client) return; } + sysfs_remove_group(&client->dev.kobj, &atmel_sha204a_groups); + kfree((void *)i2c_priv->hwrng.priv); }