From 249d2e6f597b72769e158bfa0f64051de645ce3d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 1 Aug 2022 15:02:37 -0600 Subject: [PATCH 01/60] dt-bindings: dma: arm,pl330: Add missing 'iommus' property The pl330 can be behind an IOMMU which is the case for Arm Juno board. Add the 'iommus' property allowing for 1 IOMMU entry per channel for writes and 1 IOMMU entry for reads. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220801210237.1501488-1-robh@kernel.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/arm,pl330.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/arm,pl330.yaml b/Documentation/devicetree/bindings/dma/arm,pl330.yaml index 2bec69b308f8..4a3dd6f5309b 100644 --- a/Documentation/devicetree/bindings/dma/arm,pl330.yaml +++ b/Documentation/devicetree/bindings/dma/arm,pl330.yaml @@ -55,6 +55,12 @@ properties: dma-coherent: true + iommus: + minItems: 1 + maxItems: 9 + description: Up to 1 IOMMU entry per DMA channel for writes and 1 + IOMMU entry for reads. + power-domains: maxItems: 1 From abd7bb690bcf662afc8abe74fd3c10f7ad1c5b19 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 12 Aug 2022 10:27:19 +0200 Subject: [PATCH 02/60] dt-bindings: dmaengine: qcom: gpi: add compatible for SM6350 Document the compatible for GPI DMA controller on SM6350 SoC. Signed-off-by: Luca Weiss Reviewed-by: Bjorn Andersson Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220812082721.1125759-2-luca.weiss@fairphone.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 7d2fc4eb5530..eabf8a76d3a0 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -21,6 +21,7 @@ properties: enum: - qcom,sc7280-gpi-dma - qcom,sdm845-gpi-dma + - qcom,sm6350-gpi-dma - qcom,sm8150-gpi-dma - qcom,sm8250-gpi-dma - qcom,sm8350-gpi-dma From 5abef9d713629d7201b1a3dd1fe6a8baa869da68 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 12 Aug 2022 10:27:20 +0200 Subject: [PATCH 03/60] dmaengine: qcom: gpi: Add SM6350 support The Qualcomm SM6350 platform does, like the SM8450, provide a set of GPI controllers with an ee-offset of 0x10000. Add this to the driver. Signed-off-by: Luca Weiss Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220812082721.1125759-3-luca.weiss@fairphone.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 8f0c9c4e2efd..89839864b4ec 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -2288,6 +2288,7 @@ static int gpi_probe(struct platform_device *pdev) static const struct of_device_id gpi_of_match[] = { { .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 }, { .compatible = "qcom,sdm845-gpi-dma", .data = (void *)0x0 }, + { .compatible = "qcom,sm6350-gpi-dma", .data = (void *)0x10000 }, { .compatible = "qcom,sm8150-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm8250-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm8350-gpi-dma", .data = (void *)0x10000 }, From 407171717a4f4d2d80825584643374a2dfdb0540 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Tue, 23 Aug 2022 09:37:09 -0700 Subject: [PATCH 04/60] dmaengine: idxd: avoid deadlock in process_misc_interrupts() idxd_device_clear_state() now grabs the idxd->dev_lock itself, so don't grab the lock prior to calling it. This was seen in testing after dmar fault occurred on system, resulting in lockup stack traces. Cc: Fenghua Yu Cc: Dave Jiang Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Fixes: cf4ac3fef338 ("dmaengine: idxd: fix lockdep warning on device driver removal") Signed-off-by: Jerry Snitselaar Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20220823163709.2102468-1-jsnitsel@redhat.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 743ead5ebc57..5b9921475be6 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -324,13 +324,11 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) idxd->state = IDXD_DEV_HALTED; idxd_wqs_quiesce(idxd); idxd_wqs_unmap_portal(idxd); - spin_lock(&idxd->dev_lock); idxd_device_clear_state(idxd); dev_err(&idxd->pdev->dev, "idxd halted, need %s.\n", gensts.reset_type == IDXD_DEVICE_RESET_FLR ? "FLR" : "system reset"); - spin_unlock(&idxd->dev_lock); return -ENXIO; } } From e3bdaa04ada31f46d0586df83a2789b8913053c5 Mon Sep 17 00:00:00 2001 From: Jie Hai Date: Tue, 30 Aug 2022 14:22:45 +0800 Subject: [PATCH 05/60] dmaengine: hisilicon: Disable channels when unregister hisi_dma When hisi_dma is unloaded or unbinded, all of channels should be disabled. This patch disables DMA channels when driver is unloaded or unbinded. Fixes: e9f08b65250d ("dmaengine: hisilicon: Add Kunpeng DMA engine support") Signed-off-by: Jie Hai Acked-by: Zhou Wang Link: https://lore.kernel.org/r/20220830062251.52993-2-haijie1@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/hisi_dma.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index 43817ced3a3e..98bc488893cc 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -180,7 +180,8 @@ static void hisi_dma_reset_qp_point(struct hisi_dma_dev *hdma_dev, u32 index) hisi_dma_chan_write(hdma_dev->base, HISI_DMA_CQ_HEAD_PTR, index, 0); } -static void hisi_dma_reset_hw_chan(struct hisi_dma_chan *chan) +static void hisi_dma_reset_or_disable_hw_chan(struct hisi_dma_chan *chan, + bool disable) { struct hisi_dma_dev *hdma_dev = chan->hdma_dev; u32 index = chan->qp_num, tmp; @@ -201,8 +202,11 @@ static void hisi_dma_reset_hw_chan(struct hisi_dma_chan *chan) hisi_dma_do_reset(hdma_dev, index); hisi_dma_reset_qp_point(hdma_dev, index); hisi_dma_pause_dma(hdma_dev, index, false); - hisi_dma_enable_dma(hdma_dev, index, true); - hisi_dma_unmask_irq(hdma_dev, index); + + if (!disable) { + hisi_dma_enable_dma(hdma_dev, index, true); + hisi_dma_unmask_irq(hdma_dev, index); + } ret = readl_relaxed_poll_timeout(hdma_dev->base + HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET, tmp, @@ -218,7 +222,7 @@ static void hisi_dma_free_chan_resources(struct dma_chan *c) struct hisi_dma_chan *chan = to_hisi_dma_chan(c); struct hisi_dma_dev *hdma_dev = chan->hdma_dev; - hisi_dma_reset_hw_chan(chan); + hisi_dma_reset_or_disable_hw_chan(chan, false); vchan_free_chan_resources(&chan->vc); memset(chan->sq, 0, sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth); @@ -394,7 +398,7 @@ static void hisi_dma_enable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index) static void hisi_dma_disable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index) { - hisi_dma_reset_hw_chan(&hdma_dev->chan[qp_index]); + hisi_dma_reset_or_disable_hw_chan(&hdma_dev->chan[qp_index], true); } static void hisi_dma_enable_qps(struct hisi_dma_dev *hdma_dev) From 94477a79cf80e8ab55b68f14bc579a12ddea1e0b Mon Sep 17 00:00:00 2001 From: Jie Hai Date: Tue, 30 Aug 2022 14:22:46 +0800 Subject: [PATCH 06/60] dmaengine: hisilicon: Fix CQ head update After completion of data transfer of one or multiple descriptors, the completion status and the current head pointer to submission queue are written into the CQ and interrupt can be generated to inform the software. In interrupt process CQ is read and cq_head is updated. hisi_dma_irq updates cq_head only when the completion status is success. When an abnormal interrupt reports, cq_head will not update which will cause subsequent interrupt processes read the error CQ and never report the correct status. This patch updates cq_head whenever CQ is accessed. Fixes: e9f08b65250d ("dmaengine: hisilicon: Add Kunpeng DMA engine support") Signed-off-by: Jie Hai Acked-by: Zhou Wang Link: https://lore.kernel.org/r/20220830062251.52993-3-haijie1@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/hisi_dma.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index 98bc488893cc..837f7e4adfa6 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -436,12 +436,10 @@ static irqreturn_t hisi_dma_irq(int irq, void *data) desc = chan->desc; cqe = chan->cq + chan->cq_head; if (desc) { + chan->cq_head = (chan->cq_head + 1) % hdma_dev->chan_depth; + hisi_dma_chan_write(hdma_dev->base, HISI_DMA_CQ_HEAD_PTR, + chan->qp_num, chan->cq_head); if (FIELD_GET(STATUS_MASK, cqe->w0) == STATUS_SUCC) { - chan->cq_head = (chan->cq_head + 1) % - hdma_dev->chan_depth; - hisi_dma_chan_write(hdma_dev->base, - HISI_DMA_CQ_HEAD_PTR, chan->qp_num, - chan->cq_head); vchan_cookie_complete(&desc->vd); } else { dev_err(&hdma_dev->pdev->dev, "task error!\n"); From 2cbb95883c990d0002a77e13d3278913ab26ad79 Mon Sep 17 00:00:00 2001 From: Jie Hai Date: Tue, 30 Aug 2022 14:22:47 +0800 Subject: [PATCH 07/60] dmaengine: hisilicon: Add multi-thread support for a DMA channel When we get a DMA channel and try to use it in multiple threads it will cause oops and hanging the system. % echo 100 > /sys/module/dmatest/parameters/threads_per_chan % echo 100 > /sys/module/dmatest/parameters/iterations % echo 1 > /sys/module/dmatest/parameters/run [383493.327077] Unable to handle kernel paging request at virtual address dead000000000108 [383493.335103] Mem abort info: [383493.335103] ESR = 0x96000044 [383493.335105] EC = 0x25: DABT (current EL), IL = 32 bits [383493.335107] SET = 0, FnV = 0 [383493.335108] EA = 0, S1PTW = 0 [383493.335109] FSC = 0x04: level 0 translation fault [383493.335110] Data abort info: [383493.335111] ISV = 0, ISS = 0x00000044 [383493.364739] CM = 0, WnR = 1 [383493.367793] [dead000000000108] address between user and kernel address ranges [383493.375021] Internal error: Oops: 96000044 [#1] PREEMPT SMP [383493.437574] CPU: 63 PID: 27895 Comm: dma0chan0-copy2 Kdump: loaded Tainted: GO 5.17.0-rc4+ #2 [383493.457851] pstate: 204000c9 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [383493.465331] pc : vchan_tx_submit+0x64/0xa0 [383493.469957] lr : vchan_tx_submit+0x34/0xa0 This occurs because the transmission timed out, and that's due to data race. Each thread rewrite channels's descriptor as soon as device_issue_pending is called. It leads to the situation that the driver thinks that it uses the right descriptor in interrupt handler while channels's descriptor has been changed by other thread. The descriptor which in fact reported interrupt will not be handled any more, as well as its tx->callback. That's why timeout reports. With current fixes channels' descriptor changes it's value only when it has been used. A new descriptor is acquired from vc->desc_issued queue that is already filled with descriptors that are ready to be sent. Threads have no direct access to DMA channel descriptor. In case of channel's descriptor is busy, try to submit to HW again when a descriptor is completed. In this case, vc->desc_issued may be empty when hisi_dma_start_transfer is called, so delete error reporting on this. Now it is just possible to queue a descriptor for further processing. Fixes: e9f08b65250d ("dmaengine: hisilicon: Add Kunpeng DMA engine support") Signed-off-by: Jie Hai Acked-by: Zhou Wang Link: https://lore.kernel.org/r/20220830062251.52993-4-haijie1@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/hisi_dma.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index 837f7e4adfa6..0233b42143c7 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -271,7 +271,6 @@ static void hisi_dma_start_transfer(struct hisi_dma_chan *chan) vd = vchan_next_desc(&chan->vc); if (!vd) { - dev_err(&hdma_dev->pdev->dev, "no issued task!\n"); chan->desc = NULL; return; } @@ -303,7 +302,7 @@ static void hisi_dma_issue_pending(struct dma_chan *c) spin_lock_irqsave(&chan->vc.lock, flags); - if (vchan_issue_pending(&chan->vc)) + if (vchan_issue_pending(&chan->vc) && !chan->desc) hisi_dma_start_transfer(chan); spin_unlock_irqrestore(&chan->vc.lock, flags); @@ -441,11 +440,10 @@ static irqreturn_t hisi_dma_irq(int irq, void *data) chan->qp_num, chan->cq_head); if (FIELD_GET(STATUS_MASK, cqe->w0) == STATUS_SUCC) { vchan_cookie_complete(&desc->vd); + hisi_dma_start_transfer(chan); } else { dev_err(&hdma_dev->pdev->dev, "task error!\n"); } - - chan->desc = NULL; } spin_unlock(&chan->vc.lock); From 4aa69cf7ed44590e8702eb970ff6951e33dcdabf Mon Sep 17 00:00:00 2001 From: Jie Hai Date: Tue, 30 Aug 2022 14:22:48 +0800 Subject: [PATCH 08/60] dmaengine: hisilicon: Use macros instead of magic number readl_relaxed_poll_timeout() uses magic numbers 10 and 1000, which indicate maximum time to sleep between reads in us and timeout in us, respectively. Use macros HISI_DMA_POLL_Q_STS_DELAY_US and HISI_DMA_POLL_Q_STS_TIME_OUT_US instead of these two numbers. Signed-off-by: Jie Hai Acked-by: Zhou Wang Link: https://lore.kernel.org/r/20220830062251.52993-5-haijie1@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/hisi_dma.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index 0233b42143c7..5d62fe62ba00 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -36,6 +36,9 @@ #define PCI_BAR_2 2 +#define HISI_DMA_POLL_Q_STS_DELAY_US 10 +#define HISI_DMA_POLL_Q_STS_TIME_OUT_US 1000 + enum hisi_dma_mode { EP = 0, RC, @@ -185,15 +188,19 @@ static void hisi_dma_reset_or_disable_hw_chan(struct hisi_dma_chan *chan, { struct hisi_dma_dev *hdma_dev = chan->hdma_dev; u32 index = chan->qp_num, tmp; + void __iomem *addr; int ret; hisi_dma_pause_dma(hdma_dev, index, true); hisi_dma_enable_dma(hdma_dev, index, false); hisi_dma_mask_irq(hdma_dev, index); - ret = readl_relaxed_poll_timeout(hdma_dev->base + - HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET, tmp, - FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) != RUN, 10, 1000); + addr = hdma_dev->base + + HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET; + + ret = readl_relaxed_poll_timeout(addr, tmp, + FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) != RUN, + HISI_DMA_POLL_Q_STS_DELAY_US, HISI_DMA_POLL_Q_STS_TIME_OUT_US); if (ret) { dev_err(&hdma_dev->pdev->dev, "disable channel timeout!\n"); WARN_ON(1); @@ -208,9 +215,9 @@ static void hisi_dma_reset_or_disable_hw_chan(struct hisi_dma_chan *chan, hisi_dma_unmask_irq(hdma_dev, index); } - ret = readl_relaxed_poll_timeout(hdma_dev->base + - HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET, tmp, - FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) == IDLE, 10, 1000); + ret = readl_relaxed_poll_timeout(addr, tmp, + FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) == IDLE, + HISI_DMA_POLL_Q_STS_DELAY_US, HISI_DMA_POLL_Q_STS_TIME_OUT_US); if (ret) { dev_err(&hdma_dev->pdev->dev, "reset channel timeout!\n"); WARN_ON(1); From fd5273fa0816ce3df10538686ea76c0d70b6d623 Mon Sep 17 00:00:00 2001 From: Jie Hai Date: Tue, 30 Aug 2022 14:22:49 +0800 Subject: [PATCH 09/60] dmaengine: hisilicon: Adapt DMA driver to HiSilicon IP09 The HiSilicon IP08 and HiSilicon IP09 are DMA iEPs, they have the same pci device id but different pci revision. Unfortunately, they have different register layouts, so the origin driver cannot run on HiSilicon IP09 correctly. This patch enables the driver to adapt to HiSilicon IP09. HiSilicon IP09 offers 4 channels, each channel has a send queue, a complete queue and an interrupt to help to do tasks. This DMA engine can do memory copy between memory blocks. Signed-off-by: Jie Hai Acked-by: Zhou Wang Link: https://lore.kernel.org/r/20220830062251.52993-6-haijie1@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/hisi_dma.c | 373 ++++++++++++++++++++++++++++++++--------- 1 file changed, 295 insertions(+), 78 deletions(-) diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index 5d62fe62ba00..da5e49ee95fa 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2019 HiSilicon Limited. */ +/* Copyright(c) 2019-2022 HiSilicon Limited. */ + #include #include #include @@ -9,35 +10,85 @@ #include #include "virt-dma.h" -#define HISI_DMA_SQ_BASE_L 0x0 -#define HISI_DMA_SQ_BASE_H 0x4 -#define HISI_DMA_SQ_DEPTH 0x8 -#define HISI_DMA_SQ_TAIL_PTR 0xc -#define HISI_DMA_CQ_BASE_L 0x10 -#define HISI_DMA_CQ_BASE_H 0x14 -#define HISI_DMA_CQ_DEPTH 0x18 -#define HISI_DMA_CQ_HEAD_PTR 0x1c -#define HISI_DMA_CTRL0 0x20 -#define HISI_DMA_CTRL0_QUEUE_EN_S 0 -#define HISI_DMA_CTRL0_QUEUE_PAUSE_S 4 -#define HISI_DMA_CTRL1 0x24 -#define HISI_DMA_CTRL1_QUEUE_RESET_S 0 -#define HISI_DMA_Q_FSM_STS 0x30 -#define HISI_DMA_FSM_STS_MASK GENMASK(3, 0) -#define HISI_DMA_INT_STS 0x40 -#define HISI_DMA_INT_STS_MASK GENMASK(12, 0) -#define HISI_DMA_INT_MSK 0x44 -#define HISI_DMA_MODE 0x217c -#define HISI_DMA_OFFSET 0x100 +/* HiSilicon DMA register common field define */ +#define HISI_DMA_Q_SQ_BASE_L 0x0 +#define HISI_DMA_Q_SQ_BASE_H 0x4 +#define HISI_DMA_Q_SQ_DEPTH 0x8 +#define HISI_DMA_Q_SQ_TAIL_PTR 0xc +#define HISI_DMA_Q_CQ_BASE_L 0x10 +#define HISI_DMA_Q_CQ_BASE_H 0x14 +#define HISI_DMA_Q_CQ_DEPTH 0x18 +#define HISI_DMA_Q_CQ_HEAD_PTR 0x1c +#define HISI_DMA_Q_CTRL0 0x20 +#define HISI_DMA_Q_CTRL0_QUEUE_EN BIT(0) +#define HISI_DMA_Q_CTRL0_QUEUE_PAUSE BIT(4) +#define HISI_DMA_Q_CTRL1 0x24 +#define HISI_DMA_Q_CTRL1_QUEUE_RESET BIT(0) +#define HISI_DMA_Q_FSM_STS 0x30 +#define HISI_DMA_Q_FSM_STS_MASK GENMASK(3, 0) +#define HISI_DMA_Q_ERR_INT_NUM0 0x84 +#define HISI_DMA_Q_ERR_INT_NUM1 0x88 +#define HISI_DMA_Q_ERR_INT_NUM2 0x8c -#define HISI_DMA_MSI_NUM 32 -#define HISI_DMA_CHAN_NUM 30 -#define HISI_DMA_Q_DEPTH_VAL 1024 +/* HiSilicon IP08 DMA register and field define */ +#define HISI_DMA_HIP08_MODE 0x217C +#define HISI_DMA_HIP08_Q_BASE 0x0 +#define HISI_DMA_HIP08_Q_CTRL0_ERR_ABORT_EN BIT(2) +#define HISI_DMA_HIP08_Q_INT_STS 0x40 +#define HISI_DMA_HIP08_Q_INT_MSK 0x44 +#define HISI_DMA_HIP08_Q_INT_STS_MASK GENMASK(14, 0) +#define HISI_DMA_HIP08_Q_ERR_INT_NUM3 0x90 +#define HISI_DMA_HIP08_Q_ERR_INT_NUM4 0x94 +#define HISI_DMA_HIP08_Q_ERR_INT_NUM5 0x98 +#define HISI_DMA_HIP08_Q_ERR_INT_NUM6 0x48 +#define HISI_DMA_HIP08_Q_CTRL0_SQCQ_DRCT BIT(24) -#define PCI_BAR_2 2 +/* HiSilicon IP09 DMA register and field define */ +#define HISI_DMA_HIP09_DMA_FLR_DISABLE 0xA00 +#define HISI_DMA_HIP09_DMA_FLR_DISABLE_B BIT(0) +#define HISI_DMA_HIP09_Q_BASE 0x2000 +#define HISI_DMA_HIP09_Q_CTRL0_ERR_ABORT_EN GENMASK(31, 28) +#define HISI_DMA_HIP09_Q_CTRL0_SQ_DRCT BIT(26) +#define HISI_DMA_HIP09_Q_CTRL0_CQ_DRCT BIT(27) +#define HISI_DMA_HIP09_Q_CTRL1_VA_ENABLE BIT(2) +#define HISI_DMA_HIP09_Q_INT_STS 0x40 +#define HISI_DMA_HIP09_Q_INT_MSK 0x44 +#define HISI_DMA_HIP09_Q_INT_STS_MASK 0x1 +#define HISI_DMA_HIP09_Q_ERR_INT_STS 0x48 +#define HISI_DMA_HIP09_Q_ERR_INT_MSK 0x4C +#define HISI_DMA_HIP09_Q_ERR_INT_STS_MASK GENMASK(18, 1) +#define HISI_DMA_HIP09_PORT_CFG_REG(port_id) (0x800 + \ + (port_id) * 0x20) +#define HISI_DMA_HIP09_PORT_CFG_LINK_DOWN_MASK_B BIT(16) -#define HISI_DMA_POLL_Q_STS_DELAY_US 10 -#define HISI_DMA_POLL_Q_STS_TIME_OUT_US 1000 +#define HISI_DMA_HIP09_MAX_PORT_NUM 16 + +#define HISI_DMA_HIP08_MSI_NUM 32 +#define HISI_DMA_HIP08_CHAN_NUM 30 +#define HISI_DMA_HIP09_MSI_NUM 4 +#define HISI_DMA_HIP09_CHAN_NUM 4 +#define HISI_DMA_REVISION_HIP08B 0x21 +#define HISI_DMA_REVISION_HIP09A 0x30 + +#define HISI_DMA_Q_OFFSET 0x100 +#define HISI_DMA_Q_DEPTH_VAL 1024 + +#define PCI_BAR_2 2 + +#define HISI_DMA_POLL_Q_STS_DELAY_US 10 +#define HISI_DMA_POLL_Q_STS_TIME_OUT_US 1000 + +/* + * The HIP08B(HiSilicon IP08) and HIP09A(HiSilicon IP09) are DMA iEPs, they + * have the same pci device id but different pci revision. + * Unfortunately, they have different register layouts, so two layout + * enumerations are defined. + */ +enum hisi_dma_reg_layout { + HISI_DMA_REG_LAYOUT_INVALID = 0, + HISI_DMA_REG_LAYOUT_HIP08, + HISI_DMA_REG_LAYOUT_HIP09 +}; enum hisi_dma_mode { EP = 0, @@ -108,9 +159,45 @@ struct hisi_dma_dev { struct dma_device dma_dev; u32 chan_num; u32 chan_depth; + enum hisi_dma_reg_layout reg_layout; + void __iomem *queue_base; /* queue region start of register */ struct hisi_dma_chan chan[]; }; +static enum hisi_dma_reg_layout hisi_dma_get_reg_layout(struct pci_dev *pdev) +{ + if (pdev->revision == HISI_DMA_REVISION_HIP08B) + return HISI_DMA_REG_LAYOUT_HIP08; + else if (pdev->revision >= HISI_DMA_REVISION_HIP09A) + return HISI_DMA_REG_LAYOUT_HIP09; + + return HISI_DMA_REG_LAYOUT_INVALID; +} + +static u32 hisi_dma_get_chan_num(struct pci_dev *pdev) +{ + if (pdev->revision == HISI_DMA_REVISION_HIP08B) + return HISI_DMA_HIP08_CHAN_NUM; + + return HISI_DMA_HIP09_CHAN_NUM; +} + +static u32 hisi_dma_get_msi_num(struct pci_dev *pdev) +{ + if (pdev->revision == HISI_DMA_REVISION_HIP08B) + return HISI_DMA_HIP08_MSI_NUM; + + return HISI_DMA_HIP09_MSI_NUM; +} + +static u32 hisi_dma_get_queue_base(struct pci_dev *pdev) +{ + if (pdev->revision == HISI_DMA_REVISION_HIP08B) + return HISI_DMA_HIP08_Q_BASE; + + return HISI_DMA_HIP09_Q_BASE; +} + static inline struct hisi_dma_chan *to_hisi_dma_chan(struct dma_chan *c) { return container_of(c, struct hisi_dma_chan, vc.chan); @@ -124,7 +211,7 @@ static inline struct hisi_dma_desc *to_hisi_dma_desc(struct virt_dma_desc *vd) static inline void hisi_dma_chan_write(void __iomem *base, u32 reg, u32 index, u32 val) { - writel_relaxed(val, base + reg + index * HISI_DMA_OFFSET); + writel_relaxed(val, base + reg + index * HISI_DMA_Q_OFFSET); } static inline void hisi_dma_update_bit(void __iomem *addr, u32 pos, bool val) @@ -132,55 +219,83 @@ static inline void hisi_dma_update_bit(void __iomem *addr, u32 pos, bool val) u32 tmp; tmp = readl_relaxed(addr); - tmp = val ? tmp | BIT(pos) : tmp & ~BIT(pos); + tmp = val ? tmp | pos : tmp & ~pos; writel_relaxed(tmp, addr); } static void hisi_dma_pause_dma(struct hisi_dma_dev *hdma_dev, u32 index, bool pause) { - void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL0 + index * - HISI_DMA_OFFSET; + void __iomem *addr; - hisi_dma_update_bit(addr, HISI_DMA_CTRL0_QUEUE_PAUSE_S, pause); + addr = hdma_dev->queue_base + HISI_DMA_Q_CTRL0 + + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL0_QUEUE_PAUSE, pause); } static void hisi_dma_enable_dma(struct hisi_dma_dev *hdma_dev, u32 index, bool enable) { - void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL0 + index * - HISI_DMA_OFFSET; + void __iomem *addr; - hisi_dma_update_bit(addr, HISI_DMA_CTRL0_QUEUE_EN_S, enable); + addr = hdma_dev->queue_base + HISI_DMA_Q_CTRL0 + + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL0_QUEUE_EN, enable); } static void hisi_dma_mask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index) { - hisi_dma_chan_write(hdma_dev->base, HISI_DMA_INT_MSK, qp_index, - HISI_DMA_INT_STS_MASK); + void __iomem *q_base = hdma_dev->queue_base; + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_MSK, + qp_index, HISI_DMA_HIP08_Q_INT_STS_MASK); + else { + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_MSK, + qp_index, HISI_DMA_HIP09_Q_INT_STS_MASK); + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_MSK, + qp_index, + HISI_DMA_HIP09_Q_ERR_INT_STS_MASK); + } } static void hisi_dma_unmask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index) { - void __iomem *base = hdma_dev->base; + void __iomem *q_base = hdma_dev->queue_base; - hisi_dma_chan_write(base, HISI_DMA_INT_STS, qp_index, - HISI_DMA_INT_STS_MASK); - hisi_dma_chan_write(base, HISI_DMA_INT_MSK, qp_index, 0); + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) { + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_STS, + qp_index, HISI_DMA_HIP08_Q_INT_STS_MASK); + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_MSK, + qp_index, 0); + } else { + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_STS, + qp_index, HISI_DMA_HIP09_Q_INT_STS_MASK); + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_STS, + qp_index, + HISI_DMA_HIP09_Q_ERR_INT_STS_MASK); + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_MSK, + qp_index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_MSK, + qp_index, 0); + } } static void hisi_dma_do_reset(struct hisi_dma_dev *hdma_dev, u32 index) { - void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL1 + index * - HISI_DMA_OFFSET; + void __iomem *addr; - hisi_dma_update_bit(addr, HISI_DMA_CTRL1_QUEUE_RESET_S, 1); + addr = hdma_dev->queue_base + + HISI_DMA_Q_CTRL1 + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL1_QUEUE_RESET, 1); } static void hisi_dma_reset_qp_point(struct hisi_dma_dev *hdma_dev, u32 index) { - hisi_dma_chan_write(hdma_dev->base, HISI_DMA_SQ_TAIL_PTR, index, 0); - hisi_dma_chan_write(hdma_dev->base, HISI_DMA_CQ_HEAD_PTR, index, 0); + void __iomem *q_base = hdma_dev->queue_base; + + hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_TAIL_PTR, index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR, index, 0); } static void hisi_dma_reset_or_disable_hw_chan(struct hisi_dma_chan *chan, @@ -195,11 +310,11 @@ static void hisi_dma_reset_or_disable_hw_chan(struct hisi_dma_chan *chan, hisi_dma_enable_dma(hdma_dev, index, false); hisi_dma_mask_irq(hdma_dev, index); - addr = hdma_dev->base + - HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET; + addr = hdma_dev->queue_base + + HISI_DMA_Q_FSM_STS + index * HISI_DMA_Q_OFFSET; ret = readl_relaxed_poll_timeout(addr, tmp, - FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) != RUN, + FIELD_GET(HISI_DMA_Q_FSM_STS_MASK, tmp) != RUN, HISI_DMA_POLL_Q_STS_DELAY_US, HISI_DMA_POLL_Q_STS_TIME_OUT_US); if (ret) { dev_err(&hdma_dev->pdev->dev, "disable channel timeout!\n"); @@ -216,7 +331,7 @@ static void hisi_dma_reset_or_disable_hw_chan(struct hisi_dma_chan *chan, } ret = readl_relaxed_poll_timeout(addr, tmp, - FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) == IDLE, + FIELD_GET(HISI_DMA_Q_FSM_STS_MASK, tmp) == IDLE, HISI_DMA_POLL_Q_STS_DELAY_US, HISI_DMA_POLL_Q_STS_TIME_OUT_US); if (ret) { dev_err(&hdma_dev->pdev->dev, "reset channel timeout!\n"); @@ -298,8 +413,8 @@ static void hisi_dma_start_transfer(struct hisi_dma_chan *chan) chan->sq_tail = (chan->sq_tail + 1) % hdma_dev->chan_depth; /* update sq_tail to trigger a new task */ - hisi_dma_chan_write(hdma_dev->base, HISI_DMA_SQ_TAIL_PTR, chan->qp_num, - chan->sq_tail); + hisi_dma_chan_write(hdma_dev->queue_base, HISI_DMA_Q_SQ_TAIL_PTR, + chan->qp_num, chan->sq_tail); } static void hisi_dma_issue_pending(struct dma_chan *c) @@ -373,26 +488,86 @@ static int hisi_dma_alloc_qps_mem(struct hisi_dma_dev *hdma_dev) static void hisi_dma_init_hw_qp(struct hisi_dma_dev *hdma_dev, u32 index) { struct hisi_dma_chan *chan = &hdma_dev->chan[index]; + void __iomem *q_base = hdma_dev->queue_base; u32 hw_depth = hdma_dev->chan_depth - 1; - void __iomem *base = hdma_dev->base; + void __iomem *addr; + u32 tmp; /* set sq, cq base */ - hisi_dma_chan_write(base, HISI_DMA_SQ_BASE_L, index, + hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_BASE_L, index, lower_32_bits(chan->sq_dma)); - hisi_dma_chan_write(base, HISI_DMA_SQ_BASE_H, index, + hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_BASE_H, index, upper_32_bits(chan->sq_dma)); - hisi_dma_chan_write(base, HISI_DMA_CQ_BASE_L, index, + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_BASE_L, index, lower_32_bits(chan->cq_dma)); - hisi_dma_chan_write(base, HISI_DMA_CQ_BASE_H, index, + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_BASE_H, index, upper_32_bits(chan->cq_dma)); /* set sq, cq depth */ - hisi_dma_chan_write(base, HISI_DMA_SQ_DEPTH, index, hw_depth); - hisi_dma_chan_write(base, HISI_DMA_CQ_DEPTH, index, hw_depth); + hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_DEPTH, index, hw_depth); + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_DEPTH, index, hw_depth); /* init sq tail and cq head */ - hisi_dma_chan_write(base, HISI_DMA_SQ_TAIL_PTR, index, 0); - hisi_dma_chan_write(base, HISI_DMA_CQ_HEAD_PTR, index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_TAIL_PTR, index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR, index, 0); + + /* init error interrupt stats */ + hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM0, index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM1, index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM2, index, 0); + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) { + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM3, + index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM4, + index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM5, + index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM6, + index, 0); + /* + * init SQ/CQ direction selecting register. + * "0" is to local side and "1" is to remote side. + */ + addr = q_base + HISI_DMA_Q_CTRL0 + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_HIP08_Q_CTRL0_SQCQ_DRCT, 0); + + /* + * 0 - Continue to next descriptor if error occurs. + * 1 - Abort the DMA queue if error occurs. + */ + hisi_dma_update_bit(addr, + HISI_DMA_HIP08_Q_CTRL0_ERR_ABORT_EN, 0); + } else { + addr = q_base + HISI_DMA_Q_CTRL0 + index * HISI_DMA_Q_OFFSET; + + /* + * init SQ/CQ direction selecting register. + * "0" is to local side and "1" is to remote side. + */ + hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL0_SQ_DRCT, 0); + hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL0_CQ_DRCT, 0); + + /* + * 0 - Continue to next descriptor if error occurs. + * 1 - Abort the DMA queue if error occurs. + */ + + tmp = readl_relaxed(addr); + tmp &= ~HISI_DMA_HIP09_Q_CTRL0_ERR_ABORT_EN; + writel_relaxed(tmp, addr); + + /* + * 0 - dma should process FLR whith CPU. + * 1 - dma not process FLR, only cpu process FLR. + */ + addr = q_base + HISI_DMA_HIP09_DMA_FLR_DISABLE + + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_HIP09_DMA_FLR_DISABLE_B, 0); + + addr = q_base + HISI_DMA_Q_CTRL1 + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL1_VA_ENABLE, 1); + } } static void hisi_dma_enable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index) @@ -436,14 +611,16 @@ static irqreturn_t hisi_dma_irq(int irq, void *data) struct hisi_dma_dev *hdma_dev = chan->hdma_dev; struct hisi_dma_desc *desc; struct hisi_dma_cqe *cqe; + void __iomem *q_base; spin_lock(&chan->vc.lock); desc = chan->desc; cqe = chan->cq + chan->cq_head; + q_base = hdma_dev->queue_base; if (desc) { chan->cq_head = (chan->cq_head + 1) % hdma_dev->chan_depth; - hisi_dma_chan_write(hdma_dev->base, HISI_DMA_CQ_HEAD_PTR, + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR, chan->qp_num, chan->cq_head); if (FIELD_GET(STATUS_MASK, cqe->w0) == STATUS_SUCC) { vchan_cookie_complete(&desc->vd); @@ -504,16 +681,58 @@ static void hisi_dma_disable_hw_channels(void *data) static void hisi_dma_set_mode(struct hisi_dma_dev *hdma_dev, enum hisi_dma_mode mode) { - writel_relaxed(mode == RC ? 1 : 0, hdma_dev->base + HISI_DMA_MODE); + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) + writel_relaxed(mode == RC ? 1 : 0, + hdma_dev->base + HISI_DMA_HIP08_MODE); +} + +static void hisi_dma_init_hw(struct hisi_dma_dev *hdma_dev) +{ + void __iomem *addr; + int i; + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP09) { + for (i = 0; i < HISI_DMA_HIP09_MAX_PORT_NUM; i++) { + addr = hdma_dev->base + HISI_DMA_HIP09_PORT_CFG_REG(i); + hisi_dma_update_bit(addr, + HISI_DMA_HIP09_PORT_CFG_LINK_DOWN_MASK_B, 1); + } + } +} + +static void hisi_dma_init_dma_dev(struct hisi_dma_dev *hdma_dev) +{ + struct dma_device *dma_dev; + + dma_dev = &hdma_dev->dma_dev; + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_dev->device_free_chan_resources = hisi_dma_free_chan_resources; + dma_dev->device_prep_dma_memcpy = hisi_dma_prep_dma_memcpy; + dma_dev->device_tx_status = hisi_dma_tx_status; + dma_dev->device_issue_pending = hisi_dma_issue_pending; + dma_dev->device_terminate_all = hisi_dma_terminate_all; + dma_dev->device_synchronize = hisi_dma_synchronize; + dma_dev->directions = BIT(DMA_MEM_TO_MEM); + dma_dev->dev = &hdma_dev->pdev->dev; + INIT_LIST_HEAD(&dma_dev->channels); } static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + enum hisi_dma_reg_layout reg_layout; struct device *dev = &pdev->dev; struct hisi_dma_dev *hdma_dev; struct dma_device *dma_dev; + u32 chan_num; + u32 msi_num; int ret; + reg_layout = hisi_dma_get_reg_layout(pdev); + if (reg_layout == HISI_DMA_REG_LAYOUT_INVALID) { + dev_err(dev, "unsupported device!\n"); + return -EINVAL; + } + ret = pcim_enable_device(pdev); if (ret) { dev_err(dev, "failed to enable device mem!\n"); @@ -530,40 +749,37 @@ static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) return ret; - hdma_dev = devm_kzalloc(dev, struct_size(hdma_dev, chan, HISI_DMA_CHAN_NUM), GFP_KERNEL); + chan_num = hisi_dma_get_chan_num(pdev); + hdma_dev = devm_kzalloc(dev, struct_size(hdma_dev, chan, chan_num), + GFP_KERNEL); if (!hdma_dev) return -EINVAL; hdma_dev->base = pcim_iomap_table(pdev)[PCI_BAR_2]; hdma_dev->pdev = pdev; - hdma_dev->chan_num = HISI_DMA_CHAN_NUM; hdma_dev->chan_depth = HISI_DMA_Q_DEPTH_VAL; + hdma_dev->chan_num = chan_num; + hdma_dev->reg_layout = reg_layout; + hdma_dev->queue_base = hdma_dev->base + hisi_dma_get_queue_base(pdev); pci_set_drvdata(pdev, hdma_dev); pci_set_master(pdev); + msi_num = hisi_dma_get_msi_num(pdev); + /* This will be freed by 'pcim_release()'. See 'pcim_enable_device()' */ - ret = pci_alloc_irq_vectors(pdev, HISI_DMA_MSI_NUM, HISI_DMA_MSI_NUM, - PCI_IRQ_MSI); + ret = pci_alloc_irq_vectors(pdev, msi_num, msi_num, PCI_IRQ_MSI); if (ret < 0) { dev_err(dev, "Failed to allocate MSI vectors!\n"); return ret; } - dma_dev = &hdma_dev->dma_dev; - dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); - dma_dev->device_free_chan_resources = hisi_dma_free_chan_resources; - dma_dev->device_prep_dma_memcpy = hisi_dma_prep_dma_memcpy; - dma_dev->device_tx_status = hisi_dma_tx_status; - dma_dev->device_issue_pending = hisi_dma_issue_pending; - dma_dev->device_terminate_all = hisi_dma_terminate_all; - dma_dev->device_synchronize = hisi_dma_synchronize; - dma_dev->directions = BIT(DMA_MEM_TO_MEM); - dma_dev->dev = dev; - INIT_LIST_HEAD(&dma_dev->channels); + hisi_dma_init_dma_dev(hdma_dev); hisi_dma_set_mode(hdma_dev, RC); + hisi_dma_init_hw(hdma_dev); + ret = hisi_dma_enable_hw_channels(hdma_dev); if (ret < 0) { dev_err(dev, "failed to enable hw channel!\n"); @@ -575,6 +791,7 @@ static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) return ret; + dma_dev = &hdma_dev->dma_dev; ret = dmaenginem_async_device_register(dma_dev); if (ret < 0) dev_err(dev, "failed to register device!\n"); From 5dda7a62aa1ca99ffd4a9737283fac1ac07b819b Mon Sep 17 00:00:00 2001 From: Jie Hai Date: Tue, 30 Aug 2022 14:22:50 +0800 Subject: [PATCH 10/60] dmaengine: hisilicon: Dump regs to debugfs This patch adds dump of registers with debugfs for HIP08 and HIP09 DMA driver. Signed-off-by: Jie Hai Acked-by: Zhou Wang Link: https://lore.kernel.org/r/20220830062251.52993-7-haijie1@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/hisi_dma.c | 238 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 236 insertions(+), 2 deletions(-) diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index da5e49ee95fa..c1350a36fddd 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -78,6 +78,8 @@ #define HISI_DMA_POLL_Q_STS_DELAY_US 10 #define HISI_DMA_POLL_Q_STS_TIME_OUT_US 1000 +#define HISI_DMA_MAX_DIR_NAME_LEN 128 + /* * The HIP08B(HiSilicon IP08) and HIP09A(HiSilicon IP09) are DMA iEPs, they * have the same pci device id but different pci revision. @@ -164,6 +166,123 @@ struct hisi_dma_dev { struct hisi_dma_chan chan[]; }; +#ifdef CONFIG_DEBUG_FS + +static const struct debugfs_reg32 hisi_dma_comm_chan_regs[] = { + {"DMA_QUEUE_SQ_DEPTH ", 0x0008ull}, + {"DMA_QUEUE_SQ_TAIL_PTR ", 0x000Cull}, + {"DMA_QUEUE_CQ_DEPTH ", 0x0018ull}, + {"DMA_QUEUE_CQ_HEAD_PTR ", 0x001Cull}, + {"DMA_QUEUE_CTRL0 ", 0x0020ull}, + {"DMA_QUEUE_CTRL1 ", 0x0024ull}, + {"DMA_QUEUE_FSM_STS ", 0x0030ull}, + {"DMA_QUEUE_SQ_STS ", 0x0034ull}, + {"DMA_QUEUE_CQ_TAIL_PTR ", 0x003Cull}, + {"DMA_QUEUE_INT_STS ", 0x0040ull}, + {"DMA_QUEUE_INT_MSK ", 0x0044ull}, + {"DMA_QUEUE_INT_RO ", 0x006Cull}, +}; + +static const struct debugfs_reg32 hisi_dma_hip08_chan_regs[] = { + {"DMA_QUEUE_BYTE_CNT ", 0x0038ull}, + {"DMA_ERR_INT_NUM6 ", 0x0048ull}, + {"DMA_QUEUE_DESP0 ", 0x0050ull}, + {"DMA_QUEUE_DESP1 ", 0x0054ull}, + {"DMA_QUEUE_DESP2 ", 0x0058ull}, + {"DMA_QUEUE_DESP3 ", 0x005Cull}, + {"DMA_QUEUE_DESP4 ", 0x0074ull}, + {"DMA_QUEUE_DESP5 ", 0x0078ull}, + {"DMA_QUEUE_DESP6 ", 0x007Cull}, + {"DMA_QUEUE_DESP7 ", 0x0080ull}, + {"DMA_ERR_INT_NUM0 ", 0x0084ull}, + {"DMA_ERR_INT_NUM1 ", 0x0088ull}, + {"DMA_ERR_INT_NUM2 ", 0x008Cull}, + {"DMA_ERR_INT_NUM3 ", 0x0090ull}, + {"DMA_ERR_INT_NUM4 ", 0x0094ull}, + {"DMA_ERR_INT_NUM5 ", 0x0098ull}, + {"DMA_QUEUE_SQ_STS2 ", 0x00A4ull}, +}; + +static const struct debugfs_reg32 hisi_dma_hip09_chan_regs[] = { + {"DMA_QUEUE_ERR_INT_STS ", 0x0048ull}, + {"DMA_QUEUE_ERR_INT_MSK ", 0x004Cull}, + {"DFX_SQ_READ_ERR_PTR ", 0x0068ull}, + {"DFX_DMA_ERR_INT_NUM0 ", 0x0084ull}, + {"DFX_DMA_ERR_INT_NUM1 ", 0x0088ull}, + {"DFX_DMA_ERR_INT_NUM2 ", 0x008Cull}, + {"DFX_DMA_QUEUE_SQ_STS2 ", 0x00A4ull}, +}; + +static const struct debugfs_reg32 hisi_dma_hip08_comm_regs[] = { + {"DMA_ECC_ERR_ADDR ", 0x2004ull}, + {"DMA_ECC_ECC_CNT ", 0x2014ull}, + {"COMMON_AND_CH_ERR_STS ", 0x2030ull}, + {"LOCAL_CPL_ID_STS_0 ", 0x20E0ull}, + {"LOCAL_CPL_ID_STS_1 ", 0x20E4ull}, + {"LOCAL_CPL_ID_STS_2 ", 0x20E8ull}, + {"LOCAL_CPL_ID_STS_3 ", 0x20ECull}, + {"LOCAL_TLP_NUM ", 0x2158ull}, + {"SQCQ_TLP_NUM ", 0x2164ull}, + {"CPL_NUM ", 0x2168ull}, + {"INF_BACK_PRESS_STS ", 0x2170ull}, + {"DMA_CH_RAS_LEVEL ", 0x2184ull}, + {"DMA_CM_RAS_LEVEL ", 0x2188ull}, + {"DMA_CH_ERR_STS ", 0x2190ull}, + {"DMA_CH_DONE_STS ", 0x2194ull}, + {"DMA_SQ_TAG_STS_0 ", 0x21A0ull}, + {"DMA_SQ_TAG_STS_1 ", 0x21A4ull}, + {"DMA_SQ_TAG_STS_2 ", 0x21A8ull}, + {"DMA_SQ_TAG_STS_3 ", 0x21ACull}, + {"LOCAL_P_ID_STS_0 ", 0x21B0ull}, + {"LOCAL_P_ID_STS_1 ", 0x21B4ull}, + {"LOCAL_P_ID_STS_2 ", 0x21B8ull}, + {"LOCAL_P_ID_STS_3 ", 0x21BCull}, + {"DMA_PREBUFF_INFO_0 ", 0x2200ull}, + {"DMA_CM_TABLE_INFO_0 ", 0x2220ull}, + {"DMA_CM_CE_RO ", 0x2244ull}, + {"DMA_CM_NFE_RO ", 0x2248ull}, + {"DMA_CM_FE_RO ", 0x224Cull}, +}; + +static const struct debugfs_reg32 hisi_dma_hip09_comm_regs[] = { + {"COMMON_AND_CH_ERR_STS ", 0x0030ull}, + {"DMA_PORT_IDLE_STS ", 0x0150ull}, + {"DMA_CH_RAS_LEVEL ", 0x0184ull}, + {"DMA_CM_RAS_LEVEL ", 0x0188ull}, + {"DMA_CM_CE_RO ", 0x0244ull}, + {"DMA_CM_NFE_RO ", 0x0248ull}, + {"DMA_CM_FE_RO ", 0x024Cull}, + {"DFX_INF_BACK_PRESS_STS0 ", 0x1A40ull}, + {"DFX_INF_BACK_PRESS_STS1 ", 0x1A44ull}, + {"DFX_INF_BACK_PRESS_STS2 ", 0x1A48ull}, + {"DFX_DMA_WRR_DISABLE ", 0x1A4Cull}, + {"DFX_PA_REQ_TLP_NUM ", 0x1C00ull}, + {"DFX_PA_BACK_TLP_NUM ", 0x1C04ull}, + {"DFX_PA_RETRY_TLP_NUM ", 0x1C08ull}, + {"DFX_LOCAL_NP_TLP_NUM ", 0x1C0Cull}, + {"DFX_LOCAL_CPL_HEAD_TLP_NUM ", 0x1C10ull}, + {"DFX_LOCAL_CPL_DATA_TLP_NUM ", 0x1C14ull}, + {"DFX_LOCAL_CPL_EXT_DATA_TLP_NUM ", 0x1C18ull}, + {"DFX_LOCAL_P_HEAD_TLP_NUM ", 0x1C1Cull}, + {"DFX_LOCAL_P_ACK_TLP_NUM ", 0x1C20ull}, + {"DFX_BUF_ALOC_PORT_REQ_NUM ", 0x1C24ull}, + {"DFX_BUF_ALOC_PORT_RESULT_NUM ", 0x1C28ull}, + {"DFX_BUF_FAIL_SIZE_NUM ", 0x1C2Cull}, + {"DFX_BUF_ALOC_SIZE_NUM ", 0x1C30ull}, + {"DFX_BUF_NP_RELEASE_SIZE_NUM ", 0x1C34ull}, + {"DFX_BUF_P_RELEASE_SIZE_NUM ", 0x1C38ull}, + {"DFX_BUF_PORT_RELEASE_SIZE_NUM ", 0x1C3Cull}, + {"DFX_DMA_PREBUF_MEM0_ECC_ERR_ADDR ", 0x1CA8ull}, + {"DFX_DMA_PREBUF_MEM0_ECC_CNT ", 0x1CACull}, + {"DFX_DMA_LOC_NP_OSTB_ECC_ERR_ADDR ", 0x1CB0ull}, + {"DFX_DMA_LOC_NP_OSTB_ECC_CNT ", 0x1CB4ull}, + {"DFX_DMA_PREBUF_MEM1_ECC_ERR_ADDR ", 0x1CC0ull}, + {"DFX_DMA_PREBUF_MEM1_ECC_CNT ", 0x1CC4ull}, + {"DMA_CH_DONE_STS ", 0x02E0ull}, + {"DMA_CH_ERR_STS ", 0x0320ull}, +}; +#endif /* CONFIG_DEBUG_FS*/ + static enum hisi_dma_reg_layout hisi_dma_get_reg_layout(struct pci_dev *pdev) { if (pdev->revision == HISI_DMA_REVISION_HIP08B) @@ -717,6 +836,117 @@ static void hisi_dma_init_dma_dev(struct hisi_dma_dev *hdma_dev) INIT_LIST_HEAD(&dma_dev->channels); } +/* --- debugfs implementation --- */ +#ifdef CONFIG_DEBUG_FS +#include +static struct debugfs_reg32 *hisi_dma_get_ch_regs(struct hisi_dma_dev *hdma_dev, + u32 *regs_sz) +{ + struct device *dev = &hdma_dev->pdev->dev; + struct debugfs_reg32 *regs; + u32 regs_sz_comm; + + regs_sz_comm = ARRAY_SIZE(hisi_dma_comm_chan_regs); + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) + *regs_sz = regs_sz_comm + ARRAY_SIZE(hisi_dma_hip08_chan_regs); + else + *regs_sz = regs_sz_comm + ARRAY_SIZE(hisi_dma_hip09_chan_regs); + + regs = devm_kcalloc(dev, *regs_sz, sizeof(struct debugfs_reg32), + GFP_KERNEL); + if (!regs) + return NULL; + memcpy(regs, hisi_dma_comm_chan_regs, sizeof(hisi_dma_comm_chan_regs)); + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) + memcpy(regs + regs_sz_comm, hisi_dma_hip08_chan_regs, + sizeof(hisi_dma_hip08_chan_regs)); + else + memcpy(regs + regs_sz_comm, hisi_dma_hip09_chan_regs, + sizeof(hisi_dma_hip09_chan_regs)); + + return regs; +} + +static int hisi_dma_create_chan_dir(struct hisi_dma_dev *hdma_dev) +{ + char dir_name[HISI_DMA_MAX_DIR_NAME_LEN]; + struct debugfs_regset32 *regsets; + struct debugfs_reg32 *regs; + struct dentry *chan_dir; + struct device *dev; + u32 regs_sz; + int ret; + int i; + + dev = &hdma_dev->pdev->dev; + + regsets = devm_kcalloc(dev, hdma_dev->chan_num, + sizeof(*regsets), GFP_KERNEL); + if (!regsets) + return -ENOMEM; + + regs = hisi_dma_get_ch_regs(hdma_dev, ®s_sz); + if (!regs) + return -ENOMEM; + + for (i = 0; i < hdma_dev->chan_num; i++) { + regsets[i].regs = regs; + regsets[i].nregs = regs_sz; + regsets[i].base = hdma_dev->queue_base + i * HISI_DMA_Q_OFFSET; + regsets[i].dev = dev; + + memset(dir_name, 0, HISI_DMA_MAX_DIR_NAME_LEN); + ret = sprintf(dir_name, "channel%d", i); + if (ret < 0) + return ret; + + chan_dir = debugfs_create_dir(dir_name, + hdma_dev->dma_dev.dbg_dev_root); + debugfs_create_regset32("regs", 0444, chan_dir, ®sets[i]); + } + + return 0; +} + +static void hisi_dma_create_debugfs(struct hisi_dma_dev *hdma_dev) +{ + struct debugfs_regset32 *regset; + struct device *dev; + int ret; + + dev = &hdma_dev->pdev->dev; + + if (hdma_dev->dma_dev.dbg_dev_root == NULL) + return; + + regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL); + if (!regset) + return; + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) { + regset->regs = hisi_dma_hip08_comm_regs; + regset->nregs = ARRAY_SIZE(hisi_dma_hip08_comm_regs); + } else { + regset->regs = hisi_dma_hip09_comm_regs; + regset->nregs = ARRAY_SIZE(hisi_dma_hip09_comm_regs); + } + regset->base = hdma_dev->base; + regset->dev = dev; + + debugfs_create_regset32("regs", 0444, + hdma_dev->dma_dev.dbg_dev_root, regset); + + ret = hisi_dma_create_chan_dir(hdma_dev); + if (ret < 0) + dev_info(&hdma_dev->pdev->dev, "fail to create debugfs for channels!\n"); +} +#else +static void hisi_dma_create_debugfs(struct hisi_dma_dev *hdma_dev) { } +#endif /* CONFIG_DEBUG_FS*/ +/* --- debugfs implementation --- */ + static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id) { enum hisi_dma_reg_layout reg_layout; @@ -793,10 +1023,14 @@ static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id) dma_dev = &hdma_dev->dma_dev; ret = dmaenginem_async_device_register(dma_dev); - if (ret < 0) + if (ret < 0) { dev_err(dev, "failed to register device!\n"); + return ret; + } - return ret; + hisi_dma_create_debugfs(hdma_dev); + + return 0; } static const struct pci_device_id hisi_dma_pci_tbl[] = { From 9e08d2138f132257322b04bf7a8274c59be334e8 Mon Sep 17 00:00:00 2001 From: Jie Hai Date: Tue, 30 Aug 2022 14:22:51 +0800 Subject: [PATCH 11/60] MAINTAINERS: Add myself as maintainer for hisi_dma Add myself as a maintainer for hisi_dma. Signed-off-by: Jie Hai Acked-by: Zhou Wang Link: https://lore.kernel.org/r/20220830062251.52993-8-haijie1@huawei.com Signed-off-by: Vinod Koul --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8a5012ba6ff9..96d7f108fa71 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9114,6 +9114,7 @@ F: net/dsa/tag_hellcreek.c HISILICON DMA DRIVER M: Zhou Wang +M: Jie Hai L: dmaengine@vger.kernel.org S: Maintained F: drivers/dma/hisi_dma.c From 4dc36a53b8b9135eed90766df49c0ec48d1550dd Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 29 Aug 2022 17:46:41 +0200 Subject: [PATCH 12/60] dmaengine: stm32-dma: introduce 3 helpers to address channel flags Channels 0 to 3 flags are described in DMA_LISR and DMA_LIFCR (L as Low). Channels 4 to 7 flags are described in DMA_HISR and DMA_HIFCR (H as High). Macro STM32_DMA_ISR(n) returns the interrupt status register offset for the channel id (n). Macro STM32_DMA_IFCR(n) returns the interrupt flag clear register offset for the channel id (n). If chan->id % 4 = 2 or 3, then its flags are left-shifted by 16 bits. If chan->id % 4 = 1 or 3, then its flags are additionally left-shifted by 6 bits. If chan->id % 4 = 0, then its flags are not shifted. Macro STM32_DMA_FLAGS_SHIFT(n) returns the required shift to get or set the channel flags mask. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20220829154646.29867-2-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index adb25a11c70f..5d67e168aaee 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -32,8 +32,10 @@ #define STM32_DMA_LISR 0x0000 /* DMA Low Int Status Reg */ #define STM32_DMA_HISR 0x0004 /* DMA High Int Status Reg */ +#define STM32_DMA_ISR(n) (((n) & 4) ? STM32_DMA_HISR : STM32_DMA_LISR) #define STM32_DMA_LIFCR 0x0008 /* DMA Low Int Flag Clear Reg */ #define STM32_DMA_HIFCR 0x000c /* DMA High Int Flag Clear Reg */ +#define STM32_DMA_IFCR(n) (((n) & 4) ? STM32_DMA_HIFCR : STM32_DMA_LIFCR) #define STM32_DMA_TCI BIT(5) /* Transfer Complete Interrupt */ #define STM32_DMA_HTI BIT(4) /* Half Transfer Interrupt */ #define STM32_DMA_TEI BIT(3) /* Transfer Error Interrupt */ @@ -43,6 +45,12 @@ | STM32_DMA_TEI \ | STM32_DMA_DMEI \ | STM32_DMA_FEI) +/* + * If (chan->id % 4) is 2 or 3, left shift the mask by 16 bits; + * if (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits. + */ +#define STM32_DMA_FLAGS_SHIFT(n) ({ typeof(n) (_n) = (n); \ + (((_n) & 2) << 3) | (((_n) & 1) * 6); }) /* DMA Stream x Configuration Register */ #define STM32_DMA_SCR(x) (0x0010 + 0x18 * (x)) /* x = 0..7 */ @@ -401,17 +409,10 @@ static u32 stm32_dma_irq_status(struct stm32_dma_chan *chan) /* * Read "flags" from DMA_xISR register corresponding to the selected * DMA channel at the correct bit offset inside that register. - * - * If (ch % 4) is 2 or 3, left shift the mask by 16 bits. - * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits. */ - if (chan->id & 4) - dma_isr = stm32_dma_read(dmadev, STM32_DMA_HISR); - else - dma_isr = stm32_dma_read(dmadev, STM32_DMA_LISR); - - flags = dma_isr >> (((chan->id & 2) << 3) | ((chan->id & 1) * 6)); + dma_isr = stm32_dma_read(dmadev, STM32_DMA_ISR(chan->id)); + flags = dma_isr >> STM32_DMA_FLAGS_SHIFT(chan->id); return flags & STM32_DMA_MASKI; } @@ -424,17 +425,11 @@ static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags) /* * Write "flags" to the DMA_xIFCR register corresponding to the selected * DMA channel at the correct bit offset inside that register. - * - * If (ch % 4) is 2 or 3, left shift the mask by 16 bits. - * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits. */ flags &= STM32_DMA_MASKI; - dma_ifcr = flags << (((chan->id & 2) << 3) | ((chan->id & 1) * 6)); + dma_ifcr = flags << STM32_DMA_FLAGS_SHIFT(chan->id); - if (chan->id & 4) - stm32_dma_write(dmadev, STM32_DMA_HIFCR, dma_ifcr); - else - stm32_dma_write(dmadev, STM32_DMA_LIFCR, dma_ifcr); + stm32_dma_write(dmadev, STM32_DMA_IFCR(chan->id), dma_ifcr); } static int stm32_dma_disable_chan(struct stm32_dma_chan *chan) From 1c32d6c37cc2a037e447e73f919d57e89ef4b571 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 29 Aug 2022 17:46:42 +0200 Subject: [PATCH 13/60] dmaengine: stm32-dma: use bitfield helpers Use the FIELD_{GET,PREP}() helpers, instead of defining custom macros implementing the same operations. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20220829154646.29867-3-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 60 +++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 5d67e168aaee..6aa281561f38 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -9,6 +9,7 @@ * Pierre-Yves Mordret */ +#include #include #include #include @@ -54,20 +55,13 @@ /* DMA Stream x Configuration Register */ #define STM32_DMA_SCR(x) (0x0010 + 0x18 * (x)) /* x = 0..7 */ -#define STM32_DMA_SCR_REQ(n) ((n & 0x7) << 25) +#define STM32_DMA_SCR_REQ_MASK GENMASK(27, 25) #define STM32_DMA_SCR_MBURST_MASK GENMASK(24, 23) -#define STM32_DMA_SCR_MBURST(n) ((n & 0x3) << 23) #define STM32_DMA_SCR_PBURST_MASK GENMASK(22, 21) -#define STM32_DMA_SCR_PBURST(n) ((n & 0x3) << 21) #define STM32_DMA_SCR_PL_MASK GENMASK(17, 16) -#define STM32_DMA_SCR_PL(n) ((n & 0x3) << 16) #define STM32_DMA_SCR_MSIZE_MASK GENMASK(14, 13) -#define STM32_DMA_SCR_MSIZE(n) ((n & 0x3) << 13) #define STM32_DMA_SCR_PSIZE_MASK GENMASK(12, 11) -#define STM32_DMA_SCR_PSIZE(n) ((n & 0x3) << 11) -#define STM32_DMA_SCR_PSIZE_GET(n) ((n & STM32_DMA_SCR_PSIZE_MASK) >> 11) #define STM32_DMA_SCR_DIR_MASK GENMASK(7, 6) -#define STM32_DMA_SCR_DIR(n) ((n & 0x3) << 6) #define STM32_DMA_SCR_TRBUFF BIT(20) /* Bufferable transfer for USART/UART */ #define STM32_DMA_SCR_CT BIT(19) /* Target in double buffer */ #define STM32_DMA_SCR_DBM BIT(18) /* Double Buffer Mode */ @@ -104,7 +98,6 @@ /* DMA stream x FIFO control register */ #define STM32_DMA_SFCR(x) (0x0024 + 0x18 * (x)) #define STM32_DMA_SFCR_FTH_MASK GENMASK(1, 0) -#define STM32_DMA_SFCR_FTH(n) (n & STM32_DMA_SFCR_FTH_MASK) #define STM32_DMA_SFCR_FEIE BIT(7) /* FIFO error interrupt enable */ #define STM32_DMA_SFCR_DMDIS BIT(2) /* Direct mode disable */ #define STM32_DMA_SFCR_MASK (STM32_DMA_SFCR_FEIE \ @@ -145,11 +138,8 @@ /* DMA Features */ #define STM32_DMA_THRESHOLD_FTR_MASK GENMASK(1, 0) -#define STM32_DMA_THRESHOLD_FTR_GET(n) ((n) & STM32_DMA_THRESHOLD_FTR_MASK) #define STM32_DMA_DIRECT_MODE_MASK BIT(2) -#define STM32_DMA_DIRECT_MODE_GET(n) (((n) & STM32_DMA_DIRECT_MODE_MASK) >> 2) #define STM32_DMA_ALT_ACK_MODE_MASK BIT(4) -#define STM32_DMA_ALT_ACK_MODE_GET(n) (((n) & STM32_DMA_ALT_ACK_MODE_MASK) >> 4) enum stm32_dma_width { STM32_DMA_BYTE, @@ -856,7 +846,8 @@ static int stm32_dma_resume(struct dma_chan *c) sg_req = &chan->desc->sg_req[chan->next_sg - 1]; ndtr = sg_req->chan_reg.dma_sndtr; - offset = (ndtr - chan_reg.dma_sndtr) << STM32_DMA_SCR_PSIZE_GET(chan_reg.dma_scr); + offset = (ndtr - chan_reg.dma_sndtr); + offset <<= FIELD_GET(STM32_DMA_SCR_PSIZE_MASK, chan_reg.dma_scr); spar = sg_req->chan_reg.dma_spar; sm0ar = sg_req->chan_reg.dma_sm0ar; sm1ar = sg_req->chan_reg.dma_sm1ar; @@ -968,16 +959,16 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan, if (src_burst_size < 0) return src_burst_size; - dma_scr = STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_DEV) | - STM32_DMA_SCR_PSIZE(dst_bus_width) | - STM32_DMA_SCR_MSIZE(src_bus_width) | - STM32_DMA_SCR_PBURST(dst_burst_size) | - STM32_DMA_SCR_MBURST(src_burst_size); + dma_scr = FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_MEM_TO_DEV) | + FIELD_PREP(STM32_DMA_SCR_PSIZE_MASK, dst_bus_width) | + FIELD_PREP(STM32_DMA_SCR_MSIZE_MASK, src_bus_width) | + FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, dst_burst_size) | + FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, src_burst_size); /* Set FIFO threshold */ chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK; if (fifoth != STM32_DMA_FIFO_THRESHOLD_NONE) - chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(fifoth); + chan->chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, fifoth); /* Set peripheral address */ chan->chan_reg.dma_spar = chan->dma_sconfig.dst_addr; @@ -1025,16 +1016,16 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan, if (dst_burst_size < 0) return dst_burst_size; - dma_scr = STM32_DMA_SCR_DIR(STM32_DMA_DEV_TO_MEM) | - STM32_DMA_SCR_PSIZE(src_bus_width) | - STM32_DMA_SCR_MSIZE(dst_bus_width) | - STM32_DMA_SCR_PBURST(src_burst_size) | - STM32_DMA_SCR_MBURST(dst_burst_size); + dma_scr = FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_DEV_TO_MEM) | + FIELD_PREP(STM32_DMA_SCR_PSIZE_MASK, src_bus_width) | + FIELD_PREP(STM32_DMA_SCR_MSIZE_MASK, dst_bus_width) | + FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, src_burst_size) | + FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, dst_burst_size); /* Set FIFO threshold */ chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK; if (fifoth != STM32_DMA_FIFO_THRESHOLD_NONE) - chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(fifoth); + chan->chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, fifoth); /* Set peripheral address */ chan->chan_reg.dma_spar = chan->dma_sconfig.src_addr; @@ -1242,16 +1233,15 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy( stm32_dma_clear_reg(&desc->sg_req[i].chan_reg); desc->sg_req[i].chan_reg.dma_scr = - STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_MEM) | - STM32_DMA_SCR_PBURST(dma_burst) | - STM32_DMA_SCR_MBURST(dma_burst) | + FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_MEM_TO_MEM) | + FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, dma_burst) | + FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, dma_burst) | STM32_DMA_SCR_MINC | STM32_DMA_SCR_PINC | STM32_DMA_SCR_TCIE | STM32_DMA_SCR_TEIE; desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK; - desc->sg_req[i].chan_reg.dma_sfcr |= - STM32_DMA_SFCR_FTH(threshold); + desc->sg_req[i].chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, threshold); desc->sg_req[i].chan_reg.dma_spar = src + offset; desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset; desc->sg_req[i].chan_reg.dma_sndtr = xfer_count; @@ -1270,7 +1260,7 @@ static u32 stm32_dma_get_remaining_bytes(struct stm32_dma_chan *chan) struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id)); - width = STM32_DMA_SCR_PSIZE_GET(dma_scr); + width = FIELD_GET(STM32_DMA_SCR_PSIZE_MASK, dma_scr); ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id)); return ndtr << width; @@ -1476,15 +1466,15 @@ static void stm32_dma_set_config(struct stm32_dma_chan *chan, stm32_dma_clear_reg(&chan->chan_reg); chan->chan_reg.dma_scr = cfg->stream_config & STM32_DMA_SCR_CFG_MASK; - chan->chan_reg.dma_scr |= STM32_DMA_SCR_REQ(cfg->request_line); + chan->chan_reg.dma_scr |= FIELD_PREP(STM32_DMA_SCR_REQ_MASK, cfg->request_line); /* Enable Interrupts */ chan->chan_reg.dma_scr |= STM32_DMA_SCR_TEIE | STM32_DMA_SCR_TCIE; - chan->threshold = STM32_DMA_THRESHOLD_FTR_GET(cfg->features); - if (STM32_DMA_DIRECT_MODE_GET(cfg->features)) + chan->threshold = FIELD_GET(STM32_DMA_THRESHOLD_FTR_MASK, cfg->features); + if (FIELD_GET(STM32_DMA_DIRECT_MODE_MASK, cfg->features)) chan->threshold = STM32_DMA_FIFO_THRESHOLD_NONE; - if (STM32_DMA_ALT_ACK_MODE_GET(cfg->features)) + if (FIELD_GET(STM32_DMA_ALT_ACK_MODE_MASK, cfg->features)) chan->chan_reg.dma_scr |= STM32_DMA_SCR_TRBUFF; } From c6c1a365d6115f159572106b0bfd9796b0bffd27 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 29 Aug 2022 17:46:43 +0200 Subject: [PATCH 14/60] docs: arm: stm32: introduce STM32 DMA-MDMA chaining feature STM32 DMA-MDMA chaining feature is available on STM32 SoCs which embed STM32 DMAMUX, DMA and MDMA controllers. It is the case on STM32MP1 SoCs but also on STM32H7 SoCs. But focus is on STM32MP1 SoCs, using DDR. This documentation aims to explain how to use STM32 DMA-MDMA chaining feature in drivers of STM32 peripheral having request lines on STM32 DMA. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20220829154646.29867-4-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- Documentation/arm/index.rst | 1 + .../arm/stm32/stm32-dma-mdma-chaining.rst | 415 ++++++++++++++++++ 2 files changed, 416 insertions(+) create mode 100644 Documentation/arm/stm32/stm32-dma-mdma-chaining.rst diff --git a/Documentation/arm/index.rst b/Documentation/arm/index.rst index 495ada7915e1..8c636d4a061f 100644 --- a/Documentation/arm/index.rst +++ b/Documentation/arm/index.rst @@ -59,6 +59,7 @@ SoC-specific documents stm32/stm32f429-overview stm32/stm32mp13-overview stm32/stm32mp157-overview + stm32/stm32-dma-mdma-chaining sunxi diff --git a/Documentation/arm/stm32/stm32-dma-mdma-chaining.rst b/Documentation/arm/stm32/stm32-dma-mdma-chaining.rst new file mode 100644 index 000000000000..2945e0e33104 --- /dev/null +++ b/Documentation/arm/stm32/stm32-dma-mdma-chaining.rst @@ -0,0 +1,415 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +STM32 DMA-MDMA chaining +======================= + + +Introduction +------------ + + This document describes the STM32 DMA-MDMA chaining feature. But before going + further, let's introduce the peripherals involved. + + To offload data transfers from the CPU, STM32 microprocessors (MPUs) embed + direct memory access controllers (DMA). + + STM32MP1 SoCs embed both STM32 DMA and STM32 MDMA controllers. STM32 DMA + request routing capabilities are enhanced by a DMA request multiplexer + (STM32 DMAMUX). + + **STM32 DMAMUX** + + STM32 DMAMUX routes any DMA request from a given peripheral to any STM32 DMA + controller (STM32MP1 counts two STM32 DMA controllers) channels. + + **STM32 DMA** + + STM32 DMA is mainly used to implement central data buffer storage (usually in + the system SRAM) for different peripheral. It can access external RAMs but + without the ability to generate convenient burst transfer ensuring the best + load of the AXI. + + **STM32 MDMA** + + STM32 MDMA (Master DMA) is mainly used to manage direct data transfers between + RAM data buffers without CPU intervention. It can also be used in a + hierarchical structure that uses STM32 DMA as first level data buffer + interfaces for AHB peripherals, while the STM32 MDMA acts as a second level + DMA with better performance. As a AXI/AHB master, STM32 MDMA can take control + of the AXI/AHB bus. + + +Principles +---------- + + STM32 DMA-MDMA chaining feature relies on the strengths of STM32 DMA and + STM32 MDMA controllers. + + STM32 DMA has a circular Double Buffer Mode (DBM). At each end of transaction + (when DMA data counter - DMA_SxNDTR - reaches 0), the memory pointers + (configured with DMA_SxSM0AR and DMA_SxM1AR) are swapped and the DMA data + counter is automatically reloaded. This allows the SW or the STM32 MDMA to + process one memory area while the second memory area is being filled/used by + the STM32 DMA transfer. + + With STM32 MDMA linked-list mode, a single request initiates the data array + (collection of nodes) to be transferred until the linked-list pointer for the + channel is null. The channel transfer complete of the last node is the end of + transfer, unless first and last nodes are linked to each other, in such a + case, the linked-list loops on to create a circular MDMA transfer. + + STM32 MDMA has direct connections with STM32 DMA. This enables autonomous + communication and synchronization between peripherals, thus saving CPU + resources and bus congestion. Transfer Complete signal of STM32 DMA channel + can triggers STM32 MDMA transfer. STM32 MDMA can clear the request generated + by the STM32 DMA by writing to its Interrupt Clear register (whose address is + stored in MDMA_CxMAR, and bit mask in MDMA_CxMDR). + + .. table:: STM32 MDMA interconnect table with STM32 DMA + + +--------------+----------------+-----------+------------+ + | STM32 DMAMUX | STM32 DMA | STM32 DMA | STM32 MDMA | + | channels | channels | Transfer | request | + | | | complete | | + | | | signal | | + +==============+================+===========+============+ + | Channel *0* | DMA1 channel 0 | dma1_tcf0 | *0x00* | + +--------------+----------------+-----------+------------+ + | Channel *1* | DMA1 channel 1 | dma1_tcf1 | *0x01* | + +--------------+----------------+-----------+------------+ + | Channel *2* | DMA1 channel 2 | dma1_tcf2 | *0x02* | + +--------------+----------------+-----------+------------+ + | Channel *3* | DMA1 channel 3 | dma1_tcf3 | *0x03* | + +--------------+----------------+-----------+------------+ + | Channel *4* | DMA1 channel 4 | dma1_tcf4 | *0x04* | + +--------------+----------------+-----------+------------+ + | Channel *5* | DMA1 channel 5 | dma1_tcf5 | *0x05* | + +--------------+----------------+-----------+------------+ + | Channel *6* | DMA1 channel 6 | dma1_tcf6 | *0x06* | + +--------------+----------------+-----------+------------+ + | Channel *7* | DMA1 channel 7 | dma1_tcf7 | *0x07* | + +--------------+----------------+-----------+------------+ + | Channel *8* | DMA2 channel 0 | dma2_tcf0 | *0x08* | + +--------------+----------------+-----------+------------+ + | Channel *9* | DMA2 channel 1 | dma2_tcf1 | *0x09* | + +--------------+----------------+-----------+------------+ + | Channel *10* | DMA2 channel 2 | dma2_tcf2 | *0x0A* | + +--------------+----------------+-----------+------------+ + | Channel *11* | DMA2 channel 3 | dma2_tcf3 | *0x0B* | + +--------------+----------------+-----------+------------+ + | Channel *12* | DMA2 channel 4 | dma2_tcf4 | *0x0C* | + +--------------+----------------+-----------+------------+ + | Channel *13* | DMA2 channel 5 | dma2_tcf5 | *0x0D* | + +--------------+----------------+-----------+------------+ + | Channel *14* | DMA2 channel 6 | dma2_tcf6 | *0x0E* | + +--------------+----------------+-----------+------------+ + | Channel *15* | DMA2 channel 7 | dma2_tcf7 | *0x0F* | + +--------------+----------------+-----------+------------+ + + STM32 DMA-MDMA chaining feature then uses a SRAM buffer. STM32MP1 SoCs embed + three fast access static internal RAMs of various size, used for data storage. + Due to STM32 DMA legacy (within microcontrollers), STM32 DMA performances are + bad with DDR, while they are optimal with SRAM. Hence the SRAM buffer used + between STM32 DMA and STM32 MDMA. This buffer is split in two equal periods + and STM32 DMA uses one period while STM32 MDMA uses the other period + simultaneously. + :: + + dma[1:2]-tcf[0:7] + .----------------. + ____________ ' _________ V____________ + | STM32 DMA | / __|>_ \ | STM32 MDMA | + |------------| | / \ | |------------| + | DMA_SxM0AR |<=>| | SRAM | |<=>| []-[]...[] | + | DMA_SxM1AR | | \_____/ | | | + |____________| \___<|____/ |____________| + + STM32 DMA-MDMA chaining uses (struct dma_slave_config).peripheral_config to + exchange the parameters needed to configure MDMA. These parameters are + gathered into a u32 array with three values: + + * the STM32 MDMA request (which is actually the DMAMUX channel ID), + * the address of the STM32 DMA register to clear the Transfer Complete + interrupt flag, + * the mask of the Transfer Complete interrupt flag of the STM32 DMA channel. + +Device Tree updates for STM32 DMA-MDMA chaining support +------------------------------------------------------- + + **1. Allocate a SRAM buffer** + + SRAM device tree node is defined in SoC device tree. You can refer to it in + your board device tree to define your SRAM pool. + :: + + &sram { + my_foo_device_dma_pool: dma-sram@0 { + reg = <0x0 0x1000>; + }; + }; + + Be careful of the start index, in case there are other SRAM consumers. + Define your pool size strategically: to optimise chaining, the idea is that + STM32 DMA and STM32 MDMA can work simultaneously, on each buffer of the + SRAM. + If the SRAM period is greater than the expected DMA transfer, then STM32 DMA + and STM32 MDMA will work sequentially instead of simultaneously. It is not a + functional issue but it is not optimal. + + Don't forget to refer to your SRAM pool in your device node. You need to + define a new property. + :: + + &my_foo_device { + ... + my_dma_pool = &my_foo_device_dma_pool; + }; + + Then get this SRAM pool in your foo driver and allocate your SRAM buffer. + + **2. Allocate a STM32 DMA channel and a STM32 MDMA channel** + + You need to define an extra channel in your device tree node, in addition to + the one you should already have for "classic" DMA operation. + + This new channel must be taken from STM32 MDMA channels, so, the phandle of + the DMA controller to use is the MDMA controller's one. + :: + + &my_foo_device { + [...] + my_dma_pool = &my_foo_device_dma_pool; + dmas = <&dmamux1 ...>, // STM32 DMA channel + <&mdma1 0 0x3 0x1200000a 0 0>; // + STM32 MDMA channel + }; + + Concerning STM32 MDMA bindings: + + 1. The request line number : whatever the value here, it will be overwritten + by MDMA driver with the STM32 DMAMUX channel ID passed through + (struct dma_slave_config).peripheral_config + + 2. The priority level : choose Very High (0x3) so that your channel will + take priority other the other during request arbitration + + 3. A 32bit mask specifying the DMA channel configuration : source and + destination address increment, block transfer with 128 bytes per single + transfer + + 4. The 32bit value specifying the register to be used to acknowledge the + request: it will be overwritten by MDMA driver, with the DMA channel + interrupt flag clear register address passed through + (struct dma_slave_config).peripheral_config + + 5. The 32bit mask specifying the value to be written to acknowledge the + request: it will be overwritten by MDMA driver, with the DMA channel + Transfer Complete flag passed through + (struct dma_slave_config).peripheral_config + +Driver updates for STM32 DMA-MDMA chaining support in foo driver +---------------------------------------------------------------- + + **0. (optional) Refactor the original sg_table if dmaengine_prep_slave_sg()** + + In case of dmaengine_prep_slave_sg(), the original sg_table can't be used as + is. Two new sg_tables must be created from the original one. One for + STM32 DMA transfer (where memory address targets now the SRAM buffer instead + of DDR buffer) and one for STM32 MDMA transfer (where memory address targets + the DDR buffer). + + The new sg_list items must fit SRAM period length. Here is an example for + DMA_DEV_TO_MEM: + :: + + /* + * Assuming sgl and nents, respectively the initial scatterlist and its + * length. + * Assuming sram_dma_buf and sram_period, respectively the memory + * allocated from the pool for DMA usage, and the length of the period, + * which is half of the sram_buf size. + */ + struct sg_table new_dma_sgt, new_mdma_sgt; + struct scatterlist *s, *_sgl; + dma_addr_t ddr_dma_buf; + u32 new_nents = 0, len; + int i; + + /* Count the number of entries needed */ + for_each_sg(sgl, s, nents, i) + if (sg_dma_len(s) > sram_period) + new_nents += DIV_ROUND_UP(sg_dma_len(s), sram_period); + else + new_nents++; + + /* Create sg table for STM32 DMA channel */ + ret = sg_alloc_table(&new_dma_sgt, new_nents, GFP_ATOMIC); + if (ret) + dev_err(dev, "DMA sg table alloc failed\n"); + + for_each_sg(new_dma_sgt.sgl, s, new_dma_sgt.nents, i) { + _sgl = sgl; + sg_dma_len(s) = min(sg_dma_len(_sgl), sram_period); + /* Targets the beginning = first half of the sram_buf */ + s->dma_address = sram_buf; + /* + * Targets the second half of the sram_buf + * for odd indexes of the item of the sg_list + */ + if (i & 1) + s->dma_address += sram_period; + } + + /* Create sg table for STM32 MDMA channel */ + ret = sg_alloc_table(&new_mdma_sgt, new_nents, GFP_ATOMIC); + if (ret) + dev_err(dev, "MDMA sg_table alloc failed\n"); + + _sgl = sgl; + len = sg_dma_len(sgl); + ddr_dma_buf = sg_dma_address(sgl); + for_each_sg(mdma_sgt.sgl, s, mdma_sgt.nents, i) { + size_t bytes = min_t(size_t, len, sram_period); + + sg_dma_len(s) = bytes; + sg_dma_address(s) = ddr_dma_buf; + len -= bytes; + + if (!len && sg_next(_sgl)) { + _sgl = sg_next(_sgl); + len = sg_dma_len(_sgl); + ddr_dma_buf = sg_dma_address(_sgl); + } else { + ddr_dma_buf += bytes; + } + } + + Don't forget to release these new sg_tables after getting the descriptors + with dmaengine_prep_slave_sg(). + + **1. Set controller specific parameters** + + First, use dmaengine_slave_config() with a struct dma_slave_config to + configure STM32 DMA channel. You just have to take care of DMA addresses, + the memory address (depending on the transfer direction) must point on your + SRAM buffer, and set (struct dma_slave_config).peripheral_size != 0. + + STM32 DMA driver will check (struct dma_slave_config).peripheral_size to + determine if chaining is being used or not. If it is used, then STM32 DMA + driver fills (struct dma_slave_config).peripheral_config with an array of + three u32 : the first one containing STM32 DMAMUX channel ID, the second one + the channel interrupt flag clear register address, and the third one the + channel Transfer Complete flag mask. + + Then, use dmaengine_slave_config with another struct dma_slave_config to + configure STM32 MDMA channel. Take care of DMA addresses, the device address + (depending on the transfer direction) must point on your SRAM buffer, and + the memory address must point to the buffer originally used for "classic" + DMA operation. Use the previous (struct dma_slave_config).peripheral_size + and .peripheral_config that have been updated by STM32 DMA driver, to set + (struct dma_slave_config).peripheral_size and .peripheral_config of the + struct dma_slave_config to configure STM32 MDMA channel. + :: + + struct dma_slave_config dma_conf; + struct dma_slave_config mdma_conf; + + memset(&dma_conf, 0, sizeof(dma_conf)); + [...] + config.direction = DMA_DEV_TO_MEM; + config.dst_addr = sram_dma_buf; // SRAM buffer + config.peripheral_size = 1; // peripheral_size != 0 => chaining + + dmaengine_slave_config(dma_chan, &dma_config); + + memset(&mdma_conf, 0, sizeof(mdma_conf)); + config.direction = DMA_DEV_TO_MEM; + mdma_conf.src_addr = sram_dma_buf; // SRAM buffer + mdma_conf.dst_addr = rx_dma_buf; // original memory buffer + mdma_conf.peripheral_size = dma_conf.peripheral_size; // <- dma_conf + mdma_conf.peripheral_config = dma_config.peripheral_config; // <- dma_conf + + dmaengine_slave_config(mdma_chan, &mdma_conf); + + **2. Get a descriptor for STM32 DMA channel transaction** + + In the same way you get your descriptor for your "classic" DMA operation, + you just have to replace the original sg_list (in case of + dmaengine_prep_slave_sg()) with the new sg_list using SRAM buffer, or to + replace the original buffer address, length and period (in case of + dmaengine_prep_dma_cyclic()) with the new SRAM buffer. + + **3. Get a descriptor for STM32 MDMA channel transaction** + + If you previously get descriptor (for STM32 DMA) with + + * dmaengine_prep_slave_sg(), then use dmaengine_prep_slave_sg() for + STM32 MDMA; + * dmaengine_prep_dma_cyclic(), then use dmaengine_prep_dma_cyclic() for + STM32 MDMA. + + Use the new sg_list using SRAM buffer (in case of dmaengine_prep_slave_sg()) + or, depending on the transfer direction, either the original DDR buffer (in + case of DMA_DEV_TO_MEM) or the SRAM buffer (in case of DMA_MEM_TO_DEV), the + source address being previously set with dmaengine_slave_config(). + + **4. Submit both transactions** + + Before submitting your transactions, you may need to define on which + descriptor you want a callback to be called at the end of the transfer + (dmaengine_prep_slave_sg()) or the period (dmaengine_prep_dma_cyclic()). + Depending on the direction, set the callback on the descriptor that finishes + the overal transfer: + + * DMA_DEV_TO_MEM: set the callback on the "MDMA" descriptor + * DMA_MEM_TO_DEV: set the callback on the "DMA" descriptor + + Then, submit the descriptors whatever the order, with dmaengine_tx_submit(). + + **5. Issue pending requests (and wait for callback notification)** + + As STM32 MDMA channel transfer is triggered by STM32 DMA, you must issue + STM32 MDMA channel before STM32 DMA channel. + + If any, your callback will be called to warn you about the end of the overal + transfer or the period completion. + + Don't forget to terminate both channels. STM32 DMA channel is configured in + cyclic Double-Buffer mode so it won't be disabled by HW, you need to terminate + it. STM32 MDMA channel will be stopped by HW in case of sg transfer, but not + in case of cyclic transfer. You can terminate it whatever the kind of transfer. + + **STM32 DMA-MDMA chaining DMA_MEM_TO_DEV special case** + + STM32 DMA-MDMA chaining in DMA_MEM_TO_DEV is a special case. Indeed, the + STM32 MDMA feeds the SRAM buffer with the DDR data, and the STM32 DMA reads + data from SRAM buffer. So some data (the first period) have to be copied in + SRAM buffer when the STM32 DMA starts to read. + + A trick could be pausing the STM32 DMA channel (that will raise a Transfer + Complete signal, triggering the STM32 MDMA channel), but the first data read + by the STM32 DMA could be "wrong". The proper way is to prepare the first SRAM + period with dmaengine_prep_dma_memcpy(). Then this first period should be + "removed" from the sg or the cyclic transfer. + + Due to this complexity, rather use the STM32 DMA-MDMA chaining for + DMA_DEV_TO_MEM and keep the "classic" DMA usage for DMA_MEM_TO_DEV, unless + you're not afraid. + +Resources +--------- + + Application note, datasheet and reference manual are available on ST website + (STM32MP1_). + + Dedicated focus on three application notes (AN5224_, AN4031_ & AN5001_) + dealing with STM32 DMAMUX, STM32 DMA and STM32 MDMA. + +.. _STM32MP1: https://www.st.com/en/microcontrollers-microprocessors/stm32mp1-series.html +.. _AN5224: https://www.st.com/resource/en/application_note/an5224-stm32-dmamux-the-dma-request-router-stmicroelectronics.pdf +.. _AN4031: https://www.st.com/resource/en/application_note/dm00046011-using-the-stm32f2-stm32f4-and-stm32f7-series-dma-controller-stmicroelectronics.pdf +.. _AN5001: https://www.st.com/resource/en/application_note/an5001-stm32cube-expansion-package-for-stm32h7-series-mdma-stmicroelectronics.pdf + +:Authors: + +- Amelie Delaunay \ No newline at end of file From b9a22954f08074b2d0126e45636eb77b66395ac5 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 29 Aug 2022 17:46:44 +0200 Subject: [PATCH 15/60] dmaengine: stm32-dmamux: set dmamux channel id in dma features bitfield STM32 DMAMUX is used with STM32 DMA1 and DMA2: - DMAMUX channels 0 to 7 are connected to DMA1 channels 0 to 7 - DMAMUX channels 8 to 15 are connected to DMA2 channels 0 to 7 STM32 MDMA can be triggered by DMA1 and DMA2 channels transfer complete, and the "request line number" is the DMAMUX channel id (e.g. DMA2 channel 0 triggers MDMA with request line 8). To well configure MDMA, set DMAMUX channel id in DMA features bitfield, so that DMA can update struct dma_slave_config peripheral_config properly. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20220829154646.29867-5-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dmamux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c index eee0c5aa5fb5..b431f9da9206 100644 --- a/drivers/dma/stm32-dmamux.c +++ b/drivers/dma/stm32-dmamux.c @@ -147,7 +147,7 @@ static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec, mux->request = dma_spec->args[0]; /* craft DMA spec */ - dma_spec->args[3] = dma_spec->args[2]; + dma_spec->args[3] = dma_spec->args[2] | mux->chan_id << 16; dma_spec->args[2] = dma_spec->args[1]; dma_spec->args[1] = 0; dma_spec->args[0] = mux->chan_id - min; From 723795173ce1113fb478c18dc57e788b3eba3524 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 29 Aug 2022 17:46:45 +0200 Subject: [PATCH 16/60] dmaengine: stm32-dma: add support to trigger STM32 MDMA STM32 MDMA can be triggered by STM32 DMA channels transfer complete. The "request line number" triggering STM32 MDMA is the STM32 DMAMUX channel id set by stm32-dmamux driver in dma_spec->args[3]. stm32-dma driver fills the struct stm32_dma_mdma_config used to configure the MDMA with struct dma_slave_config .peripheral_config/.peripheral_size. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20220829154646.29867-6-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 47 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 6aa281561f38..4891a1767e5a 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -140,6 +140,7 @@ #define STM32_DMA_THRESHOLD_FTR_MASK GENMASK(1, 0) #define STM32_DMA_DIRECT_MODE_MASK BIT(2) #define STM32_DMA_ALT_ACK_MODE_MASK BIT(4) +#define STM32_DMA_MDMA_STREAM_ID_MASK GENMASK(19, 16) enum stm32_dma_width { STM32_DMA_BYTE, @@ -193,6 +194,19 @@ struct stm32_dma_desc { struct stm32_dma_sg_req sg_req[]; }; +/** + * struct stm32_dma_mdma_config - STM32 DMA MDMA configuration + * @stream_id: DMA request to trigger STM32 MDMA transfer + * @ifcr: DMA interrupt flag clear register address, + * used by STM32 MDMA to clear DMA Transfer Complete flag + * @tcf: DMA Transfer Complete flag + */ +struct stm32_dma_mdma_config { + u32 stream_id; + u32 ifcr; + u32 tcf; +}; + struct stm32_dma_chan { struct virt_dma_chan vchan; bool config_init; @@ -207,6 +221,8 @@ struct stm32_dma_chan { u32 mem_burst; u32 mem_width; enum dma_status status; + bool trig_mdma; + struct stm32_dma_mdma_config mdma_config; }; struct stm32_dma_device { @@ -386,6 +402,13 @@ static int stm32_dma_slave_config(struct dma_chan *c, memcpy(&chan->dma_sconfig, config, sizeof(*config)); + /* Check if user is requesting DMA to trigger STM32 MDMA */ + if (config->peripheral_size) { + config->peripheral_config = &chan->mdma_config; + config->peripheral_size = sizeof(chan->mdma_config); + chan->trig_mdma = true; + } + chan->config_init = true; return 0; @@ -561,6 +584,10 @@ static void stm32_dma_start_transfer(struct stm32_dma_chan *chan) sg_req = &chan->desc->sg_req[chan->next_sg]; reg = &sg_req->chan_reg; + /* When DMA triggers STM32 MDMA, DMA Transfer Complete is managed by STM32 MDMA */ + if (chan->trig_mdma && chan->dma_sconfig.direction != DMA_MEM_TO_DEV) + reg->dma_scr &= ~STM32_DMA_SCR_TCIE; + reg->dma_scr &= ~STM32_DMA_SCR_EN; stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr); stm32_dma_write(dmadev, STM32_DMA_SPAR(chan->id), reg->dma_spar); @@ -710,6 +737,8 @@ static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan, u32 scr) if (chan->desc->cyclic) { vchan_cyclic_callback(&chan->desc->vdesc); + if (chan->trig_mdma) + return; stm32_dma_sg_inc(chan); /* cyclic while CIRC/DBM disable => post resume reconfiguration needed */ if (!(scr & (STM32_DMA_SCR_CIRC | STM32_DMA_SCR_DBM))) @@ -1085,6 +1114,10 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg( else chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL; + /* Activate Double Buffer Mode if DMA triggers STM32 MDMA and more than 1 sg */ + if (chan->trig_mdma && sg_len > 1) + chan->chan_reg.dma_scr |= STM32_DMA_SCR_DBM; + for_each_sg(sgl, sg, sg_len, i) { ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, sg_dma_len(sg), @@ -1106,6 +1139,8 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg( desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar; desc->sg_req[i].chan_reg.dma_sm0ar = sg_dma_address(sg); desc->sg_req[i].chan_reg.dma_sm1ar = sg_dma_address(sg); + if (chan->trig_mdma) + desc->sg_req[i].chan_reg.dma_sm1ar += sg_dma_len(sg); desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items; } @@ -1193,8 +1228,11 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic( desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar; desc->sg_req[i].chan_reg.dma_sm0ar = buf_addr; desc->sg_req[i].chan_reg.dma_sm1ar = buf_addr; + if (chan->trig_mdma) + desc->sg_req[i].chan_reg.dma_sm1ar += period_len; desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items; - buf_addr += period_len; + if (!chan->trig_mdma) + buf_addr += period_len; } desc->num_sgs = num_periods; @@ -1476,6 +1514,7 @@ static void stm32_dma_set_config(struct stm32_dma_chan *chan, chan->threshold = STM32_DMA_FIFO_THRESHOLD_NONE; if (FIELD_GET(STM32_DMA_ALT_ACK_MODE_MASK, cfg->features)) chan->chan_reg.dma_scr |= STM32_DMA_SCR_TRBUFF; + chan->mdma_config.stream_id = FIELD_GET(STM32_DMA_MDMA_STREAM_ID_MASK, cfg->features); } static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec, @@ -1615,6 +1654,12 @@ static int stm32_dma_probe(struct platform_device *pdev) chan->id = i; chan->vchan.desc_free = stm32_dma_desc_free; vchan_init(&chan->vchan, dd); + + chan->mdma_config.ifcr = res->start; + chan->mdma_config.ifcr += STM32_DMA_IFCR(chan->id); + + chan->mdma_config.tcf = STM32_DMA_TCI; + chan->mdma_config.tcf <<= STM32_DMA_FLAGS_SHIFT(chan->id); } ret = dma_async_device_register(dd); From 69687432277132c3740bee3a9ae3d375cf6e86db Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 29 Aug 2022 17:46:46 +0200 Subject: [PATCH 17/60] dmaengine: stm32-mdma: add support to be triggered by STM32 DMA STM32 MDMA can be triggered by STM32 DMA channels transfer complete. In case of non-null struct dma_slave_config .peripheral_size, it means the DMA client wants the DMA to trigger the MDMA. stm32-mdma driver gets the request id, the mask_addr, and the mask_data in struct stm32_mdma_dma_config passed by DMA with struct dma_slave_config .peripheral_config/.peripheral_size. Then, as DMA is configured in Double-Buffer mode, and MDMA channel will transfer data from/to SRAM to/from DDR, then bursts are optimized. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20220829154646.29867-7-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 70 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index b11927ed4367..e28acbcb53f4 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -199,6 +199,7 @@ struct stm32_mdma_chan_config { u32 transfer_config; u32 mask_addr; u32 mask_data; + bool m2m_hw; /* True when MDMA is triggered by STM32 DMA */ }; struct stm32_mdma_hwdesc { @@ -227,6 +228,12 @@ struct stm32_mdma_desc { struct stm32_mdma_desc_node node[]; }; +struct stm32_mdma_dma_config { + u32 request; /* STM32 DMA channel stream id, triggering MDMA */ + u32 cmar; /* STM32 DMA interrupt flag clear register address */ + u32 cmdr; /* STM32 DMA Transfer Complete flag */ +}; + struct stm32_mdma_chan { struct virt_dma_chan vchan; struct dma_pool *desc_pool; @@ -539,13 +546,23 @@ static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan, dst_addr = chan->dma_config.dst_addr; /* Set device data size */ + if (chan_config->m2m_hw) + dst_addr_width = stm32_mdma_get_max_width(dst_addr, buf_len, + STM32_MDMA_MAX_BUF_LEN); dst_bus_width = stm32_mdma_get_width(chan, dst_addr_width); if (dst_bus_width < 0) return dst_bus_width; ctcr &= ~STM32_MDMA_CTCR_DSIZE_MASK; ctcr |= STM32_MDMA_CTCR_DSIZE(dst_bus_width); + if (chan_config->m2m_hw) { + ctcr &= ~STM32_MDMA_CTCR_DINCOS_MASK; + ctcr |= STM32_MDMA_CTCR_DINCOS(dst_bus_width); + } /* Set device burst value */ + if (chan_config->m2m_hw) + dst_maxburst = STM32_MDMA_MAX_BUF_LEN / dst_addr_width; + dst_best_burst = stm32_mdma_get_best_burst(buf_len, tlen, dst_maxburst, dst_addr_width); @@ -588,13 +605,24 @@ static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan, src_addr = chan->dma_config.src_addr; /* Set device data size */ + if (chan_config->m2m_hw) + src_addr_width = stm32_mdma_get_max_width(src_addr, buf_len, + STM32_MDMA_MAX_BUF_LEN); + src_bus_width = stm32_mdma_get_width(chan, src_addr_width); if (src_bus_width < 0) return src_bus_width; ctcr &= ~STM32_MDMA_CTCR_SSIZE_MASK; ctcr |= STM32_MDMA_CTCR_SSIZE(src_bus_width); + if (chan_config->m2m_hw) { + ctcr &= ~STM32_MDMA_CTCR_SINCOS_MASK; + ctcr |= STM32_MDMA_CTCR_SINCOS(src_bus_width); + } /* Set device burst value */ + if (chan_config->m2m_hw) + src_maxburst = STM32_MDMA_MAX_BUF_LEN / src_addr_width; + src_best_burst = stm32_mdma_get_best_burst(buf_len, tlen, src_maxburst, src_addr_width); @@ -702,11 +730,15 @@ static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan, { struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); struct dma_slave_config *dma_config = &chan->dma_config; + struct stm32_mdma_chan_config *chan_config = &chan->chan_config; struct scatterlist *sg; dma_addr_t src_addr, dst_addr; - u32 ccr, ctcr, ctbr; + u32 m2m_hw_period, ccr, ctcr, ctbr; int i, ret = 0; + if (chan_config->m2m_hw) + m2m_hw_period = sg_dma_len(sgl); + for_each_sg(sgl, sg, sg_len, i) { if (sg_dma_len(sg) > STM32_MDMA_MAX_BLOCK_LEN) { dev_err(chan2dev(chan), "Invalid block len\n"); @@ -716,6 +748,8 @@ static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan, if (direction == DMA_MEM_TO_DEV) { src_addr = sg_dma_address(sg); dst_addr = dma_config->dst_addr; + if (chan_config->m2m_hw && (i & 1)) + dst_addr += m2m_hw_period; ret = stm32_mdma_set_xfer_param(chan, direction, &ccr, &ctcr, &ctbr, src_addr, sg_dma_len(sg)); @@ -723,6 +757,8 @@ static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan, src_addr); } else { src_addr = dma_config->src_addr; + if (chan_config->m2m_hw && (i & 1)) + src_addr += m2m_hw_period; dst_addr = sg_dma_address(sg); ret = stm32_mdma_set_xfer_param(chan, direction, &ccr, &ctcr, &ctbr, dst_addr, @@ -755,6 +791,7 @@ stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl, unsigned long flags, void *context) { struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + struct stm32_mdma_chan_config *chan_config = &chan->chan_config; struct stm32_mdma_desc *desc; int i, ret; @@ -777,6 +814,21 @@ stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl, if (ret < 0) goto xfer_setup_err; + /* + * In case of M2M HW transfer triggered by STM32 DMA, we do not have to clear the + * transfer complete flag by hardware in order to let the CPU rearm the STM32 DMA + * with the next sg element and update some data in dmaengine framework. + */ + if (chan_config->m2m_hw && direction == DMA_MEM_TO_DEV) { + struct stm32_mdma_hwdesc *hwdesc; + + for (i = 0; i < sg_len; i++) { + hwdesc = desc->node[i].hwdesc; + hwdesc->cmar = 0; + hwdesc->cmdr = 0; + } + } + desc->cyclic = false; return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); @@ -798,6 +850,7 @@ stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr, struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); struct dma_slave_config *dma_config = &chan->dma_config; + struct stm32_mdma_chan_config *chan_config = &chan->chan_config; struct stm32_mdma_desc *desc; dma_addr_t src_addr, dst_addr; u32 ccr, ctcr, ctbr, count; @@ -858,8 +911,12 @@ stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr, if (direction == DMA_MEM_TO_DEV) { src_addr = buf_addr + i * period_len; dst_addr = dma_config->dst_addr; + if (chan_config->m2m_hw && (i & 1)) + dst_addr += period_len; } else { src_addr = dma_config->src_addr; + if (chan_config->m2m_hw && (i & 1)) + src_addr += period_len; dst_addr = buf_addr + i * period_len; } @@ -1244,6 +1301,17 @@ static int stm32_mdma_slave_config(struct dma_chan *c, memcpy(&chan->dma_config, config, sizeof(*config)); + /* Check if user is requesting STM32 DMA to trigger MDMA */ + if (config->peripheral_size) { + struct stm32_mdma_dma_config *mdma_config; + + mdma_config = (struct stm32_mdma_dma_config *)chan->dma_config.peripheral_config; + chan->chan_config.request = mdma_config->request; + chan->chan_config.mask_addr = mdma_config->cmar; + chan->chan_config.mask_data = mdma_config->cmdr; + chan->chan_config.m2m_hw = true; + } + return 0; } From d5988dcc760ca5cf683867163c7c78454fb2ec31 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 13 Jul 2022 20:22:32 +0300 Subject: [PATCH 18/60] dmaengine: hsu: Finish conversion to managed resources With help of devm_add_action_or_reset() we may finish conversion the driver to use managed resources. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220713172235.22611-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/hsu/pci.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c index 6a2df3dd78d0..4ed6a4ef1512 100644 --- a/drivers/dma/hsu/pci.c +++ b/drivers/dma/hsu/pci.c @@ -47,8 +47,14 @@ static irqreturn_t hsu_pci_irq(int irq, void *dev) return IRQ_RETVAL(ret); } +static void hsu_pci_dma_remove(void *chip) +{ + hsu_dma_remove(chip); +} + static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + struct device *dev = &pdev->dev; struct hsu_dma_chip *chip; int ret; @@ -87,9 +93,13 @@ static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) return ret; - ret = request_irq(chip->irq, hsu_pci_irq, 0, "hsu_dma_pci", chip); + ret = devm_add_action_or_reset(dev, hsu_pci_dma_remove, chip); if (ret) - goto err_register_irq; + return ret; + + ret = devm_request_irq(dev, chip->irq, hsu_pci_irq, 0, "hsu_dma_pci", chip); + if (ret) + return ret; /* * On Intel Tangier B0 and Anniedale the interrupt line, disregarding @@ -105,18 +115,6 @@ static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, chip); return 0; - -err_register_irq: - hsu_dma_remove(chip); - return ret; -} - -static void hsu_pci_remove(struct pci_dev *pdev) -{ - struct hsu_dma_chip *chip = pci_get_drvdata(pdev); - - free_irq(chip->irq, chip); - hsu_dma_remove(chip); } static const struct pci_device_id hsu_pci_id_table[] = { @@ -130,7 +128,6 @@ static struct pci_driver hsu_pci_driver = { .name = "hsu_dma_pci", .id_table = hsu_pci_id_table, .probe = hsu_pci_probe, - .remove = hsu_pci_remove, }; module_pci_driver(hsu_pci_driver); From d6b76a45d5ae241af61cabd03496a376bd63207d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 13 Jul 2022 20:22:33 +0300 Subject: [PATCH 19/60] dmaengine: hsu: using for_each_set_bit to simplify the code It's more cleanly to use for_each_set_bit() instead of opencoding it. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220713172235.22611-2-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/hsu/pci.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c index 4ed6a4ef1512..8cdf715a7e9e 100644 --- a/drivers/dma/hsu/pci.c +++ b/drivers/dma/hsu/pci.c @@ -26,22 +26,19 @@ static irqreturn_t hsu_pci_irq(int irq, void *dev) { struct hsu_dma_chip *chip = dev; - u32 dmaisr; - u32 status; + unsigned long dmaisr; unsigned short i; + u32 status; int ret = 0; int err; dmaisr = readl(chip->regs + HSU_PCI_DMAISR); - for (i = 0; i < chip->hsu->nr_channels; i++) { - if (dmaisr & 0x1) { - err = hsu_dma_get_status(chip, i, &status); - if (err > 0) - ret |= 1; - else if (err == 0) - ret |= hsu_dma_do_irq(chip, i, status); - } - dmaisr >>= 1; + for_each_set_bit(i, &dmaisr, chip->hsu->nr_channels) { + err = hsu_dma_get_status(chip, i, &status); + if (err > 0) + ret |= 1; + else if (err == 0) + ret |= hsu_dma_do_irq(chip, i, status); } return IRQ_RETVAL(ret); From 2c40c787d4cac0fab33ac2ee879ae2305c2d8ded Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 13 Jul 2022 20:22:34 +0300 Subject: [PATCH 20/60] dmaengine: hsu: Use GENMASK() consistently For the masks replace chain of BIT() macros by GENMASK(). While at it, explicitly include bits.h. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220713172235.22611-3-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/hsu/hsu.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h index 9e5956345748..1c1195709c2f 100644 --- a/drivers/dma/hsu/hsu.h +++ b/drivers/dma/hsu/hsu.h @@ -10,6 +10,7 @@ #ifndef __DMA_HSU_H__ #define __DMA_HSU_H__ +#include #include #include @@ -36,11 +37,11 @@ /* Bits in HSU_CH_SR */ #define HSU_CH_SR_DESCTO(x) BIT(8 + (x)) -#define HSU_CH_SR_DESCTO_ANY (BIT(11) | BIT(10) | BIT(9) | BIT(8)) +#define HSU_CH_SR_DESCTO_ANY GENMASK(11, 8) #define HSU_CH_SR_CHE BIT(15) #define HSU_CH_SR_DESCE(x) BIT(16 + (x)) -#define HSU_CH_SR_DESCE_ANY (BIT(19) | BIT(18) | BIT(17) | BIT(16)) -#define HSU_CH_SR_CDESC_ANY (BIT(31) | BIT(30)) +#define HSU_CH_SR_DESCE_ANY GENMASK(19, 16) +#define HSU_CH_SR_CDESC_ANY GENMASK(31, 30) /* Bits in HSU_CH_CR */ #define HSU_CH_CR_CHA BIT(0) From 9c06002682ae0cdd01caf011899224acbc6582b2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 13 Jul 2022 20:22:35 +0300 Subject: [PATCH 21/60] dmaengine: hsu: Include headers we are direct user of For the sake of integrity, include headers we are direct user of. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220713172235.22611-4-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/hsu/hsu.c | 8 ++++++++ drivers/dma/hsu/hsu.h | 5 ++++- drivers/dma/hsu/pci.c | 1 + include/linux/dma/hsu.h | 6 ++++-- include/linux/platform_data/dma-hsu.h | 2 +- 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index 92caae55aece..af5a2e252c25 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -16,12 +16,20 @@ * port 3, and so on. */ +#include #include +#include #include #include #include +#include +#include #include +#include +#include #include +#include +#include #include "hsu.h" diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h index 1c1195709c2f..3bca577b98a1 100644 --- a/drivers/dma/hsu/hsu.h +++ b/drivers/dma/hsu/hsu.h @@ -11,7 +11,10 @@ #define __DMA_HSU_H__ #include -#include +#include +#include +#include + #include #include "../virt-dma.h" diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c index 8cdf715a7e9e..0fcc0c0c22fc 100644 --- a/drivers/dma/hsu/pci.c +++ b/drivers/dma/hsu/pci.c @@ -10,6 +10,7 @@ #include #include +#include #include #include diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h index a6b7bc707356..77ea602c287c 100644 --- a/include/linux/dma/hsu.h +++ b/include/linux/dma/hsu.h @@ -8,11 +8,13 @@ #ifndef _DMA_HSU_H #define _DMA_HSU_H -#include -#include +#include +#include +#include #include +struct device; struct hsu_dma; /** diff --git a/include/linux/platform_data/dma-hsu.h b/include/linux/platform_data/dma-hsu.h index c65b412b2b33..611bae193c1c 100644 --- a/include/linux/platform_data/dma-hsu.h +++ b/include/linux/platform_data/dma-hsu.h @@ -8,7 +8,7 @@ #ifndef _PLATFORM_DATA_DMA_HSU_H #define _PLATFORM_DATA_DMA_HSU_H -#include +struct device; struct hsu_dma_slave { struct device *dma_dev; From 493c1141f791a0a8af5d1745bdf9f159f564ca3f Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 29 Jul 2022 12:44:33 +0200 Subject: [PATCH 22/60] dt-bindings: dma: mediatek,uart-dma: Add binding for MT6795 SoC Add mediatek,mt6795-uart-dma to the compatibles list to support the MT6795 Helio X10 SoC's UART APDMA. Signed-off-by: AngeloGioacchino Del Regno Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220729104441.39177-2-angelogioacchino.delregno@collabora.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml b/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml index 19ea8dcbcbce..9ab4d81ead35 100644 --- a/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml +++ b/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml @@ -22,6 +22,7 @@ properties: - items: - enum: - mediatek,mt2712-uart-dma + - mediatek,mt6795-uart-dma - mediatek,mt8365-uart-dma - mediatek,mt8516-uart-dma - const: mediatek,mt6577-uart-dma From 7c94dcfa8fcff2dba53915f1dabfee49a3df8b88 Mon Sep 17 00:00:00 2001 From: Vaishnav Achath Date: Tue, 2 Aug 2022 11:18:35 +0530 Subject: [PATCH 23/60] dmaengine: ti: k3-udma: Reset UDMA_CHAN_RT byte counters to prevent overflow UDMA_CHAN_RT_*BCNT_REG stores the real-time channel bytecount statistics. These registers are 32-bit hardware counters and the driver uses these counters to monitor the operational progress status for a channel, when transferring more than 4GB of data it was observed that these counters overflow and completion calculation of a operation gets affected and the transfer hangs indefinitely. This commit adds changes to decrease the byte count for every complete transaction so that these registers never overflow and the proper byte count statistics is maintained for ongoing transaction by the RT counters. Earlier uc->bcnt used to maintain a count of the completed bytes at driver side, since the RT counters maintain the statistics of current transaction now, the maintenance of uc->bcnt is not necessary. Signed-off-by: Vaishnav Achath Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20220802054835.19482-1-vaishnav.a@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 2f0d2c68c93c..fcfcde947b30 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -300,8 +300,6 @@ struct udma_chan { struct udma_tx_drain tx_drain; - u32 bcnt; /* number of bytes completed since the start of the channel */ - /* Channel configuration parameters */ struct udma_chan_config config; @@ -757,6 +755,20 @@ static void udma_reset_rings(struct udma_chan *uc) } } +static void udma_decrement_byte_counters(struct udma_chan *uc, u32 val) +{ + if (uc->desc->dir == DMA_DEV_TO_MEM) { + udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); + udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); + udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); + } else { + udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); + udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); + if (!uc->bchan) + udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); + } +} + static void udma_reset_counters(struct udma_chan *uc) { u32 val; @@ -790,8 +802,6 @@ static void udma_reset_counters(struct udma_chan *uc) val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG); udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); } - - uc->bcnt = 0; } static int udma_reset_chan(struct udma_chan *uc, bool hard) @@ -1115,7 +1125,7 @@ static void udma_check_tx_completion(struct work_struct *work) if (uc->desc) { struct udma_desc *d = uc->desc; - uc->bcnt += d->residue; + udma_decrement_byte_counters(uc, d->residue); udma_start(uc); vchan_cookie_complete(&d->vd); break; @@ -1168,7 +1178,7 @@ static irqreturn_t udma_ring_irq_handler(int irq, void *data) vchan_cyclic_callback(&d->vd); } else { if (udma_is_desc_really_done(uc, d)) { - uc->bcnt += d->residue; + udma_decrement_byte_counters(uc, d->residue); udma_start(uc); vchan_cookie_complete(&d->vd); } else { @@ -1204,7 +1214,7 @@ static irqreturn_t udma_udma_irq_handler(int irq, void *data) vchan_cyclic_callback(&d->vd); } else { /* TODO: figure out the real amount of data */ - uc->bcnt += d->residue; + udma_decrement_byte_counters(uc, d->residue); udma_start(uc); vchan_cookie_complete(&d->vd); } @@ -3809,7 +3819,6 @@ static enum dma_status udma_tx_status(struct dma_chan *chan, bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); } - bcnt -= uc->bcnt; if (bcnt && !(bcnt % uc->desc->residue)) residue = 0; else From 7d81afd2690400f8fb7d9bec0532624562634766 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Wed, 10 Aug 2022 06:25:32 +0000 Subject: [PATCH 24/60] dmaengine: sf-pdma:Remove the print function dev_err() >From the coccinelle check: ./drivers/dma/sf-pdma/sf-pdma.c Error:line 409 is redundant because platform_get_irq() already prints an error ./drivers/dma/sf-pdma/sf-pdma.c Error:line 424 is redundant because platform_get_irq() already prints an error So,remove the unnecessary print function dev_err() Reported-by: Zeal Robot Signed-off-by: ye xingchen Link: https://lore.kernel.org/r/20220810062532.13425-1-ye.xingchen@zte.com.cn Signed-off-by: Vinod Koul --- drivers/dma/sf-pdma/sf-pdma.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/dma/sf-pdma/sf-pdma.c b/drivers/dma/sf-pdma/sf-pdma.c index 4f8b8498c5c6..6b524eb6bcf3 100644 --- a/drivers/dma/sf-pdma/sf-pdma.c +++ b/drivers/dma/sf-pdma/sf-pdma.c @@ -405,10 +405,8 @@ static int sf_pdma_irq_init(struct platform_device *pdev, struct sf_pdma *pdma) chan = &pdma->chans[i]; irq = platform_get_irq(pdev, i * 2); - if (irq < 0) { - dev_err(&pdev->dev, "ch(%d) Can't get done irq.\n", i); + if (irq < 0) return -EINVAL; - } r = devm_request_irq(&pdev->dev, irq, sf_pdma_done_isr, 0, dev_name(&pdev->dev), (void *)chan); @@ -420,10 +418,8 @@ static int sf_pdma_irq_init(struct platform_device *pdev, struct sf_pdma *pdma) chan->txirq = irq; irq = platform_get_irq(pdev, (i * 2) + 1); - if (irq < 0) { - dev_err(&pdev->dev, "ch(%d) Can't get err irq.\n", i); + if (irq < 0) return -EINVAL; - } r = devm_request_irq(&pdev->dev, irq, sf_pdma_err_isr, 0, dev_name(&pdev->dev), (void *)chan); From e1c9832b0964327399d43dc481a8c85285b23ad5 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 11 Aug 2022 20:09:59 +0800 Subject: [PATCH 25/60] dmaengine: stm32-dmamux: Fix comment typo The double `end' is duplicated in the comment, remove one. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20220811120959.18752-1-wangborong@cdjrlc.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dmamux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c index b431f9da9206..865dd2ff1828 100644 --- a/drivers/dma/stm32-dmamux.c +++ b/drivers/dma/stm32-dmamux.c @@ -45,7 +45,7 @@ struct stm32_dmamux_data { */ u32 dma_reqs[]; /* Number of DMA Request per DMA masters. * [0] holds number of DMA Masters. - * To be kept at very end end of this structure + * To be kept at very end of this structure */ }; From 8c79fd35e1e793361ed30eaa79b3c5752e6c69fb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 30 Jul 2022 22:07:45 +0200 Subject: [PATCH 26/60] dmaengine: stm32-dmamux: Simplify code and save a few bytes of memory STM32_DMAMUX_MAX_DMA_REQUESTS is small (i.e. 32) and when the 'dma_inuse' bitmap is allocated, there is already a check that 'dma_req' is <= this limit. So, there is no good reason to dynamically allocate this bitmap. This just waste some memory and some cycles. Use DECLARE_BITMAP with the maximum bitmap size instead. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/2d8c24359b2daa32ce0597a2949b7b2bebaf23de.1659211633.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/stm32-dmamux.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c index 865dd2ff1828..ee3cbbf51006 100644 --- a/drivers/dma/stm32-dmamux.c +++ b/drivers/dma/stm32-dmamux.c @@ -39,7 +39,7 @@ struct stm32_dmamux_data { u32 dma_requests; /* Number of DMA requests connected to DMAMUX */ u32 dmamux_requests; /* Number of DMA requests routed toward DMAs */ spinlock_t lock; /* Protects register access */ - unsigned long *dma_inuse; /* Used DMA channel */ + DECLARE_BITMAP(dma_inuse, STM32_DMAMUX_MAX_DMA_REQUESTS); /* Used DMA channel */ u32 ccr[STM32_DMAMUX_MAX_DMA_REQUESTS]; /* Used to backup CCR register * in suspend */ @@ -229,12 +229,6 @@ static int stm32_dmamux_probe(struct platform_device *pdev) stm32_dmamux->dma_requests = dma_req; stm32_dmamux->dma_reqs[0] = count; - stm32_dmamux->dma_inuse = devm_kcalloc(&pdev->dev, - BITS_TO_LONGS(dma_req), - sizeof(unsigned long), - GFP_KERNEL); - if (!stm32_dmamux->dma_inuse) - return -ENOMEM; if (device_property_read_u32(&pdev->dev, "dma-requests", &stm32_dmamux->dmamux_requests)) { From c0c269becf1f5b649b66a7f3eb60ff7e9aa8976d Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Tue, 2 Aug 2022 15:52:32 +0530 Subject: [PATCH 27/60] dmaengine: pl330: Remove unused flags txd.flags is unused and need not be updated. Signed-off-by: Harini Katakam Link: https://lore.kernel.org/r/20220802102232.17653-1-harini.katakam@amd.com Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 09915a5cba3e..0d9257fbdfb0 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2752,7 +2752,6 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( return NULL; pch->cyclic = true; - desc->txd.flags = flags; return &desc->txd; } @@ -2804,8 +2803,6 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, desc->bytes_requested = len; - desc->txd.flags = flags; - return &desc->txd; } @@ -2889,7 +2886,6 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } /* Return the last desc in the chain */ - desc->txd.flags = flg; return &desc->txd; } From 64787536ccfc072dca9b2c7c06c84dcc3f139b10 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 2 Aug 2022 17:06:30 +0300 Subject: [PATCH 28/60] dmaengine: at_xdmac: Replace two if statements with only one with two conditions Add a cosmetic change and replace two if statements with a single if statement with two conditions. In case the optional txstate parameter is NULL, we return the dma_cookie_status, which is fine, no functional change required. Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20220802140630.243550-1-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index b102d8eb5d83..d6c9781cd46a 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1470,10 +1470,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, bool initd; ret = dma_cookie_status(chan, cookie, txstate); - if (ret == DMA_COMPLETE) - return ret; - - if (!txstate) + if (ret == DMA_COMPLETE || !txstate) return ret; spin_lock_irqsave(&atchan->lock, flags); From 84dd3b2b95ef8a33a0c2d7c2c21d4d5edb9f7f08 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 3 Aug 2022 22:34:48 +0200 Subject: [PATCH 29/60] dmaengine: dw-axi-dmac: Drop obsolete dependency on COMPILE_TEST Since commit 0166dc11be91 ("of: make CONFIG_OF user selectable"), it is possible to test-build any driver which depends on OF on any architecture by explicitly selecting OF. Therefore depending on COMPILE_TEST as an alternative is no longer needed. It is actually better to always build such drivers with OF enabled, so that the test builds are closer to how each driver will actually be built on its intended target. Building them without OF may not test much as the compiler will optimize out potentially large parts of the code. In the worst case, this could even pop false positive warnings. Dropping COMPILE_TEST here improves the quality of our testing and avoids wasting time on non-existent issues. Signed-off-by: Jean Delvare Cc: Eugeniy Paltsev Cc: Vinod Koul Link: https://lore.kernel.org/r/20220803223448.6f08095b@endymion.delvare Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index a06d2a7627aa..7524b62a8870 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -180,7 +180,7 @@ config DMA_SUN6I config DW_AXI_DMAC tristate "Synopsys DesignWare AXI DMA support" - depends on OF || COMPILE_TEST + depends on OF depends on HAS_IOMEM select DMA_ENGINE select DMA_VIRTUAL_CHANNELS From 26696d4657167112a1079f86cba1739765c1360e Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Wed, 21 Sep 2022 19:05:56 +0200 Subject: [PATCH 30/60] dmaengine: mxs: use platform_driver_register Driver registration fails on SOC imx8mn as its supplier, the clock control module, is probed later than subsys initcall level. This driver uses platform_driver_probe which is not compatible with deferred probing and won't be probed again later if probe function fails due to clock not being available at that time. This patch replaces the use of platform_driver_probe with platform_driver_register which will allow probing the driver later again when the clock control module will be available. The __init annotation has been dropped because it is not compatible with deferred probing. The code is not executed once and its memory cannot be freed. Fixes: a580b8c5429a ("dmaengine: mxs-dma: add dma support for i.MX23/28") Co-developed-by: Michael Trimarchi Signed-off-by: Michael Trimarchi Signed-off-by: Dario Binacchi Acked-by: Sascha Hauer Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220921170556.1055962-1-dario.binacchi@amarulasolutions.com Signed-off-by: Vinod Koul --- drivers/dma/mxs-dma.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index 994fc4d2aca4..dc147cc2436e 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -670,7 +670,7 @@ static enum dma_status mxs_dma_tx_status(struct dma_chan *chan, return mxs_chan->status; } -static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma) +static int mxs_dma_init(struct mxs_dma_engine *mxs_dma) { int ret; @@ -741,7 +741,7 @@ static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec, ofdma->of_node); } -static int __init mxs_dma_probe(struct platform_device *pdev) +static int mxs_dma_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; const struct mxs_dma_type *dma_type; @@ -839,10 +839,7 @@ static struct platform_driver mxs_dma_driver = { .name = "mxs-dma", .of_match_table = mxs_dma_dt_ids, }, + .probe = mxs_dma_probe, }; -static int __init mxs_dma_module_init(void) -{ - return platform_driver_probe(&mxs_dma_driver, mxs_dma_probe); -} -subsys_initcall(mxs_dma_module_init); +builtin_platform_driver(mxs_dma_driver); From c10a7777dd30e36a0105055cc393aad7c35a9713 Mon Sep 17 00:00:00 2001 From: Tuo Cao <91tuocao@gmail.com> Date: Sun, 14 Aug 2022 21:13:23 +0800 Subject: [PATCH 31/60] dmaengine: qcom: gpi: move read_lock_bh to read_lock in tasklet it is unnecessary to call read_lock_bh in a tasklet. Signed-off-by: Tuo Cao <91tuocao@gmail.com> Link: https://lore.kernel.org/r/20220814131323.7029-1-91tuocao@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 89839864b4ec..3f56514bbef8 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -1150,9 +1150,9 @@ static void gpi_ev_tasklet(unsigned long data) { struct gpii *gpii = (struct gpii *)data; - read_lock_bh(&gpii->pm_lock); + read_lock(&gpii->pm_lock); if (!REG_ACCESS_VALID(gpii->pm_state)) { - read_unlock_bh(&gpii->pm_lock); + read_unlock(&gpii->pm_lock); dev_err(gpii->gpi_dev->dev, "not processing any events, pm_state:%s\n", TO_GPI_PM_STR(gpii->pm_state)); return; @@ -1163,7 +1163,7 @@ static void gpi_ev_tasklet(unsigned long data) /* enable IEOB, switching back to interrupts */ gpi_config_interrupts(gpii, MASK_IEOB_SETTINGS, 1); - read_unlock_bh(&gpii->pm_lock); + read_unlock(&gpii->pm_lock); } /* marks all pending events for the channel as stale */ From d7873903cc6bb681138c263a1648d23a71dafbd2 Mon Sep 17 00:00:00 2001 From: Shaomin Deng Date: Thu, 25 Aug 2022 10:45:45 -0400 Subject: [PATCH 32/60] dmaengine: virt-dma: Fix double word in comments Delete the double word "many" in comments. Signed-off-by: Shaomin Deng Link: https://lore.kernel.org/r/20220825144545.3528-1-dengshaomin@cdjrlc.com Signed-off-by: Vinod Koul --- drivers/dma/s3c24xx-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c index f6ed7e889781..a09eeb545f7d 100644 --- a/drivers/dma/s3c24xx-dma.c +++ b/drivers/dma/s3c24xx-dma.c @@ -1094,7 +1094,7 @@ static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma, INIT_LIST_HEAD(&dmadev->channels); /* - * Register as many many memcpy as we have physical channels, + * Register as many memcpy as we have physical channels, * we won't always be able to use all but the code will have * to cope with that situation. */ From 5c43442fee2d37881eb4c3781409ad9508685df8 Mon Sep 17 00:00:00 2001 From: Shaomin Deng Date: Tue, 30 Aug 2022 11:07:08 -0400 Subject: [PATCH 33/60] dmaengine: pl08x: Fix double word Fix the double word "many" in comments. Signed-off-by: Shaomin Deng Link: https://lore.kernel.org/r/20220830150708.24507-1-dengshaomin@cdjrlc.com Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 487a01aa207d..eea8bd33b4b7 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -2367,7 +2367,7 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, INIT_LIST_HEAD(&dmadev->channels); /* - * Register as many many memcpy as we have physical channels, + * Register as many memcpy as we have physical channels, * we won't always be able to use all but the code will have * to cope with that situation. */ From 8e527aac055557897d43a3d74d7970ef5cf6a8bb Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Wed, 28 Sep 2022 08:48:55 -0700 Subject: [PATCH 34/60] dmaengine: idxd: Set wq state to disabled in idxd_wq_disable_cleanup() If we are calling idxd_wq_disable_cleanup(), the workqueue should be in a disabled state. So set the workqueue state to IDXD_WQ_DISABLED so that the state reflects that. Currently if there is a device failure, and a software reset is attempted the workqueues will not be re-enabled due to idxd_wq_enable() seeing that state as already being IDXD_WQ_ENABLED. Cc: Fenghua Yu Cc: Dave Jiang Cc: Vinod Koul Signed-off-by: Jerry Snitselaar Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20220928154856.623545-2-jsnitsel@redhat.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5a8cc52c1abf..31911e255ac1 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -258,7 +258,6 @@ void idxd_wq_reset(struct idxd_wq *wq) operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL); idxd_wq_disable_cleanup(wq); - wq->state = IDXD_WQ_DISABLED; } int idxd_wq_map_portal(struct idxd_wq *wq) @@ -378,6 +377,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) struct idxd_device *idxd = wq->idxd; lockdep_assert_held(&wq->wq_lock); + wq->state = IDXD_WQ_DISABLED; memset(wq->wqcfg, 0, idxd->wqcfg_size); wq->type = IDXD_WQT_NONE; wq->threshold = 0; From de5819b994893197c71c86d21af10f85f50d6499 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Wed, 28 Sep 2022 08:48:56 -0700 Subject: [PATCH 35/60] dmaengine: idxd: track enabled workqueues in bitmap Now that idxd_wq_disable_cleanup() sets the workqueue state to IDXD_WQ_DISABLED, use a bitmap to track which workqueues have been enabled. This will then be used to determine which workqueues should be re-enabled when attempting a software reset to recover from a device halt state. Cc: Fenghua Yu Cc: Dave Jiang Cc: Vinod Koul Signed-off-by: Jerry Snitselaar Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20220928154856.623545-3-jsnitsel@redhat.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 2 ++ drivers/dma/idxd/idxd.h | 2 ++ drivers/dma/idxd/init.c | 6 ++++++ drivers/dma/idxd/irq.c | 5 +++-- drivers/dma/idxd/sysfs.c | 1 + 5 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 31911e255ac1..f0c7d6d348e3 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -196,6 +196,7 @@ int idxd_wq_enable(struct idxd_wq *wq) } wq->state = IDXD_WQ_ENABLED; + set_bit(wq->id, idxd->wq_enable_map); dev_dbg(dev, "WQ %d enabled\n", wq->id); return 0; } @@ -223,6 +224,7 @@ int idxd_wq_disable(struct idxd_wq *wq, bool reset_config) if (reset_config) idxd_wq_disable_cleanup(wq); + clear_bit(wq->id, idxd->wq_enable_map); wq->state = IDXD_WQ_DISABLED; dev_dbg(dev, "WQ %d disabled\n", wq->id); return 0; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index fed0dfc1eaa8..f527a7f88b92 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include "registers.h" @@ -299,6 +300,7 @@ struct idxd_device { int rdbuf_limit; int nr_rdbufs; /* non-reserved read buffers */ unsigned int wqcfg_size; + unsigned long *wq_enable_map; union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index aa3478257ddb..7e27e69ff741 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -151,6 +151,12 @@ static int idxd_setup_wqs(struct idxd_device *idxd) if (!idxd->wqs) return -ENOMEM; + idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev)); + if (!idxd->wq_enable_map) { + kfree(idxd->wqs); + return -ENOMEM; + } + for (i = 0; i < idxd->max_wqs; i++) { wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev)); if (!wq) { diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 5b9921475be6..5927d371493c 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -49,11 +49,12 @@ static void idxd_device_reinit(struct work_struct *work) goto out; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = idxd->wqs[i]; + if (test_bit(i, idxd->wq_enable_map)) { + struct idxd_wq *wq = idxd->wqs[i]; - if (wq->state == IDXD_WQ_ENABLED) { rc = idxd_wq_enable(wq); if (rc < 0) { + clear_bit(i, idxd->wq_enable_map); dev_warn(dev, "Unable to re-enable wq %s\n", dev_name(wq_confdev(wq))); } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 3f262a57441b..3325b16ed959 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1405,6 +1405,7 @@ static void idxd_conf_device_release(struct device *dev) struct idxd_device *idxd = confdev_to_idxd(dev); kfree(idxd->groups); + bitmap_free(idxd->wq_enable_map); kfree(idxd->wqs); kfree(idxd->engines); ida_free(&idxd_ida, idxd->id); From 612fcfdd1a7ccb1968052250f2622de0bdcd513b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 26 Sep 2022 17:03:24 +0200 Subject: [PATCH 36/60] dt-bindings: renesas,rcar-dmac: Add r8a779g0 support Document support for the Direct Memory Access Controllers (DMAC) in the Renesas R-Car V4H (R8A779G0) SoC. Signed-off-by: Geert Uytterhoeven Acked-by: Krzysztof Kozlowski Reviewed-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/0a4d40092a51345003742725aea512a815d27e89.1664204526.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml index 7202cd68e759..89b591a05bce 100644 --- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml +++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml @@ -45,6 +45,7 @@ properties: - enum: - renesas,dmac-r8a779a0 # R-Car V3U - renesas,dmac-r8a779f0 # R-Car S4-8 + - renesas,dmac-r8a779g0 # R-Car V4H - const: renesas,rcar-gen4-dmac # R-Car Gen4 reg: true From d1083fd04302a95bc4dcf1c059537da87b39bd9a Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 28 Sep 2022 01:47:47 +0000 Subject: [PATCH 37/60] dmaengine: idxd: Remove unused struct idxd_fault Since fault processing code has been removed, struct idxd_fault is not used any more and can be removed as well. Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20220928014747.106808-1-yuancan@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 5927d371493c..aa314ebec587 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -17,12 +17,6 @@ enum irq_work_type { IRQ_WORK_PROCESS_FAULT, }; -struct idxd_fault { - struct work_struct work; - u64 addr; - struct idxd_device *idxd; -}; - struct idxd_resubmit { struct work_struct work; struct idxd_desc *desc; From 45ecf27f300765d135f98d444957675ff6bb9837 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 26 Sep 2022 16:46:24 -0500 Subject: [PATCH 38/60] dmaengine: sh: rcar-dmac: Replace zero-length arrays with DECLARE_FLEX_ARRAY() helper Zero-length arrays are deprecated and we are moving towards adopting C99 flexible-array members, instead. So, replace zero-length arrays declarations in anonymous union with the new DECLARE_FLEX_ARRAY() helper macro. This helper allows for flexible-array members in unions. Link: https://github.com/KSPP/linux/issues/193 Link: https://github.com/KSPP/linux/issues/217 Link: https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/YzIdsJqsR3LH2qEK@work Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 13d12d660cc2..641d689d17ff 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -103,8 +103,8 @@ struct rcar_dmac_desc_page { struct list_head node; union { - struct rcar_dmac_desc descs[0]; - struct rcar_dmac_xfer_chunk chunks[0]; + DECLARE_FLEX_ARRAY(struct rcar_dmac_desc, descs); + DECLARE_FLEX_ARRAY(struct rcar_dmac_xfer_chunk, chunks); }; }; From 19ea810e88e08f87d07aafcf82f45084c360ed03 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Mon, 19 Sep 2022 22:07:21 -0400 Subject: [PATCH 39/60] Documentation: devicetree: dma: update the comments remove the double word to. Signed-off-by: Deming Wang Acked-by: Rob Herring Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20220920020721.2190-1-wangdeming@inspur.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt index b849a1ed389d..47e477cce6d2 100644 --- a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt +++ b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt @@ -4,7 +4,7 @@ Required properties: - compatible: "ti,dra7-dma-crossbar" for DRA7xx DMA crossbar "ti,am335x-edma-crossbar" for AM335x and AM437x - reg: Memory map for accessing module -- #dma-cells: Should be set to to match with the DMA controller's dma-cells +- #dma-cells: Should be set to match with the DMA controller's dma-cells for ti,dra7-dma-crossbar and <3> for ti,am335x-edma-crossbar. - dma-requests: Number of DMA requests the crossbar can receive - dma-masters: phandle pointing to the DMA controller From 65add05cfd6e44ea45b2b4e6135745564595cf4f Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Mon, 26 Sep 2022 16:52:00 +0530 Subject: [PATCH 40/60] dt-bindings: dma: Make minor fixes to qcom,bam-dma binding doc As a user recently noted, the qcom,bam-dma binding document describes the msm8974 BAM DMA node in the 'example section' incorrectly. Fix the same by making it consistent with the node present inside 'qcom-msm8974' dts file, namely the 'reg' and 'interrupt' values which are incorrect in the 'example section'. While at it also make two additioanal minor cleanups: - mention Bjorn's new email ID in the document, and - add SDM845 in the comment line for the SoCs on which qcom,bam-v1.7.0 version is supported. Signed-off-by: Bhupesh Sharma Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220926112200.1948080-1-bhupesh.sharma@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml b/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml index 9bf3a1b164f1..003098caf709 100644 --- a/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml @@ -8,7 +8,7 @@ title: Qualcomm Technologies Inc BAM DMA controller maintainers: - Andy Gross - - Bjorn Andersson + - Bjorn Andersson allOf: - $ref: "dma-controller.yaml#" @@ -20,7 +20,7 @@ properties: - qcom,bam-v1.3.0 # MSM8974, APQ8074 and APQ8084 - qcom,bam-v1.4.0 - # MSM8916 + # MSM8916 and SDM845 - qcom,bam-v1.7.0 clocks: @@ -90,8 +90,8 @@ examples: dma-controller@f9944000 { compatible = "qcom,bam-v1.4.0"; - reg = <0xf9944000 0x15000>; - interrupts = ; + reg = <0xf9944000 0x19000>; + interrupts = ; clocks = <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "bam_clk"; #dma-cells = <1>; From 0f4c5b29e3337c2718b812ae2dade5fc55a8321b Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Sep 2022 18:19:43 +0800 Subject: [PATCH 41/60] dmaengine: ti: edma: Remove some unused functions These functions are defined in the edma.c file, but not called elsewhere, so delete these unused functions. drivers/dma/ti/edma.c:746:31: warning: unused function 'to_edma_cc'. drivers/dma/ti/edma.c:420:20: warning: unused function 'edma_param_or'. drivers/dma/ti/edma.c:414:20: warning: unused function 'edma_param_and'. drivers/dma/ti/edma.c:402:20: warning: unused function 'edma_param_write'. drivers/dma/ti/edma.c:373:28: warning: unused function 'edma_shadow0_read'. drivers/dma/ti/edma.c:396:28: warning: unused function 'edma_param_read'. drivers/dma/ti/edma.c:355:20: warning: unused function 'edma_or_array'. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2152 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20220914101943.83929-1-jiapeng.chong@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/ti/edma.c | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 4cbca80ee16e..fa06d7e6d8e3 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -352,12 +352,6 @@ static inline void edma_modify_array(struct edma_cc *ecc, int offset, int i, edma_modify(ecc, offset + (i << 2), and, or); } -static inline void edma_or_array(struct edma_cc *ecc, int offset, int i, - unsigned or) -{ - edma_or(ecc, offset + (i << 2), or); -} - static inline void edma_or_array2(struct edma_cc *ecc, int offset, int i, int j, unsigned or) { @@ -370,11 +364,6 @@ static inline void edma_write_array2(struct edma_cc *ecc, int offset, int i, edma_write(ecc, offset + ((i * 2 + j) << 2), val); } -static inline unsigned int edma_shadow0_read(struct edma_cc *ecc, int offset) -{ - return edma_read(ecc, EDMA_SHADOW0 + offset); -} - static inline unsigned int edma_shadow0_read_array(struct edma_cc *ecc, int offset, int i) { @@ -393,36 +382,12 @@ static inline void edma_shadow0_write_array(struct edma_cc *ecc, int offset, edma_write(ecc, EDMA_SHADOW0 + offset + (i << 2), val); } -static inline unsigned int edma_param_read(struct edma_cc *ecc, int offset, - int param_no) -{ - return edma_read(ecc, EDMA_PARM + offset + (param_no << 5)); -} - -static inline void edma_param_write(struct edma_cc *ecc, int offset, - int param_no, unsigned val) -{ - edma_write(ecc, EDMA_PARM + offset + (param_no << 5), val); -} - static inline void edma_param_modify(struct edma_cc *ecc, int offset, int param_no, unsigned and, unsigned or) { edma_modify(ecc, EDMA_PARM + offset + (param_no << 5), and, or); } -static inline void edma_param_and(struct edma_cc *ecc, int offset, int param_no, - unsigned and) -{ - edma_and(ecc, EDMA_PARM + offset + (param_no << 5), and); -} - -static inline void edma_param_or(struct edma_cc *ecc, int offset, int param_no, - unsigned or) -{ - edma_or(ecc, EDMA_PARM + offset + (param_no << 5), or); -} - static void edma_assign_priority_to_queue(struct edma_cc *ecc, int queue_no, int priority) { @@ -743,11 +708,6 @@ static void edma_free_channel(struct edma_chan *echan) edma_setup_interrupt(echan, false); } -static inline struct edma_cc *to_edma_cc(struct dma_device *d) -{ - return container_of(d, struct edma_cc, dma_slave); -} - static inline struct edma_chan *to_edma_chan(struct dma_chan *c) { return container_of(c, struct edma_chan, vchan.chan); From 072431595a57bc6605c29724afce5f9ef8114915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Sun, 18 Sep 2022 11:58:44 +0200 Subject: [PATCH 42/60] dmaengine: apple-admac: Do not use devres for IRQs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is in advance of adding support for triggering the reset signal to the peripheral, since registering the IRQ handler will have to be sequenced with it. Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220918095845.68860-4-povik+lin@cutebit.org Signed-off-by: Vinod Koul --- drivers/dma/apple-admac.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index d1f74a3aa999..7d1b76678032 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -96,6 +96,7 @@ struct admac_data { struct device *dev; __iomem void *base; + int irq; int irq_index; int nchannels; struct admac_chan channels[]; @@ -724,12 +725,7 @@ static int admac_probe(struct platform_device *pdev) if (irq < 0) return dev_err_probe(&pdev->dev, irq, "no usable interrupt\n"); - - err = devm_request_irq(&pdev->dev, irq, admac_interrupt, - 0, dev_name(&pdev->dev), ad); - if (err) - return dev_err_probe(&pdev->dev, err, - "unable to register interrupt\n"); + ad->irq = irq; ad->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ad->base)) @@ -774,17 +770,29 @@ static int admac_probe(struct platform_device *pdev) tasklet_setup(&adchan->tasklet, admac_chan_tasklet); } - err = dma_async_device_register(&ad->dma); + err = request_irq(irq, admac_interrupt, 0, dev_name(&pdev->dev), ad); if (err) - return dev_err_probe(&pdev->dev, err, "failed to register DMA device\n"); + return dev_err_probe(&pdev->dev, err, + "unable to register interrupt\n"); + + err = dma_async_device_register(&ad->dma); + if (err) { + dev_err_probe(&pdev->dev, err, "failed to register DMA device\n"); + goto free_irq; + } err = of_dma_controller_register(pdev->dev.of_node, admac_dma_of_xlate, ad); if (err) { dma_async_device_unregister(&ad->dma); - return dev_err_probe(&pdev->dev, err, "failed to register with OF\n"); + dev_err_probe(&pdev->dev, err, "failed to register with OF\n"); + goto free_irq; } return 0; + +free_irq: + free_irq(ad->irq, ad); + return err; } static int admac_remove(struct platform_device *pdev) @@ -793,6 +801,7 @@ static int admac_remove(struct platform_device *pdev) of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&ad->dma); + free_irq(ad->irq, ad); return 0; } From 6aed75d7ccb3288029287c50fc594a4314698be0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Sun, 18 Sep 2022 11:58:45 +0200 Subject: [PATCH 43/60] dmaengine: apple-admac: Trigger shared reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a reset domain is attached to the device, obtain a shared reference to it and trigger it. Typically on a chip the ADMAC controller will share a reset domain with the MCA peripheral. Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220918095845.68860-5-povik+lin@cutebit.org Signed-off-by: Vinod Koul --- drivers/dma/apple-admac.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index 7d1b76678032..317ca76ccafd 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -12,8 +12,9 @@ #include #include #include -#include +#include #include +#include #include "dmaengine.h" @@ -95,6 +96,7 @@ struct admac_data { struct dma_device dma; struct device *dev; __iomem void *base; + struct reset_control *rstc; int irq; int irq_index; @@ -732,6 +734,10 @@ static int admac_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(ad->base), "unable to obtain MMIO resource\n"); + ad->rstc = devm_reset_control_get_optional_shared(&pdev->dev, NULL); + if (IS_ERR(ad->rstc)) + return PTR_ERR(ad->rstc); + dma = &ad->dma; dma_cap_set(DMA_PRIVATE, dma->cap_mask); @@ -770,10 +776,17 @@ static int admac_probe(struct platform_device *pdev) tasklet_setup(&adchan->tasklet, admac_chan_tasklet); } - err = request_irq(irq, admac_interrupt, 0, dev_name(&pdev->dev), ad); + err = reset_control_reset(ad->rstc); if (err) return dev_err_probe(&pdev->dev, err, - "unable to register interrupt\n"); + "unable to trigger reset\n"); + + err = request_irq(irq, admac_interrupt, 0, dev_name(&pdev->dev), ad); + if (err) { + dev_err_probe(&pdev->dev, err, + "unable to register interrupt\n"); + goto free_reset; + } err = dma_async_device_register(&ad->dma); if (err) { @@ -792,6 +805,8 @@ static int admac_probe(struct platform_device *pdev) free_irq: free_irq(ad->irq, ad); +free_reset: + reset_control_rearm(ad->rstc); return err; } @@ -802,6 +817,7 @@ static int admac_remove(struct platform_device *pdev) of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&ad->dma); free_irq(ad->irq, ad); + reset_control_rearm(ad->rstc); return 0; } From 5cfeaf7cc5d2a349ba7f2cc1942e106c972e0a1c Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Mon, 19 Sep 2022 13:59:31 -0700 Subject: [PATCH 44/60] dmaengine: ti: k3-psil: add additional TX threads for j7200 Add matching PSI-L threads mapping for transmission DMA channels on the J7200 platform. Signed-off-by: Matt Ranostay Link: https://lore.kernel.org/r/20220919205931.8397-3-mranostay@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-psil-j7200.c | 67 ++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/drivers/dma/ti/k3-psil-j7200.c b/drivers/dma/ti/k3-psil-j7200.c index 5ea63ea74822..e3feff869991 100644 --- a/drivers/dma/ti/k3-psil-j7200.c +++ b/drivers/dma/ti/k3-psil-j7200.c @@ -143,6 +143,57 @@ static struct psil_ep j7200_src_ep_map[] = { /* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ static struct psil_ep j7200_dst_ep_map[] = { + /* PDMA_MCASP - McASP0-2 */ + PSIL_PDMA_MCASP(0xc400), + PSIL_PDMA_MCASP(0xc401), + PSIL_PDMA_MCASP(0xc402), + /* PDMA_SPI_G0 - SPI0-3 */ + PSIL_PDMA_XY_PKT(0xc600), + PSIL_PDMA_XY_PKT(0xc601), + PSIL_PDMA_XY_PKT(0xc602), + PSIL_PDMA_XY_PKT(0xc603), + PSIL_PDMA_XY_PKT(0xc604), + PSIL_PDMA_XY_PKT(0xc605), + PSIL_PDMA_XY_PKT(0xc606), + PSIL_PDMA_XY_PKT(0xc607), + PSIL_PDMA_XY_PKT(0xc608), + PSIL_PDMA_XY_PKT(0xc609), + PSIL_PDMA_XY_PKT(0xc60a), + PSIL_PDMA_XY_PKT(0xc60b), + PSIL_PDMA_XY_PKT(0xc60c), + PSIL_PDMA_XY_PKT(0xc60d), + PSIL_PDMA_XY_PKT(0xc60e), + PSIL_PDMA_XY_PKT(0xc60f), + /* PDMA_SPI_G1 - SPI4-7 */ + PSIL_PDMA_XY_PKT(0xc610), + PSIL_PDMA_XY_PKT(0xc611), + PSIL_PDMA_XY_PKT(0xc612), + PSIL_PDMA_XY_PKT(0xc613), + PSIL_PDMA_XY_PKT(0xc614), + PSIL_PDMA_XY_PKT(0xc615), + PSIL_PDMA_XY_PKT(0xc616), + PSIL_PDMA_XY_PKT(0xc617), + PSIL_PDMA_XY_PKT(0xc618), + PSIL_PDMA_XY_PKT(0xc619), + PSIL_PDMA_XY_PKT(0xc61a), + PSIL_PDMA_XY_PKT(0xc61b), + PSIL_PDMA_XY_PKT(0xc61c), + PSIL_PDMA_XY_PKT(0xc61d), + PSIL_PDMA_XY_PKT(0xc61e), + PSIL_PDMA_XY_PKT(0xc61f), + /* PDMA_USART_G0 - UART0-1 */ + PSIL_PDMA_XY_PKT(0xc700), + PSIL_PDMA_XY_PKT(0xc701), + /* PDMA_USART_G1 - UART2-3 */ + PSIL_PDMA_XY_PKT(0xc702), + PSIL_PDMA_XY_PKT(0xc703), + /* PDMA_USART_G2 - UART4-9 */ + PSIL_PDMA_XY_PKT(0xc704), + PSIL_PDMA_XY_PKT(0xc705), + PSIL_PDMA_XY_PKT(0xc706), + PSIL_PDMA_XY_PKT(0xc707), + PSIL_PDMA_XY_PKT(0xc708), + PSIL_PDMA_XY_PKT(0xc709), /* CPSW5 */ PSIL_ETHERNET(0xca00), PSIL_ETHERNET(0xca01), @@ -161,6 +212,22 @@ static struct psil_ep j7200_dst_ep_map[] = { PSIL_ETHERNET(0xf005), PSIL_ETHERNET(0xf006), PSIL_ETHERNET(0xf007), + /* MCU_PDMA_MISC_G0 - SPI0 */ + PSIL_PDMA_XY_PKT(0xf100), + PSIL_PDMA_XY_PKT(0xf101), + PSIL_PDMA_XY_PKT(0xf102), + PSIL_PDMA_XY_PKT(0xf103), + /* MCU_PDMA_MISC_G1 - SPI1-2 */ + PSIL_PDMA_XY_PKT(0xf200), + PSIL_PDMA_XY_PKT(0xf201), + PSIL_PDMA_XY_PKT(0xf202), + PSIL_PDMA_XY_PKT(0xf203), + PSIL_PDMA_XY_PKT(0xf204), + PSIL_PDMA_XY_PKT(0xf205), + PSIL_PDMA_XY_PKT(0xf206), + PSIL_PDMA_XY_PKT(0xf207), + /* MCU_PDMA_MISC_G2 - UART0 */ + PSIL_PDMA_XY_PKT(0xf300), /* SA2UL */ PSIL_SA2UL(0xf500, 1), PSIL_SA2UL(0xf501, 1), From 693e9c269e8e8fb16f1d7fac38bd774402722e87 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Mon, 19 Sep 2022 13:59:30 -0700 Subject: [PATCH 45/60] dmaengine: ti: k3-psil: add additional TX threads for j721e Add matching PSI-L threads mapping for transmission DMA channels on the J721E platform. Signed-off-by: Matt Ranostay Link: https://lore.kernel.org/r/20220919205931.8397-2-mranostay@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-psil-j721e.c | 79 ++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/drivers/dma/ti/k3-psil-j721e.c b/drivers/dma/ti/k3-psil-j721e.c index 34e3fc565a37..e7c83d668bb6 100644 --- a/drivers/dma/ti/k3-psil-j721e.c +++ b/drivers/dma/ti/k3-psil-j721e.c @@ -266,6 +266,69 @@ static struct psil_ep j721e_dst_ep_map[] = { PSIL_ETHERNET(0xc205), PSIL_ETHERNET(0xc206), PSIL_ETHERNET(0xc207), + /* PDMA6 (PSIL_PDMA_MCASP_G0) - McASP0-2 */ + PSIL_PDMA_MCASP(0xc400), + PSIL_PDMA_MCASP(0xc401), + PSIL_PDMA_MCASP(0xc402), + /* PDMA7 (PSIL_PDMA_MCASP_G1) - McASP3-11 */ + PSIL_PDMA_MCASP(0xc500), + PSIL_PDMA_MCASP(0xc501), + PSIL_PDMA_MCASP(0xc502), + PSIL_PDMA_MCASP(0xc503), + PSIL_PDMA_MCASP(0xc504), + PSIL_PDMA_MCASP(0xc505), + PSIL_PDMA_MCASP(0xc506), + PSIL_PDMA_MCASP(0xc507), + PSIL_PDMA_MCASP(0xc508), + /* PDMA8 (PDMA_MISC_G0) - SPI0-1 */ + PSIL_PDMA_XY_PKT(0xc600), + PSIL_PDMA_XY_PKT(0xc601), + PSIL_PDMA_XY_PKT(0xc602), + PSIL_PDMA_XY_PKT(0xc603), + PSIL_PDMA_XY_PKT(0xc604), + PSIL_PDMA_XY_PKT(0xc605), + PSIL_PDMA_XY_PKT(0xc606), + PSIL_PDMA_XY_PKT(0xc607), + /* PDMA9 (PDMA_MISC_G1) - SPI2-3 */ + PSIL_PDMA_XY_PKT(0xc60c), + PSIL_PDMA_XY_PKT(0xc60d), + PSIL_PDMA_XY_PKT(0xc60e), + PSIL_PDMA_XY_PKT(0xc60f), + PSIL_PDMA_XY_PKT(0xc610), + PSIL_PDMA_XY_PKT(0xc611), + PSIL_PDMA_XY_PKT(0xc612), + PSIL_PDMA_XY_PKT(0xc613), + /* PDMA10 (PDMA_MISC_G2) - SPI4-5 */ + PSIL_PDMA_XY_PKT(0xc618), + PSIL_PDMA_XY_PKT(0xc619), + PSIL_PDMA_XY_PKT(0xc61a), + PSIL_PDMA_XY_PKT(0xc61b), + PSIL_PDMA_XY_PKT(0xc61c), + PSIL_PDMA_XY_PKT(0xc61d), + PSIL_PDMA_XY_PKT(0xc61e), + PSIL_PDMA_XY_PKT(0xc61f), + /* PDMA11 (PDMA_MISC_G3) */ + PSIL_PDMA_XY_PKT(0xc624), + PSIL_PDMA_XY_PKT(0xc625), + PSIL_PDMA_XY_PKT(0xc626), + PSIL_PDMA_XY_PKT(0xc627), + PSIL_PDMA_XY_PKT(0xc628), + PSIL_PDMA_XY_PKT(0xc629), + PSIL_PDMA_XY_PKT(0xc630), + PSIL_PDMA_XY_PKT(0xc63a), + /* PDMA13 (PDMA_USART_G0) - UART0-1 */ + PSIL_PDMA_XY_PKT(0xc700), + PSIL_PDMA_XY_PKT(0xc701), + /* PDMA14 (PDMA_USART_G1) - UART2-3 */ + PSIL_PDMA_XY_PKT(0xc702), + PSIL_PDMA_XY_PKT(0xc703), + /* PDMA15 (PDMA_USART_G2) - UART4-9 */ + PSIL_PDMA_XY_PKT(0xc704), + PSIL_PDMA_XY_PKT(0xc705), + PSIL_PDMA_XY_PKT(0xc706), + PSIL_PDMA_XY_PKT(0xc707), + PSIL_PDMA_XY_PKT(0xc708), + PSIL_PDMA_XY_PKT(0xc709), /* CPSW9 */ PSIL_ETHERNET(0xca00), PSIL_ETHERNET(0xca01), @@ -284,6 +347,22 @@ static struct psil_ep j721e_dst_ep_map[] = { PSIL_ETHERNET(0xf005), PSIL_ETHERNET(0xf006), PSIL_ETHERNET(0xf007), + /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */ + PSIL_PDMA_XY_PKT(0xf100), + PSIL_PDMA_XY_PKT(0xf101), + PSIL_PDMA_XY_PKT(0xf102), + PSIL_PDMA_XY_PKT(0xf103), + /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */ + PSIL_PDMA_XY_PKT(0xf200), + PSIL_PDMA_XY_PKT(0xf201), + PSIL_PDMA_XY_PKT(0xf202), + PSIL_PDMA_XY_PKT(0xf203), + PSIL_PDMA_XY_PKT(0xf204), + PSIL_PDMA_XY_PKT(0xf205), + PSIL_PDMA_XY_PKT(0xf206), + PSIL_PDMA_XY_PKT(0xf207), + /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */ + PSIL_PDMA_XY_PKT(0xf300), /* SA2UL */ PSIL_SA2UL(0xf500, 1), PSIL_SA2UL(0xf501, 1), From 7c8765308371be30f50c1b5b97618b731514b207 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 15 Sep 2022 22:48:44 +0200 Subject: [PATCH 46/60] dmaengine: qcom-adm: fix wrong sizeof config in slave_config Fix broken slave_config function that uncorrectly compare the peripheral_size with the size of the config pointer instead of the size of the config struct. This cause the crci value to be ignored and cause a kernel panic on any slave that use adm driver. To fix this, compare to the size of the struct and NOT the size of the pointer. Fixes: 03de6b273805 ("dmaengine: qcom-adm: stop abusing slave_id config") Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org # v5.17+ Reviewed-by: Arnd Bergmann Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220915204844.3838-1-ansuelsmth@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/qcom_adm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/qcom/qcom_adm.c b/drivers/dma/qcom/qcom_adm.c index facdacf8aede..c77d9de853de 100644 --- a/drivers/dma/qcom/qcom_adm.c +++ b/drivers/dma/qcom/qcom_adm.c @@ -494,7 +494,7 @@ static int adm_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg) spin_lock_irqsave(&achan->vc.lock, flag); memcpy(&achan->slave, cfg, sizeof(struct dma_slave_config)); - if (cfg->peripheral_size == sizeof(config)) + if (cfg->peripheral_size == sizeof(*config)) achan->crci = config->crci; spin_unlock_irqrestore(&achan->vc.lock, flag); From b9d2140c3badf4107973ad77c5a0ec3075705c85 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Fri, 16 Sep 2022 06:12:56 +0200 Subject: [PATCH 47/60] dmaengine: qcom-adm: fix wrong calling convention for prep_slave_sg The calling convention for pre_slave_sg is to return NULL on error and provide an error log to the system. Qcom-adm instead provide error pointer when an error occur. This indirectly cause kernel panic for example for the nandc driver that checks only if the pointer returned by device_prep_slave_sg is not NULL. Returning an error pointer makes nandc think the device_prep_slave_sg function correctly completed and makes the kernel panics later in the code. While nandc is the one that makes the kernel crash, it was pointed out that the real problem is qcom-adm not following calling convention for that function. To fix this, drop returning error pointer and return NULL with an error log. Fixes: 03de6b273805 ("dmaengine: qcom-adm: stop abusing slave_id config") Fixes: 5c9f8c2dbdbe ("dmaengine: qcom: Add ADM driver") Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org # v5.11+ Link: https://lore.kernel.org/r/20220916041256.7104-1-ansuelsmth@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/qcom_adm.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/dma/qcom/qcom_adm.c b/drivers/dma/qcom/qcom_adm.c index c77d9de853de..d56caf1681ff 100644 --- a/drivers/dma/qcom/qcom_adm.c +++ b/drivers/dma/qcom/qcom_adm.c @@ -379,13 +379,13 @@ static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan, if (blk_size < 0) { dev_err(adev->dev, "invalid burst value: %d\n", burst); - return ERR_PTR(-EINVAL); + return NULL; } crci = achan->crci & 0xf; if (!crci || achan->crci > 0x1f) { dev_err(adev->dev, "invalid crci value\n"); - return ERR_PTR(-EINVAL); + return NULL; } } @@ -403,8 +403,10 @@ static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan, } async_desc = kzalloc(sizeof(*async_desc), GFP_NOWAIT); - if (!async_desc) - return ERR_PTR(-ENOMEM); + if (!async_desc) { + dev_err(adev->dev, "not enough memory for async_desc struct\n"); + return NULL; + } async_desc->mux = achan->mux ? ADM_CRCI_CTL_MUX_SEL : 0; async_desc->crci = crci; @@ -414,8 +416,10 @@ static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan, sizeof(*cple) + 2 * ADM_DESC_ALIGN; async_desc->cpl = kzalloc(async_desc->dma_len, GFP_NOWAIT); - if (!async_desc->cpl) + if (!async_desc->cpl) { + dev_err(adev->dev, "not enough memory for cpl struct\n"); goto free; + } async_desc->adev = adev; @@ -437,8 +441,10 @@ static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan, async_desc->dma_addr = dma_map_single(adev->dev, async_desc->cpl, async_desc->dma_len, DMA_TO_DEVICE); - if (dma_mapping_error(adev->dev, async_desc->dma_addr)) + if (dma_mapping_error(adev->dev, async_desc->dma_addr)) { + dev_err(adev->dev, "dma mapping error for cpl\n"); goto free; + } cple_addr = async_desc->dma_addr + ((void *)cple - async_desc->cpl); @@ -454,7 +460,7 @@ static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan, free: kfree(async_desc); - return ERR_PTR(-ENOMEM); + return NULL; } /** From 898ec89dbb55b8294695ad71694a0684e62b2a73 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 19 Sep 2022 09:58:42 -0700 Subject: [PATCH 48/60] dmaengine: ioat: stop mod_timer from resurrecting deleted timer in __cleanup() User reports observing timer event report channel halted but no error observed in CHANERR register. The driver finished self-test and released channel resources. Debug shows that __cleanup() can call mod_timer() after the timer has been deleted and thus resurrect the timer. While harmless, it causes suprious error message to be emitted. Use mod_timer_pending() call to prevent deleted timer from being resurrected. Fixes: 3372de5813e4 ("dmaengine: ioatdma: removal of dma_v3.c and relevant ioat3 references") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/166360672197.3851724.17040290563764838369.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 37ff4ec7db76..e2070df6cad2 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -656,7 +656,7 @@ static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) if (active - i == 0) { dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n", __func__); - mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); } /* microsecond delay by sysfs variable per pending descriptor */ @@ -682,7 +682,7 @@ static void ioat_cleanup(struct ioatdma_chan *ioat_chan) if (chanerr & (IOAT_CHANERR_HANDLE_MASK | IOAT_CHANERR_RECOVER_MASK)) { - mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); ioat_eh(ioat_chan); } } @@ -879,7 +879,7 @@ static void check_active(struct ioatdma_chan *ioat_chan) } if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) - mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); } static void ioat_reboot_chan(struct ioatdma_chan *ioat_chan) From 22bd0df846ca1388ce9f5d54fb6e9f597c932ba9 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:18 -0700 Subject: [PATCH 49/60] dmaengine: idxd: convert ats_dis to a wq flag Make wq attributes access consistent. Convert ats_dis to wq flag WQ_FLAG_ATS_DISABLE. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-2-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 4 ++-- drivers/dma/idxd/idxd.h | 2 +- drivers/dma/idxd/sysfs.c | 7 +++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index f0c7d6d348e3..88986db57743 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -384,10 +384,10 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->type = IDXD_WQT_NONE; wq->threshold = 0; wq->priority = 0; - wq->ats_dis = 0; wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; @@ -861,7 +861,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->bof = 1; if (idxd->hw.wq_cap.wq_ats_support) - wq->wqcfg->wq_ats_disable = wq->ats_dis; + wq->wqcfg->wq_ats_disable = test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); /* bytes 12-15 */ wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index f527a7f88b92..4e7e21264be7 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -133,6 +133,7 @@ enum idxd_wq_state { enum idxd_wq_flag { WQ_FLAG_DEDICATED = 0, WQ_FLAG_BLOCK_ON_FAULT, + WQ_FLAG_ATS_DISABLE, }; enum idxd_wq_type { @@ -209,7 +210,6 @@ struct idxd_wq { char name[WQ_NAME_SIZE + 1]; u64 max_xfer_bytes; u32 max_batch_size; - bool ats_dis; }; struct idxd_engine { diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 3325b16ed959..8ff599ea48a4 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -973,7 +973,7 @@ static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute * { struct idxd_wq *wq = confdev_to_wq(dev); - return sysfs_emit(buf, "%u\n", wq->ats_dis); + return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags)); } static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr, @@ -994,7 +994,10 @@ static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute if (rc < 0) return rc; - wq->ats_dis = ats_dis; + if (ats_dis) + set_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); + else + clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); return count; } From a8563a33a5e26064061f2fb34215c97f0e2995f4 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:19 -0700 Subject: [PATCH 50/60] dmanegine: idxd: reformat opcap output to match bitmap_parse() input To make input and output consistent and prepping for the per WQ operation configuration support, change the output of opcap display to match the input that is expected by bitmap_parse() helper function. The output will be a bitmap with field width as the number of bits using the %*pb format specifier for printk() family. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-3-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/idxd.h | 2 ++ drivers/dma/idxd/init.c | 20 ++++++++++++++++++++ drivers/dma/idxd/registers.h | 2 ++ drivers/dma/idxd/sysfs.c | 9 ++------- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 4e7e21264be7..fd0642b8cadc 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -310,6 +310,8 @@ struct idxd_device { struct work_struct work; struct idxd_pmu *idxd_pmu; + + unsigned long *opcap_bmap; }; /* IDXD software descriptor */ diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 7e27e69ff741..f6f0654a79b8 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -375,6 +375,19 @@ static void idxd_read_table_offsets(struct idxd_device *idxd) dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset); } +static void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count) +{ + int i, j, nr; + + for (i = 0, nr = 0; i < count; i++) { + for (j = 0; j < BITS_PER_LONG_LONG; j++) { + if (val[i] & BIT(j)) + set_bit(nr, bmap); + nr++; + } + } +} + static void idxd_read_caps(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -433,6 +446,7 @@ static void idxd_read_caps(struct idxd_device *idxd) IDXD_OPCAP_OFFSET + i * sizeof(u64)); dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]); } + multi_u64_to_bmap(idxd->opcap_bmap, &idxd->hw.opcap.bits[0], 4); } static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) @@ -454,6 +468,12 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d if (idxd->id < 0) return NULL; + idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, dev_to_node(dev)); + if (!idxd->opcap_bmap) { + ida_free(&idxd_ida, idxd->id); + return NULL; + } + device_initialize(conf_dev); conf_dev->parent = dev; conf_dev->bus = &dsa_bus_type; diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 02449aa9c454..4c96ea85f843 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -90,6 +90,8 @@ struct opcap { u64 bits[4]; }; +#define IDXD_MAX_OPCAP_BITS 256U + #define IDXD_OPCAP_OFFSET 0x40 #define IDXD_TABLE_OFFSET 0x60 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 8ff599ea48a4..57aeeee835ab 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1180,14 +1180,8 @@ static ssize_t op_cap_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idxd_device *idxd = confdev_to_idxd(dev); - int i, rc = 0; - for (i = 0; i < 4; i++) - rc += sysfs_emit_at(buf, rc, "%#llx ", idxd->hw.opcap.bits[i]); - - rc--; - rc += sysfs_emit_at(buf, rc, "\n"); - return rc; + return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, idxd->opcap_bmap); } static DEVICE_ATTR_RO(op_cap); @@ -1412,6 +1406,7 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->wqs); kfree(idxd->engines); ida_free(&idxd_ida, idxd->id); + bitmap_free(idxd->opcap_bmap); kfree(idxd); } From b0325aefd398d8b536ba46ee2e5d24252c1b2258 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:20 -0700 Subject: [PATCH 51/60] dmaengine: idxd: add WQ operation cap restriction support DSA 2.0 add the capability of configuring DMA ops on a per workqueue basis. This means that certain ops can be disabled by the system administrator for certain wq. By default, all ops are available. A bitmap is used to store the ops due to total op size of 256 bits and it is more convenient to use a range list to specify which bits are enabled. One of the usage to support this is for VM migration between different iteration of devices. The newer ops are disabled in order to allow guest to migrate to a host that only support older ops. Another usage is to restrict the WQ to certain operations for QoS of performance. A sysfs of ops_config attribute is added per wq. It is only usable when the ops_config bit is set under WQ_CAP register. This means that this attribute will return -EOPNOTSUPP on DSA 1.x devices. The expected input is a range list for the bits per operation the WQ supports. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-4-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 11 +++ drivers/dma/idxd/device.c | 15 +++- drivers/dma/idxd/idxd.h | 2 + drivers/dma/idxd/init.c | 10 +++ drivers/dma/idxd/registers.h | 8 +- drivers/dma/idxd/sysfs.c | 85 +++++++++++++++++++ 6 files changed, 128 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 0c2b613f2373..3f9f93b5e48c 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -227,6 +227,17 @@ Contact: dmaengine@vger.kernel.org Description: Indicate the number of retires for an enqcmds submission on a sharedwq. A max value to set attribute is capped at 64. +What: /sys/bus/dsa/devices/wq./op_config +Date: Sept 14, 2022 +KernelVersion: 6.0.0 +Contact: dmaengine@vger.kernel.org +Description: Shows the operation capability bits displayed in bitmap format + presented by %*pb printk() output format specifier. + The attribute can be configured when the WQ is disabled in + order to configure the WQ to accept specific bits that + correlates to the operations allowed. It's visible only + on platforms that support the capability. + What: /sys/bus/dsa/devices/engine./group_id Date: Oct 25, 2019 KernelVersion: 5.6.0 diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 88986db57743..df1c108e4bb3 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -391,6 +391,8 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) memset(wq->name, 0, WQ_NAME_SIZE); wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; + if (wq->opcap_bmap) + bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq) @@ -809,7 +811,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq) struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; u32 wq_offset; - int i; + int i, n; if (!wq->group) return 0; @@ -867,6 +869,17 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size); + /* bytes 32-63 */ + if (idxd->hw.wq_cap.op_config && wq->opcap_bmap) { + memset(wq->wqcfg->op_config, 0, IDXD_MAX_OPCAP_BITS / 8); + for_each_set_bit(n, wq->opcap_bmap, IDXD_MAX_OPCAP_BITS) { + int pos = n % BITS_PER_LONG_LONG; + int idx = n / BITS_PER_LONG_LONG; + + wq->wqcfg->op_config[idx] |= BIT(pos); + } + } + dev_dbg(dev, "WQ %d CFGs\n", wq->id); for (i = 0; i < WQCFG_STRIDES(idxd); i++) { wq_offset = WQCFG_OFFSET(idxd, wq->id, i); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index fd0642b8cadc..ba6e94f0cd6e 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -196,6 +196,8 @@ struct idxd_wq { enum idxd_wq_state state; unsigned long flags; union wqcfg *wqcfg; + unsigned long *opcap_bmap; + struct dsa_hw_desc **hw_descs; int num_descs; union { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index f6f0654a79b8..2b18d512cbfc 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -191,6 +191,16 @@ static int idxd_setup_wqs(struct idxd_device *idxd) rc = -ENOMEM; goto err; } + + if (idxd->hw.wq_cap.op_config) { + wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); + if (!wq->opcap_bmap) { + put_device(conf_dev); + rc = -ENOMEM; + goto err; + } + bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); + } idxd->wqs[i] = wq; } diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 4c96ea85f843..7b95be8f0f64 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -54,7 +54,8 @@ union wq_cap_reg { u64 priority:1; u64 occupancy:1; u64 occupancy_int:1; - u64 rsvd3:10; + u64 op_config:1; + u64 rsvd3:9; }; u64 bits; } __packed; @@ -350,8 +351,11 @@ union wqcfg { /* bytes 28-31 */ u32 rsvd8; + + /* bytes 32-63 */ + u64 op_config[4]; }; - u32 bits[8]; + u32 bits[16]; } __packed; #define WQCFG_PASID_IDX 2 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 57aeeee835ab..6dbdd74e5bae 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1058,6 +1058,68 @@ static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attrib static struct device_attribute dev_attr_wq_enqcmds_retries = __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store); +static ssize_t wq_op_config_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, wq->opcap_bmap); +} + +static int idxd_verify_supported_opcap(struct idxd_device *idxd, unsigned long *opmask) +{ + int bit; + + /* + * The OPCAP is defined as 256 bits that represents each operation the device + * supports per bit. Iterate through all the bits and check if the input mask + * is set for bits that are not set in the OPCAP for the device. If no OPCAP + * bit is set and input mask has the bit set, then return error. + */ + for_each_set_bit(bit, opmask, IDXD_MAX_OPCAP_BITS) { + if (!test_bit(bit, idxd->opcap_bmap)) + return -EINVAL; + } + + return 0; +} + +static ssize_t wq_op_config_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + unsigned long *opmask; + int rc; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + opmask = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); + if (!opmask) + return -ENOMEM; + + rc = bitmap_parse(buf, count, opmask, IDXD_MAX_OPCAP_BITS); + if (rc < 0) + goto err; + + rc = idxd_verify_supported_opcap(idxd, opmask); + if (rc < 0) + goto err; + + bitmap_copy(wq->opcap_bmap, opmask, IDXD_MAX_OPCAP_BITS); + + bitmap_free(opmask); + return count; + +err: + bitmap_free(opmask); + return rc; +} + +static struct device_attribute dev_attr_wq_op_config = + __ATTR(op_config, 0644, wq_op_config_show, wq_op_config_store); + static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_clients.attr, &dev_attr_wq_state.attr, @@ -1075,11 +1137,33 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_ats_disable.attr, &dev_attr_wq_occupancy.attr, &dev_attr_wq_enqcmds_retries.attr, + &dev_attr_wq_op_config.attr, NULL, }; +static bool idxd_wq_attr_op_config_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_wq_op_config.attr && + !idxd->hw.wq_cap.op_config; +} + +static umode_t idxd_wq_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + + if (idxd_wq_attr_op_config_invisible(attr, idxd)) + return 0; + + return attr->mode; +} + static const struct attribute_group idxd_wq_attribute_group = { .attrs = idxd_wq_attributes, + .is_visible = idxd_wq_attr_visible, }; static const struct attribute_group *idxd_wq_attribute_groups[] = { @@ -1091,6 +1175,7 @@ static void idxd_conf_wq_release(struct device *dev) { struct idxd_wq *wq = confdev_to_wq(dev); + bitmap_free(wq->opcap_bmap); kfree(wq->wqcfg); kfree(wq); } From 1f2737521af2b7d018971f1d873856fff02d2b33 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:21 -0700 Subject: [PATCH 52/60] dmaengine: idxd: add configuration for concurrent work descriptor processing Add sysfs knob to allow control of the number of work descriptors that can be concurrently processed by an engine in the group as a fraction of the Maximum Work Descriptors in Progress value specified in ENGCAP register. This control knob is part of toggle for QoS control. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-5-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 12 +++++ drivers/dma/idxd/device.c | 13 +++-- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/registers.h | 23 ++++---- drivers/dma/idxd/sysfs.c | 53 +++++++++++++++++++ 5 files changed, 87 insertions(+), 15 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 3f9f93b5e48c..02a721a8ea68 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -266,3 +266,15 @@ Contact: dmaengine@vger.kernel.org Description: Indicates the number of Read Buffers reserved for the use of engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers Reserved. + +What: /sys/bus/dsa/devices/group./desc_progress_limit +Date: Sept 14, 2022 +KernelVersion: 6.0.0 +Contact: dmaengine@vger.kernel.org +Description: Allows control of the number of work descriptors that can be + concurrently processed by an engine in the group as a fraction + of the Maximum Work Descriptors in Progress value specified in + the ENGCAP register. The acceptable values are 0 (default), + 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of + the max value). It's visible only on platforms that support + the capability. diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index df1c108e4bb3..05a982e143fe 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -709,6 +709,7 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) group->tc_a = -1; group->tc_b = -1; } + group->desc_progress_limit = 0; } } @@ -765,10 +766,10 @@ static void idxd_group_config_write(struct idxd_group *group) /* setup GRPFLAGS */ grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); - iowrite32(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset); - dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n", + iowrite64(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n", group->id, grpcfg_offset, - ioread32(idxd->reg_base + grpcfg_offset)); + ioread64(idxd->reg_base + grpcfg_offset)); } static int idxd_groups_config_write(struct idxd_device *idxd) @@ -929,6 +930,8 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed; else group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs; + + group->grpcfg.flags.desc_progress_limit = group->desc_progress_limit; } } @@ -1111,8 +1114,8 @@ static void idxd_group_load_config(struct idxd_group *group) } grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); - group->grpcfg.flags.bits = ioread32(idxd->reg_base + grpcfg_offset); - dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n", + group->grpcfg.flags.bits = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n", group->id, grpcfg_offset, group->grpcfg.flags.bits); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index ba6e94f0cd6e..7ee870e5ca67 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -96,6 +96,7 @@ struct idxd_group { u8 rdbufs_reserved; int tc_a; int tc_b; + int desc_progress_limit; }; struct idxd_pmu { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 7b95be8f0f64..2cc2543edd58 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -68,7 +68,8 @@ union group_cap_reg { u64 total_rdbufs:8; /* formerly total_tokens */ u64 rdbuf_ctrl:1; /* formerly token_en */ u64 rdbuf_limit:1; /* formerly token_limit */ - u64 rsvd:46; + u64 progress_limit:1; /* descriptor and batch descriptor */ + u64 rsvd:45; }; u64 bits; } __packed; @@ -288,16 +289,18 @@ union msix_perm { union group_flags { struct { - u32 tc_a:3; - u32 tc_b:3; - u32 rsvd:1; - u32 use_rdbuf_limit:1; - u32 rdbufs_reserved:8; - u32 rsvd2:4; - u32 rdbufs_allowed:8; - u32 rsvd3:4; + u64 tc_a:3; + u64 tc_b:3; + u64 rsvd:1; + u64 use_rdbuf_limit:1; + u64 rdbufs_reserved:8; + u64 rsvd2:4; + u64 rdbufs_allowed:8; + u64 rsvd3:4; + u64 desc_progress_limit:2; + u64 rsvd4:30; }; - u32 bits; + u64 bits; } __packed; struct grpcfg { diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 6dbdd74e5bae..3624bdeb71f6 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -443,6 +443,37 @@ static struct device_attribute dev_attr_group_traffic_class_b = __ATTR(traffic_class_b, 0644, group_traffic_class_b_show, group_traffic_class_b_store); +static ssize_t group_desc_progress_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%d\n", group->desc_progress_limit); +} + +static ssize_t group_desc_progress_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + int val, rc; + + rc = kstrtoint(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (val & ~GENMASK(1, 0)) + return -EINVAL; + + group->desc_progress_limit = val; + return count; +} + +static struct device_attribute dev_attr_group_desc_progress_limit = + __ATTR(desc_progress_limit, 0644, group_desc_progress_limit_show, + group_desc_progress_limit_store); + static struct attribute *idxd_group_attributes[] = { &dev_attr_group_work_queues.attr, &dev_attr_group_engines.attr, @@ -454,11 +485,33 @@ static struct attribute *idxd_group_attributes[] = { &dev_attr_group_read_buffers_reserved.attr, &dev_attr_group_traffic_class_a.attr, &dev_attr_group_traffic_class_b.attr, + &dev_attr_group_desc_progress_limit.attr, NULL, }; +static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_group_desc_progress_limit.attr && + !idxd->hw.group_cap.progress_limit; +} + +static umode_t idxd_group_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_group *group = confdev_to_group(dev); + struct idxd_device *idxd = group->idxd; + + if (idxd_group_attr_progress_limit_invisible(attr, idxd)) + return 0; + + return attr->mode; +} + static const struct attribute_group idxd_group_attribute_group = { .attrs = idxd_group_attributes, + .is_visible = idxd_group_attr_visible, }; static const struct attribute_group *idxd_group_attribute_groups[] = { From 7ca68fa3c8ab83dfa539f16c5b4b1aec2e33320d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:22 -0700 Subject: [PATCH 53/60] dmaengine: idxd: add configuration for concurrent batch descriptor processing Add sysfs knob to allow control of the number of batch descriptors that can be concurrently processed by an engine in the group as a fraction of the Maximum Work Descriptors in Progress value specfied in ENGCAP register. This control knob is part of toggle for QoS control. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-6-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 12 +++++++ drivers/dma/idxd/device.c | 2 ++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/registers.h | 4 ++- drivers/dma/idxd/sysfs.c | 36 +++++++++++++++++-- 5 files changed, 52 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 02a721a8ea68..8e2c2c405db2 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -278,3 +278,15 @@ Description: Allows control of the number of work descriptors that can be 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of the max value). It's visible only on platforms that support the capability. + +What: /sys/bus/dsa/devices/group./batch_progress_limit +Date: Sept 14, 2022 +KernelVersion: 6.0.0 +Contact: dmaengine@vger.kernel.org +Description: Allows control of the number of batch descriptors that can be + concurrently processed by an engine in the group as a fraction + of the Maximum Batch Descriptors in Progress value specified in + the ENGCAP register. The acceptable values are 0 (default), + 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of + the max value). It's visible only on platforms that support + the capability. diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 05a982e143fe..2c1e6f6daa62 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -710,6 +710,7 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) group->tc_b = -1; } group->desc_progress_limit = 0; + group->batch_progress_limit = 0; } } @@ -932,6 +933,7 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs; group->grpcfg.flags.desc_progress_limit = group->desc_progress_limit; + group->grpcfg.flags.batch_progress_limit = group->batch_progress_limit; } } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 7ee870e5ca67..1196ab342f01 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -97,6 +97,7 @@ struct idxd_group { int tc_a; int tc_b; int desc_progress_limit; + int batch_progress_limit; }; struct idxd_pmu { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 2cc2543edd58..fe3b8d04f9db 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -298,7 +298,9 @@ union group_flags { u64 rdbufs_allowed:8; u64 rsvd3:4; u64 desc_progress_limit:2; - u64 rsvd4:30; + u64 rsvd4:2; + u64 batch_progress_limit:2; + u64 rsvd5:26; }; u64 bits; } __packed; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 3624bdeb71f6..bdaccf9e0436 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -474,6 +474,36 @@ static struct device_attribute dev_attr_group_desc_progress_limit = __ATTR(desc_progress_limit, 0644, group_desc_progress_limit_show, group_desc_progress_limit_store); +static ssize_t group_batch_progress_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%d\n", group->batch_progress_limit); +} + +static ssize_t group_batch_progress_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + int val, rc; + + rc = kstrtoint(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (val & ~GENMASK(1, 0)) + return -EINVAL; + + group->batch_progress_limit = val; + return count; +} + +static struct device_attribute dev_attr_group_batch_progress_limit = + __ATTR(batch_progress_limit, 0644, group_batch_progress_limit_show, + group_batch_progress_limit_store); static struct attribute *idxd_group_attributes[] = { &dev_attr_group_work_queues.attr, &dev_attr_group_engines.attr, @@ -486,14 +516,16 @@ static struct attribute *idxd_group_attributes[] = { &dev_attr_group_traffic_class_a.attr, &dev_attr_group_traffic_class_b.attr, &dev_attr_group_desc_progress_limit.attr, + &dev_attr_group_batch_progress_limit.attr, NULL, }; static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr, struct idxd_device *idxd) { - return attr == &dev_attr_group_desc_progress_limit.attr && - !idxd->hw.group_cap.progress_limit; + return (attr == &dev_attr_group_desc_progress_limit.attr || + attr == &dev_attr_group_batch_progress_limit.attr) && + !idxd->hw.group_cap.progress_limit; } static umode_t idxd_group_attr_visible(struct kobject *kobj, From a0188eb6e71c93ab7dd9bfa4305fac43c70db309 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Sat, 10 Sep 2022 11:17:00 +0530 Subject: [PATCH 54/60] dmaengine: dw-edma: Remove runtime PM support Currently, the dw-edma driver enables the runtime_pm for parent device (chip->dev) and increments/decrements the refcount during alloc/free chan resources callbacks. This leads to a problem when the eDMA driver has been probed, but the channels were not used. This scenario can happen when the DW PCIe driver probes eDMA driver successfully, but the PCI EPF driver decides not to use eDMA channels and use iATU instead for PCI transfers. In this case, the underlying device would be runtime suspended due to pm_runtime_enable() in dw_edma_probe() and the PCI EPF driver would have no knowledge of it. Ideally, the eDMA driver should not be the one doing the runtime PM of the parent device. The responsibility should instead belong to the client drivers like PCI EPF. So let's remove the runtime PM support from eDMA driver. Cc: Serge Semin Cc: Frank Li Reviewed-by: Serge Semin Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20220910054700.12205-1-manivannan.sadhasivam@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-core.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index 07f756479663..c54b24ff5206 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -682,15 +681,12 @@ static int dw_edma_alloc_chan_resources(struct dma_chan *dchan) if (chan->status != EDMA_ST_IDLE) return -EBUSY; - pm_runtime_get(chan->dw->chip->dev); - return 0; } static void dw_edma_free_chan_resources(struct dma_chan *dchan) { unsigned long timeout = jiffies + msecs_to_jiffies(5000); - struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); int ret; while (time_before(jiffies, timeout)) { @@ -703,8 +699,6 @@ static void dw_edma_free_chan_resources(struct dma_chan *dchan) cpu_relax(); } - - pm_runtime_put(chan->dw->chip->dev); } static int dw_edma_channel_setup(struct dw_edma *dw, bool write, @@ -977,9 +971,6 @@ int dw_edma_probe(struct dw_edma_chip *chip) if (err) goto err_irq_free; - /* Power management */ - pm_runtime_enable(dev); - /* Turn debugfs on */ dw_edma_v0_core_debugfs_on(dw); @@ -1009,9 +1000,6 @@ int dw_edma_remove(struct dw_edma_chip *chip) for (i = (dw->nr_irqs - 1); i >= 0; i--) free_irq(chip->ops->irq_vector(dev, i), &dw->irq[i]); - /* Power management */ - pm_runtime_disable(dev); - /* Deregister eDMA device */ dma_async_device_unregister(&dw->wr_edma); list_for_each_entry_safe(chan, _chan, &dw->wr_edma.channels, From 41742afd34b7ac354ec354a3efa3651e486d8c54 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Fri, 16 Sep 2022 16:25:41 +0200 Subject: [PATCH 55/60] dt-bindings: dma: apple,admac: Add iommus and power-domains properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apple's ADMAC is on all supported Apple silicon SoCs behind an IOMMU and has its own power-domain. Signed-off-by: Janne Grunau Acked-by: Martin Povišer Acked-by: Krzysztof Kozlowski Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220916142550.269905-2-j@jannau.net Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/apple,admac.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/apple,admac.yaml b/Documentation/devicetree/bindings/dma/apple,admac.yaml index bdc8c129c4f5..3b1e667f7ea0 100644 --- a/Documentation/devicetree/bindings/dma/apple,admac.yaml +++ b/Documentation/devicetree/bindings/dma/apple,admac.yaml @@ -49,6 +49,13 @@ properties: in an interrupts-extended list the disconnected positions will contain an empty phandle reference <0>. + iommus: + minItems: 1 + maxItems: 2 + + power-domains: + maxItems: 1 + required: - compatible - reg From 84641a1e32cbbabfe9a808b4df79f75ed4c88576 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Wed, 14 Sep 2022 16:04:25 +0200 Subject: [PATCH 56/60] dt-bindings: dma: rework qcom,adm Documentation to yaml schema Rework the qcom,adm Documentation to yaml schema. This is not a pure conversion since originally the driver has changed implementation for the #dma-cells and was wrong from the start. Also the driver now handles the common DMA clients implementation with the first cell that denotes the channel number and nothing else since the client will have to provide the crci information via other means. Signed-off-by: Christian Marangi Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20220914140426.7609-1-ansuelsmth@gmail.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/qcom,adm.yaml | 96 +++++++++++++++++++ .../devicetree/bindings/dma/qcom_adm.txt | 61 ------------ 2 files changed, 96 insertions(+), 61 deletions(-) create mode 100644 Documentation/devicetree/bindings/dma/qcom,adm.yaml delete mode 100644 Documentation/devicetree/bindings/dma/qcom_adm.txt diff --git a/Documentation/devicetree/bindings/dma/qcom,adm.yaml b/Documentation/devicetree/bindings/dma/qcom,adm.yaml new file mode 100644 index 000000000000..6c08245bf5d5 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/qcom,adm.yaml @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/qcom,adm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm ADM DMA Controller + +maintainers: + - Christian Marangi + - Bjorn Andersson + +description: | + QCOM ADM DMA controller provides DMA capabilities for + peripheral buses such as NAND and SPI. + +properties: + compatible: + const: qcom,adm + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + "#dma-cells": + const: 1 + + clocks: + items: + - description: phandle to the core clock + - description: phandle to the iface clock + + clock-names: + items: + - const: core + - const: iface + + resets: + items: + - description: phandle to the clk reset + - description: phandle to the c0 reset + - description: phandle to the c1 reset + - description: phandle to the c2 reset + + reset-names: + items: + - const: clk + - const: c0 + - const: c1 + - const: c2 + + qcom,ee: + $ref: /schemas/types.yaml#/definitions/uint32 + description: indicates the security domain identifier used in the secure world. + minimum: 0 + maximum: 255 + +required: + - compatible + - reg + - interrupts + - "#dma-cells" + - clocks + - clock-names + - resets + - reset-names + - qcom,ee + +additionalProperties: false + +examples: + - | + #include + #include + + adm_dma: dma-controller@18300000 { + compatible = "qcom,adm"; + reg = <0x18300000 0x100000>; + interrupts = <0 170 0>; + #dma-cells = <1>; + + clocks = <&gcc ADM0_CLK>, + <&gcc ADM0_PBUS_CLK>; + clock-names = "core", "iface"; + + resets = <&gcc ADM0_RESET>, + <&gcc ADM0_C0_RESET>, + <&gcc ADM0_C1_RESET>, + <&gcc ADM0_C2_RESET>; + reset-names = "clk", "c0", "c1", "c2"; + qcom,ee = <0>; + }; + +... diff --git a/Documentation/devicetree/bindings/dma/qcom_adm.txt b/Documentation/devicetree/bindings/dma/qcom_adm.txt deleted file mode 100644 index 9d3b2f917b7b..000000000000 --- a/Documentation/devicetree/bindings/dma/qcom_adm.txt +++ /dev/null @@ -1,61 +0,0 @@ -QCOM ADM DMA Controller - -Required properties: -- compatible: must contain "qcom,adm" for IPQ/APQ8064 and MSM8960 -- reg: Address range for DMA registers -- interrupts: Should contain one interrupt shared by all channels -- #dma-cells: must be <2>. First cell denotes the channel number. Second cell - denotes CRCI (client rate control interface) flow control assignment. -- clocks: Should contain the core clock and interface clock. -- clock-names: Must contain "core" for the core clock and "iface" for the - interface clock. -- resets: Must contain an entry for each entry in reset names. -- reset-names: Must include the following entries: - - clk - - c0 - - c1 - - c2 -- qcom,ee: indicates the security domain identifier used in the secure world. - -Example: - adm_dma: dma@18300000 { - compatible = "qcom,adm"; - reg = <0x18300000 0x100000>; - interrupts = <0 170 0>; - #dma-cells = <2>; - - clocks = <&gcc ADM0_CLK>, <&gcc ADM0_PBUS_CLK>; - clock-names = "core", "iface"; - - resets = <&gcc ADM0_RESET>, - <&gcc ADM0_C0_RESET>, - <&gcc ADM0_C1_RESET>, - <&gcc ADM0_C2_RESET>; - reset-names = "clk", "c0", "c1", "c2"; - qcom,ee = <0>; - }; - -DMA clients must use the format descripted in the dma.txt file, using a three -cell specifier for each channel. - -Each dmas request consists of 3 cells: - 1. phandle pointing to the DMA controller - 2. channel number - 3. CRCI assignment, if applicable. If no CRCI flow control is required, use 0. - The CRCI is used for flow control. It identifies the peripheral device that - is the source/destination for the transferred data. - -Example: - - spi4: spi@1a280000 { - spi-max-frequency = <50000000>; - - pinctrl-0 = <&spi_pins>; - pinctrl-names = "default"; - - cs-gpios = <&qcom_pinmux 20 0>; - - dmas = <&adm_dma 6 9>, - <&adm_dma 5 10>; - dma-names = "rx", "tx"; - }; From 41d8ffd7cb2add394a2626f12357770846abcf1e Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Wed, 14 Sep 2022 16:04:26 +0200 Subject: [PATCH 57/60] dt-bindings: dma: add additional pbus reset to qcom,adm qcom,adm require an additional reset for the pbus line. Add this missing reset to match the current implementation on ipq806x.dtsi. Signed-off-by: Christian Marangi Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20220914140426.7609-2-ansuelsmth@gmail.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,adm.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/dma/qcom,adm.yaml b/Documentation/devicetree/bindings/dma/qcom,adm.yaml index 6c08245bf5d5..6a9d7bc74aff 100644 --- a/Documentation/devicetree/bindings/dma/qcom,adm.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,adm.yaml @@ -40,6 +40,7 @@ properties: resets: items: - description: phandle to the clk reset + - description: phandle to the pbus reset - description: phandle to the c0 reset - description: phandle to the c1 reset - description: phandle to the c2 reset @@ -47,6 +48,7 @@ properties: reset-names: items: - const: clk + - const: pbus - const: c0 - const: c1 - const: c2 @@ -86,10 +88,11 @@ examples: clock-names = "core", "iface"; resets = <&gcc ADM0_RESET>, + <&gcc ADM0_PBUS_RESET>, <&gcc ADM0_C0_RESET>, <&gcc ADM0_C1_RESET>, <&gcc ADM0_C2_RESET>; - reset-names = "clk", "c0", "c1", "c2"; + reset-names = "clk", "pbus", "c0", "c1", "c2"; qcom,ee = <0>; }; From f2b816a1dfb8b4bbbecd1603e6c17c3d457e2c0a Mon Sep 17 00:00:00 2001 From: Swati Agarwal Date: Thu, 15 Sep 2022 14:35:16 +0530 Subject: [PATCH 58/60] dmaengine: zynqmp_dma: Add device_synchronize support dmaengine_synchronize implementation is required to synchronize proper termination of current transfers so that any memory resources are not freed while still in use. Implement this callback in the driver so that framework can use the same (in dmaengine_terminate_sync/ dmaengine_synchronize). Signed-off-by: Swati Agarwal Link: https://lore.kernel.org/r/20220915090516.5812-1-swati.agarwal@amd.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/zynqmp_dma.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c index dc299ab36818..2fc675327ec9 100644 --- a/drivers/dma/xilinx/zynqmp_dma.c +++ b/drivers/dma/xilinx/zynqmp_dma.c @@ -795,6 +795,17 @@ static int zynqmp_dma_device_terminate_all(struct dma_chan *dchan) return 0; } +/** + * zynqmp_dma_synchronize - Synchronizes the termination of a transfers to the current context. + * @dchan: DMA channel pointer + */ +static void zynqmp_dma_synchronize(struct dma_chan *dchan) +{ + struct zynqmp_dma_chan *chan = to_chan(dchan); + + tasklet_kill(&chan->tasklet); +} + /** * zynqmp_dma_prep_memcpy - prepare descriptors for memcpy transaction * @dchan: DMA channel @@ -1057,6 +1068,7 @@ static int zynqmp_dma_probe(struct platform_device *pdev) p = &zdev->common; p->device_prep_dma_memcpy = zynqmp_dma_prep_memcpy; p->device_terminate_all = zynqmp_dma_device_terminate_all; + p->device_synchronize = zynqmp_dma_synchronize; p->device_issue_pending = zynqmp_dma_issue_pending; p->device_alloc_chan_resources = zynqmp_dma_alloc_chan_resources; p->device_free_chan_resources = zynqmp_dma_free_chan_resources; From e8e2f92b1553b977aef8bb4fa4e4c5b69c8d9d54 Mon Sep 17 00:00:00 2001 From: Vaishnav Achath Date: Wed, 14 Sep 2022 16:30:49 +0530 Subject: [PATCH 59/60] dmaengine: ti: k3-udma: Respond TX done if DMA_PREP_INTERRUPT is not requested If the DMA consumer driver does not expect the callback for TX done, then we need not perform the channel RT byte counter calculations and estimate the completion but return complete on first attempt itself.This assumes that the consumer who did not request DMA_PREP_INTERRUPT has its own mechanism for understanding TX completion, example: MCSPI EOW interrupt can be used as TX completion signal for a SPI transaction. Signed-off-by: Vaishnav Achath Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20220914110049.5842-1-vaishnav.a@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index fcfcde947b30..7b5081989b3d 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -263,6 +263,7 @@ struct udma_chan_config { enum udma_tp_level channel_tpl; /* Channel Throughput Level */ u32 tr_trigger_type; + unsigned long tx_flags; /* PKDMA mapped channel */ int mapped_channel_id; @@ -1055,9 +1056,14 @@ static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d) { u32 peer_bcnt, bcnt; - /* Only TX towards PDMA is affected */ + /* + * Only TX towards PDMA is affected. + * If DMA_PREP_INTERRUPT is not set by consumer then skip the transfer + * completion calculation, consumer must ensure that there is no stale + * data in DMA fabric in this case. + */ if (uc->config.ep_type == PSIL_EP_NATIVE || - uc->config.dir != DMA_MEM_TO_DEV) + uc->config.dir != DMA_MEM_TO_DEV || !(uc->config.tx_flags & DMA_PREP_INTERRUPT)) return true; peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG); @@ -3418,6 +3424,8 @@ udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (!burst) burst = 1; + uc->config.tx_flags = tx_flags; + if (uc->config.pkt_mode) d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags, context); From b957df98469240d459bcfae6904b36d6ecea9bee Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sun, 11 Sep 2022 17:18:17 +0800 Subject: [PATCH 60/60] dmaengine: ioat: remove unused declarations in dma.h ioat_ring_alloc_order and ioat_ring_max_alloc_order have been removed since commit cd60cd96137f ("dmaengine: IOATDMA: Removing descriptor ring reshape"), so remove them. Signed-off-by: Gaosheng Cui Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20220911091817.3214271-1-cuigaosheng1@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 140cfe3782fb..35e06b382603 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -196,10 +196,8 @@ extern const struct sysfs_ops ioat_sysfs_ops; extern struct ioat_sysfs_entry ioat_version_attr; extern struct ioat_sysfs_entry ioat_cap_attr; extern int ioat_pending_level; -extern int ioat_ring_alloc_order; extern struct kobj_type ioat_ktype; extern struct kmem_cache *ioat_cache; -extern int ioat_ring_max_alloc_order; extern struct kmem_cache *ioat_sed_cache; static inline struct ioatdma_chan *to_ioat_chan(struct dma_chan *c)