From 3e01d5254698ea3d18e09d96b974c762328352cd Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 17 Jul 2023 21:42:19 +0200 Subject: [PATCH 001/212] mtd: rawnand: marvell: Ensure program page operations are successful The NAND core complies with the ONFI specification, which itself mentions that after any program or erase operation, a status check should be performed to see whether the operation was finished *and* successful. The NAND core offers helpers to finish a page write (sending the "PAGE PROG" command, waiting for the NAND chip to be ready again, and checking the operation status). But in some cases, advanced controller drivers might want to optimize this and craft their own page write helper to leverage additional hardware capabilities, thus not always using the core facilities. Some drivers, like this one, do not use the core helper to finish a page write because the final cycles are automatically managed by the hardware. In this case, the additional care must be taken to manually perform the final status check. Let's read the NAND chip status at the end of the page write helper and return -EIO upon error. Cc: stable@vger.kernel.org Fixes: 02f26ecf8c77 ("mtd: nand: add reworked Marvell NAND controller driver") Reported-by: Aviram Dali Signed-off-by: Miquel Raynal Tested-by: Ravi Chandra Minnikanti Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-1-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/marvell_nand.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 2c94da7a3b3a..b841a81cb128 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -1165,6 +1165,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip, .ndcb[2] = NDCB2_ADDR5_PAGE(page), }; unsigned int oob_bytes = lt->spare_bytes + (raw ? lt->ecc_bytes : 0); + u8 status; int ret; /* NFCv2 needs more information about the operation being executed */ @@ -1198,7 +1199,18 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip, ret = marvell_nfc_wait_op(chip, PSEC_TO_MSEC(sdr->tPROG_max)); - return ret; + if (ret) + return ret; + + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + return ret; + + if (status & NAND_STATUS_FAIL) + return -EIO; + + return 0; } static int marvell_nfc_hw_ecc_hmg_write_page_raw(struct nand_chip *chip, @@ -1627,6 +1639,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip, int data_len = lt->data_bytes; int spare_len = lt->spare_bytes; int chunk, ret; + u8 status; marvell_nfc_select_target(chip, chip->cur_cs); @@ -1663,6 +1676,14 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip, if (ret) return ret; + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + return ret; + + if (status & NAND_STATUS_FAIL) + return -EIO; + return 0; } From 6792b7fce610bcd1cf3e07af3607fe7e2c38c1d8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Aug 2023 17:00:34 +0200 Subject: [PATCH 002/212] mtd: physmap-core: Restore map_rom fallback When the exact mapping type driver was not available, the old physmap_of_core driver fell back to mapping the region as ROM. Unfortunately this feature was lost when the DT and pdata cases were merged. Revive this useful feature. Fixes: 642b1e8dbed7bbbf ("mtd: maps: Merge physmap_of.c into physmap-core.c") Signed-off-by: Geert Uytterhoeven Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/550e8c8c1da4c4baeb3d71ff79b14a18d4194f9e.1693407371.git.geert+renesas@glider.be --- drivers/mtd/maps/physmap-core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/mtd/maps/physmap-core.c b/drivers/mtd/maps/physmap-core.c index 78710fbc8e7f..fc8721339282 100644 --- a/drivers/mtd/maps/physmap-core.c +++ b/drivers/mtd/maps/physmap-core.c @@ -551,6 +551,17 @@ static int physmap_flash_probe(struct platform_device *dev) if (info->probe_type) { info->mtds[i] = do_map_probe(info->probe_type, &info->maps[i]); + + /* Fall back to mapping region as ROM */ + if (!info->mtds[i] && IS_ENABLED(CONFIG_MTD_ROM) && + strcmp(info->probe_type, "map_rom")) { + dev_warn(&dev->dev, + "map_probe() failed for type %s\n", + info->probe_type); + + info->mtds[i] = do_map_probe("map_rom", + &info->maps[i]); + } } else { int j; From 9836a987860e33943945d4b257729a4f94eae576 Mon Sep 17 00:00:00 2001 From: Martin Kurbanov Date: Tue, 5 Sep 2023 17:56:37 +0300 Subject: [PATCH 003/212] mtd: spinand: micron: correct bitmask for ecc status Valid bitmask is 0x70 in the status register. Fixes: a508e8875e13 ("mtd: spinand: Add initial support for Micron MT29F2G01ABAGD") Signed-off-by: Martin Kurbanov Reviewed-by: Frieder Schrempf Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230905145637.139068-1-mmkurbanov@sberdevices.ru --- drivers/mtd/nand/spi/micron.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/spi/micron.c b/drivers/mtd/nand/spi/micron.c index 50b7295bc922..12601bc4227a 100644 --- a/drivers/mtd/nand/spi/micron.c +++ b/drivers/mtd/nand/spi/micron.c @@ -12,7 +12,7 @@ #define SPINAND_MFR_MICRON 0x2c -#define MICRON_STATUS_ECC_MASK GENMASK(7, 4) +#define MICRON_STATUS_ECC_MASK GENMASK(6, 4) #define MICRON_STATUS_ECC_NO_BITFLIPS (0 << 4) #define MICRON_STATUS_ECC_1TO3_BITFLIPS (1 << 4) #define MICRON_STATUS_ECC_4TO6_BITFLIPS (3 << 4) From 719606154c7033c068a5d4c1dc5f9163b814b3c8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 13 Sep 2023 09:04:27 +0300 Subject: [PATCH 004/212] phy: mapphone-mdm6600: Fix runtime disable on probe Commit d644e0d79829 ("phy: mapphone-mdm6600: Fix PM error handling in phy_mdm6600_probe") caused a regression where we now unconditionally disable runtime PM at the end of the probe while it is only needed on errors. Cc: Ivaylo Dimitrov Cc: Merlijn Wajer Cc: Miaoqian Lin Cc: Pavel Machek Reviewed-by: Sebastian Reichel Fixes: d644e0d79829 ("phy: mapphone-mdm6600: Fix PM error handling in phy_mdm6600_probe") Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20230913060433.48373-1-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-mapphone-mdm6600.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index 1d567604b650..0147112f77b1 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -627,10 +627,12 @@ static int phy_mdm6600_probe(struct platform_device *pdev) pm_runtime_put_autosuspend(ddata->dev); cleanup: - if (error < 0) + if (error < 0) { phy_mdm6600_device_power_off(ddata); - pm_runtime_disable(ddata->dev); - pm_runtime_dont_use_autosuspend(ddata->dev); + pm_runtime_disable(ddata->dev); + pm_runtime_dont_use_autosuspend(ddata->dev); + } + return error; } From b99e0ba9633af51638e5ee1668da2e33620c134f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 13 Sep 2023 09:04:28 +0300 Subject: [PATCH 005/212] phy: mapphone-mdm6600: Fix runtime PM for remove Otherwise we will get an underflow on remove. Cc: Ivaylo Dimitrov Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Fixes: f7f50b2a7b05 ("phy: mapphone-mdm6600: Add runtime PM support for n_gsm on USB suspend") Signed-off-by: Tony Lindgren Reviewed-by: Sebastian Reichel Link: https://lore.kernel.org/r/20230913060433.48373-2-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-mapphone-mdm6600.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index 0147112f77b1..df48b1becebe 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -641,6 +641,7 @@ static void phy_mdm6600_remove(struct platform_device *pdev) struct phy_mdm6600 *ddata = platform_get_drvdata(pdev); struct gpio_desc *reset_gpio = ddata->ctrl_gpios[PHY_MDM6600_RESET]; + pm_runtime_get_noresume(ddata->dev); pm_runtime_dont_use_autosuspend(ddata->dev); pm_runtime_put_sync(ddata->dev); pm_runtime_disable(ddata->dev); From 3b384cc74b00b5ac21d18e4c1efc3c1da5300971 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 13 Sep 2023 09:04:29 +0300 Subject: [PATCH 006/212] phy: mapphone-mdm6600: Fix pinctrl_pm handling for sleep pins Looks like the driver sleep pins configuration is unusable. Adding the sleep pins causes the usb phy to not respond. We need to use the default pins in probe, and only set sleep pins at phy_mdm6600_device_power_off(). As the modem can also be booted to a serial port mode for firmware flashing, let's make the pin changes limited to probe and remove. For probe, we get the default pins automatically. We only need to set the sleep pins in phy_mdm6600_device_power_off() to prevent the modem from waking up because the gpio line glitches. If it turns out that we need a separate state for phy_mdm6600_power_on() and phy_mdm6600_power_off(), we can use the pinctrl idle state. Cc: Ivaylo Dimitrov Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Fixes: 2ad2af081622 ("phy: mapphone-mdm6600: Improve phy related runtime PM calls") Signed-off-by: Tony Lindgren Reviewed-by: Sebastian Reichel Link: https://lore.kernel.org/r/20230913060433.48373-3-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-mapphone-mdm6600.c | 29 +++++++++------------ 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index df48b1becebe..376d023a0aa9 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -122,16 +122,10 @@ static int phy_mdm6600_power_on(struct phy *x) { struct phy_mdm6600 *ddata = phy_get_drvdata(x); struct gpio_desc *enable_gpio = ddata->ctrl_gpios[PHY_MDM6600_ENABLE]; - int error; if (!ddata->enabled) return -ENODEV; - error = pinctrl_pm_select_default_state(ddata->dev); - if (error) - dev_warn(ddata->dev, "%s: error with default_state: %i\n", - __func__, error); - gpiod_set_value_cansleep(enable_gpio, 1); /* Allow aggressive PM for USB, it's only needed for n_gsm port */ @@ -160,11 +154,6 @@ static int phy_mdm6600_power_off(struct phy *x) gpiod_set_value_cansleep(enable_gpio, 0); - error = pinctrl_pm_select_sleep_state(ddata->dev); - if (error) - dev_warn(ddata->dev, "%s: error with sleep_state: %i\n", - __func__, error); - return 0; } @@ -456,6 +445,7 @@ static void phy_mdm6600_device_power_off(struct phy_mdm6600 *ddata) { struct gpio_desc *reset_gpio = ddata->ctrl_gpios[PHY_MDM6600_RESET]; + int error; ddata->enabled = false; phy_mdm6600_cmd(ddata, PHY_MDM6600_CMD_BP_SHUTDOWN_REQ); @@ -471,6 +461,17 @@ static void phy_mdm6600_device_power_off(struct phy_mdm6600 *ddata) } else { dev_err(ddata->dev, "Timed out powering down\n"); } + + /* + * Keep reset gpio high with padconf internal pull-up resistor to + * prevent modem from waking up during deeper SoC idle states. The + * gpio bank lines can have glitches if not in the always-on wkup + * domain. + */ + error = pinctrl_pm_select_sleep_state(ddata->dev); + if (error) + dev_warn(ddata->dev, "%s: error with sleep_state: %i\n", + __func__, error); } static void phy_mdm6600_deferred_power_on(struct work_struct *work) @@ -571,12 +572,6 @@ static int phy_mdm6600_probe(struct platform_device *pdev) ddata->dev = &pdev->dev; platform_set_drvdata(pdev, ddata); - /* Active state selected in phy_mdm6600_power_on() */ - error = pinctrl_pm_select_sleep_state(ddata->dev); - if (error) - dev_warn(ddata->dev, "%s: error with sleep_state: %i\n", - __func__, error); - error = phy_mdm6600_init_lines(ddata); if (error) return error; From 2d3465a75c9f83684d17da6807423824bf260524 Mon Sep 17 00:00:00 2001 From: Adrien Thierry Date: Mon, 28 Aug 2023 11:23:50 -0400 Subject: [PATCH 007/212] phy: qcom-qmp-usb: initialize PCS_USB registers Currently, PCS_USB registers that have their initialization data in a pcs_usb_tbl table are never initialized. Fix that. Fixes: fc64623637da ("phy: qcom-qmp-combo,usb: add support for separate PCS_USB region") Signed-off-by: Adrien Thierry Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20230828152353.16529-2-athierry@redhat.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 0130bb8e809a..e49262ce6d91 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1703,6 +1703,7 @@ static int qmp_usb_power_on(struct phy *phy) void __iomem *tx = qmp->tx; void __iomem *rx = qmp->rx; void __iomem *pcs = qmp->pcs; + void __iomem *pcs_usb = qmp->pcs_usb; void __iomem *status; unsigned int val; int ret; @@ -1726,6 +1727,9 @@ static int qmp_usb_power_on(struct phy *phy) qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); + if (pcs_usb) + qmp_usb_configure(pcs_usb, cfg->pcs_usb_tbl, cfg->pcs_usb_tbl_num); + if (cfg->has_pwrdn_delay) usleep_range(10, 20); From c599dc5cca4dd6a5c664e4a8837246e68a96cb4c Mon Sep 17 00:00:00 2001 From: Adrien Thierry Date: Mon, 28 Aug 2023 11:23:51 -0400 Subject: [PATCH 008/212] phy: qcom-qmp-usb: split PCS_USB init table for sc8280xp and sa8775p For sc8280xp and sa8775p, PCS and PCS_USB initialization data is described in the same table, thus the pcs_usb offset is not being applied during initialization of PCS_USB registers. Fix this by adding the appropriate pcs_usb_tbl tables. Fixes: 8bd2d6e11c99 ("phy: qcom-qmp: Add SA8775P USB3 UNI phy") Fixes: c0c7769cdae2 ("phy: qcom-qmp: Add SC8280XP USB3 UNI phy") Reviewed-by: Dmitry Baryshkov Signed-off-by: Adrien Thierry Link: https://lore.kernel.org/r/20230828152353.16529-3-athierry@redhat.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index e49262ce6d91..c69577601ae0 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1112,8 +1112,6 @@ static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0xaa), QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCS_TX_RX_CONFIG, 0x0c), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), QMP_PHY_INIT_CFG(QPHY_V5_PCS_CDR_RESET_TIME, 0x0a), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG1, 0x88), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG2, 0x13), @@ -1122,6 +1120,11 @@ static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x21), }; +static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), +}; + static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_LOCK_DETECT_CONFIG1, 0xc4), QMP_PHY_INIT_CFG(QPHY_V5_PCS_LOCK_DETECT_CONFIG2, 0x89), @@ -1131,9 +1134,6 @@ static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0xaa), QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCS_TX_RX_CONFIG, 0x0c), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_POWER_STATE_CONFIG1, 0x6f), QMP_PHY_INIT_CFG(QPHY_V5_PCS_CDR_RESET_TIME, 0x0a), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG1, 0x88), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG2, 0x13), @@ -1142,6 +1142,12 @@ static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x21), }; +static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_POWER_STATE_CONFIG1, 0x6f), +}; + struct qmp_usb_offsets { u16 serdes; u16 pcs; @@ -1383,6 +1389,8 @@ static const struct qmp_phy_cfg sa8775p_usb3_uniphy_cfg = { .rx_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_rx_tbl), .pcs_tbl = sa8775p_usb3_uniphy_pcs_tbl, .pcs_tbl_num = ARRAY_SIZE(sa8775p_usb3_uniphy_pcs_tbl), + .pcs_usb_tbl = sa8775p_usb3_uniphy_pcs_usb_tbl, + .pcs_usb_tbl_num = ARRAY_SIZE(sa8775p_usb3_uniphy_pcs_usb_tbl), .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), .reset_list = qcm2290_usb3phy_reset_l, @@ -1405,6 +1413,8 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .rx_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_rx_tbl), .pcs_tbl = sc8280xp_usb3_uniphy_pcs_tbl, .pcs_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_pcs_tbl), + .pcs_usb_tbl = sc8280xp_usb3_uniphy_pcs_usb_tbl, + .pcs_usb_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_pcs_usb_tbl), .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), .reset_list = qcm2290_usb3phy_reset_l, From 211de9681195b92709b5d68e07af948b01c8bb9a Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 23 Aug 2023 21:17:28 +0300 Subject: [PATCH 009/212] dt-bindings: phy: qcom,ipq8074-qmp-pcie: fix warning regarding reg size Fix the 'reg is too long' warning caused by me adding 64-bit address and size to the example, while default being 32-bit (cell size equal to 1). Reported-by: Rob Herring Fixes: 505fb2541678 ("dt-bindings: phy: migrate QMP PCIe PHY bindings to qcom,sc8280xp-qmp-pcie-phy.yaml") Signed-off-by: Dmitry Baryshkov Acked-by: Rob Herring Link: https://lore.kernel.org/r/20230823181728.3082946-1-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml index 5073007267ad..634cec5d57ea 100644 --- a/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml @@ -70,7 +70,7 @@ examples: phy@84000 { compatible = "qcom,ipq6018-qmp-pcie-phy"; - reg = <0x0 0x00084000 0x0 0x1000>; + reg = <0x00084000 0x1000>; clocks = <&gcc GCC_PCIE0_AUX_CLK>, <&gcc GCC_PCIE0_AHB_CLK>, From 5f7cd740a6b657fba775bde744496e5ed21851ca Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 24 Aug 2023 17:13:45 +0800 Subject: [PATCH 010/212] phy: qcom: phy-qcom-m31: fix wrong pointer pass to PTR_ERR() It should be 'qphy->vreg' passed to PTR_ERR() when devm_regulator_get() fails. Fixes: 08e49af50701 ("phy: qcom: Introduce M31 USB PHY driver") Signed-off-by: Yang Yingliang Reviewed-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20230824091345.1072650-1-yangyingliang@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index ed08072ca032..99d570f4142a 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -256,7 +256,7 @@ static int m31usb_phy_probe(struct platform_device *pdev) qphy->vreg = devm_regulator_get(dev, "vdda-phy"); if (IS_ERR(qphy->vreg)) - return dev_err_probe(dev, PTR_ERR(qphy->phy), + return dev_err_probe(dev, PTR_ERR(qphy->vreg), "failed to get vreg\n"); phy_set_drvdata(qphy->phy, qphy); From 426e05ce126e8febc21fae643139a1072d2670ad Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 24 Aug 2023 17:23:56 +0800 Subject: [PATCH 011/212] phy: qcom: phy-qcom-m31: change m31_ipq5332_regs to static m31_ipq5332_regs is only used in phy-qcom-m31.c now, change it to static. Signed-off-by: Yang Yingliang Reviewed-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20230824092356.1154839-1-yangyingliang@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index 99d570f4142a..014278e5428c 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -82,7 +82,7 @@ struct m31_priv_data { unsigned int nregs; }; -struct m31_phy_regs m31_ipq5332_regs[] = { +static struct m31_phy_regs m31_ipq5332_regs[] = { { USB_PHY_CFG0, UTMI_PHY_OVERRIDE_EN, From 6ee8a9a772b5abd9a9791123ca7aee2dbf126d5f Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 1 Sep 2023 15:52:31 +0800 Subject: [PATCH 012/212] phy: realtek: usb: Drop unnecessary error check for debugfs_create_dir() Both debugfs_create_dir() and debugfs_create_file() return ERR_PTR and never return NULL. As Greg suggested, this patch removes the error checking for debugfs_create_dir in phy-rtk-usb2.c and phy-rtk-usb3.c. This is because the DebugFS kernel API is developed in a way that the caller can safely ignore the errors that occur during the creation of DebugFS nodes. The debugfs APIs have a IS_ERR() judge in start_creating() which can handle it gracefully. So these checks are unnecessary. Fixes: 134e6d25f6bd ("phy: realtek: usb: Add driver for the Realtek SoC USB 2.0 PHY") Fixes: adda6e82a7de ("phy: realtek: usb: Add driver for the Realtek SoC USB 3.0 PHY") Signed-off-by: Jinjie Ruan Suggested-by: Greg Kroah-Hartman Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20230901075231.1368947-1-ruanjinjie@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/realtek/phy-rtk-usb2.c | 10 ++-------- drivers/phy/realtek/phy-rtk-usb3.c | 10 ++-------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/phy/realtek/phy-rtk-usb2.c b/drivers/phy/realtek/phy-rtk-usb2.c index 5e7ee060b404..aedc78bd37f7 100644 --- a/drivers/phy/realtek/phy-rtk-usb2.c +++ b/drivers/phy/realtek/phy-rtk-usb2.c @@ -853,17 +853,11 @@ static inline void create_debug_files(struct rtk_phy *rtk_phy) rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), phy_debug_root); - if (!rtk_phy->debug_dir) - return; - if (!debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, - &rtk_usb2_parameter_fops)) - goto file_error; + debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, + &rtk_usb2_parameter_fops); return; - -file_error: - debugfs_remove_recursive(rtk_phy->debug_dir); } static inline void remove_debug_files(struct rtk_phy *rtk_phy) diff --git a/drivers/phy/realtek/phy-rtk-usb3.c b/drivers/phy/realtek/phy-rtk-usb3.c index 7881f908aade..dfb3122f3f11 100644 --- a/drivers/phy/realtek/phy-rtk-usb3.c +++ b/drivers/phy/realtek/phy-rtk-usb3.c @@ -416,17 +416,11 @@ static inline void create_debug_files(struct rtk_phy *rtk_phy) return; rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), phy_debug_root); - if (!rtk_phy->debug_dir) - return; - if (!debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, - &rtk_usb3_parameter_fops)) - goto file_error; + debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, + &rtk_usb3_parameter_fops); return; - -file_error: - debugfs_remove_recursive(rtk_phy->debug_dir); } static inline void remove_debug_files(struct rtk_phy *rtk_phy) From ecec1de5c58f8f3ab6959fcf8d68752eeb65311d Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Thu, 7 Sep 2023 12:20:52 +0530 Subject: [PATCH 013/212] phy: qcom: m31: Remove unwanted qphy->vreg is NULL check Fix the following Smatch complaint: drivers/phy/qualcomm/phy-qcom-m31.c:175 m31usb_phy_init() warn: variable dereferenced before check 'qphy->vreg' (see line 167) drivers/phy/qualcomm/phy-qcom-m31.c 166 167 ret = regulator_enable(qphy->vreg); ^^^^^^^^^^ Unchecked dereference 168 if (ret) { 169 dev_err(&phy->dev, "failed to enable regulator, %d\n", ret); 170 return ret; 171 } 172 173 ret = clk_prepare_enable(qphy->clk); 174 if (ret) { 175 if (qphy->vreg) ^^^^^^^^^^ Checked too late 176 regulator_disable(qphy->vreg); 177 dev_err(&phy->dev, "failed to enable cfg ahb clock, %d\n", ret); Since the phy will not get registered if qphy->vreg is NULL, this check is not needed. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-phy/cbd26132-c624-44b7-a073-73222b287338@moroto.mountain/T/#u Fixes: 08e49af50701 ("phy: qcom: Introduce M31 USB PHY driver") Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/1694069452-3794-1-git-send-email-quic_varada@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index 014278e5428c..5cb7e79b99b3 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -172,8 +172,7 @@ static int m31usb_phy_init(struct phy *phy) ret = clk_prepare_enable(qphy->clk); if (ret) { - if (qphy->vreg) - regulator_disable(qphy->vreg); + regulator_disable(qphy->vreg); dev_err(&phy->dev, "failed to enable cfg ahb clock, %d\n", ret); return ret; } From 112c23705c6dc59a05290c8e3e597e1b4e9c23fc Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 11 Sep 2023 22:07:14 +0200 Subject: [PATCH 014/212] phy: qcom-qmp-combo: Square out 8550 POWER_STATE_CONFIG1 There are two instances of the POWER_STATE_CONFIG1 register: one in the PCS space and another one in PCS_USB. The downstream init sequence pokes the latter one while we've been poking the former one (and misnamed it as the latter one, impostor!). Fix that up to avoid UB. Fixes: 49742e9edab3 ("phy: qcom-qmp-combo: Add support for SM8550") Reviewed-by: Abel Vesa Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230829-topic-8550_usbphy-v3-1-34ec434194c5@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +- drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index cbb28afce135..8fd240dd5127 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -859,10 +859,10 @@ static const struct qmp_phy_init_tbl sm8550_usb3_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_PCS_TX_RX_CONFIG, 0x0c), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_EQ_CONFIG1, 0x4b), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_EQ_CONFIG5, 0x10), - QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1, 0x68), }; static const struct qmp_phy_init_tbl sm8550_usb3_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1, 0x68), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_RCVR_DTCT_DLY_U3_L, 0x40), diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h index 9510e63ba9d8..c38530d6776b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h @@ -12,7 +12,7 @@ #define QPHY_USB_V6_PCS_LOCK_DETECT_CONFIG3 0xcc #define QPHY_USB_V6_PCS_LOCK_DETECT_CONFIG6 0xd8 #define QPHY_USB_V6_PCS_REFGEN_REQ_CONFIG1 0xdc -#define QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1 0x90 +#define QPHY_USB_V6_PCS_POWER_STATE_CONFIG1 0x90 #define QPHY_USB_V6_PCS_RX_SIGDET_LVL 0x188 #define QPHY_USB_V6_PCS_RCVR_DTCT_DLY_P1U2_L 0x190 #define QPHY_USB_V6_PCS_RCVR_DTCT_DLY_P1U2_H 0x194 @@ -23,6 +23,7 @@ #define QPHY_USB_V6_PCS_EQ_CONFIG1 0x1dc #define QPHY_USB_V6_PCS_EQ_CONFIG5 0x1ec +#define QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1 0x00 #define QPHY_USB_V6_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL 0x18 #define QPHY_USB_V6_PCS_USB3_RXEQTRAINING_DFE_TIME_S2 0x3c #define QPHY_USB_V6_PCS_USB3_RCVR_DTCT_DLY_U3_L 0x40 From 76d20290d0c66a84a7a40c6231e73d1ab25994e5 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 11 Sep 2023 22:07:15 +0200 Subject: [PATCH 015/212] phy: qcom-qmp-combo: initialize PCS_USB registers Currently, PCS_USB registers that have their initialization data in a pcs_usb_tbl table are never initialized. Fix that. Fixes: fc64623637da ("phy: qcom-qmp-combo,usb: add support for separate PCS_USB region") Reported-by: Adrien Thierry Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230829-topic-8550_usbphy-v3-2-34ec434194c5@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 8fd240dd5127..5e6fc8103e9d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2555,6 +2555,7 @@ static int qmp_combo_usb_power_on(struct phy *phy) void __iomem *tx2 = qmp->tx2; void __iomem *rx2 = qmp->rx2; void __iomem *pcs = qmp->pcs; + void __iomem *pcs_usb = qmp->pcs_usb; void __iomem *status; unsigned int val; int ret; @@ -2576,6 +2577,9 @@ static int qmp_combo_usb_power_on(struct phy *phy) qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); + if (pcs_usb) + qmp_combo_configure(pcs_usb, cfg->pcs_usb_tbl, cfg->pcs_usb_tbl_num); + if (cfg->has_pwrdn_delay) usleep_range(10, 20); From 11395c32f9e9e26f2f6281bd916a1161ba42ee6c Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Tue, 12 Sep 2023 07:46:46 -0400 Subject: [PATCH 016/212] phy: qualcomm: Fix typos in comments Fix typo in the description of the 'succesfully'. Signed-off-by: Bo Liu Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230912114646.8452-1-liubo03@inspur.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-apq8064-sata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c index 8814f4322adf..3642a5d4f2f3 100644 --- a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c +++ b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c @@ -152,7 +152,7 @@ static int qcom_apq8064_sata_phy_init(struct phy *generic_phy) return ret; } - /* SATA phy calibrated succesfully, power up to functional mode */ + /* SATA phy calibrated successfully, power up to functional mode */ writel_relaxed(0x3E, base + SATA_PHY_POW_DWN_CTRL1); writel_relaxed(0x01, base + SATA_PHY_RX_IMCAL0); writel_relaxed(0x01, base + SATA_PHY_TX_IMCAL0); From 3a4a893dbb19e229db3b753f0462520b561dee98 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 17 Jul 2023 21:42:20 +0200 Subject: [PATCH 017/212] mtd: rawnand: arasan: Ensure program page operations are successful The NAND core complies with the ONFI specification, which itself mentions that after any program or erase operation, a status check should be performed to see whether the operation was finished *and* successful. The NAND core offers helpers to finish a page write (sending the "PAGE PROG" command, waiting for the NAND chip to be ready again, and checking the operation status). But in some cases, advanced controller drivers might want to optimize this and craft their own page write helper to leverage additional hardware capabilities, thus not always using the core facilities. Some drivers, like this one, do not use the core helper to finish a page write because the final cycles are automatically managed by the hardware. In this case, the additional care must be taken to manually perform the final status check. Let's read the NAND chip status at the end of the page write helper and return -EIO upon error. Cc: Michal Simek Cc: stable@vger.kernel.org Fixes: 88ffef1b65cf ("mtd: rawnand: arasan: Support the hardware BCH ECC engine") Signed-off-by: Miquel Raynal Acked-by: Michal Simek Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-2-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/arasan-nand-controller.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c index 4621ec549cc7..a492051c46f5 100644 --- a/drivers/mtd/nand/raw/arasan-nand-controller.c +++ b/drivers/mtd/nand/raw/arasan-nand-controller.c @@ -515,6 +515,7 @@ static int anfc_write_page_hw_ecc(struct nand_chip *chip, const u8 *buf, struct mtd_info *mtd = nand_to_mtd(chip); unsigned int len = mtd->writesize + (oob_required ? mtd->oobsize : 0); dma_addr_t dma_addr; + u8 status; int ret; struct anfc_op nfc_op = { .pkt_reg = @@ -561,10 +562,21 @@ static int anfc_write_page_hw_ecc(struct nand_chip *chip, const u8 *buf, } /* Spare data is not protected */ - if (oob_required) + if (oob_required) { ret = nand_write_oob_std(chip, page); + if (ret) + return ret; + } - return ret; + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + return ret; + + if (status & NAND_STATUS_FAIL) + return -EIO; + + return 0; } static int anfc_sel_write_page_hw_ecc(struct nand_chip *chip, const u8 *buf, From 9777cc13fd2c3212618904636354be60835e10bb Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 17 Jul 2023 21:42:21 +0200 Subject: [PATCH 018/212] mtd: rawnand: pl353: Ensure program page operations are successful The NAND core complies with the ONFI specification, which itself mentions that after any program or erase operation, a status check should be performed to see whether the operation was finished *and* successful. The NAND core offers helpers to finish a page write (sending the "PAGE PROG" command, waiting for the NAND chip to be ready again, and checking the operation status). But in some cases, advanced controller drivers might want to optimize this and craft their own page write helper to leverage additional hardware capabilities, thus not always using the core facilities. Some drivers, like this one, do not use the core helper to finish a page write because the final cycles are automatically managed by the hardware. In this case, the additional care must be taken to manually perform the final status check. Let's read the NAND chip status at the end of the page write helper and return -EIO upon error. Cc: Michal Simek Cc: stable@vger.kernel.org Fixes: 08d8c62164a3 ("mtd: rawnand: pl353: Add support for the ARM PL353 SMC NAND controller") Signed-off-by: Miquel Raynal Tested-by: Michal Simek Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-3-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/pl35x-nand-controller.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/mtd/nand/raw/pl35x-nand-controller.c b/drivers/mtd/nand/raw/pl35x-nand-controller.c index 8da5fee321b5..c506e92a3e45 100644 --- a/drivers/mtd/nand/raw/pl35x-nand-controller.c +++ b/drivers/mtd/nand/raw/pl35x-nand-controller.c @@ -511,6 +511,7 @@ static int pl35x_nand_write_page_hwecc(struct nand_chip *chip, u32 addr1 = 0, addr2 = 0, row; u32 cmd_addr; int i, ret; + u8 status; ret = pl35x_smc_set_ecc_mode(nfc, chip, PL35X_SMC_ECC_CFG_MODE_APB); if (ret) @@ -563,6 +564,14 @@ static int pl35x_nand_write_page_hwecc(struct nand_chip *chip, if (ret) goto disable_ecc_engine; + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + goto disable_ecc_engine; + + if (status & NAND_STATUS_FAIL) + ret = -EIO; + disable_ecc_engine: pl35x_smc_set_ecc_mode(nfc, chip, PL35X_SMC_ECC_CFG_MODE_BYPASS); From 5279f4a9eed3ee7d222b76511ea7a22c89e7eefd Mon Sep 17 00:00:00 2001 From: Bibek Kumar Patro Date: Wed, 13 Sep 2023 12:37:02 +0530 Subject: [PATCH 019/212] mtd: rawnand: qcom: Unmap the right resource upon probe failure We currently provide the physical address of the DMA region rather than the output of dma_map_resource() which is obviously wrong. Fixes: 7330fc505af4 ("mtd: rawnand: qcom: stop using phys_to_dma()") Cc: stable@vger.kernel.org Reviewed-by: Manivannan Sadhasivam Signed-off-by: Bibek Kumar Patro Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230913070702.12707-1-quic_bibekkum@quicinc.com --- drivers/mtd/nand/raw/qcom_nandc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 64499c1b3603..b079605c84d3 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -3444,7 +3444,7 @@ static int qcom_nandc_probe(struct platform_device *pdev) err_aon_clk: clk_disable_unprepare(nandc->core_clk); err_core_clk: - dma_unmap_resource(dev, res->start, resource_size(res), + dma_unmap_resource(dev, nandc->base_dma, resource_size(res), DMA_BIDIRECTIONAL, 0); return ret; } From 1bbac8d6af085408885675c1e29b2581250be124 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 25 Aug 2023 15:55:02 +0200 Subject: [PATCH 020/212] dt-bindings: mmc: sdhci-msm: correct minimum number of clocks In the TXT binding before conversion, the "xo" clock was listed as optional. Conversion kept it optional in "clock-names", but not in "clocks". This fixes dbts_check warnings like: qcom-sdx65-mtp.dtb: mmc@8804000: clocks: [[13, 59], [13, 58]] is too short Cc: Fixes: a45537723f4b ("dt-bindings: mmc: sdhci-msm: Convert bindings to yaml") Signed-off-by: Krzysztof Kozlowski Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20230825135503.282135-1-krzysztof.kozlowski@linaro.org Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/sdhci-msm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml index 80141eb7fc6b..10f34aa8ba8a 100644 --- a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml +++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml @@ -69,7 +69,7 @@ properties: maxItems: 4 clocks: - minItems: 3 + minItems: 2 items: - description: Main peripheral bus clock, PCLK/HCLK - AHB Bus clock - description: SDC MMC clock, MCLK From 32a9cdb8869dc111a0c96cf8e1762be9684af15b Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 30 Aug 2023 17:39:22 +0800 Subject: [PATCH 021/212] mmc: core: sdio: hold retuning if sdio in 1-bit mode tuning only support in 4-bit mode or 8 bit mode, so in 1-bit mode, need to hold retuning. Find this issue when use manual tuning method on imx93. When system resume back, SDIO WIFI try to switch back to 4 bit mode, first will trigger retuning, and all tuning command failed. Signed-off-by: Haibo Chen Acked-by: Adrian Hunter Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230830093922.3095850-1-haibo.chen@nxp.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/sdio.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index f64b9ac76a5c..5914516df2f7 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -1089,8 +1089,14 @@ static int mmc_sdio_resume(struct mmc_host *host) } err = mmc_sdio_reinit_card(host); } else if (mmc_card_wake_sdio_irq(host)) { - /* We may have switched to 1-bit mode during suspend */ + /* + * We may have switched to 1-bit mode during suspend, + * need to hold retuning, because tuning only supprt + * 4-bit mode or 8 bit mode. + */ + mmc_retune_hold_now(host); err = sdio_enable_4bit_bus(host->card); + mmc_retune_release(host); } if (err) From 1202d617e3d04c8d27a14ef30784a698c48170b3 Mon Sep 17 00:00:00 2001 From: Sven van Ashbrook Date: Thu, 31 Aug 2023 16:00:56 +0000 Subject: [PATCH 022/212] mmc: sdhci-pci-gli: fix LPM negotiation so x86/S0ix SoCs can suspend To improve the r/w performance of GL9763E, the current driver inhibits LPM negotiation while the device is active. This prevents a large number of SoCs from suspending, notably x86 systems which commonly use S0ix as the suspend mechanism - for example, Intel Alder Lake and Raptor Lake processors. Failure description: 1. Userspace initiates s2idle suspend (e.g. via writing to /sys/power/state) 2. This switches the runtime_pm device state to active, which disables LPM negotiation, then calls the "regular" suspend callback 3. With LPM negotiation disabled, the bus cannot enter low-power state 4. On a large number of SoCs, if the bus not in a low-power state, S0ix cannot be entered, which in turn prevents the SoC from entering suspend. Fix by re-enabling LPM negotiation in the device's suspend callback. Suggested-by: Stanislaw Kardach Fixes: f9e5b33934ce ("mmc: host: Improve I/O read/write performance for GL9763E") Cc: stable@vger.kernel.org Signed-off-by: Sven van Ashbrook Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20230831160055.v3.1.I7ed1ca09797be2dd76ca914c57d88b32d24dac88@changeid Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 104 ++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 38 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index ae8c307b7aa7..109d4b010f97 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -1144,42 +1144,6 @@ static u32 sdhci_gl9750_readl(struct sdhci_host *host, int reg) return value; } -#ifdef CONFIG_PM_SLEEP -static int sdhci_pci_gli_resume(struct sdhci_pci_chip *chip) -{ - struct sdhci_pci_slot *slot = chip->slots[0]; - - pci_free_irq_vectors(slot->chip->pdev); - gli_pcie_enable_msi(slot); - - return sdhci_pci_resume_host(chip); -} - -static int sdhci_cqhci_gli_resume(struct sdhci_pci_chip *chip) -{ - struct sdhci_pci_slot *slot = chip->slots[0]; - int ret; - - ret = sdhci_pci_gli_resume(chip); - if (ret) - return ret; - - return cqhci_resume(slot->host->mmc); -} - -static int sdhci_cqhci_gli_suspend(struct sdhci_pci_chip *chip) -{ - struct sdhci_pci_slot *slot = chip->slots[0]; - int ret; - - ret = cqhci_suspend(slot->host->mmc); - if (ret) - return ret; - - return sdhci_suspend_host(slot->host); -} -#endif - static void gl9763e_hs400_enhanced_strobe(struct mmc_host *mmc, struct mmc_ios *ios) { @@ -1420,6 +1384,70 @@ static int gl9763e_runtime_resume(struct sdhci_pci_chip *chip) } #endif +#ifdef CONFIG_PM_SLEEP +static int sdhci_pci_gli_resume(struct sdhci_pci_chip *chip) +{ + struct sdhci_pci_slot *slot = chip->slots[0]; + + pci_free_irq_vectors(slot->chip->pdev); + gli_pcie_enable_msi(slot); + + return sdhci_pci_resume_host(chip); +} + +static int gl9763e_resume(struct sdhci_pci_chip *chip) +{ + struct sdhci_pci_slot *slot = chip->slots[0]; + int ret; + + ret = sdhci_pci_gli_resume(chip); + if (ret) + return ret; + + ret = cqhci_resume(slot->host->mmc); + if (ret) + return ret; + + /* + * Disable LPM negotiation to bring device back in sync + * with its runtime_pm state. + */ + gl9763e_set_low_power_negotiation(slot, false); + + return 0; +} + +static int gl9763e_suspend(struct sdhci_pci_chip *chip) +{ + struct sdhci_pci_slot *slot = chip->slots[0]; + int ret; + + /* + * Certain SoCs can suspend only with the bus in low- + * power state, notably x86 SoCs when using S0ix. + * Re-enable LPM negotiation to allow entering L1 state + * and entering system suspend. + */ + gl9763e_set_low_power_negotiation(slot, true); + + ret = cqhci_suspend(slot->host->mmc); + if (ret) + goto err_suspend; + + ret = sdhci_suspend_host(slot->host); + if (ret) + goto err_suspend_host; + + return 0; + +err_suspend_host: + cqhci_resume(slot->host->mmc); +err_suspend: + gl9763e_set_low_power_negotiation(slot, false); + return ret; +} +#endif + static int gli_probe_slot_gl9763e(struct sdhci_pci_slot *slot) { struct pci_dev *pdev = slot->chip->pdev; @@ -1527,8 +1555,8 @@ const struct sdhci_pci_fixes sdhci_gl9763e = { .probe_slot = gli_probe_slot_gl9763e, .ops = &sdhci_gl9763e_ops, #ifdef CONFIG_PM_SLEEP - .resume = sdhci_cqhci_gli_resume, - .suspend = sdhci_cqhci_gli_suspend, + .resume = gl9763e_resume, + .suspend = gl9763e_suspend, #endif #ifdef CONFIG_PM .runtime_suspend = gl9763e_runtime_suspend, From 168054ca5cf783e07518681fad0904de8dcc6b17 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Sep 2023 12:54:50 +0300 Subject: [PATCH 023/212] mmc: sdhci-sprd: Fix error code in sdhci_sprd_tuning() Return an error code if sdhci_sprd_get_best_clk_sample() fails. Currently, it returns success. Fixes: d83d251bf3c2 ("mmc: sdhci-sprd: Add SD HS mode online tuning") Signed-off-by: Dan Carpenter Reviewed-by: Wenchao Chen Reviewed-by: Baolin Wang Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/a8af0a08-8405-43cc-bd83-85ff25f572ca@moroto.mountain Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 649ae075e229..6b84ba27e6ab 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -644,6 +644,7 @@ static int sdhci_sprd_tuning(struct mmc_host *mmc, struct mmc_card *card, best_clk_sample = sdhci_sprd_get_best_clk_sample(mmc, value); if (best_clk_sample < 0) { dev_err(mmc_dev(host->mmc), "all tuning phase fail!\n"); + err = best_clk_sample; goto out; } From f19c5a73e6f78d69efce66cfdce31148c76a61a6 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 13 Sep 2023 13:29:21 +0200 Subject: [PATCH 024/212] mmc: core: Fix error propagation for some ioctl commands Userspace has currently no way of checking the internal R1 response error bits for some commands. This is a problem for some commands, like RPMB for example. Typically, we may detect that the busy completion has successfully ended, while in fact the card did not complete the requested operation. To fix the problem, let's always poll with CMD13 for these commands and during the polling, let's also aggregate the R1 response bits. Before completing the ioctl request, let's propagate the R1 response bits too. Reviewed-by: Avri Altman Co-developed-by: Christian Loehle Signed-off-by: Christian Loehle Signed-off-by: Ulf Hansson Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230913112921.553019-1-ulf.hansson@linaro.org --- drivers/mmc/core/block.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index b5b414a71e0b..3a8f27c3e310 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -179,6 +179,7 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, struct mmc_queue *mq); static void mmc_blk_hsq_req_done(struct mmc_request *mrq); static int mmc_spi_err_check(struct mmc_card *card); +static int mmc_blk_busy_cb(void *cb_data, bool *busy); static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk) { @@ -470,7 +471,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, struct mmc_data data = {}; struct mmc_request mrq = {}; struct scatterlist sg; - bool r1b_resp, use_r1b_resp = false; + bool r1b_resp; unsigned int busy_timeout_ms; int err; unsigned int target_part; @@ -551,8 +552,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, busy_timeout_ms = idata->ic.cmd_timeout_ms ? : MMC_BLK_TIMEOUT_MS; r1b_resp = (cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B; if (r1b_resp) - use_r1b_resp = mmc_prepare_busy_cmd(card->host, &cmd, - busy_timeout_ms); + mmc_prepare_busy_cmd(card->host, &cmd, busy_timeout_ms); mmc_wait_for_req(card->host, &mrq); memcpy(&idata->ic.response, cmd.resp, sizeof(cmd.resp)); @@ -605,19 +605,28 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, if (idata->ic.postsleep_min_us) usleep_range(idata->ic.postsleep_min_us, idata->ic.postsleep_max_us); - /* No need to poll when using HW busy detection. */ - if ((card->host->caps & MMC_CAP_WAIT_WHILE_BUSY) && use_r1b_resp) - return 0; - if (mmc_host_is_spi(card->host)) { if (idata->ic.write_flag || r1b_resp || cmd.flags & MMC_RSP_SPI_BUSY) return mmc_spi_err_check(card); return err; } - /* Ensure RPMB/R1B command has completed by polling with CMD13. */ - if (idata->rpmb || r1b_resp) - err = mmc_poll_for_busy(card, busy_timeout_ms, false, - MMC_BUSY_IO); + + /* + * Ensure RPMB, writes and R1B responses are completed by polling with + * CMD13. Note that, usually we don't need to poll when using HW busy + * detection, but here it's needed since some commands may indicate the + * error through the R1 status bits. + */ + if (idata->rpmb || idata->ic.write_flag || r1b_resp) { + struct mmc_blk_busy_data cb_data = { + .card = card, + }; + + err = __mmc_poll_for_busy(card->host, 0, busy_timeout_ms, + &mmc_blk_busy_cb, &cb_data); + + idata->ic.response[0] = cb_data.status; + } return err; } From c7bb120c1c66672b657e95d0942c989b8275aeb3 Mon Sep 17 00:00:00 2001 From: Pablo Sun Date: Fri, 22 Sep 2023 17:53:48 +0800 Subject: [PATCH 025/212] mmc: mtk-sd: Use readl_poll_timeout_atomic in msdc_reset_hw Use atomic readl_poll_timeout_atomic, because msdc_reset_hw may be invoked in IRQ handler in the following context: msdc_irq() -> msdc_cmd_done() -> msdc_reset_hw() The following kernel BUG stack trace can be observed on Genio 1200 EVK after initializing MSDC1 hardware during kernel boot: [ 1.187441] BUG: scheduling while atomic: swapper/0/0/0x00010002 [ 1.189157] Modules linked in: [ 1.204633] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 5.15.42-mtk+modified #1 [ 1.205713] Hardware name: MediaTek Genio 1200 EVK-P1V2-EMMC (DT) [ 1.206484] Call trace: [ 1.206796] dump_backtrace+0x0/0x1ac [ 1.207266] show_stack+0x24/0x30 [ 1.207692] dump_stack_lvl+0x68/0x84 [ 1.208162] dump_stack+0x1c/0x38 [ 1.208587] __schedule_bug+0x68/0x80 [ 1.209056] __schedule+0x6ec/0x7c0 [ 1.209502] schedule+0x7c/0x110 [ 1.209915] schedule_hrtimeout_range_clock+0xc4/0x1f0 [ 1.210569] schedule_hrtimeout_range+0x20/0x30 [ 1.211148] usleep_range_state+0x84/0xc0 [ 1.211661] msdc_reset_hw+0xc8/0x1b0 [ 1.212134] msdc_cmd_done.isra.0+0x4ac/0x5f0 [ 1.212693] msdc_irq+0x104/0x2d4 [ 1.213121] __handle_irq_event_percpu+0x68/0x280 [ 1.213725] handle_irq_event+0x70/0x15c [ 1.214230] handle_fasteoi_irq+0xb0/0x1a4 [ 1.214755] handle_domain_irq+0x6c/0x9c [ 1.215260] gic_handle_irq+0xc4/0x180 [ 1.215741] call_on_irq_stack+0x2c/0x54 [ 1.216245] do_interrupt_handler+0x5c/0x70 [ 1.216782] el1_interrupt+0x30/0x80 [ 1.217242] el1h_64_irq_handler+0x1c/0x2c [ 1.217769] el1h_64_irq+0x78/0x7c [ 1.218206] cpuidle_enter_state+0xc8/0x600 [ 1.218744] cpuidle_enter+0x44/0x5c [ 1.219205] do_idle+0x224/0x2d0 [ 1.219624] cpu_startup_entry+0x30/0x80 [ 1.220129] rest_init+0x108/0x134 [ 1.220568] arch_call_rest_init+0x1c/0x28 [ 1.221094] start_kernel+0x6c0/0x700 [ 1.221564] __primary_switched+0xc0/0xc8 Fixes: ffaea6ebfe9c ("mmc: mtk-sd: Use readl_poll_timeout instead of open-coded polling") Signed-off-by: Pablo Sun Reviewed-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230922095348.22182-1-pablo.sun@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 5392200cfdf7..97f7c3d4be6e 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -669,11 +669,11 @@ static void msdc_reset_hw(struct msdc_host *host) u32 val; sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_RST); - readl_poll_timeout(host->base + MSDC_CFG, val, !(val & MSDC_CFG_RST), 0, 0); + readl_poll_timeout_atomic(host->base + MSDC_CFG, val, !(val & MSDC_CFG_RST), 0, 0); sdr_set_bits(host->base + MSDC_FIFOCS, MSDC_FIFOCS_CLR); - readl_poll_timeout(host->base + MSDC_FIFOCS, val, - !(val & MSDC_FIFOCS_CLR), 0, 0); + readl_poll_timeout_atomic(host->base + MSDC_FIFOCS, val, + !(val & MSDC_FIFOCS_CLR), 0, 0); val = readl(host->base + MSDC_INT); writel(val, host->base + MSDC_INT); From 48a3adcf47888019f953c57a7290036744635912 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Wed, 20 Sep 2023 12:23:49 +0000 Subject: [PATCH 026/212] perf pmu: Fix perf stat output with correct scale and unit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The perf_pmu__parse_* functions for the sysfs files of pmu event’s scale, unit, per-pkg and snapshot were updated in commit 7b723dbb96e8 ("perf pmu: Be lazy about loading event info files from sysfs"). However, the paths for these sysfs files were incorrect. This resulted in perf stat reporting values with wrong scaling and missing units. This is fixed by correcting the paths for these sysfs files. Before this fix: $sudo perf stat -e power/energy-pkg/ -- sleep 2 Performance counter stats for 'system wide': 351,217,188,864 power/energy-pkg/ 2.004127961 seconds time elapsed After this fix: $sudo perf stat -e power/energy-pkg/ -- sleep 2 Performance counter stats for 'system wide': 80.58 Joules power/energy-pkg/ 2.004009749 seconds time elapsed Fixes: 7b723dbb96e8 ("perf pmu: Be lazy about loading event info files from sysfs") Signed-off-by: Wyes Karny Reviewed-by: Ian Rogers Cc: ravi.bangoria@amd.com Cc: sandipan.das@amd.com Cc: james.clark@arm.com Cc: kan.liang@linux.intel.com Link: https://lore.kernel.org/r/20230920122349.418673-1-wyes.karny@amd.com Signed-off-by: Namhyung Kim --- tools/perf/util/pmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8de6f39abd1b..d515ba8a0e16 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -295,7 +295,7 @@ static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *al len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.scale", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.scale", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -330,7 +330,7 @@ static int perf_pmu__parse_unit(struct perf_pmu *pmu, struct perf_pmu_alias *ali len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.unit", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.unit", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -364,7 +364,7 @@ perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias) len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.per-pkg", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.per-pkg", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -385,7 +385,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.snapshot", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.snapshot", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) From 84ee19bffc9306128cd0f1c650e89767079efeff Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Wed, 27 Sep 2023 10:15:00 +0300 Subject: [PATCH 027/212] mmc: core: Capture correct oemid-bits for eMMC cards The OEMID is an 8-bit binary number rather than 16-bit as the current code parses for. The OEMID occupies bits [111:104] in the CID register, see the eMMC spec JESD84-B51 paragraph 7.2.3. It seems that the 16-bit comes from the legacy MMC specs (v3.31 and before). Let's fix the parsing by simply move to use 8-bit instead of 16-bit. This means we ignore the impact on some of those old MMC cards that may be out there, but on the other hand this shouldn't be a problem as the OEMID seems not be an important feature for these cards. Signed-off-by: Avri Altman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230927071500.1791882-1-avri.altman@wdc.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 89cd48fcec79..4a4bab9aa726 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -104,7 +104,7 @@ static int mmc_decode_cid(struct mmc_card *card) case 3: /* MMC v3.1 - v3.3 */ case 4: /* MMC v4 */ card->cid.manfid = UNSTUFF_BITS(resp, 120, 8); - card->cid.oemid = UNSTUFF_BITS(resp, 104, 16); + card->cid.oemid = UNSTUFF_BITS(resp, 104, 8); card->cid.prod_name[0] = UNSTUFF_BITS(resp, 96, 8); card->cid.prod_name[1] = UNSTUFF_BITS(resp, 88, 8); card->cid.prod_name[2] = UNSTUFF_BITS(resp, 80, 8); From 089667aaaa6aa33715d22b21a72216b43af3e896 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 15 Aug 2023 17:28:13 +0200 Subject: [PATCH 028/212] phy: realtek: Realtek PHYs should depend on ARCH_REALTEK The Realtek SoC USB2 and USB3 PHY Transceivers are only present on Realtek Digital Home Center (DHC) RTD series SoCs. Hence add a dependency on ARCH_REALTEK, to prevent asking the user about these drivers when configuring a kernel without Realtek SoC support. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/2892527cac9af6fa8f5e7b8daeffd7d4351fde68.1692113167.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- drivers/phy/realtek/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/phy/realtek/Kconfig b/drivers/phy/realtek/Kconfig index 650e20ed69af..75ac7e7c31ae 100644 --- a/drivers/phy/realtek/Kconfig +++ b/drivers/phy/realtek/Kconfig @@ -2,6 +2,9 @@ # # Phy drivers for Realtek platforms # + +if ARCH_REALTEK || COMPILE_TEST + config PHY_RTK_RTD_USB2PHY tristate "Realtek RTD USB2 PHY Transceiver Driver" depends on USB_SUPPORT @@ -25,3 +28,5 @@ config PHY_RTK_RTD_USB3PHY The DHC (digital home center) RTD series SoCs used the Synopsys DWC3 USB IP. This driver will do the PHY initialization of the parameters. + +endif # ARCH_REALTEK || COMPILE_TEST From 7a48b58eb5ff3798f0480d2da16bf27df9654fc7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 28 Sep 2023 10:16:05 +0300 Subject: [PATCH 029/212] perf dlfilter: Fix use of addr_location__exit() in dlfilter__object_code() Stop calling addr_location__exit() when addr_location__init() was not called. Fixes: 0dd5041c9a0e ("perf addr_location: Add init/exit/copy functions") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20230928071605.17624-1-adrian.hunter@intel.com Signed-off-by: Namhyung Kim --- tools/perf/util/dlfilter.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index 1dbf27822ee2..4a1dc21b0450 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -282,13 +282,21 @@ static struct perf_event_attr *dlfilter__attr(void *ctx) return &d->evsel->core.attr; } +static __s32 code_read(__u64 ip, struct map *map, struct machine *machine, void *buf, __u32 len) +{ + u64 offset = map__map_ip(map, ip); + + if (ip + len >= map__end(map)) + len = map__end(map) - ip; + + return dso__data_read_offset(map__dso(map), machine, offset, buf, len); +} + static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) { struct dlfilter *d = (struct dlfilter *)ctx; struct addr_location *al; struct addr_location a; - struct map *map; - u64 offset; __s32 ret; if (!d->ctx_valid) @@ -298,27 +306,17 @@ static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) if (!al) return -1; - map = al->map; - - if (map && ip >= map__start(map) && ip < map__end(map) && + if (al->map && ip >= map__start(al->map) && ip < map__end(al->map) && machine__kernel_ip(d->machine, ip) == machine__kernel_ip(d->machine, d->sample->ip)) - goto have_map; + return code_read(ip, al->map, d->machine, buf, len); addr_location__init(&a); - thread__find_map_fb(al->thread, d->sample->cpumode, ip, &a); - if (!a.map) { - ret = -1; - goto out; - } - map = a.map; -have_map: - offset = map__map_ip(map, ip); - if (ip + len >= map__end(map)) - len = map__end(map) - ip; - ret = dso__data_read_offset(map__dso(map), d->machine, offset, buf, len); -out: + thread__find_map_fb(al->thread, d->sample->cpumode, ip, &a); + ret = a.map ? code_read(ip, a.map, d->machine, buf, len) : -1; + addr_location__exit(&a); + return ret; } From f38f547314b858b94d36aeb9a4401f0aade7d1af Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 28 Sep 2023 12:10:33 +0300 Subject: [PATCH 030/212] perf dlfilter: Add a test for object_code() Extend the "dlfilter C API" test to test perf_dlfilter_fns.object_code(). Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20230928091033.33998-1-adrian.hunter@intel.com Signed-off-by: Namhyung Kim --- tools/perf/dlfilters/dlfilter-test-api-v0.c | 12 +++++++++++- tools/perf/dlfilters/dlfilter-test-api-v2.c | 12 +++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index 72f263d49121..4083b1abeaab 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -289,6 +289,15 @@ static int check_attr(void *ctx) return 0; } +static int check_object_code(void *ctx, const struct perf_dlfilter_sample *sample) +{ + __u8 buf[15]; + + CHECK(perf_dlfilter_fns.object_code(ctx, sample->ip, buf, sizeof(buf)) > 0); + + return 0; +} + static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early) { struct filter_data *d = data; @@ -314,7 +323,8 @@ static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void if (early && !d->do_early) return 0; - if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample) || + check_object_code(ctx, sample)) return -1; if (early) diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c index 38e593d92920..32ff619e881c 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v2.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v2.c @@ -308,6 +308,15 @@ static int check_attr(void *ctx) return 0; } +static int check_object_code(void *ctx, const struct perf_dlfilter_sample *sample) +{ + __u8 buf[15]; + + CHECK(perf_dlfilter_fns.object_code(ctx, sample->ip, buf, sizeof(buf)) > 0); + + return 0; +} + static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early) { struct filter_data *d = data; @@ -333,7 +342,8 @@ static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void if (early && !d->do_early) return 0; - if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample) || + check_object_code(ctx, sample)) return -1; if (early) From ec4405ed92036f5bb487b5c2f9a28f9e36a3e3d5 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Thu, 10 Aug 2023 23:18:25 +0300 Subject: [PATCH 031/212] thunderbolt: Call tb_switch_put() once DisplayPort bandwidth request is finished When handling DisplayPort bandwidth request tb_switch_find_by_route() is called and it returns a router structure with reference count increased. In order to avoid resource leak call tb_switch_put() when finished. Fixes: 6ce3563520be ("thunderbolt: Add support for DisplayPort bandwidth allocation mode") Cc: stable@vger.kernel.org Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index dd0a1ef8cf12..27bd6ca6f99e 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1907,14 +1907,14 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) in = &sw->ports[ev->port]; if (!tb_port_is_dpin(in)) { tb_port_warn(in, "bandwidth request to non-DP IN adapter\n"); - goto unlock; + goto put_sw; } tb_port_dbg(in, "handling bandwidth allocation request\n"); if (!usb4_dp_port_bandwidth_mode_enabled(in)) { tb_port_warn(in, "bandwidth allocation mode not enabled\n"); - goto unlock; + goto put_sw; } ret = usb4_dp_port_requested_bandwidth(in); @@ -1923,7 +1923,7 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) tb_port_dbg(in, "no bandwidth request active\n"); else tb_port_warn(in, "failed to read requested bandwidth\n"); - goto unlock; + goto put_sw; } requested_bw = ret; @@ -1932,7 +1932,7 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) tunnel = tb_find_tunnel(tb, TB_TUNNEL_DP, in, NULL); if (!tunnel) { tb_port_warn(in, "failed to find tunnel\n"); - goto unlock; + goto put_sw; } out = tunnel->dst_port; @@ -1959,6 +1959,8 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) tb_recalc_estimated_bandwidth(tb); } +put_sw: + tb_switch_put(sw); unlock: mutex_unlock(&tb->lock); From 3820c4fdc247b6f0a4162733bdb8ddf8f2e8a1e4 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 31 Jul 2023 12:37:58 +0200 Subject: [PATCH 032/212] nvme-rdma: do not try to stop unallocated queues Trying to stop a queue which hasn't been allocated will result in a warning due to calling mutex_lock() against an uninitialized mutex. DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 4 PID: 104150 at kernel/locking/mutex.c:579 Call trace: RIP: 0010:__mutex_lock+0x1173/0x14a0 nvme_rdma_stop_queue+0x1b/0xa0 [nvme_rdma] nvme_rdma_teardown_io_queues.part.0+0xb0/0x1d0 [nvme_rdma] nvme_rdma_delete_ctrl+0x50/0x100 [nvme_rdma] nvme_do_delete_ctrl+0x149/0x158 [nvme_core] Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Tested-by: Yi Zhang Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 337a624a537c..a7fea4cbacd7 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -638,6 +638,9 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { + if (!test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) + return; + mutex_lock(&queue->queue_lock); if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) __nvme_rdma_stop_queue(queue); From 7ec4cd3c1a12dc08c60d5e376c2c05aae23f1e41 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Thu, 5 Oct 2023 07:56:14 +0000 Subject: [PATCH 033/212] platform: mellanox: Fix a resource leak in an error handling path in probing flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix missed resource deallocation in rollback flows. Currently if an error occurs after a successful mlxplat_i2c_main_init(), mlxplat_i2c_main_exit() call is missed in rollback flow. Thus, some resources are not de-allocated. Move mlxplat_pre_exit() call from mlxplat_remove() into mlxplat_i2c_main_exit(). Call mlxplat_i2c_main_exit() instead of calling mlxplat_pre_exit() in mlxplat_probe() error handling flow. Unregister 'priv->pdev_i2c' device in mlxplat_i2c_main_init() cleanup flow if this device was successfully registered. Fixes: 158cd8320776 ("platform: mellanox: Split logic in init and exit flow") Reported-by: Christophe JAILLET Closes: https://lore.kernel.org/lkml/70165032-796e-6f5c-6748-f514e3b9d08c@linux.intel.com/T/ Signed-off-by: Vadim Pasternak Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231005075616.42777-2-vadimp@nvidia.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/mlx-platform.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 3d96dbf79a72..a2ffe4157df1 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -6514,6 +6514,7 @@ static int mlxplat_i2c_main_init(struct mlxplat_priv *priv) return 0; fail_mlxplat_i2c_mux_topology_init: + platform_device_unregister(priv->pdev_i2c); fail_platform_i2c_register: fail_mlxplat_mlxcpld_verify_bus_topology: return err; @@ -6521,6 +6522,7 @@ static int mlxplat_i2c_main_init(struct mlxplat_priv *priv) static void mlxplat_i2c_main_exit(struct mlxplat_priv *priv) { + mlxplat_pre_exit(priv); mlxplat_i2c_mux_topology_exit(priv); if (priv->pdev_i2c) platform_device_unregister(priv->pdev_i2c); @@ -6597,7 +6599,7 @@ static int mlxplat_probe(struct platform_device *pdev) fail_register_reboot_notifier: fail_regcache_sync: - mlxplat_pre_exit(priv); + mlxplat_i2c_main_exit(priv); fail_mlxplat_i2c_main_init: fail_regmap_write: fail_alloc: @@ -6614,7 +6616,6 @@ static int mlxplat_remove(struct platform_device *pdev) pm_power_off = NULL; if (mlxplat_reboot_nb) unregister_reboot_notifier(mlxplat_reboot_nb); - mlxplat_pre_exit(priv); mlxplat_i2c_main_exit(priv); mlxplat_post_exit(); return 0; From 61e21cf2d2c3cc5e60e8d0a62a77e250fccda62c Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Wed, 27 Sep 2023 17:44:01 +0800 Subject: [PATCH 034/212] mm/page_alloc: correct start page when guard page debug is enabled When guard page debug is enabled and set_page_guard returns success, we miss to forward page to point to start of next split range and we will do split unexpectedly in page range without target page. Move start page update before set_page_guard to fix this. As we split to wrong target page, then splited pages are not able to merge back to original order when target page is put back and splited pages except target page is not usable. To be specific: Consider target page is the third page in buddy page with order 2. | buddy-2 | Page | Target | Page | After break down to target page, we will only set first page to Guard because of bug. | Guard | Page | Target | Page | When we try put_page_back_buddy with target page, the buddy page of target if neither guard nor buddy, Then it's not able to construct original page with order 2 | Guard | Page | buddy-0 | Page | All pages except target page is not in free list and is not usable. Link: https://lkml.kernel.org/r/20230927094401.68205-1-shikemeng@huaweicloud.com Fixes: 06be6ff3d2ec ("mm,hwpoison: rework soft offline for free pages") Signed-off-by: Kemeng Shi Acked-by: Naoya Horiguchi Cc: Matthew Wilcox (Oracle) Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 95546f376302..85741403948f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6475,6 +6475,7 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page, next_page = page; current_buddy = page + size; } + page = next_page; if (set_page_guard(zone, current_buddy, high, migratetype)) continue; @@ -6482,7 +6483,6 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page, if (current_buddy != target) { add_to_free_list(current_buddy, zone, high, migratetype); set_buddy_order(current_buddy, high); - page = next_page; } } } From 51f625377561e5b167da2db5aafb7ee268f691c5 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 28 Sep 2023 13:24:32 -0400 Subject: [PATCH 035/212] mm/mempolicy: fix set_mempolicy_home_node() previous VMA pointer The two users of mbind_range() are expecting that mbind_range() will update the pointer to the previous VMA, or return an error. However, set_mempolicy_home_node() does not call mbind_range() if there is no VMA policy. The fix is to update the pointer to the previous VMA prior to continuing iterating the VMAs when there is no policy. Users may experience a WARN_ON() during VMA policy updates when updating a range of VMAs on the home node. Link: https://lkml.kernel.org/r/20230928172432.2246534-1-Liam.Howlett@oracle.com Link: https://lore.kernel.org/linux-mm/CALcu4rbT+fMVNaO_F2izaCT+e7jzcAciFkOvk21HGJsmLcUuwQ@mail.gmail.com/ Fixes: f4e9e0e69468 ("mm/mempolicy: fix use-after-free of VMA iterator") Signed-off-by: Liam R. Howlett Reported-by: Yikebaer Aizezi Closes: https://lore.kernel.org/linux-mm/CALcu4rbT+fMVNaO_F2izaCT+e7jzcAciFkOvk21HGJsmLcUuwQ@mail.gmail.com/ Reviewed-by: Lorenzo Stoakes Cc: Signed-off-by: Andrew Morton --- mm/mempolicy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index f1b00d6ac7ee..29ebf1e7898c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1543,8 +1543,10 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le * the home node for vmas we already updated before. */ old = vma_policy(vma); - if (!old) + if (!old) { + prev = vma; continue; + } if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) { err = -EOPNOTSUPP; break; From e0f81ab1e4f42ffece6440dc78f583eb352b9a71 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 29 Sep 2023 10:19:41 -0700 Subject: [PATCH 036/212] mm: fix vm_brk_flags() to not bail out while holding lock Calling vm_brk_flags() with flags set other than VM_EXEC will exit the function without releasing the mmap_write_lock. Just do the sanity check before the lock is acquired. This doesn't fix an actual issue since no caller sets a flag other than VM_EXEC. Link: https://lkml.kernel.org/r/20230929171937.work.697-kees@kernel.org Fixes: 2e7ce7d354f2 ("mm/mmap: change do_brk_flags() to expand existing VMA and add do_brk_munmap()") Signed-off-by: Sebastian Ott Signed-off-by: Kees Cook Reviewed-by: Liam R. Howlett Cc: Yu Zhao Signed-off-by: Andrew Morton --- mm/mmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index b56a7f0c9f85..7ed286662839 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3143,13 +3143,13 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) if (!len) return 0; - if (mmap_write_lock_killable(mm)) - return -EINTR; - /* Until we need other flags, refuse anything except VM_EXEC. */ if ((flags & (~VM_EXEC)) != 0) return -EINVAL; + if (mmap_write_lock_killable(mm)) + return -EINTR; + ret = check_brk_limits(addr, len); if (ret) goto limits_failed; From 1419430c8abb5a00590169068590dd54d86590ba Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 29 Sep 2023 14:30:39 -0400 Subject: [PATCH 037/212] mmap: fix vma_iterator in error path of vma_merge() During the error path, the vma iterator may not be correctly positioned or set to the correct range. Undo the vma_prev() call by resetting to the passed in address. Re-walking to the same range will fix the range to the area previously passed in. Users would notice increased cycles as vma_merge() would be called an extra time with vma == prev, and thus would fail to merge and return. Link: https://lore.kernel.org/linux-mm/CAG48ez12VN1JAOtTNMY+Y2YnsU45yL5giS-Qn=ejtiHpgJAbdQ@mail.gmail.com/ Link: https://lkml.kernel.org/r/20230929183041.2835469-2-Liam.Howlett@oracle.com Fixes: 18b098af2890 ("vma_merge: set vma iterator to correct position.") Signed-off-by: Liam R. Howlett Reported-by: Jann Horn Closes: https://lore.kernel.org/linux-mm/CAG48ez12VN1JAOtTNMY+Y2YnsU45yL5giS-Qn=ejtiHpgJAbdQ@mail.gmail.com/ Reviewed-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Cc: Matthew Wilcox (Oracle) Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- mm/mmap.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 7ed286662839..a0917ed26057 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -975,7 +975,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, /* Error in anon_vma clone. */ if (err) - return NULL; + goto anon_vma_fail; if (vma_start < vma->vm_start || vma_end > vma->vm_end) vma_expanded = true; @@ -988,7 +988,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, } if (vma_iter_prealloc(vmi, vma)) - return NULL; + goto prealloc_fail; init_multi_vma_prep(&vp, vma, adjust, remove, remove2); VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma && @@ -1016,6 +1016,12 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, vma_complete(&vp, vmi, mm); khugepaged_enter_vma(res, vm_flags); return res; + +prealloc_fail: +anon_vma_fail: + vma_iter_set(vmi, addr); + vma_iter_load(vmi); + return NULL; } /* From 824135c46b00df7fb369ec7f1f8607427bbebeb0 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 29 Sep 2023 14:30:40 -0400 Subject: [PATCH 038/212] mmap: fix error paths with dup_anon_vma() When the calling function fails after the dup_anon_vma(), the duplication of the anon_vma is not being undone. Add the necessary unlink_anon_vma() call to the error paths that are missing them. This issue showed up during inspection of the error path in vma_merge() for an unrelated vma iterator issue. Users may experience increased memory usage, which may be problematic as the failure would likely be caused by a low memory situation. Link: https://lkml.kernel.org/r/20230929183041.2835469-3-Liam.Howlett@oracle.com Fixes: d4af56c5c7c6 ("mm: start tracking VMAs with maple tree") Signed-off-by: Liam R. Howlett Reviewed-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Cc: Jann Horn Cc: Matthew Wilcox (Oracle) Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- mm/mmap.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index a0917ed26057..9e018d8dd7d6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -583,11 +583,12 @@ static inline void vma_complete(struct vma_prepare *vp, * dup_anon_vma() - Helper function to duplicate anon_vma * @dst: The destination VMA * @src: The source VMA + * @dup: Pointer to the destination VMA when successful. * * Returns: 0 on success. */ static inline int dup_anon_vma(struct vm_area_struct *dst, - struct vm_area_struct *src) + struct vm_area_struct *src, struct vm_area_struct **dup) { /* * Easily overlooked: when mprotect shifts the boundary, make sure the @@ -595,9 +596,15 @@ static inline int dup_anon_vma(struct vm_area_struct *dst, * anon pages imported. */ if (src->anon_vma && !dst->anon_vma) { + int ret; + vma_assert_write_locked(dst); dst->anon_vma = src->anon_vma; - return anon_vma_clone(dst, src); + ret = anon_vma_clone(dst, src); + if (ret) + return ret; + + *dup = dst; } return 0; @@ -624,6 +631,7 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *next) { + struct vm_area_struct *anon_dup = NULL; bool remove_next = false; struct vma_prepare vp; @@ -633,7 +641,7 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, remove_next = true; vma_start_write(next); - ret = dup_anon_vma(vma, next); + ret = dup_anon_vma(vma, next, &anon_dup); if (ret) return ret; } @@ -661,6 +669,8 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, return 0; nomem: + if (anon_dup) + unlink_anon_vmas(anon_dup); return -ENOMEM; } @@ -860,6 +870,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, { struct vm_area_struct *curr, *next, *res; struct vm_area_struct *vma, *adjust, *remove, *remove2; + struct vm_area_struct *anon_dup = NULL; struct vma_prepare vp; pgoff_t vma_pgoff; int err = 0; @@ -927,18 +938,18 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, vma_start_write(next); remove = next; /* case 1 */ vma_end = next->vm_end; - err = dup_anon_vma(prev, next); + err = dup_anon_vma(prev, next, &anon_dup); if (curr) { /* case 6 */ vma_start_write(curr); remove = curr; remove2 = next; if (!next->anon_vma) - err = dup_anon_vma(prev, curr); + err = dup_anon_vma(prev, curr, &anon_dup); } } else if (merge_prev) { /* case 2 */ if (curr) { vma_start_write(curr); - err = dup_anon_vma(prev, curr); + err = dup_anon_vma(prev, curr, &anon_dup); if (end == curr->vm_end) { /* case 7 */ remove = curr; } else { /* case 5 */ @@ -954,7 +965,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, vma_end = addr; adjust = next; adj_start = -(prev->vm_end - addr); - err = dup_anon_vma(next, prev); + err = dup_anon_vma(next, prev, &anon_dup); } else { /* * Note that cases 3 and 8 are the ONLY ones where prev @@ -968,7 +979,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, vma_pgoff = curr->vm_pgoff; vma_start_write(curr); remove = curr; - err = dup_anon_vma(next, curr); + err = dup_anon_vma(next, curr, &anon_dup); } } } @@ -1018,6 +1029,9 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, return res; prealloc_fail: + if (anon_dup) + unlink_anon_vmas(anon_dup); + anon_vma_fail: vma_iter_set(vmi, addr); vma_iter_load(vmi); From 117b1bb0cbc7f5feab4fd251737869958987808c Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 28 Sep 2023 17:18:45 +0200 Subject: [PATCH 039/212] riscv: handle VM_FAULT_[HWPOISON|HWPOISON_LARGE] faults instead of panicking Patch series "Fix set_huge_pte_at()". A recent report [1] from Ryan for arm64 revealed that we do not handle swap entries when setting a hugepage backed by a NAPOT region (the contpte riscv equivalent). As explained in [1], the issue was discovered by a new test in kselftest which uses poison entries, but the symptoms are different from arm64 though: - the riscv kernel bugs because we do not handle VM_FAULT_HWPOISON*, this is fixed by patch 1, - after that, the test passes because the first pte_napot() fails (the poison entry does not have the N bit set), and then we only set the first page table entry covering the NAPOT hugepage, which is enough for hugetlb_fault() to correctly raise a VM_FAULT_HWPOISON wherever we write in this mapping since only this first page table entry is checked (see https://elixir.bootlin.com/linux/v6.6-rc3/source/mm/hugetlb.c#L6071). But this seems fragile so patch 2 sets all page table entries of a NAPOT mapping. [1]: https://lore.kernel.org/linux-arm-kernel/20230922115804.2043771-1-ryan.roberts@arm.com/ This patch (of 2): We used to panic when such faults were encountered but we should handle those faults gracefully for userspace by sending a SIGBUS to the process, like most architectures do. Link: https://lkml.kernel.org/r/20230928151846.8229-1-alexghiti@rivosinc.com Link: https://lkml.kernel.org/r/20230928151846.8229-2-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti Acked-by: Palmer Dabbelt Cc: Albert Ou Cc: Andrew Jones Cc: Conor Dooley Cc: Paul Walmsley Cc: Qinglin Pan Signed-off-by: Andrew Morton --- arch/riscv/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 6115d7514972..90d4ba36d1d0 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -72,7 +72,7 @@ static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_f } pagefault_out_of_memory(); return; - } else if (fault & VM_FAULT_SIGBUS) { + } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) { no_context(regs, addr); From 1de195dd0e05d9cba43dec16f83d4ee32af94dd2 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 28 Sep 2023 17:18:46 +0200 Subject: [PATCH 040/212] riscv: fix set_huge_pte_at() for NAPOT mappings when a swap entry is set We used to determine the number of page table entries to set for a NAPOT hugepage by using the pte value which actually fails when the pte to set is a swap entry. So take advantage of a recent fix for arm64 reported in [1] which introduces the size of the mapping as an argument of set_huge_pte_at(): we can then use this size to compute the number of page table entries to set for a NAPOT region. Link: https://lkml.kernel.org/r/20230928151846.8229-3-alexghiti@rivosinc.com Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") Signed-off-by: Alexandre Ghiti Reported-by: Ryan Roberts Closes: https://lore.kernel.org/linux-arm-kernel/20230922115804.2043771-1-ryan.roberts@arm.com/ [1] Reviewed-by: Andrew Jones Cc: Albert Ou Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Qinglin Pan Cc: Conor Dooley Signed-off-by: Andrew Morton --- arch/riscv/mm/hugetlbpage.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index e4a2ace92dbe..b52f0210481f 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -183,15 +183,22 @@ void set_huge_pte_at(struct mm_struct *mm, pte_t pte, unsigned long sz) { + unsigned long hugepage_shift; int i, pte_num; - if (!pte_napot(pte)) { - set_pte_at(mm, addr, ptep, pte); - return; - } + if (sz >= PGDIR_SIZE) + hugepage_shift = PGDIR_SHIFT; + else if (sz >= P4D_SIZE) + hugepage_shift = P4D_SHIFT; + else if (sz >= PUD_SIZE) + hugepage_shift = PUD_SHIFT; + else if (sz >= PMD_SIZE) + hugepage_shift = PMD_SHIFT; + else + hugepage_shift = PAGE_SHIFT; - pte_num = napot_pte_num(napot_cont_order(pte)); - for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE) + pte_num = sz >> hugepage_shift; + for (i = 0; i < pte_num; i++, ptep++, addr += (1 << hugepage_shift)) set_pte_at(mm, addr, ptep, pte); } From 229e2253766c7cdfe024f1fe280020cc4711087c Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Tue, 3 Oct 2023 10:48:56 -0400 Subject: [PATCH 041/212] mm/migrate: fix do_pages_move for compat pointers do_pages_move does not handle compat pointers for the page list. correctly. Add in_compat_syscall check and appropriate get_user fetch when iterating the page list. It makes the syscall in compat mode (32-bit userspace, 64-bit kernel) work the same way as the native 32-bit syscall again, restoring the behavior before my broken commit 5b1b561ba73c ("mm: simplify compat_sys_move_pages"). More specifically, my patch moved the parsing of the 'pages' array from the main entry point into do_pages_stat(), which left the syscall working correctly for the 'stat' operation (nodes = NULL), while the 'move' operation (nodes != NULL) is now missing the conversion and interprets 'pages' as an array of 64-bit pointers instead of the intended 32-bit userspace pointers. It is possible that nobody noticed this bug because the few applications that actually call move_pages are unlikely to run in compat mode because of their large memory requirements, but this clearly fixes a user-visible regression and should have been caught by ltp. Link: https://lkml.kernel.org/r/20231003144857.752952-1-gregory.price@memverge.com Fixes: 5b1b561ba73c ("mm: simplify compat_sys_move_pages") Signed-off-by: Gregory Price Reported-by: Arnd Bergmann Co-developed-by: Arnd Bergmann Cc: Jonathan Cameron Cc: Signed-off-by: Andrew Morton --- mm/migrate.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 2053b54556ca..06086dc9da28 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2162,6 +2162,7 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, const int __user *nodes, int __user *status, int flags) { + compat_uptr_t __user *compat_pages = (void __user *)pages; int current_node = NUMA_NO_NODE; LIST_HEAD(pagelist); int start, i; @@ -2174,8 +2175,17 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, int node; err = -EFAULT; - if (get_user(p, pages + i)) - goto out_flush; + if (in_compat_syscall()) { + compat_uptr_t cp; + + if (get_user(cp, compat_pages + i)) + goto out_flush; + + p = compat_ptr(cp); + } else { + if (get_user(p, pages + i)) + goto out_flush; + } if (get_user(node, nodes + i)) goto out_flush; From 76aca10ccb7c23a7b7a0d56e0bfde2c8cdddfe24 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Oct 2023 17:57:09 +0200 Subject: [PATCH 042/212] ASoC: soc-dapm: Add helper for comparing widget name Some drivers use one event callback for multiple widgets but still need to perform a bit different actions based on actual widget. This is done by comparing widget name, however drivers tend to miss possible name prefix. Add a helper to solve common mistakes. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231003155710.821315-2-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 1 + sound/soc/soc-component.c | 1 + sound/soc/soc-dapm.c | 12 ++++++++++++ 3 files changed, 14 insertions(+) diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index d2faec9a323e..433543eb82b9 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -469,6 +469,7 @@ void snd_soc_dapm_connect_dai_link_widgets(struct snd_soc_card *card); int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai); +int snd_soc_dapm_widget_name_cmp(struct snd_soc_dapm_widget *widget, const char *s); /* dapm path setup */ int snd_soc_dapm_new_widgets(struct snd_soc_card *card); diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index ba7c0ae82e00..566033f7dd2e 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -242,6 +242,7 @@ int snd_soc_component_notify_control(struct snd_soc_component *component, char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; struct snd_kcontrol *kctl; + /* When updating, change also snd_soc_dapm_widget_name_cmp() */ if (component->name_prefix) snprintf(name, ARRAY_SIZE(name), "%s %s", component->name_prefix, ctl); else diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index f07e83678373..312e55579831 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2728,6 +2728,18 @@ int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream, } EXPORT_SYMBOL_GPL(snd_soc_dapm_update_dai); +int snd_soc_dapm_widget_name_cmp(struct snd_soc_dapm_widget *widget, const char *s) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(widget->dapm); + const char *wname = widget->name; + + if (component->name_prefix) + wname += strlen(component->name_prefix) + 1; /* plus space */ + + return strcmp(wname, s); +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_widget_name_cmp); + /* * dapm_update_widget_flags() - Re-compute widget sink and source flags * @w: The widget for which to update the flags From c29e5263d32a6d0ec094d425ae7fef3fa8d4da1c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Oct 2023 17:57:10 +0200 Subject: [PATCH 043/212] ASoC: codecs: wsa-macro: handle component name prefix When comparing widget names in wsa_macro_spk_boost_event(), consider also the component's name prefix. Otherwise the WSA codec won't have proper mixer setup resulting in no sound playback through speakers. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231003155710.821315-3-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-wsa-macro.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index ec6859ec0d38..fff4a8b862a7 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -1675,12 +1675,12 @@ static int wsa_macro_spk_boost_event(struct snd_soc_dapm_widget *w, u16 boost_path_ctl, boost_path_cfg1; u16 reg, reg_mix; - if (!strcmp(w->name, "WSA_RX INT0 CHAIN")) { + if (!snd_soc_dapm_widget_name_cmp(w, "WSA_RX INT0 CHAIN")) { boost_path_ctl = CDC_WSA_BOOST0_BOOST_PATH_CTL; boost_path_cfg1 = CDC_WSA_RX0_RX_PATH_CFG1; reg = CDC_WSA_RX0_RX_PATH_CTL; reg_mix = CDC_WSA_RX0_RX_PATH_MIX_CTL; - } else if (!strcmp(w->name, "WSA_RX INT1 CHAIN")) { + } else if (!snd_soc_dapm_widget_name_cmp(w, "WSA_RX INT1 CHAIN")) { boost_path_ctl = CDC_WSA_BOOST1_BOOST_PATH_CTL; boost_path_cfg1 = CDC_WSA_RX1_RX_PATH_CFG1; reg = CDC_WSA_RX1_RX_PATH_CTL; From bfbc79de60c53e5fed505390440b87ef59ee268c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:52 +0200 Subject: [PATCH 044/212] ASoC: codecs: wcd938x: drop bogus bind error handling Drop the bogus error handling for a soundwire device backcast during bind() that cannot fail. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-2-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index a3c680661377..cf1eaf678fc2 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3448,10 +3448,6 @@ static int wcd938x_bind(struct device *dev) wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev); wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x; wcd938x->tx_sdw_dev = dev_to_sdw_dev(wcd938x->txdev); - if (!wcd938x->tx_sdw_dev) { - dev_err(dev, "could not get txslave with matching of dev\n"); - return -EINVAL; - } /* As TX is main CSR reg interface, which should not be suspended first. * expicilty add the dependency link */ From fa2f8a991ba4aa733ac1c3b1be0c86148aa4c52c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:53 +0200 Subject: [PATCH 045/212] ASoC: codecs: wcd938x: fix unbind tear down order Make sure to deregister the component before tearing down the resources it depends on during unbind(). Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-3-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index cf1eaf678fc2..c617fc3ce489 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3504,10 +3504,10 @@ static void wcd938x_unbind(struct device *dev) { struct wcd938x_priv *wcd938x = dev_get_drvdata(dev); + snd_soc_unregister_component(dev); device_link_remove(dev, wcd938x->txdev); device_link_remove(dev, wcd938x->rxdev); device_link_remove(wcd938x->rxdev, wcd938x->txdev); - snd_soc_unregister_component(dev); component_unbind_all(dev, wcd938x); } From da29b94ed3547cee9d510d02eca4009f2de476cf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:54 +0200 Subject: [PATCH 046/212] ASoC: codecs: wcd938x: fix resource leaks on bind errors Add the missing code to release resources on bind errors, including the references taken by wcd938x_sdw_device_get() which also need to be dropped on unbind(). Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-4-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 44 +++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index c617fc3ce489..7e0b07eeed77 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3435,7 +3435,8 @@ static int wcd938x_bind(struct device *dev) wcd938x->rxdev = wcd938x_sdw_device_get(wcd938x->rxnode); if (!wcd938x->rxdev) { dev_err(dev, "could not find slave with matching of node\n"); - return -EINVAL; + ret = -EINVAL; + goto err_unbind; } wcd938x->sdw_priv[AIF1_PB] = dev_get_drvdata(wcd938x->rxdev); wcd938x->sdw_priv[AIF1_PB]->wcd938x = wcd938x; @@ -3443,7 +3444,8 @@ static int wcd938x_bind(struct device *dev) wcd938x->txdev = wcd938x_sdw_device_get(wcd938x->txnode); if (!wcd938x->txdev) { dev_err(dev, "could not find txslave with matching of node\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_rxdev; } wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev); wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x; @@ -3454,31 +3456,35 @@ static int wcd938x_bind(struct device *dev) if (!device_link_add(wcd938x->rxdev, wcd938x->txdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink tx and rx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_txdev; } if (!device_link_add(dev, wcd938x->txdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink wcd and tx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_rxtx_link; } if (!device_link_add(dev, wcd938x->rxdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink wcd and rx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_tx_link; } wcd938x->regmap = dev_get_regmap(&wcd938x->tx_sdw_dev->dev, NULL); if (!wcd938x->regmap) { dev_err(dev, "could not get TX device regmap\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_rx_link; } ret = wcd938x_irq_init(wcd938x, dev); if (ret) { dev_err(dev, "%s: IRQ init failed: %d\n", __func__, ret); - return ret; + goto err_remove_rx_link; } wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq; @@ -3487,17 +3493,33 @@ static int wcd938x_bind(struct device *dev) ret = wcd938x_set_micbias_data(wcd938x); if (ret < 0) { dev_err(dev, "%s: bad micbias pdata\n", __func__); - return ret; + goto err_remove_rx_link; } ret = snd_soc_register_component(dev, &soc_codec_dev_wcd938x, wcd938x_dais, ARRAY_SIZE(wcd938x_dais)); - if (ret) + if (ret) { dev_err(dev, "%s: Codec registration failed\n", __func__); + goto err_remove_rx_link; + } + + return 0; + +err_remove_rx_link: + device_link_remove(dev, wcd938x->rxdev); +err_remove_tx_link: + device_link_remove(dev, wcd938x->txdev); +err_remove_rxtx_link: + device_link_remove(wcd938x->rxdev, wcd938x->txdev); +err_put_txdev: + put_device(wcd938x->txdev); +err_put_rxdev: + put_device(wcd938x->rxdev); +err_unbind: + component_unbind_all(dev, wcd938x); return ret; - } static void wcd938x_unbind(struct device *dev) @@ -3508,6 +3530,8 @@ static void wcd938x_unbind(struct device *dev) device_link_remove(dev, wcd938x->txdev); device_link_remove(dev, wcd938x->rxdev); device_link_remove(wcd938x->rxdev, wcd938x->txdev); + put_device(wcd938x->txdev); + put_device(wcd938x->rxdev); component_unbind_all(dev, wcd938x); } From 69a026a2357ee69983690d07976de44ef26ee38a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:55 +0200 Subject: [PATCH 047/212] ASoC: codecs: wcd938x: fix regulator leaks on probe errors Make sure to disable and free the regulators on probe errors and on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-5-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 7e0b07eeed77..679c627f7eaa 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3325,8 +3325,10 @@ static int wcd938x_populate_dt_data(struct wcd938x_priv *wcd938x, struct device return dev_err_probe(dev, ret, "Failed to get supplies\n"); ret = regulator_bulk_enable(WCD938X_MAX_SUPPLY, wcd938x->supplies); - if (ret) + if (ret) { + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); return dev_err_probe(dev, ret, "Failed to enable supplies\n"); + } wcd938x_dt_parse_micbias_info(dev, wcd938x); @@ -3592,13 +3594,13 @@ static int wcd938x_probe(struct platform_device *pdev) ret = wcd938x_add_slave_components(wcd938x, dev, &match); if (ret) - return ret; + goto err_disable_regulators; wcd938x_reset(wcd938x); ret = component_master_add_with_match(dev, &wcd938x_comp_ops, match); if (ret) - return ret; + goto err_disable_regulators; pm_runtime_set_autosuspend_delay(dev, 1000); pm_runtime_use_autosuspend(dev); @@ -3608,11 +3610,21 @@ static int wcd938x_probe(struct platform_device *pdev) pm_runtime_idle(dev); return 0; + +err_disable_regulators: + regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); + + return ret; } static void wcd938x_remove(struct platform_device *pdev) { + struct wcd938x_priv *wcd938x = dev_get_drvdata(&pdev->dev); + component_master_del(&pdev->dev, &wcd938x_comp_ops); + regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); } #if defined(CONFIG_OF) From 3ebebb2c1eca92a15107b2d7aeff34196fd9e217 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:56 +0200 Subject: [PATCH 048/212] ASoC: codecs: wcd938x: fix runtime PM imbalance on remove Make sure to balance the runtime PM operations, including the disable count, on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-6-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 679c627f7eaa..d27b919c63b4 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3620,9 +3620,15 @@ static int wcd938x_probe(struct platform_device *pdev) static void wcd938x_remove(struct platform_device *pdev) { - struct wcd938x_priv *wcd938x = dev_get_drvdata(&pdev->dev); + struct device *dev = &pdev->dev; + struct wcd938x_priv *wcd938x = dev_get_drvdata(dev); + + component_master_del(dev, &wcd938x_comp_ops); + + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); - component_master_del(&pdev->dev, &wcd938x_comp_ops); regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); } From f0dfdcbe706462495d47982eecd13a61aabd644d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:57 +0200 Subject: [PATCH 049/212] ASoC: codecs: wcd938x-sdw: fix use after free on driver unbind Make sure to deregister the component when the driver is being unbound and before the underlying device-managed resources are freed. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-7-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x-sdw.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index 6951120057e5..1baea04480e2 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1281,6 +1281,15 @@ static int wcd9380_probe(struct sdw_slave *pdev, return component_add(dev, &wcd938x_sdw_component_ops); } +static int wcd9380_remove(struct sdw_slave *pdev) +{ + struct device *dev = &pdev->dev; + + component_del(dev, &wcd938x_sdw_component_ops); + + return 0; +} + static const struct sdw_device_id wcd9380_slave_id[] = { SDW_SLAVE_ENTRY(0x0217, 0x10d, 0), {}, @@ -1320,6 +1329,7 @@ static const struct dev_pm_ops wcd938x_sdw_pm_ops = { static struct sdw_driver wcd9380_codec_driver = { .probe = wcd9380_probe, + .remove = wcd9380_remove, .ops = &wcd9380_slave_ops, .id_table = wcd9380_slave_id, .driver = { From c5c0383082eace13da2ffceeea154db2780165e7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:58 +0200 Subject: [PATCH 050/212] ASoC: codecs: wcd938x-sdw: fix runtime PM imbalance on probe errors Make sure to balance the runtime PM operations, including the disable count, on probe errors and on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-8-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x-sdw.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index 1baea04480e2..a1f04010da95 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1278,7 +1278,18 @@ static int wcd9380_probe(struct sdw_slave *pdev, pm_runtime_set_active(dev); pm_runtime_enable(dev); - return component_add(dev, &wcd938x_sdw_component_ops); + ret = component_add(dev, &wcd938x_sdw_component_ops); + if (ret) + goto err_disable_rpm; + + return 0; + +err_disable_rpm: + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); + + return ret; } static int wcd9380_remove(struct sdw_slave *pdev) @@ -1287,6 +1298,10 @@ static int wcd9380_remove(struct sdw_slave *pdev) component_del(dev, &wcd938x_sdw_component_ops); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); + return 0; } From af5fd122d7bd739a2b66405f6e8ab92557279325 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 6 Oct 2023 17:44:05 +0100 Subject: [PATCH 051/212] ASoC: cs35l56: Fix illegal use of init_completion() Fix cs35l56_patch() to call reinit_completion() to reinitialize the completion object. It was incorrectly using init_completion(). Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://lore.kernel.org/r/20231006164405.253796-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index f2e7c6d0be46..9c2d9cbc63d3 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -706,7 +706,7 @@ static void cs35l56_patch(struct cs35l56_private *cs35l56) mutex_lock(&cs35l56->base.irq_lock); - init_completion(&cs35l56->init_completion); + reinit_completion(&cs35l56->init_completion); cs35l56->soft_resetting = true; cs35l56_system_reset(&cs35l56->base, !!cs35l56->sdw_peripheral); From aa6464edbd51af4a2f8db43df866a7642b244b5f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 Oct 2023 17:00:24 +0300 Subject: [PATCH 052/212] ASoC: pxa: fix a memory leak in probe() Free the "priv" pointer before returning the error code. Fixes: 90eb6b59d311 ("ASoC: pxa-ssp: add support for an external clock in devicetree") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/84ac2313-1420-471a-b2cb-3269a2e12a7c@moroto.mountain Signed-off-by: Mark Brown --- sound/soc/pxa/pxa-ssp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index b70034c07eee..b8a3cb8b7597 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -773,7 +773,7 @@ static int pxa_ssp_probe(struct snd_soc_dai *dai) if (IS_ERR(priv->extclk)) { ret = PTR_ERR(priv->extclk); if (ret == -EPROBE_DEFER) - return ret; + goto err_priv; priv->extclk = NULL; } From a37cd2a59d0cb270b1bba568fd3a3b8668b9d3ba Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 5 Oct 2023 11:06:36 +0200 Subject: [PATCH 053/212] x86/sev: Disable MMIO emulation from user mode A virt scenario can be constructed where MMIO memory can be user memory. When that happens, a race condition opens between when the hardware raises the #VC and when the #VC handler gets to emulate the instruction. If the MOVS is replaced with a MOVS accessing kernel memory in that small race window, then write to kernel memory happens as the access checks are not done at emulation time. Disable MMIO emulation in user mode temporarily until a sensible use case appears and justifies properly handling the race window. Fixes: 0118b604c2c9 ("x86/sev-es: Handle MMIO String Instructions") Reported-by: Tom Dohrmann Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Dohrmann Cc: --- arch/x86/kernel/sev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 2787826d9f60..4ee14e647ae6 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1509,6 +1509,9 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ES_DECODE_FAILED; } + if (user_mode(ctxt->regs)) + return ES_UNSUPPORTED; + switch (mmio) { case INSN_MMIO_WRITE: memcpy(ghcb->shared_buffer, reg_data, bytes); From b9cb9c45583b911e0db71d09caa6b56469eb2bdf Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Jun 2023 17:42:42 +0200 Subject: [PATCH 054/212] x86/sev: Check IOBM for IOIO exceptions from user-space Check the IO permission bitmap (if present) before emulating IOIO #VC exceptions for user-space. These permissions are checked by hardware already before the #VC is raised, but due to the VC-handler decoding race it needs to be checked again in software. Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions") Reported-by: Tom Dohrmann Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Dohrmann Cc: --- arch/x86/boot/compressed/sev.c | 5 +++++ arch/x86/kernel/sev-shared.c | 22 +++++++++++++++------- arch/x86/kernel/sev.c | 27 +++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index dc8c876fbd8f..afd91041ec3e 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -103,6 +103,11 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + return ES_OK; +} + #undef __init #define __init diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 2eabccde94fb..bcd77c48be0a 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -656,6 +656,9 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) { struct insn *insn = &ctxt->insn; + size_t size; + u64 port; + *exitinfo = 0; switch (insn->opcode.bytes[0]) { @@ -664,7 +667,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6d: *exitinfo |= IOIO_TYPE_INS; *exitinfo |= IOIO_SEG_ES; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUTS opcodes */ @@ -672,41 +675,43 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6f: *exitinfo |= IOIO_TYPE_OUTS; *exitinfo |= IOIO_SEG_DS; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* IN immediate opcodes */ case 0xe4: case 0xe5: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* OUT immediate opcodes */ case 0xe6: case 0xe7: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* IN register opcodes */ case 0xec: case 0xed: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUT register opcodes */ case 0xee: case 0xef: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; default: return ES_DECODE_FAILED; } + *exitinfo |= port << 16; + switch (insn->opcode.bytes[0]) { case 0x6c: case 0x6e: @@ -716,12 +721,15 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0xee: /* Single byte opcodes */ *exitinfo |= IOIO_DATA_8; + size = 1; break; default: /* Length determined by instruction parsing */ *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 : IOIO_DATA_32; + size = (insn->opnd_bytes == 2) ? 2 : 4; } + switch (insn->addr_bytes) { case 2: *exitinfo |= IOIO_ADDR_16; @@ -737,7 +745,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) if (insn_has_rep_prefix(insn)) *exitinfo |= IOIO_REP; - return ES_OK; + return vc_ioio_check(ctxt, (u16)port, size); } static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 4ee14e647ae6..0f218932e844 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -524,6 +524,33 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + BUG_ON(size > 4); + + if (user_mode(ctxt->regs)) { + struct thread_struct *t = ¤t->thread; + struct io_bitmap *iobm = t->io_bitmap; + size_t idx; + + if (!iobm) + goto fault; + + for (idx = port; idx < port + size; ++idx) { + if (test_bit(idx, iobm->bitmap)) + goto fault; + } + } + + return ES_OK; + +fault: + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + + return ES_EXCEPTION; +} + /* Include code shared with pre-decompression boot stage */ #include "sev-shared.c" From 1bba0badff0ede8dc51641cff4b153422baa3369 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 9 Oct 2023 16:34:12 +0100 Subject: [PATCH 055/212] ASoC: cs35l56: ASP1 DOUT must default to Hi-Z when not transmitting The ASP1 DOUT line must be defaulted to be high-impedance when it is not actually transmitting data for an active channel. In non-SoundWire modes ASP1 will usually be shared by multiple amps so each amp must only drive the line during the slot for an enabled TX channel. In SoundWire mode a custom firmware can use ASP1 as a secondary chip-to-chip audio link or as GPIO. It should be defaulted to high-impedance since by default the purpose of this pin is not known. Backport note: On kernel versions before 6.6 the cs35l56->base.regmap argument to regmap_set_bits() must be changed to cs35l56->regmap. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://lore.kernel.org/r/20231009153412.30380-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 9c2d9cbc63d3..f9059780b7a7 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1186,6 +1186,12 @@ int cs35l56_init(struct cs35l56_private *cs35l56) /* Registers could be dirty after soft reset or SoundWire enumeration */ regcache_sync(cs35l56->base.regmap); + /* Set ASP1 DOUT to high-impedance when it is not transmitting audio data. */ + ret = regmap_set_bits(cs35l56->base.regmap, CS35L56_ASP1_CONTROL3, + CS35L56_ASP1_DOUT_HIZ_CTRL_MASK); + if (ret) + return dev_err_probe(cs35l56->base.dev, ret, "Failed to write ASP1_CONTROL3\n"); + cs35l56->base.init_done = true; complete(&cs35l56->init_completion); From 53ba32acb5ab137ba333c20e0c987bdd6273a366 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 10 Oct 2023 11:24:24 +0100 Subject: [PATCH 056/212] ASoC: dt-bindings: cirrus,cs42l43: Update values for bias sense Due to an error in the datasheet the bias sense values currently don't match the hardware. Whilst this is a change to the binding no devices have yet shipped so updating the binding will not cause any issues. Acked-by: Krzysztof Kozlowski Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20231010102425.3662364-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml b/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml index 7a6de938b11d..4118aa54bbd5 100644 --- a/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml +++ b/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml @@ -82,7 +82,7 @@ properties: description: Current at which the headset micbias sense clamp will engage, 0 to disable. - enum: [ 0, 14, 23, 41, 50, 60, 68, 86, 95 ] + enum: [ 0, 14, 24, 43, 52, 61, 71, 90, 99 ] default: 0 cirrus,bias-ramp-ms: From 99d426c6dd2d6f9734617ec12def856ee35b9218 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 10 Oct 2023 11:24:25 +0100 Subject: [PATCH 057/212] ASoC: cs42l43: Update values for bias sense Due to an error in the datasheet the bias sense values currently don't match the hardware. Whilst this is a change to the binding no devices have yet shipped so updating the binding will not cause any issues. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20231010102425.3662364-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43-jack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index 92e37bc1df9d..9f5f1a92561d 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -34,7 +34,7 @@ static const unsigned int cs42l43_accdet_db_ms[] = { static const unsigned int cs42l43_accdet_ramp_ms[] = { 10, 40, 90, 170 }; static const unsigned int cs42l43_accdet_bias_sense[] = { - 14, 23, 41, 50, 60, 68, 86, 95, 0, + 14, 24, 43, 52, 61, 71, 90, 99, 0, }; static int cs42l43_find_index(struct cs42l43_codec *priv, const char * const prop, From d920abd1e7c4884f9ecd0749d1921b7ab19ddfbd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 2 Oct 2023 13:54:28 +0300 Subject: [PATCH 058/212] nvmet-tcp: Fix a possible UAF in queue intialization setup From Alon: "Due to a logical bug in the NVMe-oF/TCP subsystem in the Linux kernel, a malicious user can cause a UAF and a double free, which may lead to RCE (may also lead to an LPE in case the attacker already has local privileges)." Hence, when a queue initialization fails after the ahash requests are allocated, it is guaranteed that the queue removal async work will be called, hence leave the deallocation to the queue removal. Also, be extra careful not to continue processing the socket, so set queue rcv_state to NVMET_TCP_RECV_ERR upon a socket error. Cc: stable@vger.kernel.org Reported-by: Alon Zahavi Tested-by: Alon Zahavi Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index cd92d7ddf5ed..197fc2ecb164 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -372,6 +372,7 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) { + queue->rcv_state = NVMET_TCP_RECV_ERR; if (status == -EPIPE || status == -ECONNRESET) kernel_sock_shutdown(queue->sock, SHUT_RDWR); else @@ -910,15 +911,11 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) iov.iov_len = sizeof(*icresp); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (ret < 0) - goto free_crypto; + return ret; /* queue removal will cleanup */ queue->state = NVMET_TCP_Q_LIVE; nvmet_prepare_receive_pdu(queue); return 0; -free_crypto: - if (queue->hdr_digest || queue->data_digest) - nvmet_tcp_free_crypto(queue); - return ret; } static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, From 4ae55a7dce04989f289d5c5c8c8e5c37adc36c71 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 4 Sep 2023 17:26:38 +0200 Subject: [PATCH 059/212] nvme-auth: use chap->s2 to indicate bidirectional authentication Commit 546dea18c999 ("nvme-auth: check chap ctrl_key once constructed") replaced the condition "if (ctrl->ctrl_key)" (indicating bidirectional auth) by "if (chap->ctrl_key)", because ctrl->ctrl_key is a resource shared with sysfs. But chap->ctrl_key is set in nvme_auth_process_dhchap_challenge() depending on the DHVLEN in the DH-HMAC-CHAP Challenge message received from the controller, and will thus be non-NULL for every DH-HMAC-CHAP exchange, even if unidirectional auth was requested. This will lead to a protocol violation by sending a Success2 message in the unidirectional case (per NVMe base spec 2.0, the authentication transaction ends after the Success1 message for unidirectional auth). Use chap->s2 instead, which is non-zero if and only if the host requested bi-directional authentication from the controller. Fixes: 546dea18c999 ("nvme-auth: check chap ctrl_key once constructed") Signed-off-by: Martin Wilck Reviewed-by: Daniel Wagner Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index daf5d144a8ea..064592a5d546 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -341,7 +341,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, struct nvmf_auth_dhchap_success1_data *data = chap->buf; size_t size = sizeof(*data); - if (chap->ctrl_key) + if (chap->s2) size += chap->hash_len; if (size > CHAP_BUF_SIZE) { @@ -825,7 +825,7 @@ static void nvme_queue_auth_work(struct work_struct *work) goto fail2; } - if (chap->ctrl_key) { + if (chap->s2) { /* DH-HMAC-CHAP Step 5: send success2 */ dev_dbg(ctrl->device, "%s: qid %d send success2\n", __func__, chap->qid); From 4d73c6772ab771cbbe7e46a73e7c78ba490350fa Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 4 Oct 2023 11:19:15 -0700 Subject: [PATCH 060/212] platform/x86: intel-uncore-freq: Conditionally create attribute for read frequency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the current uncore frequency can't be read, don't create attribute "current_freq_khz" as any read will fail later. Some user space applications like turbostat fail to continue with the failure. So, check error during attribute creation. Fixes: 414eef27283a ("platform/x86/intel/uncore-freq: Display uncore current frequency") Signed-off-by: Srinivas Pandruvada Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231004181915.1887913-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- .../x86/intel/uncore-frequency/uncore-frequency-common.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c index 1152deaa0078..33ab207493e3 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c @@ -176,7 +176,7 @@ show_uncore_data(initial_max_freq_khz); static int create_attr_group(struct uncore_data *data, char *name) { - int ret, index = 0; + int ret, freq, index = 0; init_attribute_rw(max_freq_khz); init_attribute_rw(min_freq_khz); @@ -197,7 +197,11 @@ static int create_attr_group(struct uncore_data *data, char *name) data->uncore_attrs[index++] = &data->min_freq_khz_dev_attr.attr; data->uncore_attrs[index++] = &data->initial_min_freq_khz_dev_attr.attr; data->uncore_attrs[index++] = &data->initial_max_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; + + ret = uncore_read_freq(data, &freq); + if (!ret) + data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; + data->uncore_attrs[index] = NULL; data->uncore_attr_group.name = name; From 6284e67aa6cb3af870ed11dfcfafd80fd927777b Mon Sep 17 00:00:00 2001 From: Nikita Kravets Date: Fri, 6 Oct 2023 20:53:53 +0300 Subject: [PATCH 061/212] platform/x86: msi-ec: Fix the 3rd config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the charge control address of CONF3 and remove an incorrect firmware version which turned out to be a BIOS firmware and not an EC firmware. Fixes: 392cacf2aa10 ("platform/x86: Add new msi-ec driver") Cc: Aakash Singh Cc: Jose Angel Pastrana Signed-off-by: Nikita Kravets Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231006175352.1753017-5-teackot@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/msi-ec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/x86/msi-ec.c b/drivers/platform/x86/msi-ec.c index f26a3121092f..492eb383ee7a 100644 --- a/drivers/platform/x86/msi-ec.c +++ b/drivers/platform/x86/msi-ec.c @@ -276,14 +276,13 @@ static struct msi_ec_conf CONF2 __initdata = { static const char * const ALLOWED_FW_3[] __initconst = { "1592EMS1.111", - "E1592IMS.10C", NULL }; static struct msi_ec_conf CONF3 __initdata = { .allowed_fw = ALLOWED_FW_3, .charge_control = { - .address = 0xef, + .address = 0xd7, .offset_start = 0x8a, .offset_end = 0x80, .range_min = 0x8a, From 51064b7acf665bfa2c929d7cafb7ad494b7d2783 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 8 Oct 2023 01:39:28 +0200 Subject: [PATCH 062/212] platform/x86: wmi: Update MAINTAINERS entry Since 2011, the WMI subsystem is marked as orphaned, which means that a important part of platform support for modern notebooks and even desktops is receiving not enough maintenance. So i decided to take over the maintenance of the WMI subsystem. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20231007233933.72121-2-W_Armin@gmx.de Acked-by: Hans de Goede Signed-off-by: Hans de Goede --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index dbf1668dcd84..b38fcbaa24f9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -378,8 +378,9 @@ F: drivers/acpi/viot.c F: include/linux/acpi_viot.h ACPI WMI DRIVER +M: Armin Wolf L: platform-driver-x86@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/driver-api/wmi.rst F: Documentation/wmi/ F: drivers/platform/x86/wmi.c From f588d72bd95f748849685412b1f0c7959ca228cf Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Mon, 18 Sep 2023 23:30:20 -0700 Subject: [PATCH 063/212] nfs42: client needs to strip file mode's suid/sgid bit after ALLOCATE op The Linux NFS server strips the SUID and SGID from the file mode on ALLOCATE op. Modify _nfs42_proc_fallocate to add NFS_INO_REVAL_FORCED to nfs_set_cache_invalid's argument to force update of the file mode suid/sgid bit. Suggested-by: Trond Myklebust Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Tested-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs42proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 063e00aff87e..28704f924612 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -81,7 +81,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, if (status == 0) { if (nfs_should_remove_suid(inode)) { spin_lock(&inode->i_lock); - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); + nfs_set_cache_invalid(inode, + NFS_INO_REVAL_FORCED | NFS_INO_INVALID_MODE); spin_unlock(&inode->i_lock); } status = nfs_post_op_update_inode_force_wcc(inode, From 6a6d4644ce935ddec4f76223ac0ca68da56bd2d3 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 11 Oct 2023 10:43:26 -0400 Subject: [PATCH 064/212] NFS: Fix potential oops in nfs_inode_remove_request() Once a folio's private data has been cleared, it's possible for another process to clear the folio->mapping (e.g. via invalidate_complete_folio2 or evict_mapping_folio), so it wouldn't be safe to call nfs_page_to_inode() after that. Fixes: 0c493b5cf16e ("NFS: Convert buffered writes to use folios") Signed-off-by: Scott Mayhew Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7720b5e43014..9d82d50ce0b1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -788,6 +788,8 @@ static void nfs_inode_add_request(struct nfs_page *req) */ static void nfs_inode_remove_request(struct nfs_page *req) { + struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req)); + if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); struct address_space *mapping = folio_file_mapping(folio); @@ -802,7 +804,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { - atomic_long_dec(&NFS_I(nfs_page_to_inode(req))->nrequests); + atomic_long_dec(&nfsi->nrequests); nfs_release_request(req); } } From d6cbc6a3a856a7d8047316d81e2e039e44432acb Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 11 Oct 2023 14:48:53 +0100 Subject: [PATCH 065/212] ASoC: cs42l42: Fix missing include of gpio/consumer.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to gpiod_set_value_cansleep() in cs42l42_sdw_update_status() needs the header file gpio/consumer.h to be included. This was introduced by commit 2d066c6a7865 ("ASoC: cs42l42: Avoid stale SoundWire ATTACH after hard reset") and caused error: sound/soc/codecs/cs42l42-sdw.c:374:4: error: implicit declaration of function ‘gpiod_set_value_cansleep’; did you mean gpio_set_value_cansleep’? Signed-off-by: Richard Fitzgerald Fixes: 2d066c6a7865 ("ASoC: cs42l42: Avoid stale SoundWire ATTACH after hard reset") Link: https://lore.kernel.org/r/20231011134853.20059-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42-sdw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs42l42-sdw.c b/sound/soc/codecs/cs42l42-sdw.c index 974bae4abfad..94a66a325303 100644 --- a/sound/soc/codecs/cs42l42-sdw.c +++ b/sound/soc/codecs/cs42l42-sdw.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include From 7b821db95140e2c118567aee22a78bf85f3617e0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 2 Oct 2023 16:54:06 -0700 Subject: [PATCH 066/212] drm/bridge: ti-sn65dsi86: Associate DSI device lifetime with auxiliary device The kernel produces a warning splat and the DSI device fails to register in this driver if the i2c driver probes, populates child auxiliary devices, and then somewhere in ti_sn_bridge_probe() a function call returns -EPROBE_DEFER. When the auxiliary driver probe defers, the dsi device created by devm_mipi_dsi_device_register_full() is left registered because the devm managed device used to manage the lifetime of the DSI device is the parent i2c device, not the auxiliary device that is being probed. Associate the DSI device created and managed by this driver to the lifetime of the auxiliary device, not the i2c device, so that the DSI device is removed when the auxiliary driver unbinds. Similarly change the device pointer used for dev_err_probe() so the deferred probe errors are associated with the auxiliary device instead of the parent i2c device so we can narrow down future problems faster. Cc: Douglas Anderson Cc: Maxime Ripard Fixes: c3b75d4734cb ("drm/bridge: sn65dsi86: Register and attach our DSI device at probe") Signed-off-by: Stephen Boyd Reviewed-by: Neil Armstrong Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231002235407.769399-1-swboyd@chromium.org --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index f448b903e190..84148a79414b 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -692,7 +692,7 @@ static struct ti_sn65dsi86 *bridge_to_ti_sn65dsi86(struct drm_bridge *bridge) return container_of(bridge, struct ti_sn65dsi86, bridge); } -static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) +static int ti_sn_attach_host(struct auxiliary_device *adev, struct ti_sn65dsi86 *pdata) { int val; struct mipi_dsi_host *host; @@ -707,7 +707,7 @@ static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) if (!host) return -EPROBE_DEFER; - dsi = devm_mipi_dsi_device_register_full(dev, host, &info); + dsi = devm_mipi_dsi_device_register_full(&adev->dev, host, &info); if (IS_ERR(dsi)) return PTR_ERR(dsi); @@ -725,7 +725,7 @@ static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) pdata->dsi = dsi; - return devm_mipi_dsi_attach(dev, dsi); + return devm_mipi_dsi_attach(&adev->dev, dsi); } static int ti_sn_bridge_attach(struct drm_bridge *bridge, @@ -1298,9 +1298,9 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, struct device_node *np = pdata->dev->of_node; int ret; - pdata->next_bridge = devm_drm_of_get_bridge(pdata->dev, np, 1, 0); + pdata->next_bridge = devm_drm_of_get_bridge(&adev->dev, np, 1, 0); if (IS_ERR(pdata->next_bridge)) - return dev_err_probe(pdata->dev, PTR_ERR(pdata->next_bridge), + return dev_err_probe(&adev->dev, PTR_ERR(pdata->next_bridge), "failed to create panel bridge\n"); ti_sn_bridge_parse_lanes(pdata, np); @@ -1319,9 +1319,9 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, drm_bridge_add(&pdata->bridge); - ret = ti_sn_attach_host(pdata); + ret = ti_sn_attach_host(adev, pdata); if (ret) { - dev_err_probe(pdata->dev, ret, "failed to attach dsi host\n"); + dev_err_probe(&adev->dev, ret, "failed to attach dsi host\n"); goto err_remove_bridge; } From ad3e33fe071dffea07279f96dab4f3773c430fe2 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 25 Sep 2023 15:00:11 -0700 Subject: [PATCH 067/212] drm/panel: Move AUX B116XW03 out of panel-edp back to panel-simple In commit 5f04e7ce392d ("drm/panel-edp: Split eDP panels out of panel-simple") I moved a pile of panels out of panel-simple driver into the newly created panel-edp driver. One of those panels, however, shouldn't have been moved. As is clear from commit e35e305eff0f ("drm/panel: simple: Add AUO B116XW03 panel support"), AUX B116XW03 is an LVDS panel. It's used in exynos5250-snow and exynos5420-peach-pit where it's clear that the panel is hooked up with LVDS. Furthermore, searching for datasheets I found one that makes it clear that this panel is LVDS. As far as I can tell, I got confused because in commit 88d3457ceb82 ("drm/panel: auo,b116xw03: fix flash backlight when power on") Jitao Shi added "DRM_MODE_CONNECTOR_eDP". That seems wrong. Looking at the downstream ChromeOS trees, it seems like some Mediatek boards are using a panel that they call "auo,b116xw03" that's an eDP panel. The best I can guess is that they actually have a different panel that has similar timing. If so then the proper panel should be used or they should switch to the generic "edp-panel" compatible. When moving this back to panel-edp, I wasn't sure what to use for .bus_flags and .bus_format and whether to add the extra "enable" delay from commit 88d3457ceb82 ("drm/panel: auo,b116xw03: fix flash backlight when power on"). I've added formats/flags/delays based on my (inexpert) analysis of the datasheet. These are untested. NOTE: if/when this is backported to stable, we might run into some trouble. Specifically, before 474c162878ba ("arm64: dts: mt8183: jacuzzi: Move panel under aux-bus") this panel was used by "mt8183-kukui-jacuzzi", which assumed it was an eDP panel. I don't know what to suggest for that other than someone making up a bogus panel for jacuzzi that's just for the stable channel. Fixes: 88d3457ceb82 ("drm/panel: auo,b116xw03: fix flash backlight when power on") Fixes: 5f04e7ce392d ("drm/panel-edp: Split eDP panels out of panel-simple") Tested-by: Anton Bambura Acked-by: Hsin-Yi Wang Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20230925150010.1.Iff672233861bcc4cf25a7ad0a81308adc3bda8a4@changeid --- drivers/gpu/drm/panel/panel-edp.c | 29 ----------------------- drivers/gpu/drm/panel/panel-simple.c | 35 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index feb665df35a1..95c8472d878a 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -976,32 +976,6 @@ static const struct panel_desc auo_b116xak01 = { }, }; -static const struct drm_display_mode auo_b116xw03_mode = { - .clock = 70589, - .hdisplay = 1366, - .hsync_start = 1366 + 40, - .hsync_end = 1366 + 40 + 40, - .htotal = 1366 + 40 + 40 + 32, - .vdisplay = 768, - .vsync_start = 768 + 10, - .vsync_end = 768 + 10 + 12, - .vtotal = 768 + 10 + 12 + 6, - .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, -}; - -static const struct panel_desc auo_b116xw03 = { - .modes = &auo_b116xw03_mode, - .num_modes = 1, - .bpc = 6, - .size = { - .width = 256, - .height = 144, - }, - .delay = { - .enable = 400, - }, -}; - static const struct drm_display_mode auo_b133han05_mode = { .clock = 142600, .hdisplay = 1920, @@ -1725,9 +1699,6 @@ static const struct of_device_id platform_of_match[] = { }, { .compatible = "auo,b116xa01", .data = &auo_b116xak01, - }, { - .compatible = "auo,b116xw03", - .data = &auo_b116xw03, }, { .compatible = "auo,b133han05", .data = &auo_b133han05, diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 95959dcc6e0e..dd7928d9570f 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -919,6 +919,38 @@ static const struct panel_desc auo_b101xtn01 = { }, }; +static const struct drm_display_mode auo_b116xw03_mode = { + .clock = 70589, + .hdisplay = 1366, + .hsync_start = 1366 + 40, + .hsync_end = 1366 + 40 + 40, + .htotal = 1366 + 40 + 40 + 32, + .vdisplay = 768, + .vsync_start = 768 + 10, + .vsync_end = 768 + 10 + 12, + .vtotal = 768 + 10 + 12 + 6, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, +}; + +static const struct panel_desc auo_b116xw03 = { + .modes = &auo_b116xw03_mode, + .num_modes = 1, + .bpc = 6, + .size = { + .width = 256, + .height = 144, + }, + .delay = { + .prepare = 1, + .enable = 200, + .disable = 200, + .unprepare = 500, + }, + .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, +}; + static const struct display_timing auo_g070vvn01_timings = { .pixelclock = { 33300000, 34209000, 45000000 }, .hactive = { 800, 800, 800 }, @@ -4102,6 +4134,9 @@ static const struct of_device_id platform_of_match[] = { }, { .compatible = "auo,b101xtn01", .data = &auo_b101xtn01, + }, { + .compatible = "auo,b116xw03", + .data = &auo_b116xw03, }, { .compatible = "auo,g070vvn01", .data = &auo_g070vvn01, From f2cab4b318ee8023f4ad640b906ae268942a7db4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Oct 2023 07:02:31 -0700 Subject: [PATCH 068/212] drm/nouveau: exec: fix ioctl kernel-doc warning kernel-doc emits a warning: include/uapi/drm/nouveau_drm.h:49: warning: Cannot understand * @NOUVEAU_GETPARAM_EXEC_PUSH_MAX on line 49 - I thought it was a doc line We don't have a way to document a macro value via kernel-doc, so change the "/**" kernel-doc marker to a C comment and format the comment more like a kernel-doc comment for consistency. Fixes: d59e75eef52d ("drm/nouveau: exec: report max pushs through getparam") Signed-off-by: Randy Dunlap Cc: Dave Airlie Cc: Danilo Krummrich Cc: Karol Herbst Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: Bragatheswaran Manickavel Reviewed-by: Lyude Paul Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231008140231.17921-1-rdunlap@infradead.org --- include/uapi/drm/nouveau_drm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index eaf9f248619f..0bade1592f34 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -45,8 +45,8 @@ extern "C" { #define NOUVEAU_GETPARAM_HAS_BO_USAGE 15 #define NOUVEAU_GETPARAM_HAS_PAGEFLIP 16 -/** - * @NOUVEAU_GETPARAM_EXEC_PUSH_MAX +/* + * NOUVEAU_GETPARAM_EXEC_PUSH_MAX - query max pushes through getparam * * Query the maximum amount of IBs that can be pushed through a single * &drm_nouveau_exec structure and hence a single &DRM_IOCTL_NOUVEAU_EXEC From d873a364ef2182af40110869f9c62813ce6f9386 Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Wed, 30 Aug 2023 08:02:23 +0700 Subject: [PATCH 069/212] tools/nolibc: i386: Fix a stack misalign bug on _start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ABI mandates that the %esp register must be a multiple of 16 when executing a 'call' instruction. Commit 2ab446336b17 ("tools/nolibc: i386: shrink _start with _start_c") simplified the _start function, but it didn't take care of the %esp alignment, causing SIGSEGV on SSE and AVX programs that use aligned move instruction (e.g., movdqa, movaps, and vmovdqa). The 'and $-16, %esp' aligns the %esp at a multiple of 16. Then 'push %eax' will subtract the %esp by 4; thus, it breaks the 16-byte alignment. Make sure the %esp is correctly aligned after the push by subtracting 12 before the push. Extra: Add 'add $12, %esp' before the 'and $-16, %esp' to avoid over-estimating for particular cases as suggested by Willy. A test program to validate the %esp alignment on _start can be found at: https://lore.kernel.org/lkml/ZOoindMFj1UKqo+s@biznet-home.integral.gnuweeb.org [ Thomas: trim Fixes tag commit id ] Cc: Zhangjin Wu Fixes: 2ab446336b17 ("tools/nolibc: i386: shrink _start with _start_c") Reported-by: Nicholas Rosenberg Acked-by: Thomas Weißschuh Signed-off-by: Ammar Faizi Reviewed-by: Alviro Iskandar Setiawan Signed-off-by: Willy Tarreau Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/arch-i386.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h index 64415b9fac77..28c26a00a762 100644 --- a/tools/include/nolibc/arch-i386.h +++ b/tools/include/nolibc/arch-i386.h @@ -167,7 +167,9 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ - "and $-16, %esp\n" /* last pushed argument must be 16-byte aligned */ + "add $12, %esp\n" /* avoid over-estimating after the 'and' & 'sub' below */ + "and $-16, %esp\n" /* the %esp must be 16-byte aligned on 'call' */ + "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ "push %eax\n" /* push arg1 on stack to support plain stack modes too */ "call _start_c\n" /* transfer to c runtime */ "hlt\n" /* ensure it does not return */ From 513bd2788e4994f6187d22b2fd0d25c3cfbeb87a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 16 Sep 2023 00:08:54 +0200 Subject: [PATCH 070/212] MAINTAINERS: nolibc: update tree location MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nolibc tree moved out of Willys user namespace into its own. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20230916-nolibc-tree-v1-1-06c9b59a5035@weissschuh.net --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 90f13281d297..6c83087ea396 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15131,7 +15131,7 @@ NOLIBC HEADER FILE M: Willy Tarreau M: Thomas Weißschuh S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/wtarreau/nolibc.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/nolibc/linux-nolibc.git F: tools/include/nolibc/ F: tools/testing/selftests/nolibc/ From 921992229b1f06df6b649860e4a5f3def1489866 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 12 Oct 2023 00:37:38 +0200 Subject: [PATCH 071/212] tools/nolibc: mark start_c as weak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise the different instances of _start_c from each compilation unit will lead to linker errors: /usr/bin/ld: /tmp/ccSNvRqs.o: in function `_start_c': nolibc-test-foo.c:(.text.nolibc_memset+0x9): multiple definition of `_start_c'; /tmp/ccG25101.o:nolibc-test.c:(.text+0x1ea3): first defined here Fixes: 17336755150b ("tools/nolibc: add new crt.h with _start_c") Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/lkml/20231012-nolibc-start_c-multiple-v1-1-fbfc73e0283f@weissschuh.net/ Link: https://lore.kernel.org/lkml/20231012-nolibc-linkage-test-v1-1-315e682768b4@weissschuh.net/ Acked-by: Willy Tarreau --- tools/include/nolibc/crt.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h index a5f33fef1672..a05655b4ce1d 100644 --- a/tools/include/nolibc/crt.h +++ b/tools/include/nolibc/crt.h @@ -13,6 +13,7 @@ const unsigned long *_auxv __attribute__((weak)); static void __stack_chk_init(void); static void exit(int); +__attribute__((weak)) void _start_c(long *sp) { long argc; From 4366faf43308bd91c59a20c43a9f853a9c3bb6e4 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 11 Oct 2023 13:41:34 +0200 Subject: [PATCH 072/212] drm/nouveau/disp: fix DP capable DSM connectors Just special case DP DSM connectors until we properly figure out how to deal with this. This resolves user regressions on GPUs with such connectors without reverting the original fix. Cc: Lyude Paul Cc: stable@vger.kernel.org # 6.4+ Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/255 Fixes: 2b5d1c29f6c4 ("drm/nouveau/disp: PIOR DP uses GPIO for HPD, not PMGR AUX interrupts") Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20231011114134.861818-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c index 46b057fe1412..3249e5c1c893 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c @@ -62,6 +62,18 @@ nvkm_uconn_uevent_gpio(struct nvkm_object *object, u64 token, u32 bits) return object->client->event(token, &args, sizeof(args.v0)); } +static bool +nvkm_connector_is_dp_dms(u8 type) +{ + switch (type) { + case DCB_CONNECTOR_DMS59_DP0: + case DCB_CONNECTOR_DMS59_DP1: + return true; + default: + return false; + } +} + static int nvkm_uconn_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent) { @@ -101,7 +113,7 @@ nvkm_uconn_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_ if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_GPIO_LO; if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ) { /* TODO: support DP IRQ on ANX9805 and remove this hack. */ - if (!outp->info.location) + if (!outp->info.location && !nvkm_connector_is_dp_dms(conn->info.type)) return -EINVAL; } From 17bfcd6a81535d7d580ddafb6e54806890aaca6c Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Sat, 30 Sep 2023 11:49:58 -0300 Subject: [PATCH 073/212] rust: error: fix the description for `ECHILD` A mistake was made and the description of `ECHILD` is wrong (it reuses the description of `ENOEXEC`). This fixes it to reflect what's in `errno-base.h`. Signed-off-by: Wedson Almeida Filho Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Trevor Gross Reviewed-by: Finn Behrens Reviewed-by: Alice Ryhl Fixes: 266def2a0f5b ("rust: error: add codes from `errno-base.h`") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230930144958.46051-1-wedsonaf@gmail.com [ Use the plural, as noticed by Benno. ] Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 05fcab6abfe6..0f29e7b2194c 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -37,7 +37,7 @@ macro_rules! declare_err { declare_err!(E2BIG, "Argument list too long."); declare_err!(ENOEXEC, "Exec format error."); declare_err!(EBADF, "Bad file number."); - declare_err!(ECHILD, "Exec format error."); + declare_err!(ECHILD, "No child processes."); declare_err!(EAGAIN, "Try again."); declare_err!(ENOMEM, "Out of memory."); declare_err!(EACCES, "Permission denied."); From 2a7e0a52ec98566a863aba42ea35690c65e3da27 Mon Sep 17 00:00:00 2001 From: Manmohan Shukla Date: Thu, 7 Sep 2023 02:18:57 +0530 Subject: [PATCH 074/212] rust: error: Markdown style nit This patch fixes a trivial markdown style nit in the `SAFETY` comment. Signed-off-by: Manmohan Shukla Reviewed-by: Alice Ryhl Reviewed-by: Jianguo Bao Reviewed-by: Vincenzo Palazzo Reviewed-by: Finn Behrens Reviewed-by: Benno Lossin Reviewed-by: Andreas Hindborg Fixes: c7e20faa5fca ("rust: error: Add Error::to_ptr()") Link: https://lore.kernel.org/r/20230906204857.85619-1-manmshuk@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 0f29e7b2194c..032b64543953 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -133,7 +133,7 @@ pub fn to_errno(self) -> core::ffi::c_int { /// Returns the error encoded as a pointer. #[allow(dead_code)] pub(crate) fn to_ptr(self) -> *mut T { - // SAFETY: self.0 is a valid error due to its invariant. + // SAFETY: `self.0` is a valid error due to its invariant. unsafe { bindings::ERR_PTR(self.0.into()) as *mut _ } } From 344b6c0a7514b044ed12b8ad3cdeecd262292f3e Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 15 Aug 2023 08:53:46 +0200 Subject: [PATCH 075/212] rust: fix bindgen build error with fstrict-flex-arrays Commit df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") enabled '-fstrict-flex-arrays=3' globally, but bindgen does not recognized this compiler option, triggering the following build error: error: unknown argument: '-fstrict-flex-arrays=3', err: true [ Miguel: Commit df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") did it so only conditionally (i.e. only if the C compiler supports it). This explains what Andrea was seeing: he was compiling with a modern enough GCC, which enables the option, but with an old enough Clang. Andrea confirmed this was the case: he was using Clang 14 with GCC 13; and that Clang 15 worked for him. While it is possible to construct code (see mailing list for an example I came up with) where this could break, it is fairly contrived, and anyway GCC-built kernels with Rust enabled should only be used for experimentation until we get support for `rustc_codegen_gcc` and/or GCC Rust. So let's add this for the time being in case it helps somebody. ] Add '-fstrict-flex-arrays' to the list of cflags that should be ignored by bindgen. Fixes: df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") Signed-off-by: Andrea Righi Tested-by: Gary Guo Link: https://lore.kernel.org/r/20230815065346.131387-1-andrea.righi@canonical.com Signed-off-by: Miguel Ojeda --- rust/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/Makefile b/rust/Makefile index 87958e864be0..1e78c82a18a8 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -290,6 +290,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \ -fzero-call-used-regs=% -fno-stack-clash-protection \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ + -fstrict-flex-arrays=% \ --param=% --param asan-% # Derived from `scripts/Makefile.clang`. From 6a7be48e9bd18d309ba25c223a27790ad1bf0fa3 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Tue, 5 Sep 2023 09:37:24 +0200 Subject: [PATCH 076/212] USB: serial: option: add Telit LE910C4-WWX 0x1035 composition Add support for the following Telit LE910C4-WWX composition: 0x1035: TTY, TTY, ECM T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1035 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=e1b117c7 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms I: If#= 3 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Fabio Porcedda Cc: stable@vger.kernel.org Reviewed-by: Daniele Palmas Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7994a4549a6c..f612bc9129bd 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1290,6 +1290,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1033, 0xff), /* Telit LE910C1-EUX (ECM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1035, 0xff) }, /* Telit LE910C4-WWX (ECM) */ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0), .driver_info = RSVD(0) | RSVD(1) | NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG1), From 064f6e2ba9eb59b2c87b866e1e968e79ccedf9dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Monin?= Date: Mon, 2 Oct 2023 17:51:40 +0200 Subject: [PATCH 077/212] USB: serial: option: add entry for Sierra EM9191 with new firmware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following a firmware update of the modem, the interface for the AT command port changed, so add it back. T: Bus=08 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=1199 ProdID=90d3 Rev=00.06 S: Manufacturer=Sierra Wireless, Incorporated S: Product=Sierra Wireless EM9191 S: SerialNumber=xxxxxxxxxxxxxxxx C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=(none) I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option Signed-off-by: Benoît Monin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f612bc9129bd..c0908e6d4921 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2263,6 +2263,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { } /* Terminating entry */ From ff07186b4d774ac22a5345d30763045af4569416 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 11 Oct 2023 22:25:28 +0300 Subject: [PATCH 078/212] x86/efistub: Don't try to print after ExitBootService() setup_e820() is executed after UEFI's ExitBootService has been called. This causes the firmware to throw an exception because the Console IO protocol is supposed to work only during boot service environment. As per UEFI 2.9, section 12.1: "This protocol is used to handle input and output of text-based information intended for the system user during the operation of code in the boot services environment." So drop the diagnostic warning from this function. We might add back a warning that is issued later when initializing the kernel itself. Signed-off-by: Nikolay Borisov Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/x86-stub.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 2fee52ed335d..3b8bccd7c216 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -605,11 +605,8 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s break; case EFI_UNACCEPTED_MEMORY: - if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) { - efi_warn_once( -"The system has unaccepted memory, but kernel does not support it\nConsider enabling CONFIG_UNACCEPTED_MEMORY\n"); + if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) continue; - } e820_type = E820_TYPE_RAM; process_unaccepted_memory(d->phys_addr, d->phys_addr + PAGE_SIZE * d->num_pages); From 0d3ad1917996839a5042d18f04e41915cfa1b74a Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 24 Sep 2023 22:26:33 +0800 Subject: [PATCH 079/212] efi: fix memory leak in krealloc failure handling In the previous code, there was a memory leak issue where the previously allocated memory was not freed upon a failed krealloc operation. This patch addresses the problem by releasing the old memory before setting the pointer to NULL in case of a krealloc failure. This ensures that memory is properly managed and avoids potential memory leaks. Signed-off-by: Kuan-Wei Chiu Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 1599f1176842..9cfac61812f6 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -273,9 +273,13 @@ static __init int efivar_ssdt_load(void) if (status == EFI_NOT_FOUND) { break; } else if (status == EFI_BUFFER_TOO_SMALL) { - name = krealloc(name, name_size, GFP_KERNEL); - if (!name) + efi_char16_t *name_tmp = + krealloc(name, name_size, GFP_KERNEL); + if (!name_tmp) { + kfree(name); return -ENOMEM; + } + name = name_tmp; continue; } From 4eaf0932c69bdc56d2c2af30404f9c918b1f6295 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Tue, 3 Oct 2023 12:02:09 +0200 Subject: [PATCH 080/212] block: Fix regression in sed-opal for a saved key. The commit 3bfeb61256643281ac4be5b8a57e9d9da3db4335 introduced the use of keyring for sed-opal. Unfortunately, there is also a possibility to save the Opal key used in opal_lock_unlock(). This patch switches the order of operation, so the cached key is used instead of failure for opal_get_key. The problem was found by the cryptsetup Opal test recently added to the cryptsetup tree. Fixes: 3bfeb6125664 ("block: sed-opal: keyring support for SED keys") Tested-by: Ondrej Kozina Signed-off-by: Milan Broz Link: https://lore.kernel.org/r/20231003100209.380037-1-gmazyland@gmail.com Signed-off-by: Jens Axboe --- block/sed-opal.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/block/sed-opal.c b/block/sed-opal.c index 6d7f25d1711b..04f38a3f5d95 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -2888,12 +2888,11 @@ static int opal_lock_unlock(struct opal_dev *dev, if (lk_unlk->session.who > OPAL_USER9) return -EINVAL; - ret = opal_get_key(dev, &lk_unlk->session.opal_key); - if (ret) - return ret; mutex_lock(&dev->dev_lock); opal_lock_check_for_saved_key(dev, lk_unlk); - ret = __opal_lock_unlock(dev, lk_unlk); + ret = opal_get_key(dev, &lk_unlk->session.opal_key); + if (!ret) + ret = __opal_lock_unlock(dev, lk_unlk); mutex_unlock(&dev->dev_lock); return ret; From f88dfbf333b3661faff996bb03af2024d907b76a Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Fri, 13 Oct 2023 17:45:25 +0800 Subject: [PATCH 081/212] ASoC: rt5650: fix the wrong result of key button The RT5650 should enable a power setting for button detection to avoid the wrong result. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20231013094525.715518-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 1a137ca3f496..7938b52d741d 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3257,6 +3257,8 @@ int rt5645_set_jack_detect(struct snd_soc_component *component, RT5645_GP1_PIN_IRQ, RT5645_GP1_PIN_IRQ); regmap_update_bits(rt5645->regmap, RT5645_GEN_CTRL1, RT5645_DIG_GATE_CTRL, RT5645_DIG_GATE_CTRL); + regmap_update_bits(rt5645->regmap, RT5645_DEPOP_M1, + RT5645_HP_CB_MASK, RT5645_HP_CB_PU); } rt5645_irq(0, rt5645); From 4e9a429ae80657bdc502d3f5078e2073656ec5fd Mon Sep 17 00:00:00 2001 From: Roy Chateau Date: Fri, 13 Oct 2023 13:02:39 +0200 Subject: [PATCH 082/212] ASoC: codecs: tas2780: Fix log of failed reset via I2C. Correctly log failures of reset via I2C. Signed-off-by: Roy Chateau Link: https://lore.kernel.org/r/20231013110239.473123-1-roy.chateau@mep-info.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2780.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2780.c b/sound/soc/codecs/tas2780.c index 86bd6c18a944..41076be23854 100644 --- a/sound/soc/codecs/tas2780.c +++ b/sound/soc/codecs/tas2780.c @@ -39,7 +39,7 @@ static void tas2780_reset(struct tas2780_priv *tas2780) usleep_range(2000, 2050); } - snd_soc_component_write(tas2780->component, TAS2780_SW_RST, + ret = snd_soc_component_write(tas2780->component, TAS2780_SW_RST, TAS2780_RST); if (ret) dev_err(tas2780->dev, "%s:errCode:0x%x Reset error!\n", From 9c97790a07dc4f9bdc6e1701003dc9b86f749c71 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 13 Oct 2023 18:37:33 +0100 Subject: [PATCH 083/212] ASoC: dwc: Fix non-DT instantiation Commit d6d6c513f5d2 ("ASoC: dwc: Use ops to get platform data") converted the DesignWare I2S driver to use a DT specific function to obtain platform data but this breaks at least non-DT systems such as AMD. Revert it. Fixes: d6d6c513f5d2 ("ASoC: dwc: Use ops to get platform data") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20231013-asoc-fix-dwc-v1-1-63211bb746b9@kernel.org Signed-off-by: Mark Brown --- sound/soc/dwc/dwc-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c index 22c004179214..9ea4be56d3b7 100644 --- a/sound/soc/dwc/dwc-i2s.c +++ b/sound/soc/dwc/dwc-i2s.c @@ -917,7 +917,7 @@ static int jh7110_i2stx0_clk_cfg(struct i2s_clk_config_data *config) static int dw_i2s_probe(struct platform_device *pdev) { - const struct i2s_platform_data *pdata = of_device_get_match_data(&pdev->dev); + const struct i2s_platform_data *pdata = pdev->dev.platform_data; struct dw_i2s_dev *dev; struct resource *res; int ret, irq; From ff9e8f41513669e290f6e1904e1bc75950584491 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Aug 2023 22:28:49 +1000 Subject: [PATCH 084/212] powerpc/mm: Allow ARCH_FORCE_MAX_ORDER up to 12 Christophe reported that the change to ARCH_FORCE_MAX_ORDER to limit the range to 10 had broken his ability to configure hugepages: # echo 1 > /sys/kernel/mm/hugepages/hugepages-8192kB/nr_hugepages sh: write error: Invalid argument Several of the powerpc defconfigs previously set the ARCH_FORCE_MAX_ORDER value to 12, via the definition in arch/powerpc/configs/fsl-emb-nonhw.config, used by: mpc85xx_defconfig mpc85xx_smp_defconfig corenet32_smp_defconfig corenet64_smp_defconfig mpc86xx_defconfig mpc86xx_smp_defconfig Fix it by increasing the allowed range to 12 to restore the previous behaviour. Fixes: 358e526a1648 ("powerpc/mm: Reinstate ARCH_FORCE_MAX_ORDER ranges") Reported-by: Christophe Leroy Closes: https://lore.kernel.org/all/8011d806-5b30-bf26-2bfe-a08c39d57e20@csgroup.eu/ Tested-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20230824122849.942072-1-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3aaadfd2c8eb..d5d5388973ac 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -910,7 +910,7 @@ config ARCH_FORCE_MAX_ORDER default "6" if PPC32 && PPC_64K_PAGES range 4 10 if PPC32 && PPC_256K_PAGES default "4" if PPC32 && PPC_256K_PAGES - range 10 10 + range 10 12 default "10" help The kernel page allocator limits the size of maximal physically From 52480e1f1a259c93d749ba3961af0bffedfe7a7a Mon Sep 17 00:00:00 2001 From: Puliang Lu Date: Mon, 16 Oct 2023 15:36:16 +0800 Subject: [PATCH 085/212] USB: serial: option: add Fibocom to DELL custom modem FM101R-GL Update the USB serial option driver support for the Fibocom FM101R-GL LTE modules as there are actually several different variants. - VID:PID 413C:8213, FM101R-GL are laptop M.2 cards (with MBIM interfaces for Linux) - VID:PID 413C:8215, FM101R-GL ESIM are laptop M.2 cards (with MBIM interface for Linux) 0x8213: mbim, tty 0x8215: mbim, tty T: Bus=04 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=413c ProdID=8213 Rev= 5.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom FM101-GL Module S: SerialNumber=a3b7cbf0 C:* #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=896mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms T: Bus=04 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 3 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=413c ProdID=8215 Rev= 5.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom FM101-GL Module S: SerialNumber=a3b7cbf0 C:* #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=896mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: Puliang Lu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c0908e6d4921..45dcfaadaf98 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -203,6 +203,9 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5829E_ESIM 0x81e4 #define DELL_PRODUCT_5829E 0x81e6 +#define DELL_PRODUCT_FM101R 0x8213 +#define DELL_PRODUCT_FM101R_ESIM 0x8215 + #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da #define KYOCERA_PRODUCT_KPC680 0x180a @@ -1108,6 +1111,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | RSVD(6) }, { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E_ESIM), .driver_info = RSVD(0) | RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(DELL_VENDOR_ID, DELL_PRODUCT_FM101R, 0xff) }, + { USB_DEVICE_INTERFACE_CLASS(DELL_VENDOR_ID, DELL_PRODUCT_FM101R_ESIM, 0xff) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, From f6ca3fb6978f94d95ee79f95085fc22e71ca17cc Mon Sep 17 00:00:00 2001 From: Rouven Czerwinski Date: Fri, 22 Sep 2023 16:17:16 +0200 Subject: [PATCH 086/212] mtd: rawnand: Ensure the nand chip supports cached reads Both the JEDEC and ONFI specification say that read cache sequential support is an optional command. This means that we not only need to check whether the individual controller supports the command, we also need to check the parameter pages for both ONFI and JEDEC NAND flashes before enabling sequential cache reads. This fixes support for NAND flashes which don't support enabling cache reads, i.e. Samsung K9F4G08U0F or Toshiba TC58NVG0S3HTA00. Sequential cache reads are now only available for ONFI and JEDEC devices, if individual vendors implement this, it needs to be enabled per vendor. Tested on i.MX6Q with a Samsung NAND flash chip that doesn't support sequential reads. Fixes: 003fe4b9545b ("mtd: rawnand: Support for sequential cache reads") Cc: stable@vger.kernel.org Signed-off-by: Rouven Czerwinski Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230922141717.35977-1-r.czerwinski@pengutronix.de --- drivers/mtd/nand/raw/nand_base.c | 3 +++ drivers/mtd/nand/raw/nand_jedec.c | 3 +++ drivers/mtd/nand/raw/nand_onfi.c | 3 +++ include/linux/mtd/jedec.h | 3 +++ include/linux/mtd/onfi.h | 1 + include/linux/mtd/rawnand.h | 2 ++ 6 files changed, 15 insertions(+) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index d4b55155aeae..1fcac403cee6 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5110,6 +5110,9 @@ static void rawnand_check_cont_read_support(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); + if (!chip->parameters.supports_read_cache) + return; + if (chip->read_retries) return; diff --git a/drivers/mtd/nand/raw/nand_jedec.c b/drivers/mtd/nand/raw/nand_jedec.c index 836757717660..b3cc8f360529 100644 --- a/drivers/mtd/nand/raw/nand_jedec.c +++ b/drivers/mtd/nand/raw/nand_jedec.c @@ -94,6 +94,9 @@ int nand_jedec_detect(struct nand_chip *chip) goto free_jedec_param_page; } + if (p->opt_cmd[0] & JEDEC_OPT_CMD_READ_CACHE) + chip->parameters.supports_read_cache = true; + memorg->pagesize = le32_to_cpu(p->byte_per_page); mtd->writesize = memorg->pagesize; diff --git a/drivers/mtd/nand/raw/nand_onfi.c b/drivers/mtd/nand/raw/nand_onfi.c index f15ef90aec8c..861975e44b55 100644 --- a/drivers/mtd/nand/raw/nand_onfi.c +++ b/drivers/mtd/nand/raw/nand_onfi.c @@ -303,6 +303,9 @@ int nand_onfi_detect(struct nand_chip *chip) ONFI_FEATURE_ADDR_TIMING_MODE, 1); } + if (le16_to_cpu(p->opt_cmd) & ONFI_OPT_CMD_READ_CACHE) + chip->parameters.supports_read_cache = true; + onfi = kzalloc(sizeof(*onfi), GFP_KERNEL); if (!onfi) { ret = -ENOMEM; diff --git a/include/linux/mtd/jedec.h b/include/linux/mtd/jedec.h index 0b6b59f7cfbd..56047a4e54c9 100644 --- a/include/linux/mtd/jedec.h +++ b/include/linux/mtd/jedec.h @@ -21,6 +21,9 @@ struct jedec_ecc_info { /* JEDEC features */ #define JEDEC_FEATURE_16_BIT_BUS (1 << 0) +/* JEDEC Optional Commands */ +#define JEDEC_OPT_CMD_READ_CACHE BIT(1) + struct nand_jedec_params { /* rev info and features block */ /* 'J' 'E' 'S' 'D' */ diff --git a/include/linux/mtd/onfi.h b/include/linux/mtd/onfi.h index a7376f9beddf..55ab2e4d62f9 100644 --- a/include/linux/mtd/onfi.h +++ b/include/linux/mtd/onfi.h @@ -55,6 +55,7 @@ #define ONFI_SUBFEATURE_PARAM_LEN 4 /* ONFI optional commands SET/GET FEATURES supported? */ +#define ONFI_OPT_CMD_READ_CACHE BIT(1) #define ONFI_OPT_CMD_SET_GET_FEATURES BIT(2) struct nand_onfi_params { diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 90a141ba2a5a..c29ace15a053 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -225,6 +225,7 @@ struct gpio_desc; * struct nand_parameters - NAND generic parameters from the parameter page * @model: Model name * @supports_set_get_features: The NAND chip supports setting/getting features + * @supports_read_cache: The NAND chip supports read cache operations * @set_feature_list: Bitmap of features that can be set * @get_feature_list: Bitmap of features that can be get * @onfi: ONFI specific parameters @@ -233,6 +234,7 @@ struct nand_parameters { /* Generic parameters */ const char *model; bool supports_set_get_features; + bool supports_read_cache; DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER); DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); From e07744b43d3ad10b040f0ec464b6323ca96903d6 Mon Sep 17 00:00:00 2001 From: Liming Wu Date: Sun, 8 Oct 2023 11:17:33 +0800 Subject: [PATCH 087/212] tools/virtio: Add dma sync api for virtio test Fixes: 8bd2f71054bd ("virtio_ring: introduce dma sync api for virtqueue") also add dma sync api for virtio test. Signed-off-by: Liming Wu Message-Id: <20231008031734.1095-1-liming.wu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/dma-mapping.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h index 834a90bd3270..822ecaa8e4df 100644 --- a/tools/virtio/linux/dma-mapping.h +++ b/tools/virtio/linux/dma-mapping.h @@ -24,11 +24,23 @@ enum dma_data_direction { #define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o)) #define dma_map_single(d, p, s, dir) (virt_to_phys(p)) +#define dma_map_single_attrs(d, p, s, dir, a) (virt_to_phys(p)) #define dma_mapping_error(...) (0) #define dma_unmap_single(d, a, s, r) do { (void)(d); (void)(a); (void)(s); (void)(r); } while (0) #define dma_unmap_page(d, a, s, r) do { (void)(d); (void)(a); (void)(s); (void)(r); } while (0) +#define sg_dma_address(sg) (0) +#define dma_need_sync(v, a) (0) +#define dma_unmap_single_attrs(d, a, s, r, t) do { \ + (void)(d); (void)(a); (void)(s); (void)(r); (void)(t); \ +} while (0) +#define dma_sync_single_range_for_cpu(d, a, o, s, r) do { \ + (void)(d); (void)(a); (void)(o); (void)(s); (void)(r); \ +} while (0) +#define dma_sync_single_range_for_device(d, a, o, s, r) do { \ + (void)(d); (void)(a); (void)(o); (void)(s); (void)(r); \ +} while (0) #define dma_max_mapping_size(...) SIZE_MAX #endif From 63e8b94ad1840f02462633abdb363397f56bc642 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 21 Sep 2023 15:14:12 +0800 Subject: [PATCH 088/212] s390/cio: fix a memleak in css_alloc_subchannel When dma_set_coherent_mask() fails, sch->lock has not been freed, which is allocated in css_sch_create_locks(), leading to a memleak. Fixes: 4520a91a976e ("s390/cio: use dma helpers for setting masks") Signed-off-by: Dinghao Liu Message-Id: <20230921071412.13806-1-dinghao.liu@zju.edu.cn> Link: https://lore.kernel.org/linux-s390/bd38baa8-7b9d-4d89-9422-7e943d626d6e@linux.ibm.com/ Reviewed-by: Halil Pasic Reviewed-by: Peter Oberparleiter Signed-off-by: Vasily Gorbik --- drivers/s390/cio/css.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 3ef636935a54..3ff46fc694f8 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -233,17 +233,19 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid, */ ret = dma_set_coherent_mask(&sch->dev, DMA_BIT_MASK(31)); if (ret) - goto err; + goto err_lock; /* * But we don't have such restrictions imposed on the stuff that * is handled by the streaming API. */ ret = dma_set_mask(&sch->dev, DMA_BIT_MASK(64)); if (ret) - goto err; + goto err_lock; return sch; +err_lock: + kfree(sch->lock); err: kfree(sch); return ERR_PTR(ret); From 327899674eef18f96644be87aa5510b7523fe4f6 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 12 Oct 2023 11:06:21 +0200 Subject: [PATCH 089/212] s390/kasan: handle DCSS mapping in memory holes When physical memory is defined under z/VM using DEF STOR CONFIG, there may be memory holes that are not hotpluggable memory. In such cases, DCSS mapping could be placed in one of these memory holes. Subsequently, attempting memory access to such DCSS mapping would result in a kasan failure because there is no shadow memory mapping for it. To maintain consistency with cases where DCSS mapping is positioned after the kernel identity mapping, which is then covered by kasan zero shadow mapping, handle the scenario above by populating zero shadow mapping for memory holes where DCSS mapping could potentially be placed. Reviewed-by: Heiko Carstens Reviewed-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- arch/s390/boot/vmem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 01257ce3b89c..442a74f113cb 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -57,6 +57,7 @@ static void kasan_populate_shadow(void) pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long memgap_start = 0; unsigned long untracked_end; unsigned long start, end; int i; @@ -101,8 +102,12 @@ static void kasan_populate_shadow(void) * +- shadow end ----+---------+- shadow end ---+ */ - for_each_physmem_usable_range(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) { kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); + if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) + kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW); + memgap_start = end; + } if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_end = VMALLOC_START; /* shallowly populate kasan shadow for vmalloc and modules */ From 3b401e30c249849d803de6c332dad2a595a58658 Mon Sep 17 00:00:00 2001 From: Karolina Stolarek Date: Mon, 16 Oct 2023 14:15:25 +0200 Subject: [PATCH 090/212] drm/ttm: Reorder sys manager cleanup step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the current cleanup flow, we could trigger a NULL pointer dereference if there is a delayed destruction of a BO with a system resource that gets executed on drain_workqueue() call, as we attempt to free a resource using an already released resource manager. Remove the device from the device list and drain its workqueue before releasing the system domain manager in ttm_device_fini(). Signed-off-by: Karolina Stolarek Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20231016121525.2237838-1-karolina.stolarek@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 7726a72befc5..d48b39132b32 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -232,10 +232,6 @@ void ttm_device_fini(struct ttm_device *bdev) struct ttm_resource_manager *man; unsigned i; - man = ttm_manager_type(bdev, TTM_PL_SYSTEM); - ttm_resource_manager_set_used(man, false); - ttm_set_driver_manager(bdev, TTM_PL_SYSTEM, NULL); - mutex_lock(&ttm_global_mutex); list_del(&bdev->device_list); mutex_unlock(&ttm_global_mutex); @@ -243,6 +239,10 @@ void ttm_device_fini(struct ttm_device *bdev) drain_workqueue(bdev->wq); destroy_workqueue(bdev->wq); + man = ttm_manager_type(bdev, TTM_PL_SYSTEM); + ttm_resource_manager_set_used(man, false); + ttm_set_driver_manager(bdev, TTM_PL_SYSTEM, NULL); + spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) From c8befdc411e5fd1bf95a13e8744c8ca79b412bee Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 13 Oct 2023 16:57:05 +0200 Subject: [PATCH 091/212] pinctrl: qcom: lpass-lpi: fix concurrent register updates The Qualcomm LPASS LPI pin controller driver uses one lock for guarding Read-Modify-Write code for slew rate registers. However the pin configuration and muxing registers have exactly the same RMW code but are not protected. Pin controller framework does not provide locking here, thus it is possible to trigger simultaneous change of pin configuration registers resulting in non-atomic changes. Protect from concurrent access by re-using the same lock used to cover the slew rate register. Using the same lock instead of adding second one will make more sense, once we add support for newer Qualcomm SoC, where slew rate is configured in the same register as pin configuration/muxing. Fixes: 6e261d1090d6 ("pinctrl: qcom: Add sm8250 lpass lpi pinctrl driver") Cc: stable@vger.kernel.org Reviewed-by: Linus Walleij Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231013145705.219954-1-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-lpass-lpi.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c index e5a418026ba3..0b2839d27fd6 100644 --- a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c +++ b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c @@ -32,7 +32,8 @@ struct lpi_pinctrl { char __iomem *tlmm_base; char __iomem *slew_base; struct clk_bulk_data clks[MAX_LPI_NUM_CLKS]; - struct mutex slew_access_lock; + /* Protects from concurrent register updates */ + struct mutex lock; DECLARE_BITMAP(ever_gpio, MAX_NR_GPIO); const struct lpi_pinctrl_variant_data *data; }; @@ -103,6 +104,7 @@ static int lpi_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned int function, if (WARN_ON(i == g->nfuncs)) return -EINVAL; + mutex_lock(&pctrl->lock); val = lpi_gpio_read(pctrl, pin, LPI_GPIO_CFG_REG); /* @@ -128,6 +130,7 @@ static int lpi_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned int function, u32p_replace_bits(&val, i, LPI_GPIO_FUNCTION_MASK); lpi_gpio_write(pctrl, pin, LPI_GPIO_CFG_REG, val); + mutex_unlock(&pctrl->lock); return 0; } @@ -233,14 +236,14 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group, if (slew_offset == LPI_NO_SLEW) break; - mutex_lock(&pctrl->slew_access_lock); + mutex_lock(&pctrl->lock); sval = ioread32(pctrl->slew_base + LPI_SLEW_RATE_CTL_REG); sval &= ~(LPI_SLEW_RATE_MASK << slew_offset); sval |= arg << slew_offset; iowrite32(sval, pctrl->slew_base + LPI_SLEW_RATE_CTL_REG); - mutex_unlock(&pctrl->slew_access_lock); + mutex_unlock(&pctrl->lock); break; default: return -EINVAL; @@ -256,6 +259,7 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group, lpi_gpio_write(pctrl, group, LPI_GPIO_VALUE_REG, val); } + mutex_lock(&pctrl->lock); val = lpi_gpio_read(pctrl, group, LPI_GPIO_CFG_REG); u32p_replace_bits(&val, pullup, LPI_GPIO_PULL_MASK); @@ -264,6 +268,7 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group, u32p_replace_bits(&val, output_enabled, LPI_GPIO_OE_MASK); lpi_gpio_write(pctrl, group, LPI_GPIO_CFG_REG, val); + mutex_unlock(&pctrl->lock); return 0; } @@ -461,7 +466,7 @@ int lpi_pinctrl_probe(struct platform_device *pdev) pctrl->chip.label = dev_name(dev); pctrl->chip.can_sleep = false; - mutex_init(&pctrl->slew_access_lock); + mutex_init(&pctrl->lock); pctrl->ctrl = devm_pinctrl_register(dev, &pctrl->desc, pctrl); if (IS_ERR(pctrl->ctrl)) { @@ -483,7 +488,7 @@ int lpi_pinctrl_probe(struct platform_device *pdev) return 0; err_pinctrl: - mutex_destroy(&pctrl->slew_access_lock); + mutex_destroy(&pctrl->lock); clk_bulk_disable_unprepare(MAX_LPI_NUM_CLKS, pctrl->clks); return ret; @@ -495,7 +500,7 @@ int lpi_pinctrl_remove(struct platform_device *pdev) struct lpi_pinctrl *pctrl = platform_get_drvdata(pdev); int i; - mutex_destroy(&pctrl->slew_access_lock); + mutex_destroy(&pctrl->lock); clk_bulk_disable_unprepare(MAX_LPI_NUM_CLKS, pctrl->clks); for (i = 0; i < pctrl->data->npins; i++) From 88630e91f12677848c0c4a5790ec0d691f8859fa Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Thu, 12 Oct 2023 14:49:27 -0400 Subject: [PATCH 092/212] drm/edid: add 8 bpc quirk to the BenQ GW2765 The BenQ GW2765 reports that it supports higher (> 8) bpc modes, but when trying to set them we end up with a black screen. So, limit it to 8 bpc modes. Cc: stable@vger.kernel.org # 6.5+ Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2610 Reviewed-by: Harry Wentland Signed-off-by: Hamza Mahfooz Link: https://patchwork.freedesktop.org/patch/msgid/20231012184927.133137-1-hamza.mahfooz@amd.com --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 340da8257b51..4b71040ae5be 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -123,6 +123,9 @@ static const struct edid_quirk { /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */ EDID_QUIRK('A', 'E', 'O', 0, EDID_QUIRK_FORCE_6BPC), + /* BenQ GW2765 */ + EDID_QUIRK('B', 'N', 'Q', 0x78d6, EDID_QUIRK_FORCE_8BPC), + /* BOE model on HP Pavilion 15-n233sl reports 8 bpc, but is a 6 bpc panel */ EDID_QUIRK('B', 'O', 'E', 0x78b, EDID_QUIRK_FORCE_6BPC), From dcc583c225e659d5da34b4ad83914fd6b51e3dbf Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 4 Oct 2023 16:32:24 +0800 Subject: [PATCH 093/212] drm/mediatek: Correctly free sg_table in gem prime vmap The MediaTek DRM driver implements GEM PRIME vmap by fetching the sg_table for the object, iterating through the pages, and then vmapping them. In essence, unlike the GEM DMA helpers which vmap when the object is first created or imported, the MediaTek version does it on request. Unfortunately, the code never correctly frees the sg_table contents. This results in a kernel memory leak. On a Hayato device with a text console on the internal display, this results in the system running out of memory in a few days from all the console screen cursor updates. Add sg_free_table() to correctly free the contents of the sg_table. This was missing despite explicitly required by mtk_gem_prime_get_sg_table(). Also move the "out" shortcut label to after the kfree() call for the sg_table. Having sg_free_table() together with kfree() makes more sense. The shortcut is only used when the object already has a kernel address, in which case the pointer is NULL and kfree() does nothing. Hence this change causes no functional change. Fixes: 3df64d7b0a4f ("drm/mediatek: Implement gem prime vmap/vunmap function") Cc: Signed-off-by: Chen-Yu Tsai Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20231004083226.1940055-1-wenst@chromium.org/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_gem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index 9f364df52478..0e0a41b2f57f 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -239,6 +239,7 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) npages = obj->size >> PAGE_SHIFT; mtk_gem->pages = kcalloc(npages, sizeof(*mtk_gem->pages), GFP_KERNEL); if (!mtk_gem->pages) { + sg_free_table(sgt); kfree(sgt); return -ENOMEM; } @@ -248,12 +249,15 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) mtk_gem->kvaddr = vmap(mtk_gem->pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); if (!mtk_gem->kvaddr) { + sg_free_table(sgt); kfree(sgt); kfree(mtk_gem->pages); return -ENOMEM; } -out: + sg_free_table(sgt); kfree(sgt); + +out: iosys_map_set_vaddr(map, mtk_gem->kvaddr); return 0; From e40c04ade0e2f3916b78211d747317843b11ce10 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Sun, 15 Oct 2023 13:45:29 +0200 Subject: [PATCH 094/212] scsi: mpt3sas: Fix in error path The driver should be deregistered as misc driver after PCI registration failure. Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20231015114529.10725-1-thenzl@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index c3c1f466fe01..605013d3ee83 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -12913,8 +12913,10 @@ _mpt3sas_init(void) mpt3sas_ctl_init(hbas_to_enumerate); error = pci_register_driver(&mpt3sas_driver); - if (error) + if (error) { + mpt3sas_ctl_exit(hbas_to_enumerate); scsih_exit(); + } return error; } From 097c06394c835be0cf21e23f9bd13ff771601b63 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Mon, 16 Oct 2023 15:47:49 +0530 Subject: [PATCH 095/212] scsi: qla2xxx: Fix double free of dsd_list during driver load On driver load, scsi_add_host() can fail. This triggers the free path to call qla2x00_mem_free() multiple times. This causes NULL pointer access of ha->base_qpair. Add check before access. BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 IP: [] qla2x00_mem_free+0x51c/0xcb0 [qla2xxx] PGD 8000001fcfe4a067 PUD 1fc8f0a067 PMD 0 Oops: 0000 [#1] SMP RIP: 0010:[] [] qla2x00_mem_free+0x51c/0xcb0 [qla2xxx] RSP: 0018:ffff8ace97a93a30 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8ace8efd0000 RCX: 000000000000488f RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff8ace97a93a60 R08: 000000000001f040 R09: ffffffff8678209b R10: ffff8acf7d6df040 R11: ffffc591c0fcc980 R12: ffffffff87034800 R13: ffff8acf0e3cc740 R14: ffff8ace8efd0000 R15: 00000000fffffff4 FS: 00007f4cf5449740(0000) GS:ffff8acf7d6c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000030 CR3: 0000001fc2f6c000 CR4: 00000000007607e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: [] ? kobject_put+0x28/0x60 [] qla2x00_probe_one+0x19fc/0x3040 [qla2xxx] Fixes: efeda3bf912f ("scsi: qla2xxx: Move resource to allow code reuse") Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20231016101749.5059-1-njavali@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 50db08265c51..dcae09a37d49 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4953,7 +4953,7 @@ qla2x00_mem_free(struct qla_hw_data *ha) ha->gid_list = NULL; ha->gid_list_dma = 0; - if (!list_empty(&ha->base_qpair->dsd_list)) { + if (ha->base_qpair && !list_empty(&ha->base_qpair->dsd_list)) { struct dsd_dma *dsd_ptr, *tdsd_ptr; /* clean up allocated prev pool */ From 1aee9158bc978f91701c5992e395efbc6da2de3c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 14 Oct 2023 21:34:40 -0400 Subject: [PATCH 096/212] nfsd: lock_rename() needs both directories to live on the same fs ... checking that after lock_rename() is too late. Incidentally, NFSv2 had no nfserr_xdev... Fixes: aa387d6ce153 "nfsd: fix EXDEV checking in rename" Cc: stable@vger.kernel.org # v3.9+ Reviewed-by: Jeff Layton Acked-by: Chuck Lever Tested-by: Jeff Layton Signed-off-by: Al Viro --- fs/nfsd/vfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 48260cf68fde..02f5fcaad03f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1788,6 +1788,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; + err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; + if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) + goto out; + if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) + goto out; + retry: host_err = fh_want_write(ffhp); if (host_err) { @@ -1823,12 +1829,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (ndentry == trap) goto out_dput_new; - host_err = -EXDEV; - if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) - goto out_dput_new; - if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) - goto out_dput_new; - if ((ndentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) && nfsd_has_cached_files(ndentry)) { close_cached = true; From db7724134c26fdf16886a560646d02292563f5a4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 16 Oct 2023 17:24:31 +0200 Subject: [PATCH 097/212] x86/boot: efistub: Assign global boot_params variable Now that the x86 EFI stub calls into some APIs exposed by the decompressor (e.g., kaslr_get_random_long()), it is necessary to ensure that the global boot_params variable is set correctly before doing so. Note that the decompressor and the kernel proper carry conflicting declarations for the global variable 'boot_params' so refer to it via an alias to work around this. Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/x86-stub.c | 2 ++ drivers/firmware/efi/libstub/x86-stub.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 3b8bccd7c216..9d5df683f882 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -849,6 +849,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle, unsigned long kernel_entry; efi_status_t status; + boot_params_pointer = boot_params; + efi_system_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h index 37c5a36b9d8c..2748bca192df 100644 --- a/drivers/firmware/efi/libstub/x86-stub.h +++ b/drivers/firmware/efi/libstub/x86-stub.h @@ -2,6 +2,8 @@ #include +extern struct boot_params *boot_params_pointer asm("boot_params"); + extern void trampoline_32bit_src(void *, bool); extern const u16 trampoline_ljmp_imm_offset; From 56e85993896b914032d11e32ecbf8415e7b2f621 Mon Sep 17 00:00:00 2001 From: Luka Guzenko Date: Tue, 17 Oct 2023 00:13:28 +0200 Subject: [PATCH 098/212] ALSA: hda/relatek: Enable Mute LED on HP Laptop 15s-fq5xxx This HP Laptop uses ALC236 codec with COEF 0x07 controlling the mute LED. Enable existing quirk for this device. Signed-off-by: Luka Guzenko Cc: Link: https://lore.kernel.org/r/20231016221328.1521674-1-l.guzenko@web.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3eeecf67c17b..4b68d3df9473 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9722,6 +9722,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x89c6, "Zbook Fury 17 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x89d3, "HP EliteBook 645 G9 (MB 89D2)", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8a20, "HP Laptop 15s-fq5xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8a25, "HP Victus 16-d1xxx (MB 8A25)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED), From 5dedc9f53eef7ec07b23686381100d03fb259f50 Mon Sep 17 00:00:00 2001 From: Artem Borisov Date: Sat, 14 Oct 2023 10:50:42 +0300 Subject: [PATCH 099/212] ALSA: hda/realtek: Add quirk for ASUS ROG GU603ZV Enables the SPI-connected Cirrus amp and the required pins for headset mic detection. As of BIOS version 313 it is still necessary to modify the ACPI table to add the related _DSD properties: https://gist.github.com/Flex1911/1bce378645fc95a5743671bd5deabfc8 Signed-off-by: Artem Borisov Cc: Link: https://lore.kernel.org/r/20231014075044.17474-1-dedsa2002@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4b68d3df9473..b75e28fb4c53 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9792,6 +9792,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301V", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK), + SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZV", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1683, "ASUS UM3402YAR", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), From c8c0a03ec1be6b3f3ec1ce91685351235212db19 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 17 Oct 2023 15:30:24 +0800 Subject: [PATCH 100/212] ALSA: hda/realtek - Fixed ASUS platform headset Mic issue ASUS platform Headset Mic was disable by default. Assigned verb table for Mic pin will enable it. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/1155d914c20c40569f56d36c79254879@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b75e28fb4c53..9677c09cf7a9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7078,6 +7078,24 @@ static void alc287_fixup_bind_dacs(struct hda_codec *codec, 0x0); /* Make sure 0x14 was disable */ } } +/* Fix none verb table of Headset Mic pin */ +static void alc_fixup_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + static const struct hda_pintbl pincfgs[] = { + { 0x19, 0x03a1103c }, + { } + }; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_apply_pincfgs(codec, pincfgs); + alc_update_coef_idx(codec, 0x45, 0xf<<12 | 1<<10, 5<<12); + spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; + break; + } +} enum { @@ -7344,6 +7362,7 @@ enum { ALC245_FIXUP_HP_X360_MUTE_LEDS, ALC287_FIXUP_THINKPAD_I2S_SPK, ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD, + ALC2XX_FIXUP_HEADSET_MIC, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9448,6 +9467,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI, }, + [ALC2XX_FIXUP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_headset_mic, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -10752,6 +10775,8 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB, {0x19, 0x40000000}, {0x1a, 0x40000000}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC2XX_FIXUP_HEADSET_MIC, + {0x19, 0x40000000}), {} }; From 63e44bc52047f182601e7817da969a105aa1f721 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 16 Oct 2023 14:42:50 +0200 Subject: [PATCH 101/212] x86/sev: Check for user-space IOIO pointing to kernel space Check the memory operand of INS/OUTS before emulating the instruction. The #VC exception can get raised from user-space, but the memory operand can be manipulated to access kernel memory before the emulation actually begins and after the exception handler has run. [ bp: Massage commit message. ] Fixes: 597cfe48212a ("x86/boot/compressed/64: Setup a GHCB-based VC Exception handler") Reported-by: Tom Dohrmann Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov (AMD) Cc: --- arch/x86/boot/compressed/sev.c | 5 +++++ arch/x86/kernel/sev-shared.c | 31 +++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index afd91041ec3e..80d76aea1f7b 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -108,6 +108,11 @@ static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t si return ES_OK; } +static bool fault_in_kernel_space(unsigned long address) +{ + return false; +} + #undef __init #define __init diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index bcd77c48be0a..4d729686f992 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -592,6 +592,23 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); } +static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt, + unsigned long address, + bool write) +{ + if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) { + ctxt->fi.vector = X86_TRAP_PF; + ctxt->fi.error_code = X86_PF_USER; + ctxt->fi.cr2 = address; + if (write) + ctxt->fi.error_code |= X86_PF_WRITE; + + return ES_EXCEPTION; + } + + return ES_OK; +} + static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, void *src, char *buf, unsigned int data_size, @@ -599,7 +616,12 @@ static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, bool backwards) { int i, b = backwards ? -1 : 1; - enum es_result ret = ES_OK; + unsigned long address = (unsigned long)src; + enum es_result ret; + + ret = vc_insn_string_check(ctxt, address, false); + if (ret != ES_OK) + return ret; for (i = 0; i < count; i++) { void *s = src + (i * data_size * b); @@ -620,7 +642,12 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, bool backwards) { int i, s = backwards ? -1 : 1; - enum es_result ret = ES_OK; + unsigned long address = (unsigned long)dst; + enum es_result ret; + + ret = vc_insn_string_check(ctxt, address, true); + if (ret != ES_OK) + return ret; for (i = 0; i < count; i++) { void *d = dst + (i * data_size * s); From e8ecffd9962fe051d53a0761921b26d653b3df6b Mon Sep 17 00:00:00 2001 From: David Rau Date: Tue, 17 Oct 2023 10:12:58 +0800 Subject: [PATCH 102/212] ASoC: da7219: Correct the process of setting up Gnd switch in AAD Enable Gnd switch to improve stability when Jack insert event occurs, and then disable Gnd switch after Jack type detection is finished. Signed-off-by: David Rau Link: https://lore.kernel.org/r/20231017021258.5929-1-David.Rau.opensource@dm.renesas.com Signed-off-by: Mark Brown --- sound/soc/codecs/da7219-aad.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c index 581b334a6631..3bbe85091649 100644 --- a/sound/soc/codecs/da7219-aad.c +++ b/sound/soc/codecs/da7219-aad.c @@ -59,9 +59,6 @@ static void da7219_aad_btn_det_work(struct work_struct *work) bool micbias_up = false; int retries = 0; - /* Disable ground switch */ - snd_soc_component_update_bits(component, 0xFB, 0x01, 0x00); - /* Drive headphones/lineout */ snd_soc_component_update_bits(component, DA7219_HP_L_CTRL, DA7219_HP_L_AMP_OE_MASK, @@ -155,9 +152,6 @@ static void da7219_aad_hptest_work(struct work_struct *work) tonegen_freq_hptest = cpu_to_le16(DA7219_AAD_HPTEST_RAMP_FREQ_INT_OSC); } - /* Disable ground switch */ - snd_soc_component_update_bits(component, 0xFB, 0x01, 0x00); - /* Ensure gain ramping at fastest rate */ gain_ramp_ctrl = snd_soc_component_read(component, DA7219_GAIN_RAMP_CTRL); snd_soc_component_write(component, DA7219_GAIN_RAMP_CTRL, DA7219_GAIN_RAMP_RATE_X8); @@ -421,6 +415,11 @@ static irqreturn_t da7219_aad_irq_thread(int irq, void *data) * handle a removal, and we can check at the end of * hptest if we have a valid result or not. */ + + cancel_delayed_work_sync(&da7219_aad->jack_det_work); + /* Disable ground switch */ + snd_soc_component_update_bits(component, 0xFB, 0x01, 0x00); + if (statusa & DA7219_JACK_TYPE_STS_MASK) { report |= SND_JACK_HEADSET; mask |= SND_JACK_HEADSET | SND_JACK_LINEOUT; From cf5a103c98a6fb9ee3164334cb5502df6360749b Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Thu, 5 Oct 2023 21:57:12 +0000 Subject: [PATCH 103/212] selftests/user_events: Fix abi_test for BE archs The abi_test currently uses a long sized test value for enablement checks. On LE this works fine, however, on BE this results in inaccurate assert checks due to a bit being used and assuming it's value is the same on both LE and BE. Use int type for 32-bit values and long type for 64-bit values to ensure appropriate behavior on both LE and BE. Fixes: 60b1af8de8c1 ("tracing/user_events: Add ABI self-test") Signed-off-by: Beau Belgrave Acked-by: Steven Rostedt (Google) Signed-off-by: Shuah Khan --- tools/testing/selftests/user_events/abi_test.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/user_events/abi_test.c b/tools/testing/selftests/user_events/abi_test.c index 8202f1327c39..f5575ef2007c 100644 --- a/tools/testing/selftests/user_events/abi_test.c +++ b/tools/testing/selftests/user_events/abi_test.c @@ -47,7 +47,7 @@ static int change_event(bool enable) return ret; } -static int reg_enable(long *enable, int size, int bit) +static int reg_enable(void *enable, int size, int bit) { struct user_reg reg = {0}; int fd = open(data_file, O_RDWR); @@ -69,7 +69,7 @@ static int reg_enable(long *enable, int size, int bit) return ret; } -static int reg_disable(long *enable, int bit) +static int reg_disable(void *enable, int bit) { struct user_unreg reg = {0}; int fd = open(data_file, O_RDWR); @@ -90,7 +90,8 @@ static int reg_disable(long *enable, int bit) } FIXTURE(user) { - long check; + int check; + long check_long; bool umount; }; @@ -99,6 +100,7 @@ FIXTURE_SETUP(user) { change_event(false); self->check = 0; + self->check_long = 0; } FIXTURE_TEARDOWN(user) { @@ -136,9 +138,9 @@ TEST_F(user, bit_sizes) { #if BITS_PER_LONG == 8 /* Allow 0-64 bits for 64-bit */ - ASSERT_EQ(0, reg_enable(&self->check, sizeof(long), 63)); - ASSERT_NE(0, reg_enable(&self->check, sizeof(long), 64)); - ASSERT_EQ(0, reg_disable(&self->check, 63)); + ASSERT_EQ(0, reg_enable(&self->check_long, sizeof(long), 63)); + ASSERT_NE(0, reg_enable(&self->check_long, sizeof(long), 64)); + ASSERT_EQ(0, reg_disable(&self->check_long, 63)); #endif /* Disallowed sizes (everything beside 4 and 8) */ @@ -200,7 +202,7 @@ static int clone_check(void *check) for (i = 0; i < 10; ++i) { usleep(100000); - if (*(long *)check) + if (*(int *)check) return 0; } From 20045f0155ab79f8beb840022ea86bff46167f79 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 17 Oct 2023 23:15:27 +1100 Subject: [PATCH 104/212] powerpc/64s/radix: Don't warn on copros in radix__tlb_flush() Sachin reported a warning when running the inject-ra-err selftest: # selftests: powerpc/mce: inject-ra-err Disabling lock debugging due to kernel taint MCE: CPU19: machine check (Severe) Real address Load/Store (foreign/control memory) [Not recovered] MCE: CPU19: PID: 5254 Comm: inject-ra-err NIP: [0000000010000e48] MCE: CPU19: Initiator CPU MCE: CPU19: Unknown ------------[ cut here ]------------ WARNING: CPU: 19 PID: 5254 at arch/powerpc/mm/book3s64/radix_tlb.c:1221 radix__tlb_flush+0x160/0x180 CPU: 19 PID: 5254 Comm: inject-ra-err Kdump: loaded Tainted: G M E 6.6.0-rc3-00055-g9ed22ae6be81 #4 Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1030.20 (NH1030_058) hv:phyp pSeries ... NIP radix__tlb_flush+0x160/0x180 LR radix__tlb_flush+0x104/0x180 Call Trace: radix__tlb_flush+0xf4/0x180 (unreliable) tlb_finish_mmu+0x15c/0x1e0 exit_mmap+0x1a0/0x510 __mmput+0x60/0x1e0 exit_mm+0xdc/0x170 do_exit+0x2bc/0x5a0 do_group_exit+0x4c/0xc0 sys_exit_group+0x28/0x30 system_call_exception+0x138/0x330 system_call_vectored_common+0x15c/0x2ec And bisected it to commit e43c0a0c3c28 ("powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown IPIs"), which added a warning in radix__tlb_flush() if mm->context.copros is still elevated. However it's possible for the copros count to be elevated if a process exits without first closing file descriptors that are associated with a copro, eg. VAS. If the process exits with a VAS file still open, the release callback is queued up for exit_task_work() via: exit_files() put_files_struct() close_files() filp_close() fput() And called via: exit_task_work() ____fput() __fput() file->f_op->release(inode, file) coproc_release() vas_user_win_ops->close_win() vas_deallocate_window() mm_context_remove_vas_window() mm_context_remove_copro() But that is after exit_mm() has been called from do_exit() and triggered the warning. Fix it by dropping the warning, and always calling __flush_all_mm(). In the normal case of no copros, that will result in a call to _tlbiel_pid(mm->context.id, RIC_FLUSH_ALL) just as the current code does. If the copros count is elevated then it will cause a global flush, which should flush translations from any copros. Note that the process table entry was cleared in arch_exit_mmap(), so copros should not be able to fetch any new translations. Fixes: e43c0a0c3c28 ("powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown IPIs") Reported-by: Sachin Sant Closes: https://lore.kernel.org/all/A8E52547-4BF1-47CE-8AEA-BC5A9D7E3567@linux.ibm.com/ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Tested-by: Sachin Sant Link: https://msgid.link/20231017121527.1574104-1-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/radix_tlb.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 39acc2cbab4c..9e1f6558d026 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -1212,14 +1212,7 @@ void radix__tlb_flush(struct mmu_gather *tlb) smp_mb(); /* see radix__flush_tlb_mm */ exit_flush_lazy_tlbs(mm); - _tlbiel_pid(mm->context.id, RIC_FLUSH_ALL); - - /* - * It should not be possible to have coprocessors still - * attached here. - */ - if (WARN_ON_ONCE(atomic_read(&mm->context.copros) > 0)) - __flush_all_mm(mm, true); + __flush_all_mm(mm, true); preempt_enable(); } else { From eab0261967aeab528db4d0a51806df8209aec179 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Mon, 16 Oct 2023 22:24:39 -0400 Subject: [PATCH 105/212] drm/amdgpu: Unset context priority is now invalid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A context priority value of AMD_CTX_PRIORITY_UNSET is now invalid--instead of carrying it around and passing it to the Direct Rendering Manager--and it becomes AMD_CTX_PRIORITY_NORMAL in amdgpu_ctx_ioctl(), the gateway to context creation. Cc: Alex Deucher Cc: Christian König Signed-off-by: Luben Tuikov Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20231017035656.8211-1-luben.tuikov@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 0dc9c655c4fb..092962b93064 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -47,7 +47,6 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio) { switch (ctx_prio) { - case AMDGPU_CTX_PRIORITY_UNSET: case AMDGPU_CTX_PRIORITY_VERY_LOW: case AMDGPU_CTX_PRIORITY_LOW: case AMDGPU_CTX_PRIORITY_NORMAL: @@ -55,6 +54,7 @@ bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio) case AMDGPU_CTX_PRIORITY_VERY_HIGH: return true; default: + case AMDGPU_CTX_PRIORITY_UNSET: return false; } } From fa8391ad68c16716e2c06ada397e99ceed2fb647 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Mon, 16 Oct 2023 22:48:56 -0400 Subject: [PATCH 106/212] gpu/drm: Eliminate DRM_SCHED_PRIORITY_UNSET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminate DRM_SCHED_PRIORITY_UNSET, value of -2, whose only user was amdgpu. Furthermore, eliminate an index bug, in that when amdgpu boots, it calls drm_sched_entity_init() with DRM_SCHED_PRIORITY_UNSET, which uses it to index sched->sched_rq[]. Cc: Alex Deucher Cc: Christian König Signed-off-by: Luben Tuikov Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20231017035656.8211-2-luben.tuikov@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 3 ++- include/drm/gpu_scheduler.h | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 092962b93064..aac52d9754e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -64,7 +64,8 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) { switch (ctx_prio) { case AMDGPU_CTX_PRIORITY_UNSET: - return DRM_SCHED_PRIORITY_UNSET; + pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL"); + return DRM_SCHED_PRIORITY_NORMAL; case AMDGPU_CTX_PRIORITY_VERY_LOW: return DRM_SCHED_PRIORITY_MIN; diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index f9544d9b670d..ac65f0626cfc 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -68,8 +68,7 @@ enum drm_sched_priority { DRM_SCHED_PRIORITY_HIGH, DRM_SCHED_PRIORITY_KERNEL, - DRM_SCHED_PRIORITY_COUNT, - DRM_SCHED_PRIORITY_UNSET = -2 + DRM_SCHED_PRIORITY_COUNT }; /* Used to chose between FIFO and RR jobs scheduling */ From 5e4c16fe08c8b894b258f4110349dc9b642669f9 Mon Sep 17 00:00:00 2001 From: Khaled Almahallawy Date: Wed, 4 Oct 2023 17:13:10 -0700 Subject: [PATCH 107/212] drm/i915/cx0: Only clear/set the Pipe Reset bit of the PHY Lanes Owned Currently, with MFD/pin assignment D, the driver clears the pipe reset bit of lane 1 which is not owned by display. This causes the display to block S0iX. By not clearing this bit for lane 1 and keeping whatever default, S0ix started to work. This is already what the driver does at the end of the phy lane reset sequence (Step#8) Bspec: 65451 Fixes: 619a06dba6fa ("drm/i915/mtl: Reset only one lane in case of MFD") Cc: Mika Kahola Cc: Gustavo Sousa Signed-off-by: Khaled Almahallawy Reviewed-by: Gustavo Sousa Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20231005001310.154396-1-khaled.almahallawy@intel.com (cherry picked from commit 4a07f063d20c46524f00976f4537de72d9f31c4e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 1b00ef2c6185..80e4ec6ee403 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2553,8 +2553,7 @@ static void intel_cx0_phy_lane_reset(struct drm_i915_private *i915, drm_warn(&i915->drm, "PHY %c failed to bring out of SOC reset after %dus.\n", phy_name(phy), XELPDP_PORT_BUF_SOC_READY_TIMEOUT_US); - intel_de_rmw(i915, XELPDP_PORT_BUF_CTL2(port), - XELPDP_LANE_PIPE_RESET(0) | XELPDP_LANE_PIPE_RESET(1), + intel_de_rmw(i915, XELPDP_PORT_BUF_CTL2(port), lane_pipe_reset, lane_pipe_reset); if (__intel_de_wait_for_register(i915, XELPDP_PORT_BUF_CTL2(port), From e339c6d628fe66c9b64bf31040a55770952aec57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 12 Oct 2023 16:28:01 +0300 Subject: [PATCH 108/212] drm/i915: Retry gtt fault when out of fence registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we can't find a free fence register to handle a fault in the GMADR range just return VM_FAULT_NOPAGE without populating the PTE so that userspace will retry the access and trigger another fault. Eventually we should find a free fence and the fault will get properly handled. A further improvement idea might be to reserve a fence (or one per CPU?) for the express purpose of handling faults without having to retry. But that would require some additional work. Looks like this may have gotten broken originally by commit 39965b376601 ("drm/i915: don't trash the gtt when running out of fences") as that changed the errno to -EDEADLK which wasn't handle by the gtt fault code either. But later in commit 2feeb52859fc ("drm/i915/gt: Fix -EDEADLK handling regression") I changed it again to -ENOBUFS as -EDEADLK was now getting used for the ww mutex dance. So this fix only makes sense after that last commit. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9479 Fixes: 2feeb52859fc ("drm/i915/gt: Fix -EDEADLK handling regression") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231012132801.16292-1-ville.syrjala@linux.intel.com Reviewed-by: Andi Shyti (cherry picked from commit 7f403caabe811b88ab0de3811ff3f4782c415761) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index aa4d842d4c5a..310654542b42 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -235,6 +235,7 @@ static vm_fault_t i915_error_to_vmf_fault(int err) case 0: case -EAGAIN: case -ENOSPC: /* transient failure to evict? */ + case -ENOBUFS: /* temporarily out of fences? */ case -ERESTARTSYS: case -EINTR: case -EBUSY: From b11950356c4b416d2e87941f3aa7a8bf089db72b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 16 Oct 2023 16:35:36 +0800 Subject: [PATCH 109/212] KEYS: asymmetric: Fix sign/verify on pkcs1pad without a hash The new sign/verify code broke the case of pkcs1pad without a hash algorithm. Fix it by setting issig correctly for this case. Fixes: 63ba4d67594a ("KEYS: asymmetric: Use new crypto interface without scatterlists") Cc: stable@vger.kernel.org # v6.5 Reported-by: Denis Kenzior Signed-off-by: Herbert Xu Tested-by: Denis Kenzior Signed-off-by: Herbert Xu --- crypto/asymmetric_keys/public_key.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index abeecb8329b3..1dcab27986a6 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -81,14 +81,13 @@ software_key_determine_akcipher(const struct public_key *pkey, * RSA signatures usually use EMSA-PKCS1-1_5 [RFC3447 sec 8.2]. */ if (strcmp(encoding, "pkcs1") == 0) { + *sig = op == kernel_pkey_sign || + op == kernel_pkey_verify; if (!hash_algo) { - *sig = false; n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", pkey->pkey_algo); } else { - *sig = op == kernel_pkey_sign || - op == kernel_pkey_verify; n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)", pkey->pkey_algo, hash_algo); From d2929762cc3f85528b0ca12f6f63c2a714f24778 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Oct 2023 16:59:47 +0200 Subject: [PATCH 110/212] sched/eevdf: Fix heap corruption more Because someone is a flaming idiot... and forgot we have current as se->on_rq but not actually in the tree itself, and walking rb_parent() on an entry not in the tree is 'funky' and KASAN complains. Fixes: 8dafa9d0eb1a ("sched/eevdf: Fix min_deadline heap integrity") Reported-by: 0599jiangyc@gmail.com Reported-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Peter Zijlstra (Intel) Tested-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://bugzilla.kernel.org/show_bug.cgi?id=218020 Link: https://lkml.kernel.org/r/CAJwJo6ZGXO07%3DQvW4fgQfbsDzQPs9xj5sAQ1zp%3DmAyPMNbHYww%40mail.gmail.com --- kernel/sched/fair.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 061a30a8925a..df348aa55d3c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3657,7 +3657,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, */ deadline = div_s64(deadline * old_weight, weight); se->deadline = se->vruntime + deadline; - min_deadline_cb_propagate(&se->run_node, NULL); + if (se != cfs_rq->curr) + min_deadline_cb_propagate(&se->run_node, NULL); } #ifdef CONFIG_SMP From 430232619791e7de95191f2cd8ebaa4c380d17d0 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Tue, 17 Oct 2023 18:42:36 +0800 Subject: [PATCH 111/212] gpio: vf610: mask the gpio irq in system suspend and support wakeup Add flag IRQCHIP_MASK_ON_SUSPEND to make sure gpio irq is masked on suspend, if lack this flag, current irq arctitecture will not mask the irq, and these unmasked gpio irq will wrongly wakeup the system even they are not config as wakeup source. Also add flag IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND to make sure the gpio irq which is configed as wakeup source can work as expect. Fixes: 7f2691a19627 ("gpio: vf610: add gpiolib/IRQ chip driver for Vybrid") Signed-off-by: Haibo Chen Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-vf610.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index dbc7ba0ee72c..d1f05810340f 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -246,7 +246,8 @@ static const struct irq_chip vf610_irqchip = { .irq_unmask = vf610_gpio_irq_unmask, .irq_set_type = vf610_gpio_irq_set_type, .irq_set_wake = vf610_gpio_irq_set_wake, - .flags = IRQCHIP_IMMUTABLE, + .flags = IRQCHIP_IMMUTABLE | IRQCHIP_MASK_ON_SUSPEND + | IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND, GPIOCHIP_IRQ_RESOURCE_HELPERS, }; From fc363413ef8ea842ae7a99e3caf5465dafdd3a49 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 18 Oct 2023 11:00:17 +0200 Subject: [PATCH 112/212] gpio: vf610: set value before the direction to avoid a glitch We found a glitch when configuring the pad as output high. To avoid this glitch, move the data value setting before direction config in the function vf610_gpio_direction_output(). Fixes: 659d8a62311f ("gpio: vf610: add imx7ulp support") Signed-off-by: Haibo Chen [Bartosz: tweak the commit message] Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-vf610.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index d1f05810340f..656d6b1dddb5 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -126,14 +126,14 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, unsigned long mask = BIT(gpio); u32 val; + vf610_gpio_set(chip, gpio, value); + if (port->sdata && port->sdata->have_paddr) { val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR); val |= mask; vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); } - vf610_gpio_set(chip, gpio, value); - return pinctrl_gpio_direction_output(chip->base + gpio); } From f37cc2fc277b371fc491890afb7d8a26e36bb3a1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 17 Oct 2023 11:07:23 +0200 Subject: [PATCH 113/212] platform/x86: asus-wmi: Change ASUS_WMI_BRN_DOWN code from 0x20 to 0x2e Older Asus laptops change the backlight level themselves and then send WMI events with different codes for different backlight levels. The asus-wmi.c code maps the entire range of codes reported on brightness down keypresses to an internal ASUS_WMI_BRN_DOWN code: define NOTIFY_BRNUP_MIN 0x11 define NOTIFY_BRNUP_MAX 0x1f define NOTIFY_BRNDOWN_MIN 0x20 define NOTIFY_BRNDOWN_MAX 0x2e if (code >= NOTIFY_BRNUP_MIN && code <= NOTIFY_BRNUP_MAX) code = ASUS_WMI_BRN_UP; else if (code >= NOTIFY_BRNDOWN_MIN && code <= NOTIFY_BRNDOWN_MAX) code = ASUS_WMI_BRN_DOWN; Before this commit all the NOTIFY_BRNDOWN_MIN - NOTIFY_BRNDOWN_MAX aka 0x20 - 0x2e events were mapped to 0x20. This mapping is causing issues on new laptop models which actually send 0x2b events for printscreen presses and 0x2c events for capslock presses, which get translated into spurious brightness-down presses. The plan is disable the 0x11-0x2e special mapping on laptops where asus-wmi does not register a backlight-device to avoid the spurious brightness-down keypresses. New laptops always send 0x2e for brightness-down presses, change the special internal ASUS_WMI_BRN_DOWN value from 0x20 to 0x2e to match this in preparation for fixing the spurious brightness-down presses. This change does not have any functional impact since all of 0x20 - 0x2e is mapped to ASUS_WMI_BRN_DOWN first and only then checked against the keymap code and the new 0x2e value is still in the 0x20 - 0x2e range. Reported-by: James John Closes: https://lore.kernel.org/platform-driver-x86/a2c441fe-457e-44cf-a146-0ecd86b037cf@donjajo.com/ Closes: https://bbs.archlinux.org/viewtopic.php?pid=2123716 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231017090725.38163-2-hdegoede@redhat.com --- drivers/platform/x86/asus-wmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index a478ebfd34df..fc41d1b1bb7f 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -18,7 +18,7 @@ #include #define ASUS_WMI_KEY_IGNORE (-1) -#define ASUS_WMI_BRN_DOWN 0x20 +#define ASUS_WMI_BRN_DOWN 0x2e #define ASUS_WMI_BRN_UP 0x2f struct module; From a5b92be2482e5f9ef30be4e4cda12ed484381493 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 17 Oct 2023 11:07:24 +0200 Subject: [PATCH 114/212] platform/x86: asus-wmi: Only map brightness codes when using asus-wmi backlight control Older Asus laptops change the backlight level themselves and then send WMI events with different codes for different backlight levels. The asus-wmi.c code maps the entire range of codes reported on brightness down keypresses to an internal ASUS_WMI_BRN_DOWN code: define NOTIFY_BRNUP_MIN 0x11 define NOTIFY_BRNUP_MAX 0x1f define NOTIFY_BRNDOWN_MIN 0x20 define NOTIFY_BRNDOWN_MAX 0x2e if (code >= NOTIFY_BRNUP_MIN && code <= NOTIFY_BRNUP_MAX) code = ASUS_WMI_BRN_UP; else if (code >= NOTIFY_BRNDOWN_MIN && code <= NOTIFY_BRNDOWN_MAX) code = ASUS_WMI_BRN_DOWN; This mapping is causing issues on new laptop models which actually send 0x2b events for printscreen presses and 0x2c events for capslock presses, which get translated into spurious brightness-down presses. This mapping is really only necessary when asus-wmi has registered a backlight-device for backlight control. In this case the mapping was used to decide to filter out the keypresss since in this case the firmware has already modified the brightness itself and instead of reporting a keypress asus-wmi will just report the new brightness value to userspace. OTOH when the firmware does not adjust the brightness itself then it seems to always report 0x2e for brightness-down presses and 0x2f for brightness up presses independent of the actual brightness level. So in this case the mapping of the code is not necessary and this translation actually leads to spurious brightness-down presses being send to userspace when pressing printscreen or capslock. Modify asus_wmi_handle_event_code() to only do the mapping when using asus-wmi backlight control to fix the spurious brightness-down presses. Reported-by: James John Closes: https://lore.kernel.org/platform-driver-x86/a2c441fe-457e-44cf-a146-0ecd86b037cf@donjajo.com/ Closes: https://bbs.archlinux.org/viewtopic.php?pid=2123716 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231017090725.38163-3-hdegoede@redhat.com --- drivers/platform/x86/asus-wmi.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 9f8cea5f9615..19bfd30861aa 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -3826,7 +3826,6 @@ static void asus_wmi_handle_event_code(int code, struct asus_wmi *asus) { unsigned int key_value = 1; bool autorelease = 1; - int orig_code = code; if (asus->driver->key_filter) { asus->driver->key_filter(asus->driver, &code, &key_value, @@ -3835,16 +3834,10 @@ static void asus_wmi_handle_event_code(int code, struct asus_wmi *asus) return; } - if (code >= NOTIFY_BRNUP_MIN && code <= NOTIFY_BRNUP_MAX) - code = ASUS_WMI_BRN_UP; - else if (code >= NOTIFY_BRNDOWN_MIN && code <= NOTIFY_BRNDOWN_MAX) - code = ASUS_WMI_BRN_DOWN; - - if (code == ASUS_WMI_BRN_DOWN || code == ASUS_WMI_BRN_UP) { - if (acpi_video_get_backlight_type() == acpi_backlight_vendor) { - asus_wmi_backlight_notify(asus, orig_code); - return; - } + if (acpi_video_get_backlight_type() == acpi_backlight_vendor && + code >= NOTIFY_BRNUP_MIN && code <= NOTIFY_BRNDOWN_MAX) { + asus_wmi_backlight_notify(asus, code); + return; } if (code == NOTIFY_KBD_BRTUP) { From 235985d1763f7aba92c1c64e5f5aaec26c2c9b18 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 17 Oct 2023 11:07:25 +0200 Subject: [PATCH 115/212] platform/x86: asus-wmi: Map 0x2a code, Ignore 0x2b and 0x2c events Newer Asus laptops send the following new WMI event codes when some of the F1 - F12 "media" hotkeys are pressed: 0x2a Screen Capture 0x2b PrintScreen 0x2c CapsLock Map 0x2a to KEY_SELECTIVE_SCREENSHOT mirroring how similar hotkeys are mapped on other laptops. PrintScreem and CapsLock are also reported as normal PS/2 keyboard events, map these event codes to KE_IGNORE to avoid "Unknown key code 0x%x\n" log messages. Reported-by: James John Closes: https://lore.kernel.org/platform-driver-x86/a2c441fe-457e-44cf-a146-0ecd86b037cf@donjajo.com/ Closes: https://bbs.archlinux.org/viewtopic.php?pid=2123716 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231017090725.38163-4-hdegoede@redhat.com --- drivers/platform/x86/asus-nb-wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index d85d895fee89..df1db54d4e18 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -531,6 +531,9 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, ASUS_WMI_BRN_DOWN, { KEY_BRIGHTNESSDOWN } }, { KE_KEY, ASUS_WMI_BRN_UP, { KEY_BRIGHTNESSUP } }, + { KE_KEY, 0x2a, { KEY_SELECTIVE_SCREENSHOT } }, + { KE_IGNORE, 0x2b, }, /* PrintScreen (also send via PS/2) on newer models */ + { KE_IGNORE, 0x2c, }, /* CapsLock (also send via PS/2) on newer models */ { KE_KEY, 0x30, { KEY_VOLUMEUP } }, { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, { KE_KEY, 0x32, { KEY_MUTE } }, From f9bc9bbe8afdf83412728f0b464979a72a3b9ec2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 16 Oct 2023 22:43:00 +1000 Subject: [PATCH 116/212] powerpc/qspinlock: Fix stale propagated yield_cpu yield_cpu is a sample of a preempted lock holder that gets propagated back through the queue. Queued waiters use this to yield to the preempted lock holder without continually sampling the lock word (which would defeat the purpose of MCS queueing by bouncing the cache line). The problem is that yield_cpu can become stale. It can take some time to be passed down the chain, and if any queued waiter gets preempted then it will cease to propagate the yield_cpu to later waiters. This can result in yielding to a CPU that no longer holds the lock, which is bad, but particularly if it is currently in H_CEDE (idle), then it appears to be preempted and some hypervisors (PowerVM) can cause very long H_CONFER latencies waiting for H_CEDE wakeup. This results in latency spikes and hard lockups on oversubscribed partitions with lock contention. This is a minimal fix. Before yielding to yield_cpu, sample the lock word to confirm yield_cpu is still the owner, and bail out of it is not. Thanks to a bunch of people who reported this and tracked down the exact problem using tracepoints and dispatch trace logs. Fixes: 28db61e207ea ("powerpc/qspinlock: allow propagation of yield CPU down the queue") Cc: stable@vger.kernel.org # v6.2+ Reported-by: Srikar Dronamraju Reported-by: Laurent Dufour Reported-by: Shrikanth Hegde Debugged-by: "Nysal Jan K.A" Signed-off-by: Nicholas Piggin Tested-by: Shrikanth Hegde Signed-off-by: Michael Ellerman Link: https://msgid.link/20231016124305.139923-2-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 253620979d0c..6dd2f46bd3ef 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -406,6 +406,9 @@ static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode * if ((yield_count & 1) == 0) goto yield_prev; /* owner vcpu is running */ + if (get_owner_cpu(READ_ONCE(lock->val)) != yield_cpu) + goto yield_prev; /* re-sample lock owner */ + spin_end(); preempted = true; From 0c21a18d5d6c6a73d098fb9b4701572370942df9 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 16 Oct 2023 22:39:39 +0530 Subject: [PATCH 117/212] ACPI: irq: Fix incorrect return value in acpi_register_gsi() acpi_register_gsi() should return a negative value in case of failure. Currently, it returns the return value from irq_create_fwspec_mapping(). However, irq_create_fwspec_mapping() returns 0 for failure. Fix the issue by returning -EINVAL if irq_create_fwspec_mapping() returns zero. Fixes: d44fa3d46079 ("ACPI: Add support for ResourceSource/IRQ domain mapping") Cc: 4.11+ # 4.11+ Signed-off-by: Sunil V L [ rjw: Rename a new local variable ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/irq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c index c2c786eb95ab..1687483ff319 100644 --- a/drivers/acpi/irq.c +++ b/drivers/acpi/irq.c @@ -57,6 +57,7 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) { struct irq_fwspec fwspec; + unsigned int irq; fwspec.fwnode = acpi_get_gsi_domain_id(gsi); if (WARN_ON(!fwspec.fwnode)) { @@ -68,7 +69,11 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, fwspec.param[1] = acpi_dev_get_irq_type(trigger, polarity); fwspec.param_count = 2; - return irq_create_fwspec_mapping(&fwspec); + irq = irq_create_fwspec_mapping(&fwspec); + if (!irq) + return -EINVAL; + + return irq; } EXPORT_SYMBOL_GPL(acpi_register_gsi); From d5921c460e543228d100daf67dac7a03dfaaa40a Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Tue, 10 Oct 2023 16:21:23 +0800 Subject: [PATCH 118/212] ACPI: bus: Move acpi_arm_init() to the place of after acpi_ghes_init() acpi_agdi_init() in acpi_arm_init() will register a SDEI event, so it needs the SDEI subsystem to be initialized (which is done in acpi_ghes_init()) before the AGDI driver probing. In commit fcea0ccf4fd7 ("ACPI: bus: Consolidate all arm specific initialisation into acpi_arm_init()"), the acpi_agdi_init() was called before acpi_ghes_init() and it causes following failure: | [ 0.515864] sdei: Failed to create event 1073741825: -5 | [ 0.515866] agdi agdi.0: Failed to register for SDEI event 1073741825 | [ 0.515867] agdi: probe of agdi.0 failed with error -5 | ... | [ 0.516022] sdei: SDEIv1.0 (0x0) detected in firmware. Fix it by moving acpi_arm_init() to the place of after acpi_ghes_init(). Fixes: fcea0ccf4fd7 ("ACPI: bus: Consolidate all arm specific initialisation into acpi_arm_init()") Reported-by: D Scott Phillips Signed-off-by: Hanjun Guo Reviewed-by: Sudeep Holla Tested-by: D Scott Phillips Cc: 6.5+ # 6.5+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index f41dda2d3493..a4aa53b7e2bb 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1410,10 +1410,10 @@ static int __init acpi_init(void) acpi_init_ffh(); pci_mmcfg_late_init(); - acpi_arm_init(); acpi_viot_early_init(); acpi_hest_init(); acpi_ghes_init(); + acpi_arm_init(); acpi_scan_init(); acpi_ec_init(); acpi_debugfs_init(); From fe0e04cf66a12ffe6d1b43725ddaabd5599d024f Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 15 Oct 2023 01:54:49 +0200 Subject: [PATCH 119/212] platform/surface: platform_profile: Propagate error if profile registration fails If platform_profile_register() fails, the driver does not propagate the error, but instead probes successfully. This means when the driver unbinds, the a warning might be issued by platform_profile_remove(). Fix this by propagating the error back to the caller of surface_platform_profile_probe(). Compile-tested only. Fixes: b78b4982d763 ("platform/surface: Add platform profile driver") Signed-off-by: Armin Wolf Reviewed-by: Maximilian Luz Tested-by: Maximilian Luz Link: https://lore.kernel.org/r/20231014235449.288702-1-W_Armin@gmx.de Signed-off-by: Hans de Goede --- drivers/platform/surface/surface_platform_profile.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/surface/surface_platform_profile.c b/drivers/platform/surface/surface_platform_profile.c index f433a13c3689..a5a3941b3f43 100644 --- a/drivers/platform/surface/surface_platform_profile.c +++ b/drivers/platform/surface/surface_platform_profile.c @@ -159,8 +159,7 @@ static int surface_platform_profile_probe(struct ssam_device *sdev) set_bit(PLATFORM_PROFILE_BALANCED_PERFORMANCE, tpd->handler.choices); set_bit(PLATFORM_PROFILE_PERFORMANCE, tpd->handler.choices); - platform_profile_register(&tpd->handler); - return 0; + return platform_profile_register(&tpd->handler); } static void surface_platform_profile_remove(struct ssam_device *sdev) From 0e51cb42438b8754d8f4cee4c802a8c5bb2cd5e0 Mon Sep 17 00:00:00 2001 From: Orlando Chamberlain Date: Tue, 17 Oct 2023 22:14:45 +1100 Subject: [PATCH 120/212] apple-gmux: Hard Code max brightness for MMIO gmux The data in the max brightness port for iMacs with MMIO gmux incorrectly reports 0x03ff, but it should be 0xffff. As all other MMIO gmux models have 0xffff, hard code this for all MMIO gmux's so they all have the proper brightness range accessible. Fixes: 0c18184de990 ("platform/x86: apple-gmux: support MMIO gmux on T2 Macs") Reported-by: Karsten Leipold Signed-off-by: Orlando Chamberlain Link: https://lore.kernel.org/r/20231017111444.19304-2-orlandoch.dev@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/apple-gmux.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c index cadbb557a108..1417e230edbd 100644 --- a/drivers/platform/x86/apple-gmux.c +++ b/drivers/platform/x86/apple-gmux.c @@ -105,6 +105,8 @@ struct apple_gmux_config { #define GMUX_BRIGHTNESS_MASK 0x00ffffff #define GMUX_MAX_BRIGHTNESS GMUX_BRIGHTNESS_MASK +# define MMIO_GMUX_MAX_BRIGHTNESS 0xffff + static u8 gmux_pio_read8(struct apple_gmux_data *gmux_data, int port) { return inb(gmux_data->iostart + port); @@ -857,7 +859,17 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) memset(&props, 0, sizeof(props)); props.type = BACKLIGHT_PLATFORM; - props.max_brightness = gmux_read32(gmux_data, GMUX_PORT_MAX_BRIGHTNESS); + + /* + * All MMIO gmux's have 0xffff as max brightness, but some iMacs incorrectly + * report 0x03ff, despite the firmware being happy to set 0xffff as the brightness + * at boot. Force 0xffff for all MMIO gmux's so they all have the correct brightness + * range. + */ + if (type == APPLE_GMUX_TYPE_MMIO) + props.max_brightness = MMIO_GMUX_MAX_BRIGHTNESS; + else + props.max_brightness = gmux_read32(gmux_data, GMUX_PORT_MAX_BRIGHTNESS); #if IS_REACHABLE(CONFIG_ACPI_VIDEO) register_bdev = acpi_video_get_backlight_type() == acpi_backlight_apple_gmux; From 99c09c985e5973c8f0ad976ebae069548dd86f12 Mon Sep 17 00:00:00 2001 From: Liming Sun Date: Thu, 12 Oct 2023 19:02:35 -0400 Subject: [PATCH 121/212] platform/mellanox: mlxbf-tmfifo: Fix a warning message This commit fixes the smatch static checker warning in function mlxbf_tmfifo_rxtx_word() which complains data not initialized at line 634 when IS_VRING_DROP() is TRUE. Signed-off-by: Liming Sun Link: https://lore.kernel.org/r/20231012230235.219861-1-limings@nvidia.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxbf-tmfifo.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index fd38d8c8371e..ab7d7a1235b8 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -609,24 +609,25 @@ static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring, if (vring->cur_len + sizeof(u64) <= len) { /* The whole word. */ - if (!IS_VRING_DROP(vring)) { - if (is_rx) + if (is_rx) { + if (!IS_VRING_DROP(vring)) memcpy(addr + vring->cur_len, &data, sizeof(u64)); - else - memcpy(&data, addr + vring->cur_len, - sizeof(u64)); + } else { + memcpy(&data, addr + vring->cur_len, + sizeof(u64)); } vring->cur_len += sizeof(u64); } else { /* Leftover bytes. */ - if (!IS_VRING_DROP(vring)) { - if (is_rx) + if (is_rx) { + if (!IS_VRING_DROP(vring)) memcpy(addr + vring->cur_len, &data, len - vring->cur_len); - else - memcpy(&data, addr + vring->cur_len, - len - vring->cur_len); + } else { + data = 0; + memcpy(&data, addr + vring->cur_len, + len - vring->cur_len); } vring->cur_len = len; } From 8b51a3956d44ea6ade962874ade14de9a7d16556 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 18 Oct 2023 08:09:27 -0600 Subject: [PATCH 122/212] io_uring: fix crash with IORING_SETUP_NO_MMAP and invalid SQ ring address If we specify a valid CQ ring address but an invalid SQ ring address, we'll correctly spot this and free the allocated pages and clear them to NULL. However, we don't clear the ring page count, and hence will attempt to free the pages again. We've already cleared the address of the page array when freeing them, but we don't check for that. This causes the following crash: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Oops [#1] Modules linked in: CPU: 0 PID: 20 Comm: kworker/u2:1 Not tainted 6.6.0-rc5-dirty #56 Hardware name: ucbbar,riscvemu-bare (DT) Workqueue: events_unbound io_ring_exit_work epc : io_pages_free+0x2a/0x58 ra : io_rings_free+0x3a/0x50 epc : ffffffff808811a2 ra : ffffffff80881406 sp : ffff8f80000c3cd0 status: 0000000200000121 badaddr: 0000000000000000 cause: 000000000000000d [] io_pages_free+0x2a/0x58 [] io_rings_free+0x3a/0x50 [] io_ring_exit_work+0x37e/0x424 [] process_one_work+0x10c/0x1f4 [] worker_thread+0x252/0x31c [] kthread+0xc4/0xe0 [] ret_from_fork+0xa/0x1c Check for a NULL array in io_pages_free(), but also clear the page counts when we free them to be on the safer side. Reported-by: rtm@csail.mit.edu Fixes: 03d89a2de25b ("io_uring: support for user allocated memory for rings/sqes") Cc: stable@vger.kernel.org Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index d839a80a6751..8d1bc6cdfe71 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2674,7 +2674,11 @@ static void io_pages_free(struct page ***pages, int npages) if (!pages) return; + page_array = *pages; + if (!page_array) + return; + for (i = 0; i < npages; i++) unpin_user_page(page_array[i]); kvfree(page_array); @@ -2758,7 +2762,9 @@ static void io_rings_free(struct io_ring_ctx *ctx) ctx->sq_sqes = NULL; } else { io_pages_free(&ctx->ring_pages, ctx->n_ring_pages); + ctx->n_ring_pages = 0; io_pages_free(&ctx->sqe_pages, ctx->n_sqe_pages); + ctx->n_sqe_pages = 0; } } From d121df789b159e9a8ee770666f210975a81e8111 Mon Sep 17 00:00:00 2001 From: "Shawn.Shao" Date: Mon, 21 Aug 2023 14:03:33 +0800 Subject: [PATCH 123/212] vdpa_sim_blk: Fix the potential leak of mgmt_dev If the shared_buffer allocation fails, need to unregister mgmt_dev first. Cc: stable@vger.kernel.org Fixes: abebb16254b36 ("vdpa_sim_blk: support shared backend") Signed-off-by: Shawn.Shao Acked-by: Jason Wang Message-Id: <20230821060333.1155-1-shawn.shao@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c index 00d7d72713be..b3a3cb165795 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -499,12 +499,13 @@ static int __init vdpasim_blk_init(void) GFP_KERNEL); if (!shared_buffer) { ret = -ENOMEM; - goto parent_err; + goto mgmt_dev_err; } } return 0; - +mgmt_dev_err: + vdpa_mgmtdev_unregister(&mgmt_dev); parent_err: device_unregister(&vdpasim_blk_mgmtdev); return ret; From fab7f259227b8f70aa6d54e1de1a1f5f4729041c Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Mon, 11 Sep 2023 09:03:29 +0000 Subject: [PATCH 124/212] virtio-mmio: fix memory leak of vm_dev With the recent removal of vm_dev from devres its memory is only freed via the callback virtio_mmio_release_dev. However, this only takes effect after device_add is called by register_virtio_device. Until then it's an unmanaged resource and must be explicitly freed on error exit. This bug was discovered and resolved using Coverity Static Analysis Security Testing (SAST) by Synopsys, Inc. Cc: stable@vger.kernel.org Fixes: 55c91fedd03d ("virtio-mmio: don't break lifecycle of vm_dev") Signed-off-by: Maximilian Heyne Reviewed-by: Catalin Marinas Tested-by: Catalin Marinas Reviewed-by: Xuan Zhuo Message-Id: <20230911090328.40538-1-mheyne@amazon.de> Signed-off-by: Michael S. Tsirkin Reviewed-by: Wolfram Sang --- drivers/virtio/virtio_mmio.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 97760f611295..59892a31cf76 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -631,14 +631,17 @@ static int virtio_mmio_probe(struct platform_device *pdev) spin_lock_init(&vm_dev->lock); vm_dev->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(vm_dev->base)) - return PTR_ERR(vm_dev->base); + if (IS_ERR(vm_dev->base)) { + rc = PTR_ERR(vm_dev->base); + goto free_vm_dev; + } /* Check magic value */ magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE); if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) { dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic); - return -ENODEV; + rc = -ENODEV; + goto free_vm_dev; } /* Check device version */ @@ -646,7 +649,8 @@ static int virtio_mmio_probe(struct platform_device *pdev) if (vm_dev->version < 1 || vm_dev->version > 2) { dev_err(&pdev->dev, "Version %ld not supported!\n", vm_dev->version); - return -ENXIO; + rc = -ENXIO; + goto free_vm_dev; } vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID); @@ -655,7 +659,8 @@ static int virtio_mmio_probe(struct platform_device *pdev) * virtio-mmio device with an ID 0 is a (dummy) placeholder * with no function. End probing now with no error reported. */ - return -ENODEV; + rc = -ENODEV; + goto free_vm_dev; } vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID); @@ -685,6 +690,10 @@ static int virtio_mmio_probe(struct platform_device *pdev) put_device(&vm_dev->vdev.dev); return rc; + +free_vm_dev: + kfree(vm_dev); + return rc; } static int virtio_mmio_remove(struct platform_device *pdev) From f8a3db47d944a33eac1f37358db560e5aabbfbca Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 29 Aug 2023 20:40:09 +0300 Subject: [PATCH 125/212] vdpa/mlx5: Fix double release of debugfs entry The error path in setup_driver deletes the debugfs entry but doesn't clear the pointer. During .dev_del the invalid pointer will be released again causing a crash. This patch fixes the issue by always clearing the debugfs entry in mlx5_vdpa_remove_debugfs. Also, stop removing the debugfs entry in .dev_del op: the debugfs entry is already handled within the setup_driver/teardown_driver scope. Cc: stable@vger.kernel.org Fixes: f0417e72add5 ("vdpa/mlx5: Add and remove debugfs in setup/teardown driver") Signed-off-by: Dragos Tatulea Reviewed-by: Gal Pressman Message-Id: <20230829174014.928189-2-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/debug.c | 5 +++-- drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 ++----- drivers/vdpa/mlx5/net/mlx5_vnet.h | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/mlx5/net/debug.c b/drivers/vdpa/mlx5/net/debug.c index 60d6ac68cdc4..9c85162c19fc 100644 --- a/drivers/vdpa/mlx5/net/debug.c +++ b/drivers/vdpa/mlx5/net/debug.c @@ -146,7 +146,8 @@ void mlx5_vdpa_add_debugfs(struct mlx5_vdpa_net *ndev) ndev->rx_dent = debugfs_create_dir("rx", ndev->debugfs); } -void mlx5_vdpa_remove_debugfs(struct dentry *dbg) +void mlx5_vdpa_remove_debugfs(struct mlx5_vdpa_net *ndev) { - debugfs_remove_recursive(dbg); + debugfs_remove_recursive(ndev->debugfs); + ndev->debugfs = NULL; } diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 40a03b08d7cf..2b7b9000f2d3 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2713,7 +2713,7 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev) err_rqt: teardown_virtqueues(ndev); err_setup: - mlx5_vdpa_remove_debugfs(ndev->debugfs); + mlx5_vdpa_remove_debugfs(ndev); out: return err; } @@ -2727,8 +2727,7 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev) if (!ndev->setup) return; - mlx5_vdpa_remove_debugfs(ndev->debugfs); - ndev->debugfs = NULL; + mlx5_vdpa_remove_debugfs(ndev); teardown_steering(ndev); destroy_tir(ndev); destroy_rqt(ndev); @@ -3489,8 +3488,6 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct workqueue_struct *wq; - mlx5_vdpa_remove_debugfs(ndev->debugfs); - ndev->debugfs = NULL; unregister_link_notifier(ndev); _vdpa_unregister_device(dev); wq = mvdev->wq; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h index 36c44d9fdd16..60cdbc903037 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.h +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h @@ -88,7 +88,7 @@ struct macvlan_node { }; void mlx5_vdpa_add_debugfs(struct mlx5_vdpa_net *ndev); -void mlx5_vdpa_remove_debugfs(struct dentry *dbg); +void mlx5_vdpa_remove_debugfs(struct mlx5_vdpa_net *ndev); void mlx5_vdpa_add_rx_flow_table(struct mlx5_vdpa_net *ndev); void mlx5_vdpa_remove_rx_flow_table(struct mlx5_vdpa_net *ndev); void mlx5_vdpa_add_tirn(struct mlx5_vdpa_net *ndev); From 07622bd415639e9709579f400afd19e7e9866e5e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 31 Aug 2023 11:10:07 +1000 Subject: [PATCH 126/212] virtio_balloon: Fix endless deflation and inflation on arm64 The deflation request to the target, which isn't unaligned to the guest page size causes endless deflation and inflation actions. For example, we receive the flooding QMP events for the changes on memory balloon's size after a deflation request to the unaligned target is sent for the ARM64 guest, where we have 64KB base page size. /home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \ -accel kvm -machine virt,gic-version=host -cpu host \ -smp maxcpus=8,cpus=8,sockets=2,clusters=2,cores=2,threads=1 \ -m 1024M,slots=16,maxmem=64G \ -object memory-backend-ram,id=mem0,size=512M \ -object memory-backend-ram,id=mem1,size=512M \ -numa node,nodeid=0,memdev=mem0,cpus=0-3 \ -numa node,nodeid=1,memdev=mem1,cpus=4-7 \ : \ -device virtio-balloon-pci,id=balloon0,bus=pcie.10 { "execute" : "balloon", "arguments": { "value" : 1073672192 } } {"return": {}} {"timestamp": {"seconds": 1693272173, "microseconds": 88667}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073610752}} {"timestamp": {"seconds": 1693272174, "microseconds": 89704}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073610752}} {"timestamp": {"seconds": 1693272175, "microseconds": 90819}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073610752}} {"timestamp": {"seconds": 1693272176, "microseconds": 91961}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073610752}} {"timestamp": {"seconds": 1693272177, "microseconds": 93040}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073676288}} {"timestamp": {"seconds": 1693272178, "microseconds": 94117}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073676288}} {"timestamp": {"seconds": 1693272179, "microseconds": 95337}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073610752}} {"timestamp": {"seconds": 1693272180, "microseconds": 96615}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073676288}} {"timestamp": {"seconds": 1693272181, "microseconds": 97626}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073610752}} {"timestamp": {"seconds": 1693272182, "microseconds": 98693}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073676288}} {"timestamp": {"seconds": 1693272183, "microseconds": 99698}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073610752}} {"timestamp": {"seconds": 1693272184, "microseconds": 100727}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073610752}} {"timestamp": {"seconds": 1693272185, "microseconds": 90430}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073610752}} {"timestamp": {"seconds": 1693272186, "microseconds": 102999}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073676288}} : Fix it by aligning the target up to the guest page size, 64KB in this specific case. With this applied, no flooding QMP events are observed and the memory balloon's size can be stablizied to 0x3ffe0000 soon after the deflation request is sent. { "execute" : "balloon", "arguments": { "value" : 1073672192 } } {"return": {}} {"timestamp": {"seconds": 1693273328, "microseconds": 793075}, \ "event": "BALLOON_CHANGE", "data": {"actual": 1073610752}} { "execute" : "query-balloon" } {"return": {"actual": 1073610752}} Cc: stable@vger.kernel.org Signed-off-by: Gavin Shan Tested-by: Zhenyu Zhang Message-Id: <20230831011007.1032822-1-gshan@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: David Hildenbrand --- drivers/virtio/virtio_balloon.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 5b15936a5214..2d5d252ef419 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -395,7 +395,11 @@ static inline s64 towards_target(struct virtio_balloon *vb) virtio_cread_le(vb->vdev, struct virtio_balloon_config, num_pages, &num_pages); - target = num_pages; + /* + * Aligned up to guest page size to avoid inflating and deflating + * balloon endlessly. + */ + target = ALIGN(num_pages, VIRTIO_BALLOON_PAGES_PER_PAGE); return target - vb->num_pages; } From abb0dcf9938c93f765abf8cb45567cadef0af6b2 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 31 Aug 2023 18:50:56 +0300 Subject: [PATCH 127/212] vdpa/mlx5: Fix firmware error on creation of 1k VQs A firmware error is triggered when configuring a 9k MTU on the PF after switching to switchdev mode and then using a vdpa device with larger (1k) rings: mlx5_cmd_out_err: CREATE_GENERAL_OBJECT(0xa00) op_mod(0xd) failed, status bad resource(0x5), syndrome (0xf6db90), err(-22) This is due to the fact that the hw VQ size parameters are computed based on the umem_1/2/3_buffer_param_a/b capabilities and all device capabilities are read only when the driver is moved to switchdev mode. The problematic configuration flow looks like this: 1) Create VF 2) Unbind VF 3) Switch PF to switchdev mode. 4) Bind VF 5) Set PF MTU to 9k 6) create vDPA device 7) Start VM with vDPA device and 1K queue size Note that setting the MTU before step 3) doesn't trigger this issue. This patch reads the forementioned umem parameters at the latest point possible before the VQs of the device are created. v2: - Allocate output with kmalloc to reduce stack frame size. - Removed stable from cc. Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Dragos Tatulea Message-Id: <20230831155702.1080754-1-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 63 ++++++++++++++++++++++++++----- drivers/vdpa/mlx5/net/mlx5_vnet.h | 9 +++++ 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2b7b9000f2d3..946488b8989f 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -625,30 +625,70 @@ static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) mlx5_db_free(ndev->mvdev.mdev, &vcq->db); } +static int read_umem_params(struct mlx5_vdpa_net *ndev) +{ + u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; + u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01); + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + int out_size; + void *caps; + void *out; + int err; + + out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); + out = kzalloc(out_size, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, + "Failed reading vdpa umem capabilities with err %d\n", err); + goto out; + } + + caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); + + ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a); + ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b); + + ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a); + ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b); + + ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a); + ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b); + +out: + kfree(out); + return 0; +} + static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, struct mlx5_vdpa_umem **umemp) { - struct mlx5_core_dev *mdev = ndev->mvdev.mdev; - int p_a; - int p_b; + u32 p_a; + u32 p_b; switch (num) { case 1: - p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a); - p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b); + p_a = ndev->umem_1_buffer_param_a; + p_b = ndev->umem_1_buffer_param_b; *umemp = &mvq->umem1; break; case 2: - p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a); - p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b); + p_a = ndev->umem_2_buffer_param_a; + p_b = ndev->umem_2_buffer_param_b; *umemp = &mvq->umem2; break; case 3: - p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a); - p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b); + p_a = ndev->umem_3_buffer_param_a; + p_b = ndev->umem_3_buffer_param_b; *umemp = &mvq->umem3; break; } + (*umemp)->size = p_a * mvq->num_ent + p_b; } @@ -2679,6 +2719,11 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev) goto out; } mlx5_vdpa_add_debugfs(ndev); + + err = read_umem_params(ndev); + if (err) + goto err_setup; + err = setup_virtqueues(mvdev); if (err) { mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h index 60cdbc903037..90b556a57971 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.h +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h @@ -65,6 +65,15 @@ struct mlx5_vdpa_net { struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE]; struct mlx5_vdpa_irq_pool irqp; struct dentry *debugfs; + + u32 umem_1_buffer_param_a; + u32 umem_1_buffer_param_b; + + u32 umem_2_buffer_param_a; + u32 umem_2_buffer_param_b; + + u32 umem_3_buffer_param_a; + u32 umem_3_buffer_param_b; }; struct mlx5_vdpa_counter { From ca50ec377c2e94b0a9f8735de2856cd0f13beab4 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Wed, 27 Sep 2023 16:05:44 +0200 Subject: [PATCH 128/212] vhost: Allow null msg.size on VHOST_IOTLB_INVALIDATE Commit e2ae38cf3d91 ("vhost: fix hung thread due to erroneous iotlb entries") Forbade vhost iotlb msg with null size to prevent entries with size = start = 0 and last = ULONG_MAX to end up in the iotlb. Then commit 95932ab2ea07 ("vhost: allow batching hint without size") only applied the check for VHOST_IOTLB_UPDATE and VHOST_IOTLB_INVALIDATE message types to fix a regression observed with batching hit. Still, the introduction of that check introduced a regression for some users attempting to invalidate the whole ULONG_MAX range by setting the size to 0. This is the case with qemu/smmuv3/vhost integration which does not work anymore. It Looks safe to partially revert the original commit and allow VHOST_IOTLB_INVALIDATE messages with null size. vhost_iotlb_del_range() will compute a correct end iova. Same for vhost_vdpa_iotlb_unmap(). Signed-off-by: Eric Auger Fixes: e2ae38cf3d91 ("vhost: fix hung thread due to erroneous iotlb entries") Cc: stable@vger.kernel.org # v5.17+ Acked-by: Jason Wang Message-Id: <20230927140544.205088-1-eric.auger@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c71d573f1c94..e0c181ad17e3 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1458,9 +1458,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, goto done; } - if ((msg.type == VHOST_IOTLB_UPDATE || - msg.type == VHOST_IOTLB_INVALIDATE) && - msg.size == 0) { + if (msg.type == VHOST_IOTLB_UPDATE && msg.size == 0) { ret = -EINVAL; goto done; } From fa2e6947aa8844f25f5bad0d8cd1a541d9bc83eb Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Sat, 7 Oct 2023 14:43:09 +0800 Subject: [PATCH 129/212] virtio-crypto: handle config changed by work queue MST pointed out: config change callback is also handled incorrectly in this driver, it takes a mutex from interrupt context. Handle config changed by work queue instead. Cc: stable@vger.kernel.org Cc: Gonglei (Arei) Cc: Halil Pasic Cc: Michael S. Tsirkin Signed-off-by: zhenwei pi Message-Id: <20231007064309.844889-1-pizhenwei@bytedance.com> Signed-off-by: Michael S. Tsirkin --- drivers/crypto/virtio/virtio_crypto_common.h | 3 +++ drivers/crypto/virtio/virtio_crypto_core.c | 14 +++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h index 59a4c0259456..154590e1f764 100644 --- a/drivers/crypto/virtio/virtio_crypto_common.h +++ b/drivers/crypto/virtio/virtio_crypto_common.h @@ -35,6 +35,9 @@ struct virtio_crypto { struct virtqueue *ctrl_vq; struct data_queue *data_vq; + /* Work struct for config space updates */ + struct work_struct config_work; + /* To protect the vq operations for the controlq */ spinlock_t ctrl_lock; diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index 94849fa3bd74..43a0838d31ff 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -335,6 +335,14 @@ static void virtcrypto_del_vqs(struct virtio_crypto *vcrypto) virtcrypto_free_queues(vcrypto); } +static void vcrypto_config_changed_work(struct work_struct *work) +{ + struct virtio_crypto *vcrypto = + container_of(work, struct virtio_crypto, config_work); + + virtcrypto_update_status(vcrypto); +} + static int virtcrypto_probe(struct virtio_device *vdev) { int err = -EFAULT; @@ -454,6 +462,8 @@ static int virtcrypto_probe(struct virtio_device *vdev) if (err) goto free_engines; + INIT_WORK(&vcrypto->config_work, vcrypto_config_changed_work); + return 0; free_engines: @@ -490,6 +500,7 @@ static void virtcrypto_remove(struct virtio_device *vdev) dev_info(&vdev->dev, "Start virtcrypto_remove.\n"); + flush_work(&vcrypto->config_work); if (virtcrypto_dev_started(vcrypto)) virtcrypto_dev_stop(vcrypto); virtio_reset_device(vdev); @@ -504,7 +515,7 @@ static void virtcrypto_config_changed(struct virtio_device *vdev) { struct virtio_crypto *vcrypto = vdev->priv; - virtcrypto_update_status(vcrypto); + schedule_work(&vcrypto->config_work); } #ifdef CONFIG_PM_SLEEP @@ -512,6 +523,7 @@ static int virtcrypto_freeze(struct virtio_device *vdev) { struct virtio_crypto *vcrypto = vdev->priv; + flush_work(&vcrypto->config_work); virtio_reset_device(vdev); virtcrypto_free_unused_reqs(vcrypto); if (virtcrypto_dev_started(vcrypto)) From 061b39fdfe7fd98946e67637213bcbb10a318cca Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 10 Oct 2023 11:11:18 +0800 Subject: [PATCH 130/212] virtio_pci: fix the common cfg map size The function vp_modern_map_capability() takes the size parameter, which corresponds to the size of virtio_pci_common_cfg. As a result, this indicates the size of memory area to map. Now the size is the size of virtio_pci_common_cfg, but some feature(such as the _F_RING_RESET) needs the virtio_pci_modern_common_cfg, so this commit changes the size to the size of virtio_pci_modern_common_cfg. Cc: stable@vger.kernel.org Fixes: 0b50cece0b78 ("virtio_pci: introduce helper to get/set queue reset") Signed-off-by: Xuan Zhuo Message-Id: <20231010031120.81272-3-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_pci_modern_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index aad7d9296e77..9cb601e16688 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -291,7 +291,7 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) err = -EINVAL; mdev->common = vp_modern_map_capability(mdev, common, sizeof(struct virtio_pci_common_cfg), 4, - 0, sizeof(struct virtio_pci_common_cfg), + 0, sizeof(struct virtio_pci_modern_common_cfg), NULL, NULL); if (!mdev->common) goto err_map_common; From f63955721a8020e979b99cc417dcb6da3106aa24 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Oct 2023 14:20:19 -0400 Subject: [PATCH 131/212] pNFS: Fix a hang in nfs4_evict_inode() We are not allowed to call pnfs_mark_matching_lsegs_return() without also holding a reference to the layout header, since doing so could lead to the reference count going to zero when we call pnfs_layout_remove_lseg(). This again can lead to a hang when we get to nfs4_evict_inode() and are unable to clear the layout pointer. pnfs_layout_return_unused_byserver() is guilty of this behaviour, and has been seen to trigger the refcount warning prior to a hang. Fixes: b6d49ecd1081 ("NFSv4: Fix a pNFS layout related use-after-free race when freeing the inode") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 306cba0b9e69..84343aefbbd6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2634,31 +2634,44 @@ pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo, return mode == 0; } -static int -pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data) +static int pnfs_layout_return_unused_byserver(struct nfs_server *server, + void *data) { const struct pnfs_layout_range *range = data; + const struct cred *cred; struct pnfs_layout_hdr *lo; struct inode *inode; + nfs4_stateid stateid; + enum pnfs_iomode iomode; + restart: rcu_read_lock(); list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) { - if (!pnfs_layout_can_be_returned(lo) || + inode = lo->plh_inode; + if (!inode || !pnfs_layout_can_be_returned(lo) || test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) continue; - inode = lo->plh_inode; spin_lock(&inode->i_lock); - if (!pnfs_should_return_unused_layout(lo, range)) { + if (!lo->plh_inode || + !pnfs_should_return_unused_layout(lo, range)) { spin_unlock(&inode->i_lock); continue; } + pnfs_get_layout_hdr(lo); + pnfs_set_plh_return_info(lo, range->iomode, 0); + if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, + range, 0) != 0 || + !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) { + spin_unlock(&inode->i_lock); + rcu_read_unlock(); + pnfs_put_layout_hdr(lo); + cond_resched(); + goto restart; + } spin_unlock(&inode->i_lock); - inode = pnfs_grab_inode_layout_hdr(lo); - if (!inode) - continue; rcu_read_unlock(); - pnfs_mark_layout_for_return(inode, range); - iput(inode); + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); + pnfs_put_layout_hdr(lo); cond_resched(); goto restart; } From e1c6cfbb3bd1377e2ddcbe06cf8fb1ec323ea7d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Oct 2023 14:28:46 -0400 Subject: [PATCH 132/212] pNFS/flexfiles: Check the layout validity in ff_layout_mirror_prepare_stats Ensure that we check the layout pointer and validity after dereferencing it in ff_layout_mirror_prepare_stats. Fixes: 08e2e5bc6c9a ("pNFS/flexfiles: Clean up layoutstats") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index a1dc33864906..ef817a0475ff 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2520,9 +2520,9 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, return i; } -static int -ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) +static int ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) { + struct pnfs_layout_hdr *lo; struct nfs4_flexfile_layout *ff_layout; const int dev_count = PNFS_LAYOUTSTATS_MAXDEV; @@ -2533,11 +2533,14 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) return -ENOMEM; spin_lock(&args->inode->i_lock); - ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout); - args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, - &args->devinfo[0], - dev_count, - NFS4_FF_OP_LAYOUTSTATS); + lo = NFS_I(args->inode)->layout; + if (lo && pnfs_layout_is_valid(lo)) { + ff_layout = FF_LAYOUT_FROM_HDR(lo); + args->num_dev = ff_layout_mirror_prepare_stats( + &ff_layout->generic_hdr, &args->devinfo[0], dev_count, + NFS4_FF_OP_LAYOUTSTATS); + } else + args->num_dev = 0; spin_unlock(&args->inode->i_lock); if (!args->num_dev) { kfree(args->devinfo); From 969d63e1af3b3abe35a49b08218f3125131ac32f Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 6 Oct 2023 12:00:24 -0400 Subject: [PATCH 133/212] mm: zswap: fix pool refcount bug around shrink_worker() When a zswap store fails due to the limit, it acquires a pool reference and queues the shrinker. When the shrinker runs, it drops the reference. However, there can be multiple store attempts before the shrinker wakes up and runs once. This results in reference leaks and eventual saturation warnings for the pool refcount. Fix this by dropping the reference again when the shrinker is already queued. This ensures one reference per shrinker run. Link: https://lkml.kernel.org/r/20231006160024.170748-1-hannes@cmpxchg.org Fixes: 45190f01dd40 ("mm/zswap.c: add allocation hysteresis if pool limit is hit") Signed-off-by: Johannes Weiner Reported-by: Chris Mason Acked-by: Nhat Pham Cc: Vitaly Wool Cc: Domenico Cerasuolo Cc: [5.6+] Signed-off-by: Andrew Morton --- mm/zswap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index 083c693602b8..37d2b1cb2ecb 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1383,8 +1383,8 @@ bool zswap_store(struct folio *folio) shrink: pool = zswap_pool_last_get(); - if (pool) - queue_work(shrink_wq, &pool->shrink_work); + if (pool && !queue_work(shrink_wq, &pool->shrink_work)) + zswap_pool_put(pool); goto reject; } From 92fe9dcbe4e109a7ce6bab3e452210a35b0ab493 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 5 Oct 2023 23:59:06 -0400 Subject: [PATCH 134/212] hugetlbfs: clear resv_map pointer if mmap fails Patch series "hugetlbfs: close race between MADV_DONTNEED and page fault", v7. Malloc libraries, like jemalloc and tcalloc, take decisions on when to call madvise independently from the code in the main application. This sometimes results in the application page faulting on an address, right after the malloc library has shot down the backing memory with MADV_DONTNEED. Usually this is harmless, because we always have some 4kB pages sitting around to satisfy a page fault. However, with hugetlbfs systems often allocate only the exact number of huge pages that the application wants. Due to TLB batching, hugetlbfs MADV_DONTNEED will free pages outside of any lock taken on the page fault path, which can open up the following race condition: CPU 1 CPU 2 MADV_DONTNEED unmap page shoot down TLB entry page fault fail to allocate a huge page killed with SIGBUS free page Fix that race by extending the hugetlb_vma_lock locking scheme to also cover private hugetlb mappings (with resv_map), and pulling the locking from __unmap_hugepage_final_range into helper functions called from zap_page_range_single. This ensures page faults stay locked out of the MADV_DONTNEED VMA until the huge pages have actually been freed. This patch (of 3): Hugetlbfs leaves a dangling pointer in the VMA if mmap fails. This has not been a problem so far, but other code in this patch series tries to follow that pointer. Link: https://lkml.kernel.org/r/20231006040020.3677377-1-riel@surriel.com Link: https://lkml.kernel.org/r/20231006040020.3677377-2-riel@surriel.com Fixes: 04ada095dcfc ("hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing") Signed-off-by: Mike Kravetz Signed-off-by: Rik van Riel Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 52d26072dfda..467fedd42453 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1138,8 +1138,7 @@ static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma); VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma); - set_vma_private_data(vma, (get_vma_private_data(vma) & - HPAGE_RESV_MASK) | (unsigned long)map); + set_vma_private_data(vma, (unsigned long)map); } static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags) @@ -6811,8 +6810,10 @@ bool hugetlb_reserve_pages(struct inode *inode, */ if (chg >= 0 && add < 0) region_abort(resv_map, from, to, regions_needed); - if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) + if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { kref_put(&resv_map->refs, resv_map_release); + set_vma_resv_map(vma, NULL); + } return false; } From bf4916922c60f43efaa329744b3eef539aa6a2b2 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 5 Oct 2023 23:59:07 -0400 Subject: [PATCH 135/212] hugetlbfs: extend hugetlb_vma_lock to private VMAs Extend the locking scheme used to protect shared hugetlb mappings from truncate vs page fault races, in order to protect private hugetlb mappings (with resv_map) against MADV_DONTNEED. Add a read-write semaphore to the resv_map data structure, and use that from the hugetlb_vma_(un)lock_* functions, in preparation for closing the race between MADV_DONTNEED and page faults. Link: https://lkml.kernel.org/r/20231006040020.3677377-3-riel@surriel.com Fixes: 04ada095dcfc ("hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing") Signed-off-by: Rik van Riel Reviewed-by: Mike Kravetz Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 6 ++++++ mm/hugetlb.c | 41 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index a30686e649f7..065ec022cbb0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -60,6 +60,7 @@ struct resv_map { long adds_in_progress; struct list_head region_cache; long region_cache_count; + struct rw_semaphore rw_sema; #ifdef CONFIG_CGROUP_HUGETLB /* * On private mappings, the counter to uncharge reservations is stored @@ -1233,6 +1234,11 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma) return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; } +static inline bool __vma_private_lock(struct vm_area_struct *vma) +{ + return (!(vma->vm_flags & VM_MAYSHARE)) && vma->vm_private_data; +} + /* * Safe version of huge_pte_offset() to check the locks. See comments * above huge_pte_offset(). diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 467fedd42453..d218ee2b6a99 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -97,6 +97,7 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static void hugetlb_unshare_pmds(struct vm_area_struct *vma, unsigned long start, unsigned long end); +static struct resv_map *vma_resv_map(struct vm_area_struct *vma); static inline bool subpool_is_free(struct hugepage_subpool *spool) { @@ -267,6 +268,10 @@ void hugetlb_vma_lock_read(struct vm_area_struct *vma) struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; down_read(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + down_read(&resv_map->rw_sema); } } @@ -276,6 +281,10 @@ void hugetlb_vma_unlock_read(struct vm_area_struct *vma) struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; up_read(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + up_read(&resv_map->rw_sema); } } @@ -285,6 +294,10 @@ void hugetlb_vma_lock_write(struct vm_area_struct *vma) struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; down_write(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + down_write(&resv_map->rw_sema); } } @@ -294,17 +307,27 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma) struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; up_write(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + up_write(&resv_map->rw_sema); } } int hugetlb_vma_trylock_write(struct vm_area_struct *vma) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - if (!__vma_shareable_lock(vma)) - return 1; + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - return down_write_trylock(&vma_lock->rw_sema); + return down_write_trylock(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + return down_write_trylock(&resv_map->rw_sema); + } + + return 1; } void hugetlb_vma_assert_locked(struct vm_area_struct *vma) @@ -313,6 +336,10 @@ void hugetlb_vma_assert_locked(struct vm_area_struct *vma) struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; lockdep_assert_held(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + lockdep_assert_held(&resv_map->rw_sema); } } @@ -345,6 +372,11 @@ static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; __hugetlb_vma_unlock_write_put(vma_lock); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + /* no free for anon vmas, but still need to unlock */ + up_write(&resv_map->rw_sema); } } @@ -1068,6 +1100,7 @@ struct resv_map *resv_map_alloc(void) kref_init(&resv_map->refs); spin_lock_init(&resv_map->lock); INIT_LIST_HEAD(&resv_map->regions); + init_rwsem(&resv_map->rw_sema); resv_map->adds_in_progress = 0; /* From 2820b0f09be99f6406784b03a22dfc83e858449d Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 5 Oct 2023 23:59:08 -0400 Subject: [PATCH 136/212] hugetlbfs: close race between MADV_DONTNEED and page fault Malloc libraries, like jemalloc and tcalloc, take decisions on when to call madvise independently from the code in the main application. This sometimes results in the application page faulting on an address, right after the malloc library has shot down the backing memory with MADV_DONTNEED. Usually this is harmless, because we always have some 4kB pages sitting around to satisfy a page fault. However, with hugetlbfs systems often allocate only the exact number of huge pages that the application wants. Due to TLB batching, hugetlbfs MADV_DONTNEED will free pages outside of any lock taken on the page fault path, which can open up the following race condition: CPU 1 CPU 2 MADV_DONTNEED unmap page shoot down TLB entry page fault fail to allocate a huge page killed with SIGBUS free page Fix that race by pulling the locking from __unmap_hugepage_final_range into helper functions called from zap_page_range_single. This ensures page faults stay locked out of the MADV_DONTNEED VMA until the huge pages have actually been freed. Link: https://lkml.kernel.org/r/20231006040020.3677377-4-riel@surriel.com Fixes: 04ada095dcfc ("hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing") Signed-off-by: Rik van Riel Reviewed-by: Mike Kravetz Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 35 +++++++++++++++++++++++++++++++++-- mm/hugetlb.c | 36 +++++++++++++++++++++++------------- mm/memory.c | 13 ++++++++----- 3 files changed, 64 insertions(+), 20 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 065ec022cbb0..47d25a5e1933 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -139,7 +139,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *, zap_flags_t); -void __unmap_hugepage_range_final(struct mmu_gather *tlb, +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags); @@ -246,6 +246,25 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); +extern void __hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *begin, unsigned long *end); +extern void __hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details); + +static inline void hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_begin(vma, start, end); +} + +static inline void hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_end(vma, details); +} + void hugetlb_vma_lock_read(struct vm_area_struct *vma); void hugetlb_vma_unlock_read(struct vm_area_struct *vma); void hugetlb_vma_lock_write(struct vm_area_struct *vma); @@ -297,6 +316,18 @@ static inline void adjust_range_if_pmd_sharing_possible( { } +static inline void hugetlb_zap_begin( + struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ +} + +static inline void hugetlb_zap_end( + struct vm_area_struct *vma, + struct zap_details *details) +{ +} + static inline struct page *hugetlb_follow_page_mask( struct vm_area_struct *vma, unsigned long address, unsigned int flags, unsigned int *page_mask) @@ -442,7 +473,7 @@ static inline long hugetlb_change_protection( return 0; } -static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, +static inline void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d218ee2b6a99..1301ba7b2c9a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5306,9 +5306,9 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, return len + old_addr - old_end; } -static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct page *ref_page, zap_flags_t zap_flags) +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct page *ref_page, zap_flags_t zap_flags) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -5437,16 +5437,25 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct tlb_flush_mmu_tlbonly(tlb); } -void __unmap_hugepage_range_final(struct mmu_gather *tlb, - struct vm_area_struct *vma, unsigned long start, - unsigned long end, struct page *ref_page, - zap_flags_t zap_flags) +void __hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) { - hugetlb_vma_lock_write(vma); - i_mmap_lock_write(vma->vm_file->f_mapping); + if (!vma->vm_file) /* hugetlbfs_file_mmap error */ + return; - /* mmu notification performed in caller */ - __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags); + adjust_range_if_pmd_sharing_possible(vma, start, end); + hugetlb_vma_lock_write(vma); + if (vma->vm_file) + i_mmap_lock_write(vma->vm_file->f_mapping); +} + +void __hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details) +{ + zap_flags_t zap_flags = details ? details->zap_flags : 0; + + if (!vma->vm_file) /* hugetlbfs_file_mmap error */ + return; if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */ /* @@ -5459,11 +5468,12 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, * someone else. */ __hugetlb_vma_unlock_write_free(vma); - i_mmap_unlock_write(vma->vm_file->f_mapping); } else { - i_mmap_unlock_write(vma->vm_file->f_mapping); hugetlb_vma_unlock_write(vma); } + + if (vma->vm_file) + i_mmap_unlock_write(vma->vm_file->f_mapping); } void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, diff --git a/mm/memory.c b/mm/memory.c index 6c264d2f969c..517221f01303 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1683,7 +1683,7 @@ static void unmap_single_vma(struct mmu_gather *tlb, if (vma->vm_file) { zap_flags_t zap_flags = details ? details->zap_flags : 0; - __unmap_hugepage_range_final(tlb, vma, start, end, + __unmap_hugepage_range(tlb, vma, start, end, NULL, zap_flags); } } else @@ -1728,8 +1728,12 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, start_addr, end_addr); mmu_notifier_invalidate_range_start(&range); do { - unmap_single_vma(tlb, vma, start_addr, end_addr, &details, + unsigned long start = start_addr; + unsigned long end = end_addr; + hugetlb_zap_begin(vma, &start, &end); + unmap_single_vma(tlb, vma, start, end, &details, mm_wr_locked); + hugetlb_zap_end(vma, &details); } while ((vma = mas_find(mas, tree_end - 1)) != NULL); mmu_notifier_invalidate_range_end(&range); } @@ -1753,9 +1757,7 @@ void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, address, end); - if (is_vm_hugetlb_page(vma)) - adjust_range_if_pmd_sharing_possible(vma, &range.start, - &range.end); + hugetlb_zap_begin(vma, &range.start, &range.end); tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); @@ -1766,6 +1768,7 @@ void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, unmap_single_vma(&tlb, vma, address, end, details, false); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); + hugetlb_zap_end(vma, details); } /** From babddbfb7d7d70ae7f10fedd75a45d8ad75fdddf Mon Sep 17 00:00:00 2001 From: Haibo Li Date: Mon, 9 Oct 2023 15:37:48 +0800 Subject: [PATCH 137/212] kasan: print the original fault addr when access invalid shadow when the checked address is illegal,the corresponding shadow address from kasan_mem_to_shadow may have no mapping in mmu table. Access such shadow address causes kernel oops. Here is a sample about oops on arm64(VA 39bit) with KASAN_SW_TAGS and KASAN_OUTLINE on: [ffffffb80aaaaaaa] pgd=000000005d3ce003, p4d=000000005d3ce003, pud=000000005d3ce003, pmd=0000000000000000 Internal error: Oops: 0000000096000006 [#1] PREEMPT SMP Modules linked in: CPU: 3 PID: 100 Comm: sh Not tainted 6.6.0-rc1-dirty #43 Hardware name: linux,dummy-virt (DT) pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __hwasan_load8_noabort+0x5c/0x90 lr : do_ib_ob+0xf4/0x110 ffffffb80aaaaaaa is the shadow address for efffff80aaaaaaaa. The problem is reading invalid shadow in kasan_check_range. The generic kasan also has similar oops. It only reports the shadow address which causes oops but not the original address. Commit 2f004eea0fc8("x86/kasan: Print original address on #GP") introduce to kasan_non_canonical_hook but limit it to KASAN_INLINE. This patch extends it to KASAN_OUTLINE mode. Link: https://lkml.kernel.org/r/20231009073748.159228-1-haibo.li@mediatek.com Fixes: 2f004eea0fc8("x86/kasan: Print original address on #GP") Signed-off-by: Haibo Li Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: AngeloGioacchino Del Regno Cc: Dmitry Vyukov Cc: Haibo Li Cc: Matthias Brugger Cc: Vincenzo Frascino Cc: Arnd Bergmann Cc: Kees Cook Signed-off-by: Andrew Morton --- include/linux/kasan.h | 6 +++--- mm/kasan/report.c | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 842623d708c2..6dbb5ded3cf9 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -466,10 +466,10 @@ static inline void kasan_free_module_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -#ifdef CONFIG_KASAN_INLINE +#ifdef CONFIG_KASAN void kasan_non_canonical_hook(unsigned long addr); -#else /* CONFIG_KASAN_INLINE */ +#else /* CONFIG_KASAN */ static inline void kasan_non_canonical_hook(unsigned long addr) { } -#endif /* CONFIG_KASAN_INLINE */ +#endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ diff --git a/mm/kasan/report.c b/mm/kasan/report.c index ca4b6ff080a6..3974e4549c3e 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -621,9 +621,8 @@ void kasan_report_async(void) } #endif /* CONFIG_KASAN_HW_TAGS */ -#ifdef CONFIG_KASAN_INLINE /* - * With CONFIG_KASAN_INLINE, accesses to bogus pointers (outside the high + * With CONFIG_KASAN, accesses to bogus pointers (outside the high * canonical half of the address space) cause out-of-bounds shadow memory reads * before the actual access. For addresses in the low canonical half of the * address space, as well as most non-canonical addresses, that out-of-bounds @@ -659,4 +658,3 @@ void kasan_non_canonical_hook(unsigned long addr) pr_alert("KASAN: %s in range [0x%016lx-0x%016lx]\n", bug_type, orig_addr, orig_addr + KASAN_GRANULE_SIZE - 1); } -#endif From 17c17567fe510857b18fe01b7a88027600e76ac6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Oct 2023 22:08:38 +0200 Subject: [PATCH 138/212] kasan: disable kasan_non_canonical_hook() for HW tags On arm64, building with CONFIG_KASAN_HW_TAGS now causes a compile-time error: mm/kasan/report.c: In function 'kasan_non_canonical_hook': mm/kasan/report.c:637:20: error: 'KASAN_SHADOW_OFFSET' undeclared (first use in this function) 637 | if (addr < KASAN_SHADOW_OFFSET) | ^~~~~~~~~~~~~~~~~~~ mm/kasan/report.c:637:20: note: each undeclared identifier is reported only once for each function it appears in mm/kasan/report.c:640:77: error: expected expression before ';' token 640 | orig_addr = (addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT; This was caused by removing the dependency on CONFIG_KASAN_INLINE that used to prevent this from happening. Use the more specific dependency on KASAN_SW_TAGS || KASAN_GENERIC to only ignore the function for hwasan mode. Link: https://lkml.kernel.org/r/20231016200925.984439-1-arnd@kernel.org Fixes: 12ec6a919b0f ("kasan: print the original fault addr when access invalid shadow") Signed-off-by: Arnd Bergmann Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Haibo Li Cc: Kees Cook Cc: Vincenzo Frascino Cc: AngeloGioacchino Del Regno Cc: Matthias Brugger Signed-off-by: Andrew Morton --- include/linux/kasan.h | 6 +++--- mm/kasan/report.c | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 6dbb5ded3cf9..71fa9a40fb5a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -466,10 +466,10 @@ static inline void kasan_free_module_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) void kasan_non_canonical_hook(unsigned long addr); -#else /* CONFIG_KASAN */ +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ static inline void kasan_non_canonical_hook(unsigned long addr) { } -#endif /* CONFIG_KASAN */ +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ #endif /* LINUX_KASAN_H */ diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 3974e4549c3e..6e3cb118d20e 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -621,8 +621,9 @@ void kasan_report_async(void) } #endif /* CONFIG_KASAN_HW_TAGS */ +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) /* - * With CONFIG_KASAN, accesses to bogus pointers (outside the high + * With CONFIG_KASAN_INLINE, accesses to bogus pointers (outside the high * canonical half of the address space) cause out-of-bounds shadow memory reads * before the actual access. For addresses in the low canonical half of the * address space, as well as most non-canonical addresses, that out-of-bounds @@ -658,3 +659,4 @@ void kasan_non_canonical_hook(unsigned long addr) pr_alert("KASAN: %s in range [0x%016lx-0x%016lx]\n", bug_type, orig_addr, orig_addr + KASAN_GRANULE_SIZE - 1); } +#endif From c5155d4ef4b2ac03cb5838b4060ab2ceb7dbda09 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sun, 8 Oct 2023 12:58:07 +0200 Subject: [PATCH 139/212] MAINTAINERS: Ondrej has moved Update my email-address in MAINTAINERS to . Also add .mailmap entries to map my old, now blocked, email address. Link: https://lkml.kernel.org/r/20231008105812.1084226-1-megi@xff.cz Signed-off-by: Ondrej Jirman Cc: Bjorn Andersson Cc: Heiko Stuebner Cc: Jakub Kicinski Cc: Jens Axboe Cc: Konrad Dybcio # qcom Cc: Mark Brown Cc: Qais Yousef Cc: Stephen Hemminger Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index a0a6efe87186..2084bf8ec2ab 100644 --- a/.mailmap +++ b/.mailmap @@ -452,6 +452,7 @@ Oleksij Rempel Oleksij Rempel Oleksij Rempel Oliver Upton +Ondřej Jirman Oza Pawandeep Pali Rohár Paolo 'Blaisorblade' Giarrusso diff --git a/MAINTAINERS b/MAINTAINERS index 35977b269d5e..68ff5b62a1d2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6748,7 +6748,7 @@ F: drivers/gpu/drm/panel/panel-sitronix-st7701.c DRM DRIVER FOR SITRONIX ST7703 PANELS M: Guido Günther R: Purism Kernel Team -R: Ondrej Jirman +R: Ondrej Jirman S: Maintained F: Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.yaml F: drivers/gpu/drm/panel/panel-sitronix-st7703.c From 76b7069bcc89dec33f03eb08abee165d0306b754 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 7 Oct 2023 20:04:32 +0000 Subject: [PATCH 140/212] mm/damon/sysfs: check DAMOS regions update progress from before_terminate() DAMON_SYSFS can receive DAMOS tried regions update request while kdamond is already out of the main loop and before_terminate callback (damon_sysfs_before_terminate() in this case) is not yet called. And damon_sysfs_handle_cmd() can further be finished before the callback is invoked. Then, damon_sysfs_before_terminate() unlocks damon_sysfs_lock, which is not locked by anyone. This happens because the callback function assumes damon_sysfs_cmd_request_callback() should be called before it. Check if the assumption was true before doing the unlock, to avoid this problem. Link: https://lkml.kernel.org/r/20231007200432.3110-1-sj@kernel.org Fixes: f1d13cacabe1 ("mm/damon/sysfs: implement DAMOS tried regions update command") Signed-off-by: SeongJae Park Cc: [6.2.x] Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index b86ba7b0a921..f60e56150feb 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1208,6 +1208,8 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx, return 0; } +static bool damon_sysfs_schemes_regions_updating; + static void damon_sysfs_before_terminate(struct damon_ctx *ctx) { struct damon_target *t, *next; @@ -1219,8 +1221,10 @@ static void damon_sysfs_before_terminate(struct damon_ctx *ctx) cmd = damon_sysfs_cmd_request.cmd; if (kdamond && ctx == kdamond->damon_ctx && (cmd == DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS || - cmd == DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES)) { + cmd == DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES) && + damon_sysfs_schemes_regions_updating) { damon_sysfs_schemes_update_regions_stop(ctx); + damon_sysfs_schemes_regions_updating = false; mutex_unlock(&damon_sysfs_lock); } @@ -1340,7 +1344,6 @@ static int damon_sysfs_commit_input(struct damon_sysfs_kdamond *kdamond) static int damon_sysfs_cmd_request_callback(struct damon_ctx *c) { struct damon_sysfs_kdamond *kdamond; - static bool damon_sysfs_schemes_regions_updating; bool total_bytes_only = false; int err = 0; From 002e39e9ece5589140236558a23c63ca4da45b68 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 11 Oct 2023 17:01:04 +0200 Subject: [PATCH 141/212] mailmap: map Bartosz's old address to the current one I no longer work for BayLibre but many DT bindings have my BL address in the maintainers entries. Map it to the email address I use for kernel development. Link: https://lkml.kernel.org/r/20231011150104.73863-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Suggested-by: Conor Dooley Cc: Bartosz Golaszewski Cc: Bjorn Andersson Cc: Heiko Stuebner Cc: Jakub Kicinski Cc: Jens Axboe Cc: Konrad Dybcio # qcom Cc: Qais Yousef Cc: Stephen Hemminger Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 2084bf8ec2ab..b0c07e7fbd2b 100644 --- a/.mailmap +++ b/.mailmap @@ -87,6 +87,7 @@ Baolin Wang Baolin Wang Bart Van Assche Bart Van Assche +Bartosz Golaszewski Ben Dooks Ben Dooks Ben Gardner From d2313c77592661e5ba259cdae3a120fb391054ff Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 11 Oct 2023 13:25:19 +0200 Subject: [PATCH 142/212] mailmap: correct email aliasing for Oleksij Rempel Ensure the current work email addresses for Oleksij Rempel are preserved and not overridden by private address. Alias the alternate work email to the primary work email address. Link: https://lkml.kernel.org/r/20231011112519.1427077-1-o.rempel@pengutronix.de Signed-off-by: Oleksij Rempel Cc: Jakub Kicinski Cc: Konrad Dybcio # qcom Cc: Mark Brown Cc: Qais Yousef Signed-off-by: Andrew Morton --- .mailmap | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index b0c07e7fbd2b..f56aafbc75be 100644 --- a/.mailmap +++ b/.mailmap @@ -450,8 +450,8 @@ Oleksandr Natalenko Oleksij Rempel Oleksij Rempel Oleksij Rempel -Oleksij Rempel -Oleksij Rempel +Oleksij Rempel +Oleksij Rempel Oliver Upton Ondřej Jirman Oza Pawandeep From e2de156b0d918b5ebe975577d25f9ef92379a756 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Thu, 12 Oct 2023 08:52:57 -0700 Subject: [PATCH 143/212] selftests/mm: include mman header to access MREMAP_DONTUNMAP identifier Definition for MREMAP_DONTUNMAP is not present in glibc older than 2.32 thus throwing an undeclared error when running make on mm. Including linux/mman.h solves the build error for people having older glibc. Link: https://lkml.kernel.org/r/20231012155257.891776-1-samasth.norway.ananda@oracle.com Fixes: 0183d777c29a ("selftests: mm: remove duplicate unneeded defines") Signed-off-by: Samasth Norway Ananda Reported-by: Linux Kernel Functional Testing Closes: https://lore.kernel.org/linux-mm/CA+G9fYvV-71XqpCr_jhdDfEtN701fBdG3q+=bafaZiGwUXy_aA@mail.gmail.com/ Tested-by: Muhammad Usama Anjum Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_dontunmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/mm/mremap_dontunmap.c b/tools/testing/selftests/mm/mremap_dontunmap.c index ca2359835e75..a06e73ec8568 100644 --- a/tools/testing/selftests/mm/mremap_dontunmap.c +++ b/tools/testing/selftests/mm/mremap_dontunmap.c @@ -7,6 +7,7 @@ */ #define _GNU_SOURCE #include +#include #include #include #include From 099d7439ce03d0e7bc8f0c3d7878b562f3a48d3d Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 12 Oct 2023 11:52:33 -0400 Subject: [PATCH 144/212] maple_tree: add GFP_KERNEL to allocations in mas_expected_entries() Users complained about OOM errors during fork without triggering compaction. This can be fixed by modifying the flags used in mas_expected_entries() so that the compaction will be triggered in low memory situations. Since mas_expected_entries() is only used during fork, the extra argument does not need to be passed through. Additionally, the two test_maple_tree test cases and one benchmark test were altered to use the correct locking type so that allocations would not trigger sleeping and thus fail. Testing was completed with lockdep atomic sleep detection. The additional locking change requires rwsem support additions to the tools/ directory through the use of pthreads pthread_rwlock_t. With this change test_maple_tree works in userspace, as a module, and in-kernel. Users may notice that the system gave up early on attempting to start new processes instead of attempting to reclaim memory. Link: https://lkml.kernel.org/r/20230915093243epcms1p46fa00bbac1ab7b7dca94acb66c44c456@epcms1p4 Link: https://lkml.kernel.org/r/20231012155233.2272446-1-Liam.Howlett@oracle.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett Reviewed-by: Peng Zhang Cc: Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 2 +- lib/test_maple_tree.c | 35 ++++++++++++++++++++++---------- tools/include/linux/rwsem.h | 40 +++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 12 deletions(-) create mode 100644 tools/include/linux/rwsem.h diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 0e00a84e8e8f..bb24d84a4922 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5627,7 +5627,7 @@ int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) /* Internal nodes */ nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap); /* Add working room for split (2 nodes) + new parents */ - mas_node_count(mas, nr_nodes + 3); + mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL); /* Detect if allocations run out */ mas->mas_flags |= MA_STATE_PREALLOC; diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 06959165e2f9..464eeb90d5ad 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -9,6 +9,7 @@ #include #include +#include #define MTREE_ALLOC_MAX 0x2000000000000Ul #define CONFIG_MAPLE_SEARCH @@ -1841,17 +1842,21 @@ static noinline void __init check_forking(struct maple_tree *mt) void *val; MA_STATE(mas, mt, 0, 0); MA_STATE(newmas, mt, 0, 0); + struct rw_semaphore newmt_lock; + + init_rwsem(&newmt_lock); for (i = 0; i <= nr_entries; i++) mtree_store_range(mt, i*10, i*10 + 5, xa_mk_value(i), GFP_KERNEL); mt_set_non_kernel(99999); - mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE); + mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); + mt_set_external_lock(&newmt, &newmt_lock); newmas.tree = &newmt; mas_reset(&newmas); mas_reset(&mas); - mas_lock(&newmas); + down_write(&newmt_lock); mas.index = 0; mas.last = 0; if (mas_expected_entries(&newmas, nr_entries)) { @@ -1866,10 +1871,10 @@ static noinline void __init check_forking(struct maple_tree *mt) } rcu_read_unlock(); mas_destroy(&newmas); - mas_unlock(&newmas); mt_validate(&newmt); mt_set_non_kernel(0); - mtree_destroy(&newmt); + __mt_destroy(&newmt); + up_write(&newmt_lock); } static noinline void __init check_iteration(struct maple_tree *mt) @@ -1980,6 +1985,10 @@ static noinline void __init bench_forking(struct maple_tree *mt) void *val; MA_STATE(mas, mt, 0, 0); MA_STATE(newmas, mt, 0, 0); + struct rw_semaphore newmt_lock; + + init_rwsem(&newmt_lock); + mt_set_external_lock(&newmt, &newmt_lock); for (i = 0; i <= nr_entries; i++) mtree_store_range(mt, i*10, i*10 + 5, @@ -1994,7 +2003,7 @@ static noinline void __init bench_forking(struct maple_tree *mt) mas.index = 0; mas.last = 0; rcu_read_lock(); - mas_lock(&newmas); + down_write(&newmt_lock); if (mas_expected_entries(&newmas, nr_entries)) { printk("OOM!"); BUG_ON(1); @@ -2005,11 +2014,11 @@ static noinline void __init bench_forking(struct maple_tree *mt) mas_store(&newmas, val); } mas_destroy(&newmas); - mas_unlock(&newmas); rcu_read_unlock(); mt_validate(&newmt); mt_set_non_kernel(0); - mtree_destroy(&newmt); + __mt_destroy(&newmt); + up_write(&newmt_lock); } } #endif @@ -2616,6 +2625,10 @@ static noinline void __init check_dup_gaps(struct maple_tree *mt, void *tmp; MA_STATE(mas, mt, 0, 0); MA_STATE(newmas, &newmt, 0, 0); + struct rw_semaphore newmt_lock; + + init_rwsem(&newmt_lock); + mt_set_external_lock(&newmt, &newmt_lock); if (!zero_start) i = 1; @@ -2625,9 +2638,9 @@ static noinline void __init check_dup_gaps(struct maple_tree *mt, mtree_store_range(mt, i*10, (i+1)*10 - gap, xa_mk_value(i), GFP_KERNEL); - mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE); + mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); mt_set_non_kernel(99999); - mas_lock(&newmas); + down_write(&newmt_lock); ret = mas_expected_entries(&newmas, nr_entries); mt_set_non_kernel(0); MT_BUG_ON(mt, ret != 0); @@ -2640,9 +2653,9 @@ static noinline void __init check_dup_gaps(struct maple_tree *mt, } rcu_read_unlock(); mas_destroy(&newmas); - mas_unlock(&newmas); - mtree_destroy(&newmt); + __mt_destroy(&newmt); + up_write(&newmt_lock); } /* Duplicate many sizes of trees. Mainly to test expected entry values */ diff --git a/tools/include/linux/rwsem.h b/tools/include/linux/rwsem.h new file mode 100644 index 000000000000..83971b3cbfce --- /dev/null +++ b/tools/include/linux/rwsem.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef _TOOLS__RWSEM_H +#define _TOOLS__RWSEM_H + +#include + +struct rw_semaphore { + pthread_rwlock_t lock; +}; + +static inline int init_rwsem(struct rw_semaphore *sem) +{ + return pthread_rwlock_init(&sem->lock, NULL); +} + +static inline int exit_rwsem(struct rw_semaphore *sem) +{ + return pthread_rwlock_destroy(&sem->lock); +} + +static inline int down_read(struct rw_semaphore *sem) +{ + return pthread_rwlock_rdlock(&sem->lock); +} + +static inline int up_read(struct rw_semaphore *sem) +{ + return pthread_rwlock_unlock(&sem->lock); +} + +static inline int down_write(struct rw_semaphore *sem) +{ + return pthread_rwlock_wrlock(&sem->lock); +} + +static inline int up_write(struct rw_semaphore *sem) +{ + return pthread_rwlock_unlock(&sem->lock); +} +#endif /* _TOOLS_RWSEM_H */ From 379e4adfddd6a2f95a4f2029b8ddcbacf92b21f9 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 9 Oct 2023 10:59:01 -0400 Subject: [PATCH 145/212] NFSv4.1: fixup use EXCHGID4_FLAG_USE_PNFS_DS for DS server This patches fixes commit 51d674a5e488 "NFSv4.1: use EXCHGID4_FLAG_USE_PNFS_DS for DS server", purpose of that commit was to mark EXCHANGE_ID to the DS with the appropriate flag. However, connection to MDS can return both EXCHGID4_FLAG_USE_PNFS_DS and EXCHGID4_FLAG_USE_PNFS_MDS set but previous patch would only remember the USE_PNFS_DS and for the 2nd EXCHANGE_ID send that to the MDS. Instead, just mark the pnfs path exclusively. Fixes: 51d674a5e488 ("NFSv4.1: use EXCHGID4_FLAG_USE_PNFS_DS for DS server") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7016eaadf555..5ee283eb9660 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8870,8 +8870,6 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre /* Save the EXCHANGE_ID verifier session trunk tests */ memcpy(clp->cl_confirm.data, argp->verifier.data, sizeof(clp->cl_confirm.data)); - if (resp->flags & EXCHGID4_FLAG_USE_PNFS_DS) - set_bit(NFS_CS_DS, &clp->cl_flags); out: trace_nfs4_exchange_id(clp, status); rpc_put_task(task); From 2b32c76e2b0154b98b9322ae7546b8156cd703e6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 16 Oct 2023 13:12:47 -0700 Subject: [PATCH 146/212] nvme: sanitize metadata bounce buffer for reads User can request more metadata bytes than the device will write. Ensure kernel buffer is initialized so we're not leaking unsanitized memory on the copy-out. Fixes: 0b7f1f26f95a51a ("nvme: use the block layer for userspace passthrough metadata") Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index d8ff796fd5f2..747c879e8982 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -108,9 +108,13 @@ static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, if (!buf) goto out; - ret = -EFAULT; - if ((req_op(req) == REQ_OP_DRV_OUT) && copy_from_user(buf, ubuf, len)) - goto out_free_meta; + if (req_op(req) == REQ_OP_DRV_OUT) { + ret = -EFAULT; + if (copy_from_user(buf, ubuf, len)) + goto out_free_meta; + } else { + memset(buf, 0, len); + } bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); if (IS_ERR(bip)) { From f965b281fd872b2e18bd82dd97730db9834d0750 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Tue, 17 Oct 2023 10:28:45 +0200 Subject: [PATCH 147/212] nvmet-auth: complete a request only after freeing the dhchap pointers It may happen that the work to destroy a queue (for example nvmet_tcp_release_queue_work()) is started while an auth-send or auth-receive command is still completing. nvmet_sq_destroy() will block, waiting for all the references to the sq to be dropped, the last reference is then dropped when nvmet_req_complete() is called. When this happens, both nvmet_sq_destroy() and nvmet_execute_auth_send()/_receive() will free the dhchap pointers by calling nvmet_auth_sq_free(). Since there isn't any lock, the two threads may race against each other, causing double frees and memory corruptions, as reported by KASAN. Reproduced by stress blktests nvme/041 nvme/042 nvme/043 nvme nvme2: qid 0: authenticated with hash hmac(sha512) dhgroup ffdhe4096 ================================================================== BUG: KASAN: double-free in kfree+0xec/0x4b0 Call Trace: kfree+0xec/0x4b0 nvmet_auth_sq_free+0xe1/0x160 [nvmet] nvmet_execute_auth_send+0x482/0x16d0 [nvmet] process_one_work+0x8e5/0x1510 Allocated by task 191846: __kasan_kmalloc+0x81/0xa0 nvmet_auth_ctrl_sesskey+0xf6/0x380 [nvmet] nvmet_auth_reply+0x119/0x990 [nvmet] Freed by task 143270: kfree+0xec/0x4b0 nvmet_auth_sq_free+0xe1/0x160 [nvmet] process_one_work+0x8e5/0x1510 Fix this bug by calling nvmet_req_complete() only after freeing the pointers, so we will prevent the race by holding the sq reference. V2: remove redundant code Fixes: db1312dd9548 ("nvmet: implement basic In-Band Authentication") Signed-off-by: Maurizio Lombardi Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/fabrics-cmd-auth.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 586458f765f1..1d9854484e2e 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -333,19 +333,21 @@ void nvmet_execute_auth_send(struct nvmet_req *req) __func__, ctrl->cntlid, req->sq->qid, status, req->error_loc); req->cqe->result.u64 = 0; - nvmet_req_complete(req, status); if (req->sq->dhchap_step != NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2 && req->sq->dhchap_step != NVME_AUTH_DHCHAP_MESSAGE_FAILURE2) { unsigned long auth_expire_secs = ctrl->kato ? ctrl->kato : 120; mod_delayed_work(system_wq, &req->sq->auth_expired_work, auth_expire_secs * HZ); - return; + goto complete; } /* Final states, clear up variables */ nvmet_auth_sq_free(req->sq); if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE2) nvmet_ctrl_fatal_error(ctrl); + +complete: + nvmet_req_complete(req, status); } static int nvmet_auth_challenge(struct nvmet_req *req, void *d, int al) @@ -514,11 +516,12 @@ void nvmet_execute_auth_receive(struct nvmet_req *req) kfree(d); done: req->cqe->result.u64 = 0; - nvmet_req_complete(req, status); + if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2) nvmet_auth_sq_free(req->sq); else if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE1) { nvmet_auth_sq_free(req->sq); nvmet_ctrl_fatal_error(ctrl); } + nvmet_req_complete(req, status); } From 5c3f4066462a5f6cac04d3dd81c9f551fabbc6c7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 12 Oct 2023 11:13:51 -0700 Subject: [PATCH 148/212] nvme-pci: add BOGUS_NID for Intel 0a54 device These ones claim cmic and nmic capable, so need special consideration to ignore their duplicate identifiers. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217981 Reported-by: welsh@cassens.com Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 347cb5daebc3..3f0c9ee09a12 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3329,7 +3329,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES | - NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + NVME_QUIRK_IGNORE_DEV_SUBNQN | + NVME_QUIRK_BOGUS_NID, }, { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, From 828d63042aeca132a93938b98dc7f1a6c97bbc51 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 3 Oct 2023 08:42:13 +0200 Subject: [PATCH 149/212] accel/ivpu: Don't enter d0i3 during FLR Avoid HW bug on some platforms where we enter D0i3 state and CPU is in low power states (C8 or above). Fixes: 852be13f3bd3 ("accel/ivpu: Add PM support") Cc: stable@vger.kernel.org Signed-off-by: Jacek Lawrynowicz Reviewed-by: Stanislaw Gruszka Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20231003064213.1527327-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 11 ++++++++--- drivers/accel/ivpu/ivpu_drv.h | 1 + drivers/accel/ivpu/ivpu_hw.h | 8 ++++++++ drivers/accel/ivpu/ivpu_hw_37xx.c | 1 + drivers/accel/ivpu/ivpu_hw_40xx.c | 1 + drivers/accel/ivpu/ivpu_pm.c | 3 ++- 6 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 467a60235370..7e9359611d69 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -367,14 +367,19 @@ int ivpu_boot(struct ivpu_device *vdev) return 0; } -int ivpu_shutdown(struct ivpu_device *vdev) +void ivpu_prepare_for_reset(struct ivpu_device *vdev) { - int ret; - ivpu_hw_irq_disable(vdev); disable_irq(vdev->irq); ivpu_ipc_disable(vdev); ivpu_mmu_disable(vdev); +} + +int ivpu_shutdown(struct ivpu_device *vdev) +{ + int ret; + + ivpu_prepare_for_reset(vdev); ret = ivpu_hw_power_down(vdev); if (ret) diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 03b3d6532fb6..2adc349126bb 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -151,6 +151,7 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link); int ivpu_boot(struct ivpu_device *vdev); int ivpu_shutdown(struct ivpu_device *vdev); +void ivpu_prepare_for_reset(struct ivpu_device *vdev); static inline u8 ivpu_revision(struct ivpu_device *vdev) { diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index ab341237bcf9..1079e06255ba 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -13,6 +13,7 @@ struct ivpu_hw_ops { int (*power_up)(struct ivpu_device *vdev); int (*boot_fw)(struct ivpu_device *vdev); int (*power_down)(struct ivpu_device *vdev); + int (*reset)(struct ivpu_device *vdev); bool (*is_idle)(struct ivpu_device *vdev); void (*wdt_disable)(struct ivpu_device *vdev); void (*diagnose_failure)(struct ivpu_device *vdev); @@ -91,6 +92,13 @@ static inline int ivpu_hw_power_down(struct ivpu_device *vdev) return vdev->hw->ops->power_down(vdev); }; +static inline int ivpu_hw_reset(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, PM, "HW reset\n"); + + return vdev->hw->ops->reset(vdev); +}; + static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev) { vdev->hw->ops->wdt_disable(vdev); diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 9eae1c241bc0..976019429164 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -1029,6 +1029,7 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = { .power_up = ivpu_hw_37xx_power_up, .is_idle = ivpu_hw_37xx_is_idle, .power_down = ivpu_hw_37xx_power_down, + .reset = ivpu_hw_37xx_reset, .boot_fw = ivpu_hw_37xx_boot_fw, .wdt_disable = ivpu_hw_37xx_wdt_disable, .diagnose_failure = ivpu_hw_37xx_diagnose_failure, diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 8bdb59a45da6..85171a408363 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -1179,6 +1179,7 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = { .power_up = ivpu_hw_40xx_power_up, .is_idle = ivpu_hw_40xx_is_idle, .power_down = ivpu_hw_40xx_power_down, + .reset = ivpu_hw_40xx_reset, .boot_fw = ivpu_hw_40xx_boot_fw, .wdt_disable = ivpu_hw_40xx_wdt_disable, .diagnose_failure = ivpu_hw_40xx_diagnose_failure, diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index e6f27daf5560..ffff2496e8e8 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -261,7 +261,8 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) ivpu_dbg(vdev, PM, "Pre-reset..\n"); atomic_inc(&vdev->pm->reset_counter); atomic_set(&vdev->pm->in_reset, 1); - ivpu_shutdown(vdev); + ivpu_prepare_for_reset(vdev); + ivpu_hw_reset(vdev); ivpu_pm_prepare_cold_boot(vdev); ivpu_jobs_abort_all(vdev); ivpu_dbg(vdev, PM, "Pre-reset done.\n"); From 610b5d219d1ccac8064556310cc0e62e3c202389 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 17 Oct 2023 14:13:53 +0200 Subject: [PATCH 150/212] Revert "accel/ivpu: Use cached buffers for FW loading" This reverts commit 645d694559cab36fe6a57c717efcfa27d9321396. The commit cause issues with memory access from the device side. Switch back to write-combined memory mappings until the issues will be properly addressed. Add extra wmb() needed when boot_params->save_restore_ret_address() is modified. Reviewed-by: Karol Wachowski Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20231017121353.532466-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_fw.c | 9 ++++----- drivers/accel/ivpu/ivpu_gem.h | 5 ----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 0191cf8e5964..a277bbae78fc 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -220,8 +220,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev) if (ret) return ret; - fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, - DRM_IVPU_BO_CACHED | DRM_IVPU_BO_NOSNOOP); + fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC); if (!fw->mem) { ivpu_err(vdev, "Failed to allocate firmware runtime memory\n"); return -ENOMEM; @@ -331,7 +330,7 @@ int ivpu_fw_load(struct ivpu_device *vdev) memset(start, 0, size); } - clflush_cache_range(fw->mem->kvaddr, fw->mem->base.size); + wmb(); /* Flush WC buffers after writing fw->mem */ return 0; } @@ -433,7 +432,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params if (!ivpu_fw_is_cold_boot(vdev)) { boot_params->save_restore_ret_address = 0; vdev->pm->is_warmboot = true; - clflush_cache_range(vdev->fw->mem->kvaddr, SZ_4K); + wmb(); /* Flush WC buffers after writing save_restore_ret_address */ return; } @@ -495,7 +494,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev); boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev); - clflush_cache_range(vdev->fw->mem->kvaddr, SZ_4K); + wmb(); /* Flush WC buffers after writing bootparams */ ivpu_fw_boot_params_print(vdev, boot_params); } diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index f4130586ff1b..6b0ceda5f253 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -8,8 +8,6 @@ #include #include -#define DRM_IVPU_BO_NOSNOOP 0x10000000 - struct dma_buf; struct ivpu_bo_ops; struct ivpu_file_priv; @@ -85,9 +83,6 @@ static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo) static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) { - if (bo->flags & DRM_IVPU_BO_NOSNOOP) - return false; - return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; } From 8f5ad367e8b884772945c6c9fb622ac94b7d3e32 Mon Sep 17 00:00:00 2001 From: "Wludzik, Jozef" Date: Wed, 18 Oct 2023 13:01:13 +0200 Subject: [PATCH 151/212] accel/ivpu: Extend address range for MMU mmap Allow to use whole address range in MMU context mmap which is up to 48 bits. Return invalid argument from MMU context mmap in case address is not aligned to MMU page size, address is below MMU page size or address is greater then 47 bits. This fixes problem disallowing to run large models on VPU4 Signed-off-by: Wludzik, Jozef Reviewed-by: Stanislaw Gruszka Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20231018110113.547208-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_mmu_context.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index 1d2e554e2c4a..ce94f4029127 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -11,6 +11,7 @@ #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" +#define IVPU_MMU_VPU_ADDRESS_MASK GENMASK(47, 12) #define IVPU_MMU_PGD_INDEX_MASK GENMASK(47, 39) #define IVPU_MMU_PUD_INDEX_MASK GENMASK(38, 30) #define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21) @@ -328,12 +329,8 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) return -EINVAL; - /* - * VPU is only 32 bit, but DMA engine is 38 bit - * Ranges < 2 GB are reserved for VPU internal registers - * Limit range to 8 GB - */ - if (vpu_addr < SZ_2G || vpu_addr > SZ_8G) + + if (vpu_addr & ~IVPU_MMU_VPU_ADDRESS_MASK) return -EINVAL; prot = IVPU_MMU_ENTRY_MAPPED; From 32671e3799ca2e4590773fd0e63aaa4229e50c06 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 18 Oct 2023 13:56:54 +0200 Subject: [PATCH 152/212] perf: Disallow mis-matched inherited group reads Because group consistency is non-atomic between parent (filedesc) and children (inherited) events, it is possible for PERF_FORMAT_GROUP read() to try and sum non-matching counter groups -- with non-sensical results. Add group_generation to distinguish the case where a parent group removes and adds an event and thus has the same number, but a different configuration of events as inherited groups. This became a problem when commit fa8c269353d5 ("perf/core: Invert perf_read_group() loops") flipped the order of child_list and sibling_list. Previously it would iterate the group (sibling_list) first, and for each sibling traverse the child_list. In this order, only the group composition of the parent is relevant. By flipping the order the group composition of the child (inherited) events becomes an issue and the mis-match in group composition becomes evident. That said; even prior to this commit, while reading of a group that is not equally inherited was not broken, it still made no sense. (Ab)use ECHILD as error return to indicate issues with child process group composition. Fixes: fa8c269353d5 ("perf/core: Invert perf_read_group() loops") Reported-by: Budimir Markovic Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231018115654.GK33217@noisy.programming.kicks-ass.net --- include/linux/perf_event.h | 1 + kernel/events/core.c | 39 ++++++++++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e85cd1c0eaf3..7b5406e3288d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -704,6 +704,7 @@ struct perf_event { /* The cumulative AND of all event_caps for events in this group. */ int group_caps; + unsigned int group_generation; struct perf_event *group_leader; /* * event->pmu will always point to pmu in which this event belongs. diff --git a/kernel/events/core.c b/kernel/events/core.c index 4c72a41f11af..d0663b9324e7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1954,6 +1954,7 @@ static void perf_group_attach(struct perf_event *event) list_add_tail(&event->sibling_list, &group_leader->sibling_list); group_leader->nr_siblings++; + group_leader->group_generation++; perf_event__header_size(group_leader); @@ -2144,6 +2145,7 @@ static void perf_group_detach(struct perf_event *event) if (leader != event) { list_del_init(&event->sibling_list); event->group_leader->nr_siblings--; + event->group_leader->group_generation++; goto out; } @@ -5440,7 +5442,7 @@ static int __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) { struct perf_event_context *ctx = leader->ctx; - struct perf_event *sub; + struct perf_event *sub, *parent; unsigned long flags; int n = 1; /* skip @nr */ int ret; @@ -5450,6 +5452,33 @@ static int __perf_read_group_add(struct perf_event *leader, return ret; raw_spin_lock_irqsave(&ctx->lock, flags); + /* + * Verify the grouping between the parent and child (inherited) + * events is still in tact. + * + * Specifically: + * - leader->ctx->lock pins leader->sibling_list + * - parent->child_mutex pins parent->child_list + * - parent->ctx->mutex pins parent->sibling_list + * + * Because parent->ctx != leader->ctx (and child_list nests inside + * ctx->mutex), group destruction is not atomic between children, also + * see perf_event_release_kernel(). Additionally, parent can grow the + * group. + * + * Therefore it is possible to have parent and child groups in a + * different configuration and summing over such a beast makes no sense + * what so ever. + * + * Reject this. + */ + parent = leader->parent; + if (parent && + (parent->group_generation != leader->group_generation || + parent->nr_siblings != leader->nr_siblings)) { + ret = -ECHILD; + goto unlock; + } /* * Since we co-schedule groups, {enabled,running} times of siblings @@ -5483,8 +5512,9 @@ static int __perf_read_group_add(struct perf_event *leader, values[n++] = atomic64_read(&sub->lost_samples); } +unlock: raw_spin_unlock_irqrestore(&ctx->lock, flags); - return 0; + return ret; } static int perf_read_group(struct perf_event *event, @@ -5503,10 +5533,6 @@ static int perf_read_group(struct perf_event *event, values[0] = 1 + leader->nr_siblings; - /* - * By locking the child_mutex of the leader we effectively - * lock the child list of all siblings.. XXX explain how. - */ mutex_lock(&leader->child_mutex); ret = __perf_read_group_add(leader, read_format, values); @@ -13346,6 +13372,7 @@ static int inherit_group(struct perf_event *parent_event, !perf_get_aux_event(child_ctr, leader)) return -EINVAL; } + leader->group_generation = parent_event->group_generation; return 0; } From 62140a1e4dec4594d5d1e1d353747bf2ef434e8b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 17 Oct 2023 17:18:06 +0300 Subject: [PATCH 153/212] Revert "pinctrl: avoid unsafe code pattern in find_pinctrl()" The commit breaks MMC enumeration on the Intel Merrifield plaform. Before: [ 36.439057] mmc0: SDHCI controller on PCI [0000:00:01.0] using ADMA [ 36.450924] mmc2: SDHCI controller on PCI [0000:00:01.3] using ADMA [ 36.459355] mmc1: SDHCI controller on PCI [0000:00:01.2] using ADMA [ 36.706399] mmc0: new DDR MMC card at address 0001 [ 37.058972] mmc2: new ultra high speed DDR50 SDIO card at address 0001 [ 37.278977] mmcblk0: mmc0:0001 H4G1d 3.64 GiB [ 37.297300] mmcblk0: p1 p2 p3 p4 p5 p6 p7 p8 p9 p10 After: [ 36.436704] mmc2: SDHCI controller on PCI [0000:00:01.3] using ADMA [ 36.436720] mmc1: SDHCI controller on PCI [0000:00:01.0] using ADMA [ 36.463685] mmc0: SDHCI controller on PCI [0000:00:01.2] using ADMA [ 36.720627] mmc1: new DDR MMC card at address 0001 [ 37.068181] mmc2: new ultra high speed DDR50 SDIO card at address 0001 [ 37.279998] mmcblk1: mmc1:0001 H4G1d 3.64 GiB [ 37.302670] mmcblk1: p1 p2 p3 p4 p5 p6 p7 p8 p9 p10 This reverts commit c153a4edff6ab01370fcac8e46f9c89cca1060c2. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20231017141806.535191-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index e2f7519bef04..e9dc9638120a 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1022,20 +1022,17 @@ static int add_setting(struct pinctrl *p, struct pinctrl_dev *pctldev, static struct pinctrl *find_pinctrl(struct device *dev) { - struct pinctrl *entry, *p = NULL; + struct pinctrl *p; mutex_lock(&pinctrl_list_mutex); - - list_for_each_entry(entry, &pinctrl_list, node) { - if (entry->dev == dev) { - p = entry; - kref_get(&p->users); - break; + list_for_each_entry(p, &pinctrl_list, node) + if (p->dev == dev) { + mutex_unlock(&pinctrl_list_mutex); + return p; } - } mutex_unlock(&pinctrl_list_mutex); - return p; + return NULL; } static void pinctrl_free(struct pinctrl *p, bool inlist); @@ -1143,6 +1140,7 @@ struct pinctrl *pinctrl_get(struct device *dev) p = find_pinctrl(dev); if (p) { dev_dbg(dev, "obtain a copy of previously claimed pinctrl\n"); + kref_get(&p->users); return p; } From 97ac489775f26acfd46a8a60c2f84ce7cc79fa4b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 18 Oct 2023 12:59:56 +0300 Subject: [PATCH 154/212] fanotify: limit reporting of event with non-decodeable file handles Commit a95aef69a740 ("fanotify: support reporting non-decodeable file handles") merged in v6.5-rc1, added the ability to use an fanotify group with FAN_REPORT_FID mode to watch filesystems that do not support nfs export, but do know how to encode non-decodeable file handles, with the newly introduced AT_HANDLE_FID flag. At the time that this commit was merged, there were no filesystems in-tree with those traits. Commit 16aac5ad1fa9 ("ovl: support encoding non-decodable file handles"), merged in v6.6-rc1, added this trait to overlayfs, thus allowing fanotify watching of overlayfs with FAN_REPORT_FID mode. In retrospect, allowing an fanotify filesystem/mount mark on such filesystem in FAN_REPORT_FID mode will result in getting events with file handles, without the ability to resolve the filesystem objects from those file handles (i.e. no open_by_handle_at() support). For v6.6, the safer option would be to allow this mode for inode marks only, where the caller has the opportunity to use name_to_handle_at() at the time of setting the mark. In the future we can revise this decision. Fixes: a95aef69a740 ("fanotify: support reporting non-decodeable file handles") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20231018100000.2453965-2-amir73il@gmail.com> --- fs/notify/fanotify/fanotify_user.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index f69c451018e3..62fe0b679e58 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1585,16 +1585,25 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid) } /* Check if filesystem can encode a unique fid */ -static int fanotify_test_fid(struct dentry *dentry) +static int fanotify_test_fid(struct dentry *dentry, unsigned int flags) { + unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; + const struct export_operations *nop = dentry->d_sb->s_export_op; + /* - * We need to make sure that the file system supports at least - * encoding a file handle so user can use name_to_handle_at() to - * compare fid returned with event to the file handle of watched - * objects. However, even the relaxed AT_HANDLE_FID flag requires - * at least empty export_operations for ecoding unique file ids. + * We need to make sure that the filesystem supports encoding of + * file handles so user can use name_to_handle_at() to compare fids + * reported with events to the file handle of watched objects. */ - if (!dentry->d_sb->s_export_op) + if (!nop) + return -EOPNOTSUPP; + + /* + * For sb/mount mark, we also need to make sure that the filesystem + * supports decoding file handles, so user has a way to map back the + * reported fids to filesystem objects. + */ + if (mark_type != FAN_MARK_INODE && !nop->fh_to_dentry) return -EOPNOTSUPP; return 0; @@ -1812,7 +1821,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (ret) goto path_put_and_out; - ret = fanotify_test_fid(path.dentry); + ret = fanotify_test_fid(path.dentry, flags); if (ret) goto path_put_and_out; From c1ae1c59c8c6e0b66a718308c623e0cb394dab6b Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 17 Oct 2023 15:37:29 +0200 Subject: [PATCH 155/212] s390/pci: fix iommu bitmap allocation Since the fixed commits both zdev->iommu_bitmap and zdev->lazy_bitmap are allocated as vzalloc(zdev->iommu_pages / 8). The problem is that zdev->iommu_bitmap is a pointer to unsigned long but the above only yields an allocation that is a multiple of sizeof(unsigned long) which is 8 on s390x if the number of IOMMU pages is a multiple of 64. This in turn is the case only if the effective IOMMU aperture is a multiple of 64 * 4K = 256K. This is usually the case and so didn't cause visible issues since both the virt_to_phys(high_memory) reduced limit and hardware limits use nice numbers. Under KVM, and in particular with QEMU limiting the IOMMU aperture to the vfio DMA limit (default 65535), it is possible for the reported aperture not to be a multiple of 256K however. In this case we end up with an iommu_bitmap whose allocation is not a multiple of 8 causing bitmap operations to access it out of bounds. Sadly we can't just fix this in the obvious way and use bitmap_zalloc() because for large RAM systems (tested on 8 TiB) the zdev->iommu_bitmap grows too large for kmalloc(). So add our own bitmap_vzalloc() wrapper. This might be a candidate for common code, but this area of code will be replaced by the upcoming conversion to use the common code DMA API on s390 so just add a local routine. Fixes: 224593215525 ("s390/pci: use virtual memory for iommu bitmap") Fixes: 13954fd6913a ("s390/pci_dma: improve lazy flush for unmap") Cc: stable@vger.kernel.org Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_dma.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 2d9b01d7ca4c..99209085c75b 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -564,6 +564,17 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, s->dma_length = 0; } } + +static unsigned long *bitmap_vzalloc(size_t bits, gfp_t flags) +{ + size_t n = BITS_TO_LONGS(bits); + size_t bytes; + + if (unlikely(check_mul_overflow(n, sizeof(unsigned long), &bytes))) + return NULL; + + return vzalloc(bytes); +} int zpci_dma_init_device(struct zpci_dev *zdev) { @@ -604,13 +615,13 @@ int zpci_dma_init_device(struct zpci_dev *zdev) zdev->end_dma - zdev->start_dma + 1); zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; - zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); + zdev->iommu_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL); if (!zdev->iommu_bitmap) { rc = -ENOMEM; goto free_dma_table; } if (!s390_iommu_strict) { - zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8); + zdev->lazy_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL); if (!zdev->lazy_bitmap) { rc = -ENOMEM; goto free_bitmap; From bd9e54a42ce26026d67963c21b3fdfe8c7e68430 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 18 Oct 2023 18:01:44 +0200 Subject: [PATCH 156/212] docs: rust: update Rust docs output path The Rust code documentation output path moved from `rust/doc` to `Documentation/output/rust/rustdoc`, thus update the old reference. Fixes: 48fadf440075 ("docs: Move rustdoc output, cross-reference it") Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20231018160145.1017340-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- Documentation/rust/general-information.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/rust/general-information.rst b/Documentation/rust/general-information.rst index 49029ee82e55..081397827a7e 100644 --- a/Documentation/rust/general-information.rst +++ b/Documentation/rust/general-information.rst @@ -29,7 +29,7 @@ target with the same invocation used for compilation, e.g.:: To read the docs locally in your web browser, run e.g.:: - xdg-open rust/doc/kernel/index.html + xdg-open Documentation/output/rust/rustdoc/kernel/index.html To learn about how to write the documentation, please see coding-guidelines.rst. From 1db773da58df20772dcc037a47163ce472d39c4d Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 18 Oct 2023 18:01:45 +0200 Subject: [PATCH 157/212] kbuild: remove old Rust docs output path The Rust code documentation output path moved from `rust/doc` to `Documentation/output/rust/rustdoc`. The `make cleandocs` target takes care of cleaning it now since it is integrated with the rest of the documentation. Thus remove the old reference. Fixes: 48fadf440075 ("docs: Move rustdoc output, cross-reference it") Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20231018160145.1017340-2-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 373649c7374e..040018111f32 100644 --- a/Makefile +++ b/Makefile @@ -1474,7 +1474,7 @@ endif # CONFIG_MODULES # Directories & files removed with 'make clean' CLEAN_FILES += vmlinux.symvers modules-only.symvers \ modules.builtin modules.builtin.modinfo modules.nsdeps \ - compile_commands.json .thinlto-cache rust/test rust/doc \ + compile_commands.json .thinlto-cache rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c # Directories & files removed with 'make mrproper' From cfd96726e61136e68a168813cedc4084f626208b Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 18 Oct 2023 17:55:27 +0200 Subject: [PATCH 158/212] rust: docs: fix logo replacement The static files placement by `rustdoc` changed in Rust 1.67.0 [1], but the custom code we have to replace the logo in the generated HTML files did not get updated. Thus update it to have the Linux logo again in the output. Hopefully `rustdoc` will eventually support a custom logo from a local file [2], so that we do not need to maintain this hack on our side. Link: https://github.com/rust-lang/rust/pull/101702 [1] Link: https://github.com/rust-lang/rfcs/pull/3226 [2] Fixes: 3ed03f4da06e ("rust: upgrade to Rust 1.68.2") Cc: stable@vger.kernel.org Tested-by: Benno Lossin Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20231018155527.1015059-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/rust/Makefile b/rust/Makefile index 1e78c82a18a8..7dbf9abe0d01 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -93,15 +93,14 @@ quiet_cmd_rustdoc = RUSTDOC $(if $(rustdoc_host),H, ) $< # and then retouch the generated files. rustdoc: rustdoc-core rustdoc-macros rustdoc-compiler_builtins \ rustdoc-alloc rustdoc-kernel - $(Q)cp $(srctree)/Documentation/images/logo.svg $(rustdoc_output) - $(Q)cp $(srctree)/Documentation/images/COPYING-logo $(rustdoc_output) + $(Q)cp $(srctree)/Documentation/images/logo.svg $(rustdoc_output)/static.files/ + $(Q)cp $(srctree)/Documentation/images/COPYING-logo $(rustdoc_output)/static.files/ $(Q)find $(rustdoc_output) -name '*.html' -type f -print0 | xargs -0 sed -Ei \ - -e 's:rust-logo\.svg:logo.svg:g' \ - -e 's:rust-logo\.png:logo.svg:g' \ - -e 's:favicon\.svg:logo.svg:g' \ - -e 's:::g' - $(Q)echo '.logo-container > img { object-fit: contain; }' \ - >> $(rustdoc_output)/rustdoc.css + -e 's:rust-logo-[0-9a-f]+\.svg:logo.svg:g' \ + -e 's:favicon-[0-9a-f]+\.svg:logo.svg:g' \ + -e 's:::g' + $(Q)for f in $(rustdoc_output)/static.files/rustdoc-*.css; do \ + echo ".logo-container > img { object-fit: contain; }" >> $$f; done rustdoc-macros: private rustdoc_host = yes rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \ From 3ac974796e5d94509b85a403449132ea660127c2 Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Thu, 19 Oct 2023 09:41:36 -0700 Subject: [PATCH 159/212] iomap: fix short copy in iomap_write_iter() Starting with commit 5d8edfb900d5 ("iomap: Copy larger chunks from userspace"), iomap_write_iter() can get into endless loop. This can be reproduced with LTP writev07 which uses partially valid iovecs: struct iovec wr_iovec[] = { { buffer, 64 }, { bad_addr, 64 }, { buffer + 64, 64 }, { buffer + 64 * 2, 64 }, }; commit bc1bb416bbb9 ("generic_perform_write()/iomap_write_actor(): saner logics for short copy") previously introduced the logic, which made short copy retry in next iteration with amount of "bytes" it managed to copy: if (unlikely(status == 0)) { /* * A short copy made iomap_write_end() reject the * thing entirely. Might be memory poisoning * halfway through, might be a race with munmap, * might be severe memory pressure. */ if (copied) bytes = copied; However, since 5d8edfb900d5 "bytes" is no longer carried into next iteration, because it is now always initialized at the beginning of the loop. And for iov_iter_count < PAGE_SIZE, "bytes" ends up with same value as previous iteration, making the loop retry same copy over and over, which leads to writev07 testcase hanging. Make next iteration retry with amount of bytes we managed to copy. Fixes: 5d8edfb900d5 ("iomap: Copy larger chunks from userspace") Signed-off-by: Jan Stancek Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 5db54ca29a35..2bc0aa23fde3 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -881,8 +881,10 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ + bytes = iov_iter_count(i); +retry: offset = pos & (chunk - 1); - bytes = min(chunk - offset, iov_iter_count(i)); + bytes = min(chunk - offset, bytes); status = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags); if (unlikely(status)) @@ -933,10 +935,12 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) * halfway through, might be a race with munmap, * might be severe memory pressure. */ - if (copied) - bytes = copied; if (chunk > PAGE_SIZE) chunk /= 2; + if (copied) { + bytes = copied; + goto retry; + } } else { pos += status; written += status; From 51b79f33817544e3b4df838d86e8e8e4388ff684 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 17 Oct 2023 16:51:03 -0400 Subject: [PATCH 160/212] drm/amdgpu: Fix possible null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit abo->tbo.resource may be NULL in amdgpu_vm_bo_update. Fixes: 180253782038 ("drm/ttm: stop allocating dummy resources during BO creation") Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f5daadcec865..82f25996ff5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1090,7 +1090,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct drm_gem_object *gobj = dma_buf->priv; struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); - if (abo->tbo.resource->mem_type == TTM_PL_VRAM) + if (abo->tbo.resource && + abo->tbo.resource->mem_type == TTM_PL_VRAM) bo = gem_to_amdgpu_bo(gobj); } mem = bo->tbo.resource; From 316baf09d355aec1179981b6dfe28eba50c5ee5b Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 17 Jul 2023 15:28:52 -0400 Subject: [PATCH 161/212] drm/amdgpu: Reserve fences for VM update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In amdgpu_dma_buf_move_notify reserve fences for the page table updates in amdgpu_vm_clear_freed and amdgpu_vm_handle_moved. This fixes a BUG_ON in dma_resv_add_fence when using SDMA for page table updates. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 12210598e5b8..ba3a87cb88cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -403,7 +403,10 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) continue; } - r = amdgpu_vm_clear_freed(adev, vm, NULL); + /* Reserve fences for two SDMA page table updates */ + r = dma_resv_reserve_fences(resv, 2); + if (!r) + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r) r = amdgpu_vm_handle_moved(adev, vm); From 913eda2b08cc49d31f382579e2be34c2709eb789 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 18 Oct 2023 18:39:08 +0200 Subject: [PATCH 162/212] i40e: xsk: remove count_mask Cited commit introduced a neat way of updating next_to_clean that does not require boundary checks on each increment. This was done by masking the new value with (ring length - 1) mask. Problem is that this is applicable only for power of 2 ring sizes, for every other size this assumption can not be made. In turn, it leads to cleaning descriptors out of order as well as splats: [ 1388.411915] Workqueue: events xp_release_deferred [ 1388.411919] RIP: 0010:xp_free+0x1a/0x50 [ 1388.411921] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 55 48 8b 57 70 48 8d 47 70 48 89 e5 48 39 d0 74 06 <5d> c3 cc cc cc cc 48 8b 57 60 83 82 b8 00 00 00 01 48 8b 57 60 48 [ 1388.411922] RSP: 0018:ffa0000000a83cb0 EFLAGS: 00000206 [ 1388.411923] RAX: ff11000119aa5030 RBX: 000000000000001d RCX: ff110001129b6e50 [ 1388.411924] RDX: ff11000119aa4fa0 RSI: 0000000055555554 RDI: ff11000119aa4fc0 [ 1388.411925] RBP: ffa0000000a83cb0 R08: 0000000000000000 R09: 0000000000000000 [ 1388.411926] R10: 0000000000000001 R11: 0000000000000000 R12: ff11000115829b80 [ 1388.411927] R13: 000000000000005f R14: 0000000000000000 R15: ff11000119aa4fc0 [ 1388.411928] FS: 0000000000000000(0000) GS:ff11000277e00000(0000) knlGS:0000000000000000 [ 1388.411929] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1388.411930] CR2: 00007f1f564e6c14 CR3: 000000000783c005 CR4: 0000000000771ef0 [ 1388.411931] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1388.411931] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1388.411932] PKRU: 55555554 [ 1388.411933] Call Trace: [ 1388.411934] [ 1388.411935] ? show_regs+0x6e/0x80 [ 1388.411937] ? watchdog_timer_fn+0x1d2/0x240 [ 1388.411939] ? __pfx_watchdog_timer_fn+0x10/0x10 [ 1388.411941] ? __hrtimer_run_queues+0x10e/0x290 [ 1388.411945] ? clockevents_program_event+0xae/0x130 [ 1388.411947] ? hrtimer_interrupt+0x105/0x240 [ 1388.411949] ? __sysvec_apic_timer_interrupt+0x54/0x150 [ 1388.411952] ? sysvec_apic_timer_interrupt+0x7f/0x90 [ 1388.411955] [ 1388.411955] [ 1388.411956] ? asm_sysvec_apic_timer_interrupt+0x1f/0x30 [ 1388.411958] ? xp_free+0x1a/0x50 [ 1388.411960] i40e_xsk_clean_rx_ring+0x5d/0x100 [i40e] [ 1388.411968] i40e_clean_rx_ring+0x14c/0x170 [i40e] [ 1388.411977] i40e_queue_pair_disable+0xda/0x260 [i40e] [ 1388.411986] i40e_xsk_pool_setup+0x192/0x1d0 [i40e] [ 1388.411993] i40e_reconfig_rss_queues+0x1f0/0x1450 [i40e] [ 1388.412002] xp_disable_drv_zc+0x73/0xf0 [ 1388.412004] ? mutex_lock+0x17/0x50 [ 1388.412007] xp_release_deferred+0x2b/0xc0 [ 1388.412010] process_one_work+0x178/0x350 [ 1388.412011] ? __pfx_worker_thread+0x10/0x10 [ 1388.412012] worker_thread+0x2f7/0x420 [ 1388.412014] ? __pfx_worker_thread+0x10/0x10 [ 1388.412015] kthread+0xf8/0x130 [ 1388.412017] ? __pfx_kthread+0x10/0x10 [ 1388.412019] ret_from_fork+0x3d/0x60 [ 1388.412021] ? __pfx_kthread+0x10/0x10 [ 1388.412023] ret_from_fork_asm+0x1b/0x30 [ 1388.412026] It comes from picking wrong ring entries when cleaning xsk buffers during pool detach. Remove the count_mask logic and use they boundary check when updating next_to_process (which used to be a next_to_clean). Fixes: c8a8ca3408dc ("i40e: remove unnecessary memory writes of the next to clean pointer") Reported-by: Tushar Vyavahare Tested-by: Tushar Vyavahare Signed-off-by: Maciej Fijalkowski Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20231018163908.40841-1-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 37f41c8a682f..7d991e4d9b89 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -437,12 +437,12 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 next_to_process = rx_ring->next_to_process; u16 next_to_clean = rx_ring->next_to_clean; - u16 count_mask = rx_ring->count - 1; unsigned int xdp_res, xdp_xmit = 0; struct xdp_buff *first = NULL; + u32 count = rx_ring->count; struct bpf_prog *xdp_prog; + u32 entries_to_alloc; bool failure = false; - u16 cleaned_count; if (next_to_process != next_to_clean) first = *i40e_rx_bi(rx_ring, next_to_clean); @@ -475,7 +475,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) qword); bi = *i40e_rx_bi(rx_ring, next_to_process); xsk_buff_free(bi); - next_to_process = (next_to_process + 1) & count_mask; + if (++next_to_process == count) + next_to_process = 0; continue; } @@ -493,7 +494,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) else if (i40e_add_xsk_frag(rx_ring, first, bi, size)) break; - next_to_process = (next_to_process + 1) & count_mask; + if (++next_to_process == count) + next_to_process = 0; if (i40e_is_non_eop(rx_ring, rx_desc)) continue; @@ -513,10 +515,10 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) rx_ring->next_to_clean = next_to_clean; rx_ring->next_to_process = next_to_process; - cleaned_count = (next_to_clean - rx_ring->next_to_use - 1) & count_mask; - if (cleaned_count >= I40E_RX_BUFFER_WRITE) - failure |= !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count); + entries_to_alloc = I40E_DESC_UNUSED(rx_ring); + if (entries_to_alloc >= I40E_RX_BUFFER_WRITE) + failure |= !i40e_alloc_rx_buffers_zc(rx_ring, entries_to_alloc); i40e_finalize_xdp_rx(rx_ring, xdp_xmit); i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); @@ -752,14 +754,16 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring) { - u16 count_mask = rx_ring->count - 1; u16 ntc = rx_ring->next_to_clean; u16 ntu = rx_ring->next_to_use; - for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) { + while (ntc != ntu) { struct xdp_buff *rx_bi = *i40e_rx_bi(rx_ring, ntc); xsk_buff_free(rx_bi); + ntc++; + if (ntc >= rx_ring->count) + ntc = 0; } } From 4fa008a2db484024a5cb52676a1b1534dc82330c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Oct 2023 18:09:33 -0300 Subject: [PATCH 163/212] tools build: Fix llvm feature detection, still used by bpftool When removing the BPF event for perf a feature test that checks if the llvm devel files are availabe was removed but that is also used by bpftool. bpftool uses it to decide what kind of disassembly it will use: llvm or binutils based. Removing the tools/build/feature/test-llvm.cpp file made bpftool to always fallback to binutils disassembly, even with the llvm devel files installed, fix it by restoring just that small test-llvm.cpp test file. Fixes: 56b11a2126bf2f42 ("perf bpf: Remove support for embedding clang for compiling BPF events (-e foo.c)") Reported-by: Manu Bretelle Reviewed-by: Ian Rogers Reviewed-by: Manu Bretelle Acked-by: Quentin Monnet Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Anshuman Khandual Cc: Carsten Haitzler Cc: Eduard Zingerman Cc: Fangrui Song Cc: He Kuang Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Naveen N. Rao Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Tiezhu Yang Cc: Tom Rix Cc: Wang Nan Cc: Wang ShaoBo Cc: Yang Jihong Cc: Yonghong Song Cc: YueHaibing Link: https://lore.kernel.org/lkml/ZTGa0Ukt7QyxWcVy@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-llvm.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 tools/build/feature/test-llvm.cpp diff --git a/tools/build/feature/test-llvm.cpp b/tools/build/feature/test-llvm.cpp new file mode 100644 index 000000000000..88a3d1bdd9f6 --- /dev/null +++ b/tools/build/feature/test-llvm.cpp @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" +#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH) + +#if NUM_VERSION < 0x030900 +# error "LLVM version too low" +#endif +int main() +{ + llvm::errs() << "Hello World!\n"; + llvm::llvm_shutdown(); + return 0; +} From 479ac419206b5fe4ce4e40de61ac3210a36711aa Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 19 Oct 2023 20:34:55 +0300 Subject: [PATCH 164/212] gpiolib: acpi: Add missing memset(0) to acpi_get_gpiod_from_data() When refactoring the acpi_get_gpiod_from_data() the change missed cleaning up the variable on stack. Add missing memset(). Reported-by: Ferry Toth Fixes: 16ba046e86e9 ("gpiolib: acpi: teach acpi_find_gpio() to handle data-only nodes") Signed-off-by: Andy Shevchenko Reviewed-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index fbda452fb4d6..51e41676de0b 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -951,6 +951,7 @@ static struct gpio_desc *acpi_get_gpiod_from_data(struct fwnode_handle *fwnode, if (!propname) return ERR_PTR(-EINVAL); + memset(&lookup, 0, sizeof(lookup)); lookup.index = index; ret = acpi_gpio_property_lookup(fwnode, propname, index, &lookup); From c1c0ce31b2420d5c173228a2132a492ede03d81f Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Wed, 18 Oct 2023 21:34:34 +0200 Subject: [PATCH 165/212] r8169: fix the KCSAN reported data-race in rtl_tx() while reading tp->cur_tx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KCSAN reported the following data-race: ================================================================== BUG: KCSAN: data-race in rtl8169_poll [r8169] / rtl8169_start_xmit [r8169] write (marked) to 0xffff888102474b74 of 4 bytes by task 5358 on cpu 29: rtl8169_start_xmit (drivers/net/ethernet/realtek/r8169_main.c:4254) r8169 dev_hard_start_xmit (./include/linux/netdevice.h:4889 ./include/linux/netdevice.h:4903 net/core/dev.c:3544 net/core/dev.c:3560) sch_direct_xmit (net/sched/sch_generic.c:342) __dev_queue_xmit (net/core/dev.c:3817 net/core/dev.c:4306) ip_finish_output2 (./include/linux/netdevice.h:3082 ./include/net/neighbour.h:526 ./include/net/neighbour.h:540 net/ipv4/ip_output.c:233) __ip_finish_output (net/ipv4/ip_output.c:311 net/ipv4/ip_output.c:293) ip_finish_output (net/ipv4/ip_output.c:328) ip_output (net/ipv4/ip_output.c:435) ip_send_skb (./include/net/dst.h:458 net/ipv4/ip_output.c:127 net/ipv4/ip_output.c:1486) udp_send_skb (net/ipv4/udp.c:963) udp_sendmsg (net/ipv4/udp.c:1246) inet_sendmsg (net/ipv4/af_inet.c:840 (discriminator 4)) sock_sendmsg (net/socket.c:730 net/socket.c:753) __sys_sendto (net/socket.c:2177) __x64_sys_sendto (net/socket.c:2185) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) read to 0xffff888102474b74 of 4 bytes by interrupt on cpu 21: rtl8169_poll (drivers/net/ethernet/realtek/r8169_main.c:4397 drivers/net/ethernet/realtek/r8169_main.c:4581) r8169 __napi_poll (net/core/dev.c:6527) net_rx_action (net/core/dev.c:6596 net/core/dev.c:6727) __do_softirq (kernel/softirq.c:553) __irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632) irq_exit_rcu (kernel/softirq.c:647) common_interrupt (arch/x86/kernel/irq.c:247 (discriminator 14)) asm_common_interrupt (./arch/x86/include/asm/idtentry.h:636) cpuidle_enter_state (drivers/cpuidle/cpuidle.c:291) cpuidle_enter (drivers/cpuidle/cpuidle.c:390) call_cpuidle (kernel/sched/idle.c:135) do_idle (kernel/sched/idle.c:219 kernel/sched/idle.c:282) cpu_startup_entry (kernel/sched/idle.c:378 (discriminator 1)) start_secondary (arch/x86/kernel/smpboot.c:210 arch/x86/kernel/smpboot.c:294) secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:433) value changed: 0x002f4815 -> 0x002f4816 Reported by Kernel Concurrency Sanitizer on: CPU: 21 PID: 0 Comm: swapper/21 Tainted: G L 6.6.0-rc2-kcsan-00143-gb5cbe7c00aa0 #41 Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023 ================================================================== The write side of drivers/net/ethernet/realtek/r8169_main.c is: ================== 4251 /* rtl_tx needs to see descriptor changes before updated tp->cur_tx */ 4252 smp_wmb(); 4253 → 4254 WRITE_ONCE(tp->cur_tx, tp->cur_tx + frags + 1); 4255 4256 stop_queue = !netif_subqueue_maybe_stop(dev, 0, rtl_tx_slots_avail(tp), 4257 R8169_TX_STOP_THRS, 4258 R8169_TX_START_THRS); The read side is the function rtl_tx(): 4355 static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, 4356 int budget) 4357 { 4358 unsigned int dirty_tx, bytes_compl = 0, pkts_compl = 0; 4359 struct sk_buff *skb; 4360 4361 dirty_tx = tp->dirty_tx; 4362 4363 while (READ_ONCE(tp->cur_tx) != dirty_tx) { 4364 unsigned int entry = dirty_tx % NUM_TX_DESC; 4365 u32 status; 4366 4367 status = le32_to_cpu(tp->TxDescArray[entry].opts1); 4368 if (status & DescOwn) 4369 break; 4370 4371 skb = tp->tx_skb[entry].skb; 4372 rtl8169_unmap_tx_skb(tp, entry); 4373 4374 if (skb) { 4375 pkts_compl++; 4376 bytes_compl += skb->len; 4377 napi_consume_skb(skb, budget); 4378 } 4379 dirty_tx++; 4380 } 4381 4382 if (tp->dirty_tx != dirty_tx) { 4383 dev_sw_netstats_tx_add(dev, pkts_compl, bytes_compl); 4384 WRITE_ONCE(tp->dirty_tx, dirty_tx); 4385 4386 netif_subqueue_completed_wake(dev, 0, pkts_compl, bytes_compl, 4387 rtl_tx_slots_avail(tp), 4388 R8169_TX_START_THRS); 4389 /* 4390 * 8168 hack: TxPoll requests are lost when the Tx packets are 4391 * too close. Let's kick an extra TxPoll request when a burst 4392 * of start_xmit activity is detected (if it is not detected, 4393 * it is slow enough). -- FR 4394 * If skb is NULL then we come here again once a tx irq is 4395 * triggered after the last fragment is marked transmitted. 4396 */ → 4397 if (tp->cur_tx != dirty_tx && skb) 4398 rtl8169_doorbell(tp); 4399 } 4400 } Obviously from the code, an earlier detected data-race for tp->cur_tx was fixed in the line 4363: 4363 while (READ_ONCE(tp->cur_tx) != dirty_tx) { but the same solution is required for protecting the other access to tp->cur_tx: → 4397 if (READ_ONCE(tp->cur_tx) != dirty_tx && skb) 4398 rtl8169_doorbell(tp); The write in the line 4254 is protected with WRITE_ONCE(), but the read in the line 4397 might have suffered read tearing under some compiler optimisations. The fix eliminated the KCSAN data-race report for this bug. It is yet to be evaluated what happens if tp->cur_tx changes between the test in line 4363 and line 4397. This test should certainly not be cached by the compiler in some register for such a long time, while asynchronous writes to tp->cur_tx might have occurred in line 4254 in the meantime. Fixes: 94d8a98e6235c ("r8169: reduce number of workaround doorbell rings") Cc: Heiner Kallweit Cc: nic_swsd@realtek.com Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Marco Elver Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/lkml/dc7fc8fa-4ea4-e9a9-30a6-7c83e6b53188@alu.unizg.hr/ Signed-off-by: Mirsad Goran Todorovac Acked-by: Marco Elver Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 6351a2dc13bc..281aaa851847 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4394,7 +4394,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, * If skb is NULL then we come here again once a tx irq is * triggered after the last fragment is marked transmitted. */ - if (tp->cur_tx != dirty_tx && skb) + if (READ_ONCE(tp->cur_tx) != dirty_tx && skb) rtl8169_doorbell(tp); } } From dcf75a0f6bc136de94e88178ae5f51b7f879abc9 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Wed, 18 Oct 2023 21:34:36 +0200 Subject: [PATCH 166/212] r8169: fix the KCSAN reported data-race in rtl_tx while reading TxDescArray[entry].opts1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KCSAN reported the following data-race: ================================================================== BUG: KCSAN: data-race in rtl8169_poll (drivers/net/ethernet/realtek/r8169_main.c:4368 drivers/net/ethernet/realtek/r8169_main.c:4581) r8169 race at unknown origin, with read to 0xffff888140d37570 of 4 bytes by interrupt on cpu 21: rtl8169_poll (drivers/net/ethernet/realtek/r8169_main.c:4368 drivers/net/ethernet/realtek/r8169_main.c:4581) r8169 __napi_poll (net/core/dev.c:6527) net_rx_action (net/core/dev.c:6596 net/core/dev.c:6727) __do_softirq (kernel/softirq.c:553) __irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632) irq_exit_rcu (kernel/softirq.c:647) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1074 (discriminator 14)) asm_sysvec_apic_timer_interrupt (./arch/x86/include/asm/idtentry.h:645) cpuidle_enter_state (drivers/cpuidle/cpuidle.c:291) cpuidle_enter (drivers/cpuidle/cpuidle.c:390) call_cpuidle (kernel/sched/idle.c:135) do_idle (kernel/sched/idle.c:219 kernel/sched/idle.c:282) cpu_startup_entry (kernel/sched/idle.c:378 (discriminator 1)) start_secondary (arch/x86/kernel/smpboot.c:210 arch/x86/kernel/smpboot.c:294) secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:433) value changed: 0xb0000042 -> 0x00000000 Reported by Kernel Concurrency Sanitizer on: CPU: 21 PID: 0 Comm: swapper/21 Tainted: G L 6.6.0-rc2-kcsan-00143-gb5cbe7c00aa0 #41 Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023 ================================================================== The read side is in drivers/net/ethernet/realtek/r8169_main.c ========================================= 4355 static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, 4356 int budget) 4357 { 4358 unsigned int dirty_tx, bytes_compl = 0, pkts_compl = 0; 4359 struct sk_buff *skb; 4360 4361 dirty_tx = tp->dirty_tx; 4362 4363 while (READ_ONCE(tp->cur_tx) != dirty_tx) { 4364 unsigned int entry = dirty_tx % NUM_TX_DESC; 4365 u32 status; 4366 → 4367 status = le32_to_cpu(tp->TxDescArray[entry].opts1); 4368 if (status & DescOwn) 4369 break; 4370 4371 skb = tp->tx_skb[entry].skb; 4372 rtl8169_unmap_tx_skb(tp, entry); 4373 4374 if (skb) { 4375 pkts_compl++; 4376 bytes_compl += skb->len; 4377 napi_consume_skb(skb, budget); 4378 } 4379 dirty_tx++; 4380 } 4381 4382 if (tp->dirty_tx != dirty_tx) { 4383 dev_sw_netstats_tx_add(dev, pkts_compl, bytes_compl); 4384 WRITE_ONCE(tp->dirty_tx, dirty_tx); 4385 4386 netif_subqueue_completed_wake(dev, 0, pkts_compl, bytes_compl, 4387 rtl_tx_slots_avail(tp), 4388 R8169_TX_START_THRS); 4389 /* 4390 * 8168 hack: TxPoll requests are lost when the Tx packets are 4391 * too close. Let's kick an extra TxPoll request when a burst 4392 * of start_xmit activity is detected (if it is not detected, 4393 * it is slow enough). -- FR 4394 * If skb is NULL then we come here again once a tx irq is 4395 * triggered after the last fragment is marked transmitted. 4396 */ 4397 if (READ_ONCE(tp->cur_tx) != dirty_tx && skb) 4398 rtl8169_doorbell(tp); 4399 } 4400 } tp->TxDescArray[entry].opts1 is reported to have a data-race and READ_ONCE() fixes this KCSAN warning. 4366 → 4367 status = le32_to_cpu(READ_ONCE(tp->TxDescArray[entry].opts1)); 4368 if (status & DescOwn) 4369 break; 4370 Cc: Heiner Kallweit Cc: nic_swsd@realtek.com Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Marco Elver Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/lkml/dc7fc8fa-4ea4-e9a9-30a6-7c83e6b53188@alu.unizg.hr/ Signed-off-by: Mirsad Goran Todorovac Acked-by: Marco Elver Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 281aaa851847..7e14a1d958c8 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4364,7 +4364,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, unsigned int entry = dirty_tx % NUM_TX_DESC; u32 status; - status = le32_to_cpu(tp->TxDescArray[entry].opts1); + status = le32_to_cpu(READ_ONCE(tp->TxDescArray[entry].opts1)); if (status & DescOwn) break; From f97eee484e71890131f9c563c5cc6d5a69e4308d Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Wed, 18 Oct 2023 21:34:38 +0200 Subject: [PATCH 167/212] r8169: fix the KCSAN reported data race in rtl_rx while reading desc->opts1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KCSAN reported the following data-race bug: ================================================================== BUG: KCSAN: data-race in rtl8169_poll (drivers/net/ethernet/realtek/r8169_main.c:4430 drivers/net/ethernet/realtek/r8169_main.c:4583) r8169 race at unknown origin, with read to 0xffff888117e43510 of 4 bytes by interrupt on cpu 21: rtl8169_poll (drivers/net/ethernet/realtek/r8169_main.c:4430 drivers/net/ethernet/realtek/r8169_main.c:4583) r8169 __napi_poll (net/core/dev.c:6527) net_rx_action (net/core/dev.c:6596 net/core/dev.c:6727) __do_softirq (kernel/softirq.c:553) __irq_exit_rcu (kernel/softirq.c:427 kernel/softirq.c:632) irq_exit_rcu (kernel/softirq.c:647) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1074 (discriminator 14)) asm_sysvec_apic_timer_interrupt (./arch/x86/include/asm/idtentry.h:645) cpuidle_enter_state (drivers/cpuidle/cpuidle.c:291) cpuidle_enter (drivers/cpuidle/cpuidle.c:390) call_cpuidle (kernel/sched/idle.c:135) do_idle (kernel/sched/idle.c:219 kernel/sched/idle.c:282) cpu_startup_entry (kernel/sched/idle.c:378 (discriminator 1)) start_secondary (arch/x86/kernel/smpboot.c:210 arch/x86/kernel/smpboot.c:294) secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:433) value changed: 0x80003fff -> 0x3402805f Reported by Kernel Concurrency Sanitizer on: CPU: 21 PID: 0 Comm: swapper/21 Tainted: G L 6.6.0-rc2-kcsan-00143-gb5cbe7c00aa0 #41 Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023 ================================================================== drivers/net/ethernet/realtek/r8169_main.c: ========================================== 4429 → 4430 status = le32_to_cpu(desc->opts1); 4431 if (status & DescOwn) 4432 break; 4433 4434 /* This barrier is needed to keep us from reading 4435 * any other fields out of the Rx descriptor until 4436 * we know the status of DescOwn 4437 */ 4438 dma_rmb(); 4439 4440 if (unlikely(status & RxRES)) { 4441 if (net_ratelimit()) 4442 netdev_warn(dev, "Rx ERROR. status = %08x\n", Marco Elver explained that dma_rmb() doesn't prevent the compiler to tear up the access to desc->opts1 which can be written to concurrently. READ_ONCE() should prevent that from happening: 4429 → 4430 status = le32_to_cpu(READ_ONCE(desc->opts1)); 4431 if (status & DescOwn) 4432 break; 4433 As the consequence of this fix, this KCSAN warning was eliminated. Fixes: 6202806e7c03a ("r8169: drop member opts1_mask from struct rtl8169_private") Suggested-by: Marco Elver Cc: Heiner Kallweit Cc: nic_swsd@realtek.com Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/lkml/dc7fc8fa-4ea4-e9a9-30a6-7c83e6b53188@alu.unizg.hr/ Signed-off-by: Mirsad Goran Todorovac Acked-by: Marco Elver Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 7e14a1d958c8..361b90007148 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4427,7 +4427,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget dma_addr_t addr; u32 status; - status = le32_to_cpu(desc->opts1); + status = le32_to_cpu(READ_ONCE(desc->opts1)); if (status & DescOwn) break; From 7db3111043885c146e795c199d39c3f9042d97c0 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 19 Oct 2023 09:13:46 +0200 Subject: [PATCH 168/212] iavf: initialize waitqueues before starting watchdog_task It is not safe to initialize the waitqueues after queueing the watchdog_task. It will be using them. The chance of this causing a real problem is very small, because there will be some sleeping before any of the waitqueues get used. I got a crash only after inserting an artificial sleep in iavf_probe. Queue the watchdog_task as the last step in iavf_probe. Add a comment to prevent repeating the mistake. Fixes: fe2647ab0c99 ("i40evf: prevent VF close returning before state transitions to DOWN") Signed-off-by: Michal Schmidt Reviewed-by: Paul Menzel Reviewed-by: Przemek Kitszel Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/iavf/iavf_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 6a2e6d64bc3a..5b5c0525aa13 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4982,8 +4982,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&adapter->finish_config, iavf_finish_config); INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task); INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(5 * (pdev->devfn & 0x07))); /* Setup the wait queue for indicating transition to down status */ init_waitqueue_head(&adapter->down_waitqueue); @@ -4994,6 +4992,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup the wait queue for indicating virtchannel events */ init_waitqueue_head(&adapter->vc_waitqueue); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, + msecs_to_jiffies(5 * (pdev->devfn & 0x07))); + /* Initialization goes on in the work. Do not add more of it below. */ return 0; err_ioremap: From 665e7d83c5386f9abdc67b2e4b6e6d9579aadfcb Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 19 Oct 2023 18:37:20 +0200 Subject: [PATCH 169/212] i40e: Fix I40E_FLAG_VF_VLAN_PRUNING value Commit c87c938f62d8f1 ("i40e: Add VF VLAN pruning") added new PF flag I40E_FLAG_VF_VLAN_PRUNING but its value collides with existing I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED flag. Move the affected flag at the end of the flags and fix its value. Reproducer: [root@cnb-03 ~]# ethtool --set-priv-flags enp2s0f0np0 link-down-on-close on [root@cnb-03 ~]# ethtool --set-priv-flags enp2s0f0np0 vf-vlan-pruning on [root@cnb-03 ~]# ethtool --set-priv-flags enp2s0f0np0 link-down-on-close off [ 6323.142585] i40e 0000:02:00.0: Setting link-down-on-close not supported on this port (because total-port-shutdown is enabled) netlink error: Operation not supported [root@cnb-03 ~]# ethtool --set-priv-flags enp2s0f0np0 vf-vlan-pruning off [root@cnb-03 ~]# ethtool --set-priv-flags enp2s0f0np0 link-down-on-close off The link-down-on-close flag cannot be modified after setting vf-vlan-pruning because vf-vlan-pruning shares the same bit with total-port-shutdown flag that prevents any modification of link-down-on-close flag. Fixes: c87c938f62d8 ("i40e: Add VF VLAN pruning") Cc: Mateusz Palczewski Cc: Simon Horman Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 6e310a539467..55bb0b5310d5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -580,7 +580,6 @@ struct i40e_pf { #define I40E_FLAG_DISABLE_FW_LLDP BIT(24) #define I40E_FLAG_RS_FEC BIT(25) #define I40E_FLAG_BASE_R_FEC BIT(26) -#define I40E_FLAG_VF_VLAN_PRUNING BIT(27) /* TOTAL_PORT_SHUTDOWN * Allows to physically disable the link on the NIC's port. * If enabled, (after link down request from the OS) @@ -603,6 +602,7 @@ struct i40e_pf { * in abilities field of i40e_aq_set_phy_config structure */ #define I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED BIT(27) +#define I40E_FLAG_VF_VLAN_PRUNING BIT(28) struct i40e_client_instance *cinst; bool stat_offsets_loaded; From fb71ba0ed8be9534493c80ba00142a64d9972a72 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 20 Oct 2023 17:31:56 +0800 Subject: [PATCH 170/212] treewide: Spelling fix in comment reques -> request Fixes: 09dde54c6a69 ("PS3: gelic: Add wireless support for PS3") Signed-off-by: Kunwu Chan Reviewed-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_wireless.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c index dc14a66583ff..44488c153ea2 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c @@ -1217,7 +1217,7 @@ static int gelic_wl_set_encodeext(struct net_device *netdev, key_index = wl->current_key; if (!enc->length && (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY)) { - /* reques to change default key index */ + /* request to change default key index */ pr_debug("%s: request to change default key to %d\n", __func__, key_index); wl->current_key = key_index; From 8c0b48e01daba5ca58f939a8425855d3f4f2ed14 Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Thu, 19 Oct 2023 13:40:35 -0700 Subject: [PATCH 171/212] igb: Fix potential memory leak in igb_add_ethtool_nfc_entry Add check for return of igb_update_ethtool_nfc_entry so that in case of any potential errors the memory alocated for input will be freed. Fixes: 0e71def25281 ("igb: add support of RX network flow classification") Reviewed-by: Wojciech Drewek Signed-off-by: Mateusz Palczewski Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 319ed601eaa1..4ee849985e2b 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2978,11 +2978,15 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, if (err) goto err_out_w_lock; - igb_update_ethtool_nfc_entry(adapter, input, input->sw_idx); + err = igb_update_ethtool_nfc_entry(adapter, input, input->sw_idx); + if (err) + goto err_out_input_filter; spin_unlock(&adapter->nfc_lock); return 0; +err_out_input_filter: + igb_erase_filter(adapter, input); err_out_w_lock: spin_unlock(&adapter->nfc_lock); err_out: From b022f0c7e404887a7c5229788fc99eff9f9a80d5 Mon Sep 17 00:00:00 2001 From: Francis Laniel Date: Fri, 20 Oct 2023 13:42:49 +0300 Subject: [PATCH 172/212] tracing/kprobes: Return EADDRNOTAVAIL when func matches several symbols When a kprobe is attached to a function that's name is not unique (is static and shares the name with other functions in the kernel), the kprobe is attached to the first function it finds. This is a bug as the function that it is attaching to is not necessarily the one that the user wants to attach to. Instead of blindly picking a function to attach to what is ambiguous, error with EADDRNOTAVAIL to let the user know that this function is not unique, and that the user must use another unique function with an address offset to get to the function they want to attach to. Link: https://lore.kernel.org/all/20231020104250.9537-2-flaniel@linux.microsoft.com/ Cc: stable@vger.kernel.org Fixes: 413d37d1eb69 ("tracing: Add kprobe-based event tracer") Suggested-by: Masami Hiramatsu Signed-off-by: Francis Laniel Link: https://lore.kernel.org/lkml/20230819101105.b0c104ae4494a7d1f2eea742@kernel.org/ Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_kprobe.c | 63 +++++++++++++++++++++++++++++++++++++ kernel/trace/trace_probe.h | 1 + 2 files changed, 64 insertions(+) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3d7a180a8427..a8fef6ab0872 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -705,6 +705,25 @@ static struct notifier_block trace_kprobe_module_nb = { .priority = 1 /* Invoked after kprobe module callback */ }; +static int count_symbols(void *data, unsigned long unused) +{ + unsigned int *count = data; + + (*count)++; + + return 0; +} + +static unsigned int number_of_same_symbols(char *func_name) +{ + unsigned int count; + + count = 0; + kallsyms_on_each_match_symbol(count_symbols, func_name, &count); + + return count; +} + static int __trace_kprobe_create(int argc, const char *argv[]) { /* @@ -836,6 +855,31 @@ static int __trace_kprobe_create(int argc, const char *argv[]) } } + if (symbol && !strchr(symbol, ':')) { + unsigned int count; + + count = number_of_same_symbols(symbol); + if (count > 1) { + /* + * Users should use ADDR to remove the ambiguity of + * using KSYM only. + */ + trace_probe_log_err(0, NON_UNIQ_SYMBOL); + ret = -EADDRNOTAVAIL; + + goto error; + } else if (count == 0) { + /* + * We can return ENOENT earlier than when register the + * kprobe. + */ + trace_probe_log_err(0, BAD_PROBE_ADDR); + ret = -ENOENT; + + goto error; + } + } + trace_probe_log_set_index(0); if (event) { ret = traceprobe_parse_event_name(&event, &group, gbuf, @@ -1695,6 +1739,7 @@ static int unregister_kprobe_event(struct trace_kprobe *tk) } #ifdef CONFIG_PERF_EVENTS + /* create a trace_kprobe, but don't add it to global lists */ struct trace_event_call * create_local_trace_kprobe(char *func, void *addr, unsigned long offs, @@ -1705,6 +1750,24 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs, int ret; char *event; + if (func) { + unsigned int count; + + count = number_of_same_symbols(func); + if (count > 1) + /* + * Users should use addr to remove the ambiguity of + * using func only. + */ + return ERR_PTR(-EADDRNOTAVAIL); + else if (count == 0) + /* + * We can return ENOENT earlier than when register the + * kprobe. + */ + return ERR_PTR(-ENOENT); + } + /* * local trace_kprobes are not added to dyn_event, so they are never * searched in find_trace_kprobe(). Therefore, there is no concern of diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 02b432ae7513..850d9ecb6765 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -450,6 +450,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_MAXACT, "Invalid maxactive number"), \ C(MAXACT_TOO_BIG, "Maxactive is too big"), \ C(BAD_PROBE_ADDR, "Invalid probed address or symbol"), \ + C(NON_UNIQ_SYMBOL, "The symbol is not unique"), \ C(BAD_RETPROBE, "Retprobe address must be an function entry"), \ C(NO_TRACEPOINT, "Tracepoint is not found"), \ C(BAD_ADDR_SUFFIX, "Invalid probed address suffix"), \ From 03b80ff8023adae6780e491f66e932df8165e3a0 Mon Sep 17 00:00:00 2001 From: Francis Laniel Date: Fri, 20 Oct 2023 13:42:50 +0300 Subject: [PATCH 173/212] selftests/ftrace: Add new test case which checks non unique symbol If name_show() is non unique, this test will try to install a kprobe on this function which should fail returning EADDRNOTAVAIL. On kernel where name_show() is not unique, this test is skipped. Link: https://lore.kernel.org/all/20231020104250.9537-3-flaniel@linux.microsoft.com/ Cc: stable@vger.kernel.org Signed-off-by: Francis Laniel Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- .../ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc new file mode 100644 index 000000000000..bc9514428dba --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc @@ -0,0 +1,13 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test failure of registering kprobe on non unique symbol +# requires: kprobe_events + +SYMBOL='name_show' + +# We skip this test on kernel where SYMBOL is unique or does not exist. +if [ "$(grep -c -E "[[:alnum:]]+ t ${SYMBOL}" /proc/kallsyms)" -le '1' ]; then + exit_unsupported +fi + +! echo "p:test_non_unique ${SYMBOL}" > kprobe_events From 50e782a86c980d4f8292ef82ed8139282ca07a98 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 16 Oct 2023 19:31:22 +0300 Subject: [PATCH 174/212] efi/unaccepted: Fix soft lockups caused by parallel memory acceptance Michael reported soft lockups on a system that has unaccepted memory. This occurs when a user attempts to allocate and accept memory on multiple CPUs simultaneously. The root cause of the issue is that memory acceptance is serialized with a spinlock, allowing only one CPU to accept memory at a time. The other CPUs spin and wait for their turn, leading to starvation and soft lockup reports. To address this, the code has been modified to release the spinlock while accepting memory. This allows for parallel memory acceptance on multiple CPUs. A newly introduced "accepting_list" keeps track of which memory is currently being accepted. This is necessary to prevent parallel acceptance of the same memory block. If a collision occurs, the lock is released and the process is retried. Such collisions should rarely occur. The main path for memory acceptance is the page allocator, which accepts memory in MAX_ORDER chunks. As long as MAX_ORDER is equal to or larger than the unit_size, collisions will never occur because the caller fully owns the memory block being accepted. Aside from the page allocator, only memblock and deferered_free_range() accept memory, but this only happens during boot. The code has been tested with unit_size == 128MiB to trigger collisions and validate the retry codepath. Fixes: 2053bc57f367 ("efi: Add unaccepted memory support") Signed-off-by: Kirill A. Shutemov Reported-by: Michael Roth Reviewed-by: Vlastimil Babka Tested-by: Michael Roth [ardb: drop unnecessary cpu_relax() call] Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/unaccepted_memory.c | 64 ++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/unaccepted_memory.c b/drivers/firmware/efi/unaccepted_memory.c index 853f7dc3c21d..135278ddaf62 100644 --- a/drivers/firmware/efi/unaccepted_memory.c +++ b/drivers/firmware/efi/unaccepted_memory.c @@ -5,9 +5,17 @@ #include #include -/* Protects unaccepted memory bitmap */ +/* Protects unaccepted memory bitmap and accepting_list */ static DEFINE_SPINLOCK(unaccepted_memory_lock); +struct accept_range { + struct list_head list; + unsigned long start; + unsigned long end; +}; + +static LIST_HEAD(accepting_list); + /* * accept_memory() -- Consult bitmap and accept the memory if needed. * @@ -24,6 +32,7 @@ void accept_memory(phys_addr_t start, phys_addr_t end) { struct efi_unaccepted_memory *unaccepted; unsigned long range_start, range_end; + struct accept_range range, *entry; unsigned long flags; u64 unit_size; @@ -78,20 +87,67 @@ void accept_memory(phys_addr_t start, phys_addr_t end) if (end > unaccepted->size * unit_size * BITS_PER_BYTE) end = unaccepted->size * unit_size * BITS_PER_BYTE; - range_start = start / unit_size; - + range.start = start / unit_size; + range.end = DIV_ROUND_UP(end, unit_size); +retry: spin_lock_irqsave(&unaccepted_memory_lock, flags); + + /* + * Check if anybody works on accepting the same range of the memory. + * + * The check is done with unit_size granularity. It is crucial to catch + * all accept requests to the same unit_size block, even if they don't + * overlap on physical address level. + */ + list_for_each_entry(entry, &accepting_list, list) { + if (entry->end < range.start) + continue; + if (entry->start >= range.end) + continue; + + /* + * Somebody else accepting the range. Or at least part of it. + * + * Drop the lock and retry until it is complete. + */ + spin_unlock_irqrestore(&unaccepted_memory_lock, flags); + goto retry; + } + + /* + * Register that the range is about to be accepted. + * Make sure nobody else will accept it. + */ + list_add(&range.list, &accepting_list); + + range_start = range.start; for_each_set_bitrange_from(range_start, range_end, unaccepted->bitmap, - DIV_ROUND_UP(end, unit_size)) { + range.end) { unsigned long phys_start, phys_end; unsigned long len = range_end - range_start; phys_start = range_start * unit_size + unaccepted->phys_base; phys_end = range_end * unit_size + unaccepted->phys_base; + /* + * Keep interrupts disabled until the accept operation is + * complete in order to prevent deadlocks. + * + * Enabling interrupts before calling arch_accept_memory() + * creates an opportunity for an interrupt handler to request + * acceptance for the same memory. The handler will continuously + * spin with interrupts disabled, preventing other task from + * making progress with the acceptance process. + */ + spin_unlock(&unaccepted_memory_lock); + arch_accept_memory(phys_start, phys_end); + + spin_lock(&unaccepted_memory_lock); bitmap_clear(unaccepted->bitmap, range_start, len); } + + list_del(&range.list); spin_unlock_irqrestore(&unaccepted_memory_lock, flags); } From 72bf4f1767f0386970dc04726dc5bc2e3991dc19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Oct 2023 11:24:57 +0000 Subject: [PATCH 175/212] net: do not leave an empty skb in write queue Under memory stress conditions, tcp_sendmsg_locked() might call sk_stream_wait_memory(), thus releasing the socket lock. If a fresh skb has been allocated prior to this, we should not leave it in the write queue otherwise tcp_write_xmit() could panic. This apparently does not happen often, but a future change in __sk_mem_raise_allocated() that Shakeel and others are considering would increase chances of being hurt. Under discussion is to remove this controversial part: /* Fail only if socket is _under_ its sndbuf. * In this case we cannot block, so that we have to fail. */ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { /* Force charge with __GFP_NOFAIL */ if (memcg_charge && !charged) { mem_cgroup_charge_skmem(sk->sk_memcg, amt, gfp_memcg_charge() | __GFP_NOFAIL); } return 1; } Fixes: fdfc5c8594c2 ("tcp: remove empty skb from write queue in error cases") Signed-off-by: Eric Dumazet Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20231019112457.1190114-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d3456cf840de..3d3a24f79573 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -927,10 +927,11 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } -/* In some cases, both sendmsg() could have added an skb to the write queue, - * but failed adding payload on it. We need to remove it to consume less +/* In some cases, sendmsg() could have added an skb to the write queue, + * but failed adding payload on it. We need to remove it to consume less * memory, but more importantly be able to generate EPOLLOUT for Edge Trigger - * epoll() users. + * epoll() users. Another reason is that tcp_write_xmit() does not like + * finding an empty skb in the write queue. */ void tcp_remove_empty_skb(struct sock *sk) { @@ -1289,6 +1290,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) wait_for_space: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + tcp_remove_empty_skb(sk); if (copied) tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH, size_goal); From a9beb7e81bcb876615e1fbb3c07f3f9dba69831f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Oct 2023 12:21:04 +0000 Subject: [PATCH 176/212] neighbour: fix various data-races 1) tbl->gc_thresh1, tbl->gc_thresh2, tbl->gc_thresh3 and tbl->gc_interval can be written from sysfs. 2) tbl->last_flush is read locklessly from neigh_alloc() 3) tbl->proxy_queue.qlen is read locklessly from neightbl_fill_info() 4) neightbl_fill_info() reads cpu stats that can be changed concurrently. Fixes: c7fb64db001f ("[NETLINK]: Neighbour table configuration and statistics via rtnetlink") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20231019122104.1448310-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 67 +++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9c09f091cbff..df81c1f0a570 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -251,7 +251,8 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) static int neigh_forced_gc(struct neigh_table *tbl) { - int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2; + int max_clean = atomic_read(&tbl->gc_entries) - + READ_ONCE(tbl->gc_thresh2); unsigned long tref = jiffies - 5 * HZ; struct neighbour *n, *tmp; int shrunk = 0; @@ -280,7 +281,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) } } - tbl->last_flush = jiffies; + WRITE_ONCE(tbl->last_flush, jiffies); write_unlock_bh(&tbl->lock); @@ -464,17 +465,17 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, { struct neighbour *n = NULL; unsigned long now = jiffies; - int entries; + int entries, gc_thresh3; if (exempt_from_gc) goto do_alloc; entries = atomic_inc_return(&tbl->gc_entries) - 1; - if (entries >= tbl->gc_thresh3 || - (entries >= tbl->gc_thresh2 && - time_after(now, tbl->last_flush + 5 * HZ))) { - if (!neigh_forced_gc(tbl) && - entries >= tbl->gc_thresh3) { + gc_thresh3 = READ_ONCE(tbl->gc_thresh3); + if (entries >= gc_thresh3 || + (entries >= READ_ONCE(tbl->gc_thresh2) && + time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) { + if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) { net_info_ratelimited("%s: neighbor table overflow!\n", tbl->id); NEIGH_CACHE_STAT_INC(tbl, table_fulls); @@ -955,13 +956,14 @@ static void neigh_periodic_work(struct work_struct *work) if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { struct neigh_parms *p; - tbl->last_rand = jiffies; + + WRITE_ONCE(tbl->last_rand, jiffies); list_for_each_entry(p, &tbl->parms_list, list) p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); } - if (atomic_read(&tbl->entries) < tbl->gc_thresh1) + if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1)) goto out; for (i = 0 ; i < (1 << nht->hash_shift); i++) { @@ -2167,15 +2169,16 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndtmsg->ndtm_pad2 = 0; if (nla_put_string(skb, NDTA_NAME, tbl->id) || - nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || - nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || - nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || - nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) + nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval), + NDTA_PAD) || + nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) || + nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) || + nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3))) goto nla_put_failure; { unsigned long now = jiffies; - long flush_delta = now - tbl->last_flush; - long rand_delta = now - tbl->last_rand; + long flush_delta = now - READ_ONCE(tbl->last_flush); + long rand_delta = now - READ_ONCE(tbl->last_rand); struct neigh_hash_table *nht; struct ndt_config ndc = { .ndtc_key_len = tbl->key_len, @@ -2183,7 +2186,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, .ndtc_entries = atomic_read(&tbl->entries), .ndtc_last_flush = jiffies_to_msecs(flush_delta), .ndtc_last_rand = jiffies_to_msecs(rand_delta), - .ndtc_proxy_qlen = tbl->proxy_queue.qlen, + .ndtc_proxy_qlen = READ_ONCE(tbl->proxy_queue.qlen), }; rcu_read_lock(); @@ -2206,17 +2209,17 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, struct neigh_statistics *st; st = per_cpu_ptr(tbl->stats, cpu); - ndst.ndts_allocs += st->allocs; - ndst.ndts_destroys += st->destroys; - ndst.ndts_hash_grows += st->hash_grows; - ndst.ndts_res_failed += st->res_failed; - ndst.ndts_lookups += st->lookups; - ndst.ndts_hits += st->hits; - ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; - ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; - ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; - ndst.ndts_forced_gc_runs += st->forced_gc_runs; - ndst.ndts_table_fulls += st->table_fulls; + ndst.ndts_allocs += READ_ONCE(st->allocs); + ndst.ndts_destroys += READ_ONCE(st->destroys); + ndst.ndts_hash_grows += READ_ONCE(st->hash_grows); + ndst.ndts_res_failed += READ_ONCE(st->res_failed); + ndst.ndts_lookups += READ_ONCE(st->lookups); + ndst.ndts_hits += READ_ONCE(st->hits); + ndst.ndts_rcv_probes_mcast += READ_ONCE(st->rcv_probes_mcast); + ndst.ndts_rcv_probes_ucast += READ_ONCE(st->rcv_probes_ucast); + ndst.ndts_periodic_gc_runs += READ_ONCE(st->periodic_gc_runs); + ndst.ndts_forced_gc_runs += READ_ONCE(st->forced_gc_runs); + ndst.ndts_table_fulls += READ_ONCE(st->table_fulls); } if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, @@ -2445,16 +2448,16 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout_tbl_lock; if (tb[NDTA_THRESH1]) - tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); + WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1])); if (tb[NDTA_THRESH2]) - tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); + WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2])); if (tb[NDTA_THRESH3]) - tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); + WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3])); if (tb[NDTA_GC_INTERVAL]) - tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); + WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL])); err = 0; From e7684d29efdf37304c62bb337ea55b3428ca118e Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Thu, 19 Oct 2023 13:36:41 -0700 Subject: [PATCH 177/212] igc: Fix ambiguity in the ethtool advertising The 'ethtool_convert_link_mode_to_legacy_u32' method does not allow us to advertise 2500M speed support and TP (twisted pair) properly. Convert to 'ethtool_link_ksettings_test_link_mode' to advertise supported speed and eliminate ambiguity. Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Suggested-by: Dima Ruinskiy Suggested-by: Vitaly Lifshits Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20231019203641.3661960-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 35 ++++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 7ab6dd58e400..dd8a9d27a167 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1817,7 +1817,7 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev, struct igc_adapter *adapter = netdev_priv(netdev); struct net_device *dev = adapter->netdev; struct igc_hw *hw = &adapter->hw; - u32 advertising; + u16 advertised = 0; /* When adapter in resetting mode, autoneg/speed/duplex * cannot be changed @@ -1842,18 +1842,33 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev, while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) usleep_range(1000, 2000); - ethtool_convert_link_mode_to_legacy_u32(&advertising, - cmd->link_modes.advertising); - /* Converting to legacy u32 drops ETHTOOL_LINK_MODE_2500baseT_Full_BIT. - * We have to check this and convert it to ADVERTISE_2500_FULL - * (aka ETHTOOL_LINK_MODE_2500baseX_Full_BIT) explicitly. - */ - if (ethtool_link_ksettings_test_link_mode(cmd, advertising, 2500baseT_Full)) - advertising |= ADVERTISE_2500_FULL; + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 2500baseT_Full)) + advertised |= ADVERTISE_2500_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 1000baseT_Full)) + advertised |= ADVERTISE_1000_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 100baseT_Full)) + advertised |= ADVERTISE_100_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 100baseT_Half)) + advertised |= ADVERTISE_100_HALF; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10baseT_Full)) + advertised |= ADVERTISE_10_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10baseT_Half)) + advertised |= ADVERTISE_10_HALF; if (cmd->base.autoneg == AUTONEG_ENABLE) { hw->mac.autoneg = 1; - hw->phy.autoneg_advertised = advertising; + hw->phy.autoneg_advertised = advertised; if (adapter->fc_autoneg) hw->fc.requested_mode = igc_fc_default; } else { From 068d8b75c1aee153193522211ace6c13c21cd16b Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Thu, 19 Oct 2023 13:38:52 -0700 Subject: [PATCH 178/212] i40e: sync next_to_clean and next_to_process for programming status desc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a programming status desc is encountered on the rx_ring, next_to_process is bumped along with cleaned_count but next_to_clean is not. This causes I40E_DESC_UNUSED() macro to misbehave resulting in overwriting whole ring with new buffers. Update next_to_clean to point to next_to_process on seeing a programming status desc if not in the middle of handling a multi-frag packet. Also, bump cleaned_count only for such case as otherwise next_to_clean buffer may be returned to hardware on reaching clean_threshold. Fixes: e9031f2da1ae ("i40e: introduce next_to_process to i40e_ring") Suggested-by: Maciej Fijalkowski Reported-by: hq.dev+kernel@msdfc.xyz Reported by: Solomon Peachy Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217678 Tested-by: hq.dev+kernel@msdfc.xyz Tested by: Indrek Järve Signed-off-by: Tirthendu Sarkar Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Jacob Keller Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20231019203852.3663665-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 0b3a27f118fb..50c70a8e470a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2544,7 +2544,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, rx_buffer = i40e_rx_bi(rx_ring, ntp); i40e_inc_ntp(rx_ring); i40e_reuse_rx_page(rx_ring, rx_buffer); - cleaned_count++; + /* Update ntc and bump cleaned count if not in the + * middle of mb packet. + */ + if (rx_ring->next_to_clean == ntp) { + rx_ring->next_to_clean = + rx_ring->next_to_process; + cleaned_count++; + } continue; } From d2ca43f30611ac41bccfa8352c53f5432f0ecbb0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 19 Oct 2023 13:23:37 -0500 Subject: [PATCH 179/212] net: xgene: Fix unused xgene_enet_of_match warning for !CONFIG_OF Commit b0377116decd ("net: ethernet: Use device_get_match_data()") dropped the unconditional use of xgene_enet_of_match resulting in this warning: drivers/net/ethernet/apm/xgene/xgene_enet_main.c:2004:34: warning: unused variable 'xgene_enet_of_match' [-Wunused-const-variable] The fix is to drop of_match_ptr() which is not necessary because DT is always used for this driver (well, it could in theory support ACPI only, but CONFIG_OF is always enabled for arm64). Fixes: b0377116decd ("net: ethernet: Use device_get_match_data()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310170627.2Kvf6ZHY-lkp@intel.com/ Signed-off-by: Rob Herring Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 4d4140b7c450..f3305c434c95 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -2170,7 +2170,7 @@ static void xgene_enet_shutdown(struct platform_device *pdev) static struct platform_driver xgene_enet_driver = { .driver = { .name = "xgene-enet", - .of_match_table = of_match_ptr(xgene_enet_of_match), + .of_match_table = xgene_enet_of_match, .acpi_match_table = ACPI_PTR(xgene_enet_acpi_match), }, .probe = xgene_enet_probe, From 95201f36f395df34321fcddbce12103e8bbe4970 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Fri, 20 Oct 2023 11:25:35 +0800 Subject: [PATCH 180/212] net: stmmac: update MAC capabilities when tx queues are updated Upon boot up, the driver will configure the MAC capabilities based on the maximum number of tx and rx queues. When the user changes the tx queues to single queue, the MAC should be capable of supporting Half Duplex, but the driver does not update the MAC capabilities when it is configured so. Using the stmmac_reinit_queues() to check the number of tx queues and set the MAC capabilities accordingly. Fixes: 0366f7e06a6b ("net: stmmac: add ethtool support for get/set channels") Cc: # 5.17+ Signed-off-by: Michael Sit Wei Hong Signed-off-by: Gan, Yi Fang Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ed1a5a31a491..5801f4d50f95 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1197,6 +1197,17 @@ static int stmmac_init_phy(struct net_device *dev) return ret; } +static void stmmac_set_half_duplex(struct stmmac_priv *priv) +{ + /* Half-Duplex can only work with single tx queue */ + if (priv->plat->tx_queues_to_use > 1) + priv->phylink_config.mac_capabilities &= + ~(MAC_10HD | MAC_100HD | MAC_1000HD); + else + priv->phylink_config.mac_capabilities |= + (MAC_10HD | MAC_100HD | MAC_1000HD); +} + static int stmmac_phy_setup(struct stmmac_priv *priv) { struct stmmac_mdio_bus_data *mdio_bus_data; @@ -1228,10 +1239,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) MAC_10FD | MAC_100FD | MAC_1000FD; - /* Half-Duplex can only work with single queue */ - if (priv->plat->tx_queues_to_use <= 1) - priv->phylink_config.mac_capabilities |= MAC_10HD | MAC_100HD | - MAC_1000HD; + stmmac_set_half_duplex(priv); /* Get the MAC specific capabilities */ stmmac_mac_phylink_get_caps(priv); @@ -7172,6 +7180,7 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) priv->rss.table[i] = ethtool_rxfh_indir_default(i, rx_cnt); + stmmac_set_half_duplex(priv); stmmac_napi_add(dev); if (netif_running(dev)) From 965f9b8c0c1b37fa2a0e3ef56e40d5666d4cbb5c Mon Sep 17 00:00:00 2001 From: Dell Jin Date: Fri, 20 Oct 2023 09:20:53 +0300 Subject: [PATCH 181/212] net: ethernet: adi: adin1110: Fix uninitialized variable The spi_transfer struct has to have all it's fields initialized to 0 in this case, since not all of them are set before starting the transfer. Otherwise, spi_sync_transfer() will sometimes return an error. Fixes: a526a3cc9c8d ("net: ethernet: adi: adin1110: Fix SPI transfers") Signed-off-by: Dell Jin Signed-off-by: Ciprian Regus Signed-off-by: David S. Miller --- drivers/net/ethernet/adi/adin1110.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c index ca66b747b7c5..d7c274af6d4d 100644 --- a/drivers/net/ethernet/adi/adin1110.c +++ b/drivers/net/ethernet/adi/adin1110.c @@ -294,7 +294,7 @@ static int adin1110_read_fifo(struct adin1110_port_priv *port_priv) { struct adin1110_priv *priv = port_priv->priv; u32 header_len = ADIN1110_RD_HEADER_LEN; - struct spi_transfer t; + struct spi_transfer t = {0}; u32 frame_size_no_fcs; struct sk_buff *rxb; u32 frame_size; From ca082f019d8fbb983f03080487946da714154bae Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 21 Oct 2023 20:03:53 +0200 Subject: [PATCH 182/212] net: ieee802154: adf7242: Fix some potential buffer overflow in adf7242_stats_show() strncat() usage in adf7242_debugfs_init() is wrong. The size given to strncat() is the maximum number of bytes that can be written, excluding the trailing NULL. Here, the size that is passed, DNAME_INLINE_LEN, does not take into account the size of "adf7242-" that is already in the array. In order to fix it, use snprintf() instead. Fixes: 7302b9d90117 ("ieee802154/adf7242: Driver for ADF7242 MAC IEEE802154") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ieee802154/adf7242.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index a03490ba2e5b..cc7ddc40020f 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1162,9 +1162,10 @@ static int adf7242_stats_show(struct seq_file *file, void *offset) static void adf7242_debugfs_init(struct adf7242_local *lp) { - char debugfs_dir_name[DNAME_INLINE_LEN + 1] = "adf7242-"; + char debugfs_dir_name[DNAME_INLINE_LEN + 1]; - strncat(debugfs_dir_name, dev_name(&lp->spi->dev), DNAME_INLINE_LEN); + snprintf(debugfs_dir_name, sizeof(debugfs_dir_name), + "adf7242-%s", dev_name(&lp->spi->dev)); lp->debugfs_root = debugfs_create_dir(debugfs_dir_name, NULL); From 9f771493da935299c6393ad3563b581255d01a37 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Fri, 20 Oct 2023 17:27:59 +0800 Subject: [PATCH 183/212] net: chelsio: cxgb4: add an error code check in t4_load_phy_fw t4_set_params_timeout() can return -EINVAL if failed, add check for this. Signed-off-by: Su Hui Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 8d719f82854a..76de55306c4d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3816,6 +3816,8 @@ int t4_load_phy_fw(struct adapter *adap, int win, FW_PARAMS_PARAM_Z_V(FW_PARAMS_PARAM_DEV_PHYFW_DOWNLOAD)); ret = t4_set_params_timeout(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val, 30000); + if (ret) + return ret; /* If we have version number support, then check to see that the new * firmware got loaded properly. From 51a32e828109b4a209efde44505baa356b37a4ce Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sat, 21 Oct 2023 02:03:44 +0900 Subject: [PATCH 184/212] net: usb: smsc95xx: Fix uninit-value access in smsc95xx_read_reg syzbot reported the following uninit-value access issue [1]: smsc95xx 1-1:0.0 (unnamed net_device) (uninitialized): Failed to read reg index 0x00000030: -32 smsc95xx 1-1:0.0 (unnamed net_device) (uninitialized): Error reading E2P_CMD ===================================================== BUG: KMSAN: uninit-value in smsc95xx_reset+0x409/0x25f0 drivers/net/usb/smsc95xx.c:896 smsc95xx_reset+0x409/0x25f0 drivers/net/usb/smsc95xx.c:896 smsc95xx_bind+0x9bc/0x22e0 drivers/net/usb/smsc95xx.c:1131 usbnet_probe+0x100b/0x4060 drivers/net/usb/usbnet.c:1750 usb_probe_interface+0xc75/0x1210 drivers/usb/core/driver.c:396 really_probe+0x506/0xf40 drivers/base/dd.c:658 __driver_probe_device+0x2a7/0x5d0 drivers/base/dd.c:800 driver_probe_device+0x72/0x7b0 drivers/base/dd.c:830 __device_attach_driver+0x55a/0x8f0 drivers/base/dd.c:958 bus_for_each_drv+0x3ff/0x620 drivers/base/bus.c:457 __device_attach+0x3bd/0x640 drivers/base/dd.c:1030 device_initial_probe+0x32/0x40 drivers/base/dd.c:1079 bus_probe_device+0x3d8/0x5a0 drivers/base/bus.c:532 device_add+0x16ae/0x1f20 drivers/base/core.c:3622 usb_set_configuration+0x31c9/0x38c0 drivers/usb/core/message.c:2207 usb_generic_driver_probe+0x109/0x2a0 drivers/usb/core/generic.c:238 usb_probe_device+0x290/0x4a0 drivers/usb/core/driver.c:293 really_probe+0x506/0xf40 drivers/base/dd.c:658 __driver_probe_device+0x2a7/0x5d0 drivers/base/dd.c:800 driver_probe_device+0x72/0x7b0 drivers/base/dd.c:830 __device_attach_driver+0x55a/0x8f0 drivers/base/dd.c:958 bus_for_each_drv+0x3ff/0x620 drivers/base/bus.c:457 __device_attach+0x3bd/0x640 drivers/base/dd.c:1030 device_initial_probe+0x32/0x40 drivers/base/dd.c:1079 bus_probe_device+0x3d8/0x5a0 drivers/base/bus.c:532 device_add+0x16ae/0x1f20 drivers/base/core.c:3622 usb_new_device+0x15f6/0x22f0 drivers/usb/core/hub.c:2589 hub_port_connect drivers/usb/core/hub.c:5440 [inline] hub_port_connect_change drivers/usb/core/hub.c:5580 [inline] port_event drivers/usb/core/hub.c:5740 [inline] hub_event+0x53bc/0x7290 drivers/usb/core/hub.c:5822 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x104e/0x1e70 kernel/workqueue.c:2703 worker_thread+0xf45/0x1490 kernel/workqueue.c:2784 kthread+0x3e8/0x540 kernel/kthread.c:388 ret_from_fork+0x66/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 Local variable buf.i225 created at: smsc95xx_read_reg drivers/net/usb/smsc95xx.c:90 [inline] smsc95xx_reset+0x203/0x25f0 drivers/net/usb/smsc95xx.c:892 smsc95xx_bind+0x9bc/0x22e0 drivers/net/usb/smsc95xx.c:1131 CPU: 1 PID: 773 Comm: kworker/1:2 Not tainted 6.6.0-rc1-syzkaller-00125-ge42bebf6db29 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023 Workqueue: usb_hub_wq hub_event ===================================================== Similar to e9c65989920f ("net: usb: smsc75xx: Fix uninit-value access in __smsc75xx_read_reg"), this issue is caused because usbnet_read_cmd() reads less bytes than requested (zero byte in the reproducer). In this case, 'buf' is not properly filled. This patch fixes the issue by returning -ENODATA if usbnet_read_cmd() reads less bytes than requested. sysbot reported similar uninit-value access issue [2]. The root cause is the same as mentioned above, and this patch addresses it as well. Fixes: 2f7ca802bdae ("net: Add SMSC LAN9500 USB2.0 10/100 ethernet adapter driver") Reported-and-tested-by: syzbot+c74c24b43c9ae534f0e0@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+2c97a98a5ba9ea9c23bd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c74c24b43c9ae534f0e0 [1] Closes: https://syzkaller.appspot.com/bug?extid=2c97a98a5ba9ea9c23bd [2] Signed-off-by: Shigeru Yoshida Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 17da42fe605c..a530f20ee257 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -95,7 +95,9 @@ static int __must_check smsc95xx_read_reg(struct usbnet *dev, u32 index, ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, &buf, 4); - if (ret < 0) { + if (ret < 4) { + ret = ret < 0 ? ret : -ENODATA; + if (ret != -ENODEV) netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n", index, ret); From a5feba71ec9c14a54c3babdc732c5b6866d8ee43 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:52 -0700 Subject: [PATCH 185/212] r8152: Increase USB control msg timeout to 5000ms as per spec According to the comment next to USB_CTRL_GET_TIMEOUT and USB_CTRL_SET_TIMEOUT, although sending/receiving control messages is usually quite fast, the spec allows them to take up to 5 seconds. Let's increase the timeout in the Realtek driver from 500ms to 5000ms (using the #defines) to account for this. This is not just a theoretical change. The need for the longer timeout was seen in testing. Specifically, if you drop a sc7180-trogdor based Chromebook into the kdb debugger and then "go" again after sitting in the debugger for a while, the next USB control message takes a long time. Out of ~40 tests the slowest USB control message was 4.5 seconds. While dropping into kdb is not exactly an end-user scenario, the above is similar to what could happen due to an temporary interrupt storm, what could happen if there was a host controller (HW or SW) issue, or what could happen if the Realtek device got into a confused state and needed time to recover. This change is fairly critical since the r8152 driver in Linux doesn't expect register reads/writes (which are backed by USB control messages) to fail. Fixes: ac718b69301c ("net/usb: new driver for RTL8152") Suggested-by: Hayes Wang Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0c13d9950cd8..482957beae66 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1212,7 +1212,7 @@ int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) ret = usb_control_msg(tp->udev, tp->pipe_ctrl_in, RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, - value, index, tmp, size, 500); + value, index, tmp, size, USB_CTRL_GET_TIMEOUT); if (ret < 0) memset(data, 0xff, size); else @@ -1235,7 +1235,7 @@ int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) ret = usb_control_msg(tp->udev, tp->pipe_ctrl_out, RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE, - value, index, tmp, size, 500); + value, index, tmp, size, USB_CTRL_SET_TIMEOUT); kfree(tmp); @@ -9494,7 +9494,8 @@ static u8 __rtl_get_hw_ver(struct usb_device *udev) ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, - PLA_TCR0, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500); + PLA_TCR0, MCU_TYPE_PLA, tmp, sizeof(*tmp), + USB_CTRL_GET_TIMEOUT); if (ret > 0) ocp_data = (__le32_to_cpu(*tmp) >> 16) & VERSION_MASK; From 5dd17689526971c5ae12bc8398f34bd68cd0499e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:53 -0700 Subject: [PATCH 186/212] r8152: Run the unload routine if we have errors during probe The rtl8152_probe() function lacks a call to the chip-specific unload() routine when it sees an error in probe. Add it in to match the cleanup code in rtl8152_disconnect(). Fixes: ac718b69301c ("net/usb: new driver for RTL8152") Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 482957beae66..201c688e3e3f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9783,6 +9783,8 @@ static int rtl8152_probe(struct usb_interface *intf, out1: tasklet_kill(&tp->tx_tl); + if (tp->rtl_ops.unload) + tp->rtl_ops.unload(tp); usb_set_intfdata(intf, NULL); out: free_netdev(netdev); From bb8adff9123e492598162ac1baad01a53891aef6 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:54 -0700 Subject: [PATCH 187/212] r8152: Cancel hw_phy_work if we have an error in probe The error handling in rtl8152_probe() is missing a call to cancel the hw_phy_work. Add it in to match what's in the cleanup code in rtl8152_disconnect(). Fixes: a028a9e003f2 ("r8152: move the settings of PHY to a work queue") Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 201c688e3e3f..d10b0886b652 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9783,6 +9783,7 @@ static int rtl8152_probe(struct usb_interface *intf, out1: tasklet_kill(&tp->tx_tl); + cancel_delayed_work_sync(&tp->hw_phy_work); if (tp->rtl_ops.unload) tp->rtl_ops.unload(tp); usb_set_intfdata(intf, NULL); From b8d35024d4059ca550cba11ac9ab23a6c238d929 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:55 -0700 Subject: [PATCH 188/212] r8152: Release firmware if we have an error in probe The error handling in rtl8152_probe() is missing a call to release firmware. Add it in to match what's in the cleanup code in rtl8152_disconnect(). Fixes: 9370f2d05a2a ("r8152: support request_firmware for RTL8153") Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d10b0886b652..656fe90734fc 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9786,6 +9786,7 @@ static int rtl8152_probe(struct usb_interface *intf, cancel_delayed_work_sync(&tp->hw_phy_work); if (tp->rtl_ops.unload) tp->rtl_ops.unload(tp); + rtl8152_release_firmware(tp); usb_set_intfdata(intf, NULL); out: free_netdev(netdev); From dc90ba37a8c37042407fa6970b9830890cfe6047 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:56 -0700 Subject: [PATCH 189/212] r8152: Check for unplug in rtl_phy_patch_request() If the adapter is unplugged while we're looping in rtl_phy_patch_request() we could end up looping for 10 seconds (2 ms * 5000 loops). Add code similar to what's done in other places in the driver to check for unplug and bail. Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 656fe90734fc..9888bc43e903 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -4046,6 +4046,9 @@ static int rtl_phy_patch_request(struct r8152 *tp, bool request, bool wait) for (i = 0; wait && i < 5000; i++) { u32 ocp_data; + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; + usleep_range(1000, 2000); ocp_data = ocp_reg_read(tp, OCP_PHY_PATCH_STAT); if ((ocp_data & PATCH_READY) ^ check) From bc65cc42af737a5a35f83842408ef2c6c79ba025 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:57 -0700 Subject: [PATCH 190/212] r8152: Check for unplug in r8153b_ups_en() / r8153c_ups_en() If the adapter is unplugged while we're looping in r8153b_ups_en() / r8153c_ups_en() we could end up looping for 10 seconds (20 ms * 500 loops). Add code similar to what's done in other places in the driver to check for unplug and bail. Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 9888bc43e903..982f9ca03e7a 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3663,6 +3663,8 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable) int i; for (i = 0; i < 500; i++) { + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + return; if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; @@ -3703,6 +3705,8 @@ static void r8153c_ups_en(struct r8152 *tp, bool enable) int i; for (i = 0; i < 500; i++) { + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + return; if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; From 715f67f33af45ce2cc3a5b1ef133cc8c8e7787b0 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:58 -0700 Subject: [PATCH 191/212] r8152: Rename RTL8152_UNPLUG to RTL8152_INACCESSIBLE Whenever the RTL8152_UNPLUG is set that just tells the driver that all accesses will fail and we should just immediately bail. A future patch will use this same concept at a time when the driver hasn't actually been unplugged but is about to be reset. Rename the flag in preparation for the future patch. This is a no-op change and just a search and replace. Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 96 ++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 982f9ca03e7a..65232848b31d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -764,7 +764,7 @@ enum rtl_register_content { /* rtl8152 flags */ enum rtl8152_flags { - RTL8152_UNPLUG = 0, + RTL8152_INACCESSIBLE = 0, RTL8152_SET_RX_MODE, WORK_ENABLE, RTL8152_LINK_CHG, @@ -1245,7 +1245,7 @@ int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) static void rtl_set_unplug(struct r8152 *tp) { if (tp->udev->state == USB_STATE_NOTATTACHED) { - set_bit(RTL8152_UNPLUG, &tp->flags); + set_bit(RTL8152_INACCESSIBLE, &tp->flags); smp_mb__after_atomic(); } } @@ -1256,7 +1256,7 @@ static int generic_ocp_read(struct r8152 *tp, u16 index, u16 size, u16 limit = 64; int ret = 0; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; /* both size and indix must be 4 bytes align */ @@ -1300,7 +1300,7 @@ static int generic_ocp_write(struct r8152 *tp, u16 index, u16 byteen, u16 byteen_start, byteen_end, byen; u16 limit = 512; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; /* both size and indix must be 4 bytes align */ @@ -1537,7 +1537,7 @@ static int read_mii_word(struct net_device *netdev, int phy_id, int reg) struct r8152 *tp = netdev_priv(netdev); int ret; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; if (phy_id != R8152_PHY_ID) @@ -1553,7 +1553,7 @@ void write_mii_word(struct net_device *netdev, int phy_id, int reg, int val) { struct r8152 *tp = netdev_priv(netdev); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (phy_id != R8152_PHY_ID) @@ -1758,7 +1758,7 @@ static void read_bulk_callback(struct urb *urb) if (!tp) return; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (!test_bit(WORK_ENABLE, &tp->flags)) @@ -1850,7 +1850,7 @@ static void write_bulk_callback(struct urb *urb) if (!test_bit(WORK_ENABLE, &tp->flags)) return; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (!skb_queue_empty(&tp->tx_queue)) @@ -1871,7 +1871,7 @@ static void intr_callback(struct urb *urb) if (!test_bit(WORK_ENABLE, &tp->flags)) return; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; switch (status) { @@ -2615,7 +2615,7 @@ static void bottom_half(struct tasklet_struct *t) { struct r8152 *tp = from_tasklet(tp, t, tx_tl); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (!test_bit(WORK_ENABLE, &tp->flags)) @@ -2658,7 +2658,7 @@ int r8152_submit_rx(struct r8152 *tp, struct rx_agg *agg, gfp_t mem_flags) int ret; /* The rx would be stopped, so skip submitting */ - if (test_bit(RTL8152_UNPLUG, &tp->flags) || + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags) || !test_bit(WORK_ENABLE, &tp->flags) || !netif_carrier_ok(tp->netdev)) return 0; @@ -3058,7 +3058,7 @@ static int rtl_enable(struct r8152 *tp) static int rtl8152_enable(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; set_tx_qlen(tp); @@ -3145,7 +3145,7 @@ static int rtl8153_enable(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; set_tx_qlen(tp); @@ -3177,7 +3177,7 @@ static void rtl_disable(struct r8152 *tp) u32 ocp_data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } @@ -3631,7 +3631,7 @@ static u16 r8153_phy_status(struct r8152 *tp, u16 desired) } msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) break; } @@ -3663,7 +3663,7 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable) int i; for (i = 0; i < 500; i++) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) @@ -3705,7 +3705,7 @@ static void r8153c_ups_en(struct r8152 *tp, bool enable) int i; for (i = 0; i < 500; i++) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) @@ -4050,8 +4050,8 @@ static int rtl_phy_patch_request(struct r8152 *tp, bool request, bool wait) for (i = 0; wait && i < 5000; i++) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) - break; + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + return -ENODEV; usleep_range(1000, 2000); ocp_data = ocp_reg_read(tp, OCP_PHY_PATCH_STAT); @@ -6009,7 +6009,7 @@ static int rtl8156_enable(struct r8152 *tp) u32 ocp_data; u16 speed; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; r8156_fc_parameter(tp); @@ -6067,7 +6067,7 @@ static int rtl8156b_enable(struct r8152 *tp) u32 ocp_data; u16 speed; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; set_tx_qlen(tp); @@ -6253,7 +6253,7 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex, static void rtl8152_up(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8152_aldps_en(tp, false); @@ -6263,7 +6263,7 @@ static void rtl8152_up(struct r8152 *tp) static void rtl8152_down(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } @@ -6278,7 +6278,7 @@ static void rtl8153_up(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153_u1u2en(tp, false); @@ -6318,7 +6318,7 @@ static void rtl8153_down(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } @@ -6339,7 +6339,7 @@ static void rtl8153b_up(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_u1u2en(tp, false); @@ -6363,7 +6363,7 @@ static void rtl8153b_down(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } @@ -6400,7 +6400,7 @@ static void rtl8153c_up(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_u1u2en(tp, false); @@ -6481,7 +6481,7 @@ static void rtl8156_up(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_u1u2en(tp, false); @@ -6554,7 +6554,7 @@ static void rtl8156_down(struct r8152 *tp) { u32 ocp_data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } @@ -6692,7 +6692,7 @@ static void rtl_work_func_t(struct work_struct *work) /* If the device is unplugged or !netif_running(), the workqueue * doesn't need to wake the device, and could return directly. */ - if (test_bit(RTL8152_UNPLUG, &tp->flags) || !netif_running(tp->netdev)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags) || !netif_running(tp->netdev)) return; if (usb_autopm_get_interface(tp->intf) < 0) @@ -6731,7 +6731,7 @@ static void rtl_hw_phy_work_func_t(struct work_struct *work) { struct r8152 *tp = container_of(work, struct r8152, hw_phy_work.work); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (usb_autopm_get_interface(tp->intf) < 0) @@ -6858,7 +6858,7 @@ static int rtl8152_close(struct net_device *netdev) netif_stop_queue(netdev); res = usb_autopm_get_interface(tp->intf); - if (res < 0 || test_bit(RTL8152_UNPLUG, &tp->flags)) { + if (res < 0 || test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); rtl_stop_rx(tp); } else { @@ -6891,7 +6891,7 @@ static void r8152b_init(struct r8152 *tp) u32 ocp_data; u16 data; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; data = r8152_mdio_read(tp, MII_BMCR); @@ -6935,7 +6935,7 @@ static void r8153_init(struct r8152 *tp) u16 data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153_u1u2en(tp, false); @@ -6946,7 +6946,7 @@ static void r8153_init(struct r8152 *tp) break; msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) break; } @@ -7075,7 +7075,7 @@ static void r8153b_init(struct r8152 *tp) u16 data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_u1u2en(tp, false); @@ -7086,7 +7086,7 @@ static void r8153b_init(struct r8152 *tp) break; msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) break; } @@ -7157,7 +7157,7 @@ static void r8153c_init(struct r8152 *tp) u16 data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_u1u2en(tp, false); @@ -7177,7 +7177,7 @@ static void r8153c_init(struct r8152 *tp) break; msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; } @@ -8006,7 +8006,7 @@ static void r8156_init(struct r8152 *tp) u16 data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_ECM_OP); @@ -8027,7 +8027,7 @@ static void r8156_init(struct r8152 *tp) break; msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; } @@ -8102,7 +8102,7 @@ static void r8156b_init(struct r8152 *tp) u16 data; int i; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_ECM_OP); @@ -8136,7 +8136,7 @@ static void r8156b_init(struct r8152 *tp) break; msleep(20); - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; } @@ -9165,7 +9165,7 @@ static int rtl8152_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) struct mii_ioctl_data *data = if_mii(rq); int res; - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; res = usb_autopm_get_interface(tp->intf); @@ -9267,7 +9267,7 @@ static const struct net_device_ops rtl8152_netdev_ops = { static void rtl8152_unload(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; if (tp->version != RTL_VER_01) @@ -9276,7 +9276,7 @@ static void rtl8152_unload(struct r8152 *tp) static void rtl8153_unload(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153_power_cut_en(tp, false); @@ -9284,7 +9284,7 @@ static void rtl8153_unload(struct r8152 *tp) static void rtl8153b_unload(struct r8152 *tp) { - if (test_bit(RTL8152_UNPLUG, &tp->flags)) + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; r8153b_power_cut_en(tp, false); From d9962b0d42029bcb40fe3c38bce06d1870fa4df4 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 20 Oct 2023 14:06:59 -0700 Subject: [PATCH 192/212] r8152: Block future register access if register access fails Even though the functions to read/write registers can fail, most of the places in the r8152 driver that read/write register values don't check error codes. The lack of error code checking is problematic in at least two ways. The first problem is that the r8152 driver often uses code patterns similar to this: x = read_register() x = x | SOME_BIT; write_register(x); ...with the above pattern, if the read_register() fails and returns garbage then we'll end up trying to write modified garbage back to the Realtek adapter. If the write_register() succeeds that's bad. Note that as of commit f53a7ad18959 ("r8152: Set memory to all 0xFFs on failed reg reads") the "garbage" returned by read_register() will at least be consistent garbage, but it is still garbage. It turns out that this problem is very serious. Writing garbage to some of the hardware registers on the Ethernet adapter can put the adapter in such a bad state that it needs to be power cycled (fully unplugged and plugged in again) before it can enumerate again. The second problem is that the r8152 driver generally has functions that are long sequences of register writes. Assuming everything will be OK if a random register write fails in the middle isn't a great assumption. One might wonder if the above two problems are real. You could ask if we would really have a successful write after a failed read. It turns out that the answer appears to be "yes, this can happen". In fact, we've seen at least two distinct failure modes where this happens. On a sc7180-trogdor Chromebook if you drop into kdb for a while and then resume, you can see: 1. We get a "Tx timeout" 2. The "Tx timeout" queues up a USB reset. 3. In rtl8152_pre_reset() we try to reinit the hardware. 4. The first several (2-9) register accesses fail with a timeout, then things recover. The above test case was actually fixed by the patch ("r8152: Increase USB control msg timeout to 5000ms as per spec") but at least shows that we really can see successful calls after failed ones. On a different (AMD) based Chromebook with a particular adapter, we found that during reboot tests we'd also sometimes get a transitory failure. In this case we saw -EPIPE being returned sometimes. Retrying worked, but retrying is not always safe for all register accesses since reading/writing some registers might have side effects (like registers that clear on read). Let's fully lock out all register access if a register access fails. When we do this, we'll try to queue up a USB reset and try to unlock register access after the reset. This is slightly tricker than it sounds since the r8152 driver has an optimized reset sequence that only works reliably after probe happens. In order to handle this, we avoid the optimized reset if probe didn't finish. Instead, we simply retry the probe routine in this case. When locking out access, we'll use the existing infrastructure that the driver was using when it detected we were unplugged. This keeps us from getting stuck in delay loops in some parts of the driver. Signed-off-by: Douglas Anderson Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 207 ++++++++++++++++++++++++++++++++++------ 1 file changed, 176 insertions(+), 31 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 65232848b31d..afb20c0ed688 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -773,6 +773,9 @@ enum rtl8152_flags { SCHEDULE_TASKLET, GREEN_ETHERNET, RX_EPROTO, + IN_PRE_RESET, + PROBED_WITH_NO_ERRORS, + PROBE_SHOULD_RETRY, }; #define DEVICE_ID_LENOVO_USB_C_TRAVEL_HUB 0x721e @@ -953,6 +956,8 @@ struct r8152 { u8 version; u8 duplex; u8 autoneg; + + unsigned int reg_access_reset_count; }; /** @@ -1200,6 +1205,96 @@ static unsigned int agg_buf_sz = 16384; #define RTL_LIMITED_TSO_SIZE (size_to_mtu(agg_buf_sz) - sizeof(struct tx_desc)) +/* If register access fails then we block access and issue a reset. If this + * happens too many times in a row without a successful access then we stop + * trying to reset and just leave access blocked. + */ +#define REGISTER_ACCESS_MAX_RESETS 3 + +static void rtl_set_inaccessible(struct r8152 *tp) +{ + set_bit(RTL8152_INACCESSIBLE, &tp->flags); + smp_mb__after_atomic(); +} + +static void rtl_set_accessible(struct r8152 *tp) +{ + clear_bit(RTL8152_INACCESSIBLE, &tp->flags); + smp_mb__after_atomic(); +} + +static +int r8152_control_msg(struct r8152 *tp, unsigned int pipe, __u8 request, + __u8 requesttype, __u16 value, __u16 index, void *data, + __u16 size, const char *msg_tag) +{ + struct usb_device *udev = tp->udev; + int ret; + + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + return -ENODEV; + + ret = usb_control_msg(udev, pipe, request, requesttype, + value, index, data, size, + USB_CTRL_GET_TIMEOUT); + + /* No need to issue a reset to report an error if the USB device got + * unplugged; just return immediately. + */ + if (ret == -ENODEV) + return ret; + + /* If the write was successful then we're done */ + if (ret >= 0) { + tp->reg_access_reset_count = 0; + return ret; + } + + dev_err(&udev->dev, + "Failed to %s %d bytes at %#06x/%#06x (%d)\n", + msg_tag, size, value, index, ret); + + /* Block all future register access until we reset. Much of the code + * in the driver doesn't check for errors. Notably, many parts of the + * driver do a read/modify/write of a register value without + * confirming that the read succeeded. Writing back modified garbage + * like this can fully wedge the adapter, requiring a power cycle. + */ + rtl_set_inaccessible(tp); + + /* If probe hasn't yet finished, then we'll request a retry of the + * whole probe routine if we get any control transfer errors. We + * never have to clear this bit since we free/reallocate the whole "tp" + * structure if we retry probe. + */ + if (!test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) { + set_bit(PROBE_SHOULD_RETRY, &tp->flags); + return ret; + } + + /* Failing to access registers in pre-reset is not surprising since we + * wouldn't be resetting if things were behaving normally. The register + * access we do in pre-reset isn't truly mandatory--we're just reusing + * the disable() function and trying to be nice by powering the + * adapter down before resetting it. Thus, if we're in pre-reset, + * we'll return right away and not try to queue up yet another reset. + * We know the post-reset is already coming. + */ + if (test_bit(IN_PRE_RESET, &tp->flags)) + return ret; + + if (tp->reg_access_reset_count < REGISTER_ACCESS_MAX_RESETS) { + usb_queue_reset_device(tp->intf); + tp->reg_access_reset_count++; + } else if (tp->reg_access_reset_count == REGISTER_ACCESS_MAX_RESETS) { + dev_err(&udev->dev, + "Tried to reset %d times; giving up.\n", + REGISTER_ACCESS_MAX_RESETS); + } + + return ret; +} + static int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) { @@ -1210,9 +1305,10 @@ int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) if (!tmp) return -ENOMEM; - ret = usb_control_msg(tp->udev, tp->pipe_ctrl_in, - RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, - value, index, tmp, size, USB_CTRL_GET_TIMEOUT); + ret = r8152_control_msg(tp, tp->pipe_ctrl_in, + RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, + value, index, tmp, size, "read"); + if (ret < 0) memset(data, 0xff, size); else @@ -1233,9 +1329,9 @@ int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) if (!tmp) return -ENOMEM; - ret = usb_control_msg(tp->udev, tp->pipe_ctrl_out, - RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE, - value, index, tmp, size, USB_CTRL_SET_TIMEOUT); + ret = r8152_control_msg(tp, tp->pipe_ctrl_out, + RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE, + value, index, tmp, size, "write"); kfree(tmp); @@ -1244,10 +1340,8 @@ int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) static void rtl_set_unplug(struct r8152 *tp) { - if (tp->udev->state == USB_STATE_NOTATTACHED) { - set_bit(RTL8152_INACCESSIBLE, &tp->flags); - smp_mb__after_atomic(); - } + if (tp->udev->state == USB_STATE_NOTATTACHED) + rtl_set_inaccessible(tp); } static int generic_ocp_read(struct r8152 *tp, u16 index, u16 size, @@ -8262,7 +8356,7 @@ static int rtl8152_pre_reset(struct usb_interface *intf) struct r8152 *tp = usb_get_intfdata(intf); struct net_device *netdev; - if (!tp) + if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) return 0; netdev = tp->netdev; @@ -8277,7 +8371,9 @@ static int rtl8152_pre_reset(struct usb_interface *intf) napi_disable(&tp->napi); if (netif_carrier_ok(netdev)) { mutex_lock(&tp->control); + set_bit(IN_PRE_RESET, &tp->flags); tp->rtl_ops.disable(tp); + clear_bit(IN_PRE_RESET, &tp->flags); mutex_unlock(&tp->control); } @@ -8290,9 +8386,11 @@ static int rtl8152_post_reset(struct usb_interface *intf) struct net_device *netdev; struct sockaddr sa; - if (!tp) + if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) return 0; + rtl_set_accessible(tp); + /* reset the MAC address in case of policy change */ if (determine_ethernet_addr(tp, &sa) >= 0) { rtnl_lock(); @@ -9494,17 +9592,29 @@ static u8 __rtl_get_hw_ver(struct usb_device *udev) __le32 *tmp; u8 version; int ret; + int i; tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); if (!tmp) return 0; - ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), - RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, - PLA_TCR0, MCU_TYPE_PLA, tmp, sizeof(*tmp), - USB_CTRL_GET_TIMEOUT); - if (ret > 0) - ocp_data = (__le32_to_cpu(*tmp) >> 16) & VERSION_MASK; + /* Retry up to 3 times in case there is a transitory error. We do this + * since retrying a read of the version is always safe and this + * function doesn't take advantage of r8152_control_msg(). + */ + for (i = 0; i < 3; i++) { + ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), + RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, + PLA_TCR0, MCU_TYPE_PLA, tmp, sizeof(*tmp), + USB_CTRL_GET_TIMEOUT); + if (ret > 0) { + ocp_data = (__le32_to_cpu(*tmp) >> 16) & VERSION_MASK; + break; + } + } + + if (i != 0 && ret > 0) + dev_warn(&udev->dev, "Needed %d retries to read version\n", i); kfree(tmp); @@ -9603,25 +9713,14 @@ static bool rtl8152_supports_lenovo_macpassthru(struct usb_device *udev) return 0; } -static int rtl8152_probe(struct usb_interface *intf, - const struct usb_device_id *id) +static int rtl8152_probe_once(struct usb_interface *intf, + const struct usb_device_id *id, u8 version) { struct usb_device *udev = interface_to_usbdev(intf); struct r8152 *tp; struct net_device *netdev; - u8 version; int ret; - if (intf->cur_altsetting->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) - return -ENODEV; - - if (!rtl_check_vendor_ok(intf)) - return -ENODEV; - - version = rtl8152_get_version(intf); - if (version == RTL_VER_UNKNOWN) - return -ENODEV; - usb_reset_device(udev); netdev = alloc_etherdev(sizeof(struct r8152)); if (!netdev) { @@ -9784,10 +9883,20 @@ static int rtl8152_probe(struct usb_interface *intf, else device_set_wakeup_enable(&udev->dev, false); + /* If we saw a control transfer error while probing then we may + * want to try probe() again. Consider this an error. + */ + if (test_bit(PROBE_SHOULD_RETRY, &tp->flags)) + goto out2; + + set_bit(PROBED_WITH_NO_ERRORS, &tp->flags); netif_info(tp, probe, netdev, "%s\n", DRIVER_VERSION); return 0; +out2: + unregister_netdev(netdev); + out1: tasklet_kill(&tp->tx_tl); cancel_delayed_work_sync(&tp->hw_phy_work); @@ -9796,10 +9905,46 @@ static int rtl8152_probe(struct usb_interface *intf, rtl8152_release_firmware(tp); usb_set_intfdata(intf, NULL); out: + if (test_bit(PROBE_SHOULD_RETRY, &tp->flags)) + ret = -EAGAIN; + free_netdev(netdev); return ret; } +#define RTL8152_PROBE_TRIES 3 + +static int rtl8152_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + u8 version; + int ret; + int i; + + if (intf->cur_altsetting->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) + return -ENODEV; + + if (!rtl_check_vendor_ok(intf)) + return -ENODEV; + + version = rtl8152_get_version(intf); + if (version == RTL_VER_UNKNOWN) + return -ENODEV; + + for (i = 0; i < RTL8152_PROBE_TRIES; i++) { + ret = rtl8152_probe_once(intf, id, version); + if (ret != -EAGAIN) + break; + } + if (ret == -EAGAIN) { + dev_err(&intf->dev, + "r8152 failed probe after %d tries; giving up\n", i); + return -ENODEV; + } + + return ret; +} + static void rtl8152_disconnect(struct usb_interface *intf) { struct r8152 *tp = usb_get_intfdata(intf); From 9b311b7313d6c104dd4a2d43ab54536dce07f960 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Thu, 19 Oct 2023 21:01:21 +0800 Subject: [PATCH 193/212] ACPI: NFIT: Install Notify() handler before getting NFIT table If there is no NFIT at startup, it will return 0 immediately in function acpi_nfit_add() and will not install Notify() handler. If hotplugging a nvdimm device later, it will not be identified as there is no Notify() handler. Install the handler before getting NFI table in function acpi_nfit_add() to avoid above issue. Fixes: dcca12ab62a2 ("ACPI: NFIT: Install Notify() handler directly") Signed-off-by: Xiang Chen [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/nfit/core.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index f96bf32cd368..7d88db451cfb 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3339,6 +3339,16 @@ static int acpi_nfit_add(struct acpi_device *adev) acpi_size sz; int rc = 0; + rc = acpi_dev_install_notify_handler(adev, ACPI_DEVICE_NOTIFY, + acpi_nfit_notify); + if (rc) + return rc; + + rc = devm_add_action_or_reset(dev, acpi_nfit_remove_notify_handler, + adev); + if (rc) + return rc; + status = acpi_get_table(ACPI_SIG_NFIT, 0, &tbl); if (ACPI_FAILURE(status)) { /* The NVDIMM root device allows OS to trigger enumeration of @@ -3386,17 +3396,7 @@ static int acpi_nfit_add(struct acpi_device *adev) if (rc) return rc; - rc = devm_add_action_or_reset(dev, acpi_nfit_shutdown, acpi_desc); - if (rc) - return rc; - - rc = acpi_dev_install_notify_handler(adev, ACPI_DEVICE_NOTIFY, - acpi_nfit_notify); - if (rc) - return rc; - - return devm_add_action_or_reset(dev, acpi_nfit_remove_notify_handler, - adev); + return devm_add_action_or_reset(dev, acpi_nfit_shutdown, acpi_desc); } static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle) From d2a0fc372aca561556e765d0a9ec365c7c12f0ad Mon Sep 17 00:00:00 2001 From: Fred Chen Date: Sat, 21 Oct 2023 08:19:47 +0800 Subject: [PATCH 194/212] tcp: fix wrong RTO timeout when received SACK reneging This commit fix wrong RTO timeout when received SACK reneging. When an ACK arrived pointing to a SACK reneging, tcp_check_sack_reneging() will rearm the RTO timer for min(1/2*srtt, 10ms) into to the future. But since the commit 62d9f1a6945b ("tcp: fix TLP timer not set when CA_STATE changes from DISORDER to OPEN") merged, the tcp_set_xmit_timer() is moved after tcp_fastretrans_alert()(which do the SACK reneging check), so the RTO timeout will be overwrited by tcp_set_xmit_timer() with icsk_rto instead of 1/2*srtt. Here is a packetdrill script to check this bug: 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 // simulate srtt to 100ms +0 < S 0:0(0) win 32792 +0 > S. 0:0(0) ack 1 +.1 < . 1:1(0) ack 1 win 1024 +0 accept(3, ..., ...) = 4 +0 write(4, ..., 10000) = 10000 +0 > P. 1:10001(10000) ack 1 // inject sack +.1 < . 1:1(0) ack 1 win 257 +0 > . 1:1001(1000) ack 1 // inject sack reneging +.1 < . 1:1(0) ack 1001 win 257 // we expect rto fired in 1/2*srtt (50ms) +.05 > . 1001:2001(1000) ack 1 This fix remove the FLAG_SET_XMIT_TIMER from ack_flag when tcp_check_sack_reneging() set RTO timer with 1/2*srtt to avoid being overwrited later. Fixes: 62d9f1a6945b ("tcp: fix TLP timer not set when CA_STATE changes from DISORDER to OPEN") Signed-off-by: Fred Chen Reviewed-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8afb0950a697..804821d6bd4d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2207,16 +2207,17 @@ void tcp_enter_loss(struct sock *sk) * restore sanity to the SACK scoreboard. If the apparent reneging * persists until this RTO then we'll clear the SACK scoreboard. */ -static bool tcp_check_sack_reneging(struct sock *sk, int flag) +static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag) { - if (flag & FLAG_SACK_RENEGING && - flag & FLAG_SND_UNA_ADVANCED) { + if (*ack_flag & FLAG_SACK_RENEGING && + *ack_flag & FLAG_SND_UNA_ADVANCED) { struct tcp_sock *tp = tcp_sk(sk); unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4), msecs_to_jiffies(10)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); + *ack_flag &= ~FLAG_SET_XMIT_TIMER; return true; } return false; @@ -2986,7 +2987,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ - if (tcp_check_sack_reneging(sk, flag)) + if (tcp_check_sack_reneging(sk, ack_flag)) return; /* C. Check consistency of the current state. */ From 05d3ef8bba77c1b5f98d941d8b2d4aeab8118ef1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Oct 2023 12:11:21 -1000 Subject: [PATCH 195/212] Linux 6.6-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5faaaf0fde6c..5fc735c7fed1 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 13454e6e0df2ff37853596d546438ac84ca6a413 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Mon, 23 Oct 2023 14:37:58 +0800 Subject: [PATCH 196/212] isdn: mISDN: hfcsusb: Spelling fix in comment protocoll -> protocol Signed-off-by: Kunwu Chan Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcsusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index 1efd17979f24..b82b89888a5e 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -678,7 +678,7 @@ ph_state(struct dchannel *dch) } /* - * disable/enable BChannel for desired protocoll + * disable/enable BChannel for desired protocol */ static int hfcsusb_setup_bch(struct bchannel *bch, int protocol) From 3e3929ef889e650dd585dc0f4f7f18240688811a Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Sat, 21 Oct 2023 08:48:27 -0700 Subject: [PATCH 197/212] wifi: cfg80211: pass correct pointer to rdev_inform_bss() Confusing struct member names here resulted in passing the wrong pointer, causing crashes. Pass the correct one. Fixes: eb142608e2c4 ("wifi: cfg80211: use a struct for inform_single_bss data") Signed-off-by: Ben Greear Link: https://lore.kernel.org/r/20231021154827.1142734-1-greearb@candelatech.com [rewrite commit message, add fixes] Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 939deecf0bbe..8210a6090ac1 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2125,7 +2125,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, if (!res) goto drop; - rdev_inform_bss(rdev, &res->pub, ies, data->drv_data); + rdev_inform_bss(rdev, &res->pub, ies, drv_data->drv_data); if (data->bss_source == BSS_SOURCE_MBSSID) { /* this is a nontransmitting bss, we need to add it to From c434b2be2d80d236bb090fdb493d4bd5ed589238 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Oct 2023 11:42:51 +0200 Subject: [PATCH 198/212] wifi: cfg80211: fix assoc response warning on failed links The warning here shouldn't be done before we even set the bss field (or should've used the input data). Move the assignment before the warning to fix it. We noticed this now because of Wen's bugfix, where the bug fixed there had previously hidden this other bug. Fixes: 53ad07e9823b ("wifi: cfg80211: support reporting failed links") Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 3e2c398abddc..55a1d3633853 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -43,10 +43,11 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, for (link_id = 0; link_id < ARRAY_SIZE(data->links); link_id++) { cr.links[link_id].status = data->links[link_id].status; + cr.links[link_id].bss = data->links[link_id].bss; + WARN_ON_ONCE(cr.links[link_id].status != WLAN_STATUS_SUCCESS && (!cr.ap_mld_addr || !cr.links[link_id].bss)); - cr.links[link_id].bss = data->links[link_id].bss; if (!cr.links[link_id].bss) continue; cr.links[link_id].bssid = data->links[link_id].bss->bssid; From 91535613b6090fc968c601d11d4e2f16b333713c Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 16 Oct 2023 14:52:48 +0300 Subject: [PATCH 199/212] wifi: mac80211: don't drop all unprotected public action frames Not all public action frames have a protected variant. When MFP is enabled drop only public action frames that have a dual protected variant. Fixes: 76a3059cf124 ("wifi: mac80211: drop some unprotected action frames") Signed-off-by: Avraham Stern Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20231016145213.2973e3c8d3bb.I6198b8d3b04cf4a97b06660d346caec3032f232a@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 29 +++++++++++++++++++++++++++++ net/mac80211/rx.c | 3 +-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index bd2f6e19c357..b24fb80782c5 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4355,6 +4355,35 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC; } +/** + * ieee80211_is_protected_dual_of_public_action - check if skb contains a + * protected dual of public action management frame + * @skb: the skb containing the frame, length will be checked + * + * Return: true if the skb contains a protected dual of public action + * management frame, false otherwise. + */ +static inline bool +ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) +{ + u8 action; + + if (!ieee80211_is_public_action((void *)skb->data, skb->len) || + skb->len < IEEE80211_MIN_ACTION_SIZE + 1) + return false; + + action = *(u8 *)(skb->data + IEEE80211_MIN_ACTION_SIZE); + + return action != WLAN_PUB_ACTION_20_40_BSS_COEX && + action != WLAN_PUB_ACTION_DSE_REG_LOC_ANN && + action != WLAN_PUB_ACTION_MSMT_PILOT && + action != WLAN_PUB_ACTION_TDLS_DISCOVER_RES && + action != WLAN_PUB_ACTION_LOC_TRACK_NOTI && + action != WLAN_PUB_ACTION_FTM_REQUEST && + action != WLAN_PUB_ACTION_FTM_RESPONSE && + action != WLAN_PUB_ACTION_FILS_DISCOVERY; +} + /** * _ieee80211_is_group_privacy_action - check if frame is a group addressed * privacy action frame diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e751cda5eef6..8f6b6f56b65b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2468,8 +2468,7 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) /* drop unicast public action frames when using MPF */ if (is_unicast_ether_addr(mgmt->da) && - ieee80211_is_public_action((void *)rx->skb->data, - rx->skb->len)) + ieee80211_is_protected_dual_of_public_action(rx->skb)) return -EACCES; } From eb96e221937af3c7bb8a63208dbab813ca5d3d7e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 19 Oct 2023 13:19:28 +0100 Subject: [PATCH 200/212] btrfs: fix unwritten extent buffer after snapshotting a new subvolume When creating a snapshot of a subvolume that was created in the current transaction, we can end up not persisting a dirty extent buffer that is referenced by the snapshot, resulting in IO errors due to checksum failures when trying to read the extent buffer later from disk. A sequence of steps that leads to this is the following: 1) At ioctl.c:create_subvol() we allocate an extent buffer, with logical address 36007936, for the leaf/root of a new subvolume that has an ID of 291. We mark the extent buffer as dirty, and at this point the subvolume tree has a single node/leaf which is also its root (level 0); 2) We no longer commit the transaction used to create the subvolume at create_subvol(). We used to, but that was recently removed in commit 1b53e51a4a8f ("btrfs: don't commit transaction for every subvol create"); 3) The transaction used to create the subvolume has an ID of 33, so the extent buffer 36007936 has a generation of 33; 4) Several updates happen to subvolume 291 during transaction 33, several files created and its tree height changes from 0 to 1, so we end up with a new root at level 1 and the extent buffer 36007936 is now a leaf of that new root node, which is extent buffer 36048896. The commit root remains as 36007936, since we are still at transaction 33; 5) Creation of a snapshot of subvolume 291, with an ID of 292, starts at ioctl.c:create_snapshot(). This triggers a commit of transaction 33 and we end up at transaction.c:create_pending_snapshot(), in the critical section of a transaction commit. There we COW the root of subvolume 291, which is extent buffer 36048896. The COW operation returns extent buffer 36048896, since there's no need to COW because the extent buffer was created in this transaction and it was not written yet. The we call btrfs_copy_root() against the root node 36048896. During this operation we allocate a new extent buffer to turn into the root node of the snapshot, copy the contents of the root node 36048896 into this snapshot root extent buffer, set the owner to 292 (the ID of the snapshot), etc, and then we call btrfs_inc_ref(). This will create a delayed reference for each leaf pointed by the root node with a reference root of 292 - this includes a reference for the leaf 36007936. After that we set the bit BTRFS_ROOT_FORCE_COW in the root's state. Then we call btrfs_insert_dir_item(), to create the directory entry in in the tree of subvolume 291 that points to the snapshot. This ends up needing to modify leaf 36007936 to insert the respective directory items. Because the bit BTRFS_ROOT_FORCE_COW is set for the root's state, we need to COW the leaf. We end up at btrfs_force_cow_block() and then at update_ref_for_cow(). At update_ref_for_cow() we call btrfs_block_can_be_shared() which returns false, despite the fact the leaf 36007936 is shared - the subvolume's root and the snapshot's root point to that leaf. The reason that it incorrectly returns false is because the commit root of the subvolume is extent buffer 36007936 - it was the initial root of the subvolume when we created it. So btrfs_block_can_be_shared() which has the following logic: int btrfs_block_can_be_shared(struct btrfs_root *root, struct extent_buffer *buf) { if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && buf != root->node && buf != root->commit_root && (btrfs_header_generation(buf) <= btrfs_root_last_snapshot(&root->root_item) || btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) return 1; return 0; } Returns false (0) since 'buf' (extent buffer 36007936) matches the root's commit root. As a result, at update_ref_for_cow(), we don't check for the number of references for extent buffer 36007936, we just assume it's not shared and therefore that it has only 1 reference, so we set the local variable 'refs' to 1. Later on, in the final if-else statement at update_ref_for_cow(): static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *cow, int *last_ref) { (...) if (refs > 1) { (...) } else { (...) btrfs_clear_buffer_dirty(trans, buf); *last_ref = 1; } } So we mark the extent buffer 36007936 as not dirty, and as a result we don't write it to disk later in the transaction commit, despite the fact that the snapshot's root points to it. Attempting to access the leaf or dumping the tree for example shows that the extent buffer was not written: $ btrfs inspect-internal dump-tree -t 292 /dev/sdb btrfs-progs v6.2.2 file tree key (292 ROOT_ITEM 33) node 36110336 level 1 items 2 free space 119 generation 33 owner 292 node 36110336 flags 0x1(WRITTEN) backref revision 1 checksum stored a8103e3e checksum calced a8103e3e fs uuid 90c9a46f-ae9f-4626-9aff-0cbf3e2e3a79 chunk uuid e8c9c885-78f4-4d31-85fe-89e5f5fd4a07 key (256 INODE_ITEM 0) block 36007936 gen 33 key (257 EXTENT_DATA 0) block 36052992 gen 33 checksum verify failed on 36007936 wanted 0x00000000 found 0x86005f29 checksum verify failed on 36007936 wanted 0x00000000 found 0x86005f29 total bytes 107374182400 bytes used 38572032 uuid 90c9a46f-ae9f-4626-9aff-0cbf3e2e3a79 The respective on disk region is full of zeroes as the device was trimmed at mkfs time. Obviously 'btrfs check' also detects and complains about this: $ btrfs check /dev/sdb Opening filesystem to check... Checking filesystem on /dev/sdb UUID: 90c9a46f-ae9f-4626-9aff-0cbf3e2e3a79 generation: 33 (33) [1/7] checking root items [2/7] checking extents checksum verify failed on 36007936 wanted 0x00000000 found 0x86005f29 checksum verify failed on 36007936 wanted 0x00000000 found 0x86005f29 checksum verify failed on 36007936 wanted 0x00000000 found 0x86005f29 bad tree block 36007936, bytenr mismatch, want=36007936, have=0 owner ref check failed [36007936 4096] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space tree [4/7] checking fs roots checksum verify failed on 36007936 wanted 0x00000000 found 0x86005f29 checksum verify failed on 36007936 wanted 0x00000000 found 0x86005f29 checksum verify failed on 36007936 wanted 0x00000000 found 0x86005f29 bad tree block 36007936, bytenr mismatch, want=36007936, have=0 The following tree block(s) is corrupted in tree 292: tree block bytenr: 36110336, level: 1, node key: (256, 1, 0) root 292 root dir 256 not found ERROR: errors found in fs roots found 38572032 bytes used, error(s) found total csum bytes: 16048 total tree bytes: 1265664 total fs tree bytes: 1118208 total extent tree bytes: 65536 btree space waste bytes: 562598 file data blocks allocated: 65978368 referenced 36569088 Fix this by updating btrfs_block_can_be_shared() to consider that an extent buffer may be shared if it matches the commit root and if its generation matches the current transaction's generation. This can be reproduced with the following script: $ cat test.sh #!/bin/bash MNT=/mnt/sdi DEV=/dev/sdi # Use a filesystem with a 64K node size so that we have the same node # size on every machine regardless of its page size (on x86_64 default # node size is 16K due to the 4K page size, while on PPC it's 64K by # default). This way we can make sure we are able to create a btree for # the subvolume with a height of 2. mkfs.btrfs -f -n 64K $DEV mount $DEV $MNT btrfs subvolume create $MNT/subvol # Create a few empty files on the subvolume, this bumps its btree # height to 2 (root node at level 1 and 2 leaves). for ((i = 1; i <= 300; i++)); do echo -n > $MNT/subvol/file_$i done btrfs subvolume snapshot -r $MNT/subvol $MNT/subvol/snap umount $DEV btrfs check $DEV Running it on a 6.5 kernel (or any 6.6-rc kernel at the moment): $ ./test.sh Create subvolume '/mnt/sdi/subvol' Create a readonly snapshot of '/mnt/sdi/subvol' in '/mnt/sdi/subvol/snap' Opening filesystem to check... Checking filesystem on /dev/sdi UUID: bbdde2ff-7d02-45ca-8a73-3c36f23755a1 [1/7] checking root items [2/7] checking extents parent transid verify failed on 30539776 wanted 7 found 5 parent transid verify failed on 30539776 wanted 7 found 5 parent transid verify failed on 30539776 wanted 7 found 5 Ignoring transid failure owner ref check failed [30539776 65536] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space tree [4/7] checking fs roots parent transid verify failed on 30539776 wanted 7 found 5 Ignoring transid failure Wrong key of child node/leaf, wanted: (256, 1, 0), have: (2, 132, 0) Wrong generation of child node/leaf, wanted: 5, have: 7 root 257 root dir 256 not found ERROR: errors found in fs roots found 917504 bytes used, error(s) found total csum bytes: 0 total tree bytes: 851968 total fs tree bytes: 393216 total extent tree bytes: 65536 btree space waste bytes: 736550 file data blocks allocated: 0 referenced 0 A test case for fstests will follow soon. Fixes: 1b53e51a4a8f ("btrfs: don't commit transaction for every subvol create") CC: stable@vger.kernel.org # 6.5+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 14 +++++++++----- fs/btrfs/backref.h | 3 ++- fs/btrfs/ctree.c | 21 ++++++++++++++++----- fs/btrfs/ctree.h | 3 ++- fs/btrfs/relocation.c | 7 ++++--- 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index b7d54efb4728..a4a809efc92f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -3196,12 +3196,14 @@ static int handle_direct_tree_backref(struct btrfs_backref_cache *cache, * We still need to do a tree search to find out the parents. This is for * TREE_BLOCK_REF backref (keyed or inlined). * + * @trans: Transaction handle. * @ref_key: The same as @ref_key in handle_direct_tree_backref() * @tree_key: The first key of this tree block. * @path: A clean (released) path, to avoid allocating path every time * the function get called. */ -static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache, +static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans, + struct btrfs_backref_cache *cache, struct btrfs_path *path, struct btrfs_key *ref_key, struct btrfs_key *tree_key, @@ -3315,7 +3317,7 @@ static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache, * If we know the block isn't shared we can avoid * checking its backrefs. */ - if (btrfs_block_can_be_shared(root, eb)) + if (btrfs_block_can_be_shared(trans, root, eb)) upper->checked = 0; else upper->checked = 1; @@ -3363,11 +3365,13 @@ static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache, * links aren't yet bi-directional. Needs to finish such links. * Use btrfs_backref_finish_upper_links() to finish such linkage. * + * @trans: Transaction handle. * @path: Released path for indirect tree backref lookup * @iter: Released backref iter for extent tree search * @node_key: The first key of the tree block */ -int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache, +int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans, + struct btrfs_backref_cache *cache, struct btrfs_path *path, struct btrfs_backref_iter *iter, struct btrfs_key *node_key, @@ -3467,8 +3471,8 @@ int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache, * offset means the root objectid. We need to search * the tree to get its parent bytenr. */ - ret = handle_indirect_tree_backref(cache, path, &key, node_key, - cur); + ret = handle_indirect_tree_backref(trans, cache, path, + &key, node_key, cur); if (ret < 0) goto out; } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 1616e3e3f1e4..71d535e03dca 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -540,7 +540,8 @@ static inline void btrfs_backref_panic(struct btrfs_fs_info *fs_info, bytenr); } -int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache, +int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans, + struct btrfs_backref_cache *cache, struct btrfs_path *path, struct btrfs_backref_iter *iter, struct btrfs_key *node_key, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index da519c1b6ad0..617d4827eec2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -367,7 +367,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, /* * check if the tree block can be shared by multiple trees */ -int btrfs_block_can_be_shared(struct btrfs_root *root, +int btrfs_block_can_be_shared(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf) { /* @@ -376,11 +377,21 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, * not allocated by tree relocation, we know the block is not shared. */ if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && - buf != root->node && buf != root->commit_root && + buf != root->node && (btrfs_header_generation(buf) <= btrfs_root_last_snapshot(&root->root_item) || - btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) - return 1; + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) { + if (buf != root->commit_root) + return 1; + /* + * An extent buffer that used to be the commit root may still be + * shared because the tree height may have increased and it + * became a child of a higher level root. This can happen when + * snapshotting a subvolume created in the current transaction. + */ + if (btrfs_header_generation(buf) == trans->transid) + return 1; + } return 0; } @@ -415,7 +426,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, * are only allowed for blocks use full backrefs. */ - if (btrfs_block_can_be_shared(root, buf)) { + if (btrfs_block_can_be_shared(trans, root, buf)) { ret = btrfs_lookup_extent_info(trans, fs_info, buf->start, btrfs_header_level(buf), 1, &refs, &flags); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9419f4e37a58..ff40acd63a37 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -540,7 +540,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer **cow_ret, u64 new_root_objectid); -int btrfs_block_can_be_shared(struct btrfs_root *root, +int btrfs_block_can_be_shared(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf); int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 9951a0caf5bb..c6d4bb8cbe29 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -466,6 +466,7 @@ static bool handle_useless_nodes(struct reloc_control *rc, * cached. */ static noinline_for_stack struct btrfs_backref_node *build_backref_tree( + struct btrfs_trans_handle *trans, struct reloc_control *rc, struct btrfs_key *node_key, int level, u64 bytenr) { @@ -499,8 +500,8 @@ static noinline_for_stack struct btrfs_backref_node *build_backref_tree( /* Breadth-first search to build backref cache */ do { - ret = btrfs_backref_add_tree_node(cache, path, iter, node_key, - cur); + ret = btrfs_backref_add_tree_node(trans, cache, path, iter, + node_key, cur); if (ret < 0) { err = ret; goto out; @@ -2803,7 +2804,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, /* Do tree relocation */ rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) { - node = build_backref_tree(rc, &block->key, + node = build_backref_tree(trans, rc, &block->key, block->level, block->bytenr); if (IS_ERR(node)) { err = PTR_ERR(node); From 7798b59409c345d4a6034a4326bceb9f7e2e8b58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Wanzenb=C3=B6ck?= Date: Thu, 19 Oct 2023 14:58:47 +0200 Subject: [PATCH 201/212] net/handshake: fix file ref count in handshake_nl_accept_doit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If req->hr_proto->hp_accept() fail, we call fput() twice: Once in the error path, but also a second time because sock->file is at that point already associated with the file descriptor. Once the task exits, as it would probably do after receiving an error reading from netlink, the fd is closed, calling fput() a second time. To fix, we move installing the file after the error path for the hp_accept() call. In the case of errors we simply put the unused fd. In case of success we can use fd_install() to link the sock->file to the reserved fd. Fixes: 7ea9c1ec66bc ("net/handshake: Fix handshake_dup() ref counting") Signed-off-by: Moritz Wanzenböck Reviewed-by: Chuck Lever Link: https://lore.kernel.org/r/20231019125847.276443-1-moritz.wanzenboeck@linbit.com Signed-off-by: Jakub Kicinski --- net/handshake/netlink.c | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index d0bc1dd8e65a..80c7302692c7 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -87,29 +87,6 @@ struct nlmsghdr *handshake_genl_put(struct sk_buff *msg, } EXPORT_SYMBOL(handshake_genl_put); -/* - * dup() a kernel socket for use as a user space file descriptor - * in the current process. The kernel socket must have an - * instatiated struct file. - * - * Implicit argument: "current()" - */ -static int handshake_dup(struct socket *sock) -{ - struct file *file; - int newfd; - - file = get_file(sock->file); - newfd = get_unused_fd_flags(O_CLOEXEC); - if (newfd < 0) { - fput(file); - return newfd; - } - - fd_install(newfd, file); - return newfd; -} - int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); @@ -133,17 +110,20 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) goto out_status; sock = req->hr_sk->sk_socket; - fd = handshake_dup(sock); + fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { err = fd; goto out_complete; } + err = req->hr_proto->hp_accept(req, info, fd); if (err) { - fput(sock->file); + put_unused_fd(fd); goto out_complete; } + fd_install(fd, get_file(sock->file)); + trace_handshake_cmd_accept(net, req, req->hr_sk, fd); return 0; From d788c9338342a3146d115281922901c1e3e1cbff Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Fri, 20 Oct 2023 15:01:49 +0100 Subject: [PATCH 202/212] sfc: cleanup and reduce netlink error messages Reduce the length of netlink error messages as they are likely to be truncated anyway. Additionally, reword netlink error messages so they are more consistent with previous messages. Fixes: 9dbc8d2b9a02 ("sfc: add decrement ipv6 hop limit by offloading set hop limit actions") Fixes: 3c9561c0a5b9 ("sfc: support TC decap rules matching on enc_ip_tos") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310202136.4u7bv0hp-lkp@intel.com/ Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Edward Cree Link: https://lore.kernel.org/r/20231020140149.30490-1-pieter.jansen-van-vuuren@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/tc.c | 38 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 834f000ba1c4..30ebef88248d 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -629,14 +629,14 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, } if (child_ip_tos_mask != old->child_ip_tos_mask) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Pseudo encap match for TOS mask %#04x conflicts with existing pseudo(MASK) entry for TOS mask %#04x", + "Pseudo encap match for TOS mask %#04x conflicts with existing mask %#04x", child_ip_tos_mask, old->child_ip_tos_mask); return -EEXIST; } if (child_udp_sport_mask != old->child_udp_sport_mask) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Pseudo encap match for UDP src port mask %#x conflicts with existing pseudo(MASK) entry for mask %#x", + "Pseudo encap match for UDP src port mask %#x conflicts with existing mask %#x", child_udp_sport_mask, old->child_udp_sport_mask); return -EEXIST; @@ -1081,7 +1081,7 @@ static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act, /* check that we do not decrement ttl twice */ if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DEC_TTL)) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl"); + NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported"); return -EOPNOTSUPP; } act->do_ttl_dec = 1; @@ -1106,7 +1106,7 @@ static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act, /* check that we do not decrement hoplimit twice */ if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DEC_TTL)) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl"); + NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported"); return -EOPNOTSUPP; } act->do_ttl_dec = 1; @@ -1120,7 +1120,7 @@ static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act, } NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: ttl add action type %x %x %x/%x", + "ttl add action type %x %x %x/%x is not supported", fa->mangle.htype, fa->mangle.offset, fa->mangle.val, fa->mangle.mask); return -EOPNOTSUPP; @@ -1164,7 +1164,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, case 0: if (fa->mangle.mask) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: mask (%#x) of eth.dst32 mangle", + "mask (%#x) of eth.dst32 mangle is not supported", fa->mangle.mask); return -EOPNOTSUPP; } @@ -1184,7 +1184,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, mung->dst_mac_16 = 1; } else { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: mask (%#x) of eth+4 mangle is not high or low 16b", + "mask (%#x) of eth+4 mangle is not high or low 16b", fa->mangle.mask); return -EOPNOTSUPP; } @@ -1192,7 +1192,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, case 8: if (fa->mangle.mask) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: mask (%#x) of eth.src32 mangle", + "mask (%#x) of eth.src32 mangle is not supported", fa->mangle.mask); return -EOPNOTSUPP; } @@ -1201,7 +1201,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, mung->src_mac_32 = 1; return efx_tc_complete_mac_mangle(efx, act, mung, extack); default: - NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported: mangle eth+%u %x/%x", + NL_SET_ERR_MSG_FMT_MOD(extack, "mangle eth+%u %x/%x is not supported", fa->mangle.offset, fa->mangle.val, fa->mangle.mask); return -EOPNOTSUPP; } @@ -1217,7 +1217,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, /* check that pedit applies to ttl only */ if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: mask (%#x) out of range, only support mangle action on ipv4.ttl", + "mask (%#x) out of range, only support mangle action on ipv4.ttl", fa->mangle.mask); return -EOPNOTSUPP; } @@ -1227,7 +1227,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, */ if (match->mask.ip_ttl != U8_MAX) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: only support mangle ipv4.ttl when we have an exact match on ttl, mask used for match (%#x)", + "only support mangle ttl when we have an exact match, current mask (%#x)", match->mask.ip_ttl); return -EOPNOTSUPP; } @@ -1237,7 +1237,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, */ if (match->value.ip_ttl == 0) { NL_SET_ERR_MSG_MOD(extack, - "Unsupported: we cannot decrement ttl past 0"); + "decrement ttl past 0 is not supported"); return -EOPNOTSUPP; } @@ -1245,7 +1245,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DEC_TTL)) { NL_SET_ERR_MSG_MOD(extack, - "Unsupported: multiple dec ttl"); + "multiple dec ttl is not supported"); return -EOPNOTSUPP; } @@ -1259,7 +1259,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, fallthrough; default: NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: only support mangle on the ttl field (offset is %u)", + "only support mangle on the ttl field (offset is %u)", fa->mangle.offset); return -EOPNOTSUPP; } @@ -1275,7 +1275,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, /* check that pedit applies to ttl only */ if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: mask (%#x) out of range, only support mangle action on ipv6.hop_limit", + "mask (%#x) out of range, only support mangle action on ipv6.hop_limit", fa->mangle.mask); return -EOPNOTSUPP; @@ -1286,7 +1286,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, */ if (match->mask.ip_ttl != U8_MAX) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: only support mangle ipv6.hop_limit when we have an exact match on ttl, mask used for match (%#x)", + "only support hop_limit when we have an exact match, current mask (%#x)", match->mask.ip_ttl); return -EOPNOTSUPP; } @@ -1296,7 +1296,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, */ if (match->value.ip_ttl == 0) { NL_SET_ERR_MSG_MOD(extack, - "Unsupported: we cannot decrement hop_limit past 0"); + "decrementing hop_limit past 0 is not supported"); return -EOPNOTSUPP; } @@ -1304,7 +1304,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DEC_TTL)) { NL_SET_ERR_MSG_MOD(extack, - "Unsupported: multiple dec ttl"); + "multiple dec ttl is not supported"); return -EOPNOTSUPP; } @@ -1318,7 +1318,7 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, fallthrough; default: NL_SET_ERR_MSG_FMT_MOD(extack, - "Unsupported: only support mangle on the hop_limit field"); + "only support mangle on the hop_limit field"); return -EOPNOTSUPP; } default: From 9644bc49705723bf7c69aa9bf542bb5161b91dba Mon Sep 17 00:00:00 2001 From: Anjali Kulkarni Date: Fri, 20 Oct 2023 16:40:58 -0700 Subject: [PATCH 203/212] Fix NULL pointer dereference in cn_filter() Check that sk_user_data is not NULL, else return from cn_filter(). Could not reproduce this issue, but Oliver Sang verified it has fixed the "Closes" problem below. Fixes: 2aa1f7a1f47c ("connector/cn_proc: Add filtering to fix some bugs") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202309201456.84c19e27-oliver.sang@intel.com/ Signed-off-by: Anjali Kulkarni Link: https://lore.kernel.org/r/20231020234058.2232347-1-anjali.k.kulkarni@oracle.com Signed-off-by: Paolo Abeni --- drivers/connector/cn_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 05d562e9c8b1..44b19e696176 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -54,7 +54,7 @@ static int cn_filter(struct sock *dsk, struct sk_buff *skb, void *data) enum proc_cn_mcast_op mc_op; uintptr_t val; - if (!dsk || !data) + if (!dsk || !dsk->sk_user_data || !data) return 0; ptr = (__u32 *)data; From adc8df12d91a2b8350b0cd4c7fec3e8546c9d1f8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 22 Oct 2023 22:25:17 +0200 Subject: [PATCH 204/212] gtp: uapi: fix GTPA_MAX Subtract one to __GTPA_MAX, otherwise GTPA_MAX is off by 2. Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Paolo Abeni --- include/uapi/linux/gtp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index 2f61298a7b77..3dcdb9e33cba 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -33,6 +33,6 @@ enum gtp_attrs { GTPA_PAD, __GTPA_MAX, }; -#define GTPA_MAX (__GTPA_MAX + 1) +#define GTPA_MAX (__GTPA_MAX - 1) #endif /* _UAPI_LINUX_GTP_H_ */ From 4530e5b8e2dad63dcad2206232dd86e4b1489b6c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 22 Oct 2023 22:25:18 +0200 Subject: [PATCH 205/212] gtp: fix fragmentation needed check with gso Call skb_gso_validate_network_len() to check if packet is over PMTU. Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Paolo Abeni --- drivers/net/gtp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 144ec626230d..b22596b18ee8 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -872,8 +872,9 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, skb_dst_update_pmtu_no_confirm(skb, mtu); - if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && - mtu < ntohs(iph->tot_len)) { + if (iph->frag_off & htons(IP_DF) && + ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)))) { netdev_dbg(dev, "packet too big, fragmentation needed\n"); icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); From 77a8c982ff0d4c3a14022c6fe9e3dbfb327552ec Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 23 Oct 2023 14:27:14 -0700 Subject: [PATCH 206/212] i40e: Fix wrong check for I40E_TXR_FLAGS_WB_ON_ITR The I40E_TXR_FLAGS_WB_ON_ITR is i40e_ring flag and not i40e_pf one. Fixes: 8e0764b4d6be42 ("i40e/i40evf: Add support for writeback on ITR feature for X722") Signed-off-by: Ivan Vecera Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20231023212714.178032-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 50c70a8e470a..b047c587629b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2854,7 +2854,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) return budget; } - if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) + if (q_vector->tx.ring[0].flags & I40E_TXR_FLAGS_WB_ON_ITR) q_vector->arm_wb_state = false; /* Exit the polling mode, but don't re-enable interrupts if stack might From 735795f68b37e9bb49f642407a0d49b1631ea1c7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Oct 2023 21:09:47 +0200 Subject: [PATCH 207/212] netfilter: flowtable: GC pushes back packets to classic path Since 41f2c7c342d3 ("net/sched: act_ct: Fix promotion of offloaded unreplied tuple"), flowtable GC pushes back flows with IPS_SEEN_REPLY back to classic path in every run, ie. every second. This is because of a new check for NF_FLOW_HW_ESTABLISHED which is specific of sched/act_ct. In Netfilter's flowtable case, NF_FLOW_HW_ESTABLISHED never gets set on and IPS_SEEN_REPLY is unreliable since users decide when to offload the flow before, such bit might be set on at a later stage. Fix it by adding a custom .gc handler that sched/act_ct can use to deal with its NF_FLOW_HW_ESTABLISHED bit. Fixes: 41f2c7c342d3 ("net/sched: act_ct: Fix promotion of offloaded unreplied tuple") Reported-by: Vladimir Smelhaus Reviewed-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 1 + net/netfilter/nf_flow_table_core.c | 14 +++++++------- net/sched/act_ct.c | 7 +++++++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d466e1a3b0b1..fe1507c1db82 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -53,6 +53,7 @@ struct nf_flowtable_type { struct list_head list; int family; int (*init)(struct nf_flowtable *ft); + bool (*gc)(const struct flow_offload *flow); int (*setup)(struct nf_flowtable *ft, struct net_device *dev, enum flow_block_command cmd); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 1d34d700bd09..920a5a29ae1d 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -316,12 +316,6 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, } EXPORT_SYMBOL_GPL(flow_offload_refresh); -static bool nf_flow_is_outdated(const struct flow_offload *flow) -{ - return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) && - !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); -} - static inline bool nf_flow_has_expired(const struct flow_offload *flow) { return nf_flow_timeout_delta(flow->timeout) <= 0; @@ -407,12 +401,18 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, return err; } +static bool nf_flow_custom_gc(struct nf_flowtable *flow_table, + const struct flow_offload *flow) +{ + return flow_table->type->gc && flow_table->type->gc(flow); +} + static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || - nf_flow_is_outdated(flow)) + nf_flow_custom_gc(flow_table, flow)) flow_offload_teardown(flow); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 7c652d14528b..0d44da4e8c8e 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -278,7 +278,14 @@ static int tcf_ct_flow_table_fill_actions(struct net *net, return err; } +static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow) +{ + return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) && + !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); +} + static struct nf_flowtable_type flowtable_ct = { + .gc = tcf_ct_flow_is_outdated, .action = tcf_ct_flow_table_fill_actions, .owner = THIS_MODULE, }; From a63b6622120cd03a304796dbccb80655b3a21798 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 24 Oct 2023 21:58:57 +0200 Subject: [PATCH 208/212] net/sched: act_ct: additional checks for outdated flows Current nf_flow_is_outdated() implementation considers any flow table flow which state diverged from its underlying CT connection status for teardown which can be problematic in the following cases: - Flow has never been offloaded to hardware in the first place either because flow table has hardware offload disabled (flag NF_FLOWTABLE_HW_OFFLOAD is not set) or because it is still pending on 'add' workqueue to be offloaded for the first time. The former is incorrect, the later generates excessive deletions and additions of flows. - Flow is already pending to be updated on the workqueue. Tearing down such flows will also generate excessive removals from the flow table, especially on highly loaded system where the latency to re-offload a flow via 'add' workqueue can be quite high. When considering a flow for teardown as outdated verify that it is both offloaded to hardware and doesn't have any pending updates. Fixes: 41f2c7c342d3 ("net/sched: act_ct: Fix promotion of offloaded unreplied tuple") Reviewed-by: Paul Blakey Signed-off-by: Vlad Buslov Signed-off-by: Pablo Neira Ayuso --- net/sched/act_ct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 0d44da4e8c8e..fb52d6f9aff9 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -281,6 +281,8 @@ static int tcf_ct_flow_table_fill_actions(struct net *net, static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow) { return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) && + test_bit(IPS_HW_OFFLOAD_BIT, &flow->ct->status) && + !test_bit(NF_FLOW_HW_PENDING, &flow->flags) && !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); } From 197f9fba9663e765f8f3ae3b2375c6cc32b2e2b3 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Wed, 25 Oct 2023 02:14:34 -0400 Subject: [PATCH 209/212] net: ipv4: fix typo in comments The word "advertize" should be replaced by "advertise". Signed-off-by: Deming Wang Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index d18f0f092fe7..4ccfc104f13a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -786,7 +786,7 @@ int esp_input_done2(struct sk_buff *skb, int err) /* * 1) if the NAT-T peer's IP or port changed then - * advertize the change to the keying daemon. + * advertise the change to the keying daemon. * This is an inbound SA, so just compare * SRC ports. */ From 1711435e3e67e079d6a2bce54d96d1af21c7ef2c Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Wed, 25 Oct 2023 02:16:56 -0400 Subject: [PATCH 210/212] net: ipv6: fix typo in comments The word "advertize" should be replaced by "advertise". Signed-off-by: Deming Wang Signed-off-by: David S. Miller --- net/ipv6/esp6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e023d29e919c..2cc1a45742d8 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -833,7 +833,7 @@ int esp6_input_done2(struct sk_buff *skb, int err) /* * 1) if the NAT-T peer's IP or port changed then - * advertize the change to the keying daemon. + * advertise the change to the keying daemon. * This is an inbound SA, so just compare * SRC ports. */ From 53b08c4985158430fd6d035fb49443bada535210 Mon Sep 17 00:00:00 2001 From: Alexandru Matei Date: Tue, 24 Oct 2023 22:17:42 +0300 Subject: [PATCH 211/212] vsock/virtio: initialize the_virtio_vsock before using VQs Once VQs are filled with empty buffers and we kick the host, it can send connection requests. If the_virtio_vsock is not initialized before, replies are silently dropped and do not reach the host. virtio_transport_send_pkt() can queue packets once the_virtio_vsock is set, but they won't be processed until vsock->tx_run is set to true. We queue vsock->send_pkt_work when initialization finishes to send those packets queued earlier. Fixes: 0deab087b16a ("vsock/virtio: use RCU to avoid use-after-free on the_virtio_vsock") Signed-off-by: Alexandru Matei Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20231024191742.14259-1-alexandru.matei@uipath.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index e95df847176b..b80bf681327b 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -555,6 +555,11 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) virtio_device_ready(vdev); + return 0; +} + +static void virtio_vsock_vqs_start(struct virtio_vsock *vsock) +{ mutex_lock(&vsock->tx_lock); vsock->tx_run = true; mutex_unlock(&vsock->tx_lock); @@ -569,7 +574,16 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) vsock->event_run = true; mutex_unlock(&vsock->event_lock); - return 0; + /* virtio_transport_send_pkt() can queue packets once + * the_virtio_vsock is set, but they won't be processed until + * vsock->tx_run is set to true. We queue vsock->send_pkt_work + * when initialization finishes to send those packets queued + * earlier. + * We don't need to queue the other workers (rx, event) because + * as long as we don't fill the queues with empty buffers, the + * host can't send us any notification. + */ + queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); } static void virtio_vsock_vqs_del(struct virtio_vsock *vsock) @@ -664,6 +678,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev) goto out; rcu_assign_pointer(the_virtio_vsock, vsock); + virtio_vsock_vqs_start(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -736,6 +751,7 @@ static int virtio_vsock_restore(struct virtio_device *vdev) goto out; rcu_assign_pointer(the_virtio_vsock, vsock); + virtio_vsock_vqs_start(vsock); out: mutex_unlock(&the_virtio_vsock_mutex); From 53798666648af3aa0dd512c2380576627237a800 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Wed, 25 Oct 2023 11:32:13 -0700 Subject: [PATCH 212/212] iavf: in iavf_down, disable queues when removing the driver In iavf_down, we're skipping the scheduling of certain operations if the driver is being removed. However, the IAVF_FLAG_AQ_DISABLE_QUEUES request must not be skipped in this case, because iavf_close waits for the transition to the __IAVF_DOWN state, which happens in iavf_virtchnl_completion after the queues are released. Without this fix, "rmmod iavf" takes half a second per interface that's up and prints the "Device resources not yet released" warning. Fixes: c8de44b577eb ("iavf: do not process adminq tasks when __IAVF_IN_REMOVE_TASK is set") Signed-off-by: Michal Schmidt Reviewed-by: Wojciech Drewek Tested-by: Rafal Romanowski Tested-by: Jacob Keller Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20231025183213.874283-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 5b5c0525aa13..b3434dbc90d6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1437,9 +1437,9 @@ void iavf_down(struct iavf_adapter *adapter) adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER; if (!list_empty(&adapter->adv_rss_list_head)) adapter->aq_required |= IAVF_FLAG_AQ_DEL_ADV_RSS_CFG; - adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES; } + adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES; mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); }