From 17a8b0b6dde67f8561cf2ccbe945d5089cd70e08 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Wed, 12 May 2021 13:41:22 +0530 Subject: [PATCH 01/21] cpufreq: blacklist SC7280 in cpufreq-dt-platdev Add SC7280 to cpufreq-dt-platdev blacklist since the actual scaling is handled by the 'qcom-cpufreq-hw' driver. Reviewed-by: Douglas Anderson Signed-off-by: Sibi Sankar Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 5e07065ec22f..345418b8250e 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -137,6 +137,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "qcom,msm8996", }, { .compatible = "qcom,qcs404", }, { .compatible = "qcom,sc7180", }, + { .compatible = "qcom,sc7280", }, { .compatible = "qcom,sdm845", }, { .compatible = "st,stih407", }, From 88bf5a85fe9840c9b49c5f6c625cdccd11233943 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 17 May 2021 16:54:58 +0100 Subject: [PATCH 02/21] dt-bindings: dvfs: Add support for generic performance domains The CLKSCREW attack [0] exposed security vulnerabilities in energy management implementations where untrusted software had direct access to clock and voltage hardware controls. In this attack, the malicious software was able to place the platform into unsafe overclocked or undervolted configurations. Such configurations then enabled the injection of predictable faults to reveal secrets. Many Arm-based systems used to or still use voltage regulator and clock frameworks in the kernel. These frameworks allow callers to independently manipulate frequency and voltage settings. Such implementations can render systems susceptible to this form of attack. Attacks such as CLKSCREW are now being mitigated by not having direct and independent control of clock and voltage in the kernel and moving that control to a trusted entity, such as the SCP firmware or secure world firmware/software which are to perform sanity checking on the requested performance levels, thereby preventing any attempted malicious programming. With the advent of such an abstraction, there is a need to replace the generic clock and regulator bindings used by such devices with a generic performance domains bindings. [0] https://www.usenix.org/conference/usenixsecurity17/technical-sessions/presentation/tang Cc: Rob Herring Acked-by: Viresh Kumar Signed-off-by: Sudeep Holla Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- .../devicetree/bindings/arm/cpus.yaml | 7 ++ .../bindings/dvfs/performance-domain.yaml | 74 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 Documentation/devicetree/bindings/dvfs/performance-domain.yaml diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml index f3c7249c73d6..9a2432a88074 100644 --- a/Documentation/devicetree/bindings/arm/cpus.yaml +++ b/Documentation/devicetree/bindings/arm/cpus.yaml @@ -257,6 +257,13 @@ properties: where voltage is in V, frequency is in MHz. + performance-domains: + maxItems: 1 + description: + List of phandles and performance domain specifiers, as defined by + bindings of the performance domain provider. See also + dvfs/performance-domain.yaml. + power-domains: description: List of phandles and PM domain specifiers, as defined by bindings of the diff --git a/Documentation/devicetree/bindings/dvfs/performance-domain.yaml b/Documentation/devicetree/bindings/dvfs/performance-domain.yaml new file mode 100644 index 000000000000..c8b91207f34d --- /dev/null +++ b/Documentation/devicetree/bindings/dvfs/performance-domain.yaml @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dvfs/performance-domain.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Generic performance domains + +maintainers: + - Sudeep Holla + +description: |+ + This binding is intended for performance management of groups of devices or + CPUs that run in the same performance domain. Performance domains must not + be confused with power domains. A performance domain is defined by a set + of devices that always have to run at the same performance level. For a given + performance domain, there is a single point of control that affects all the + devices in the domain, making it impossible to set the performance level of + an individual device in the domain independently from other devices in + that domain. For example, a set of CPUs that share a voltage domain, and + have a common frequency control, is said to be in the same performance + domain. + + This device tree binding can be used to bind performance domain consumer + devices with their performance domains provided by performance domain + providers. A performance domain provider can be represented by any node in + the device tree and can provide one or more performance domains. A consumer + node can refer to the provider by a phandle and a set of phandle arguments + (so called performance domain specifiers) of length specified by the + \#performance-domain-cells property in the performance domain provider node. + +select: true + +properties: + "#performance-domain-cells": + description: + Number of cells in a performance domain specifier. Typically 0 for nodes + representing a single performance domain and 1 for nodes providing + multiple performance domains (e.g. performance controllers), but can be + any value as specified by device tree binding documentation of particular + provider. + enum: [ 0, 1 ] + + performance-domains: + $ref: '/schemas/types.yaml#/definitions/phandle-array' + maxItems: 1 + description: + A phandle and performance domain specifier as defined by bindings of the + performance controller/provider specified by phandle. + +additionalProperties: true + +examples: + - | + performance: performance-controller@12340000 { + compatible = "qcom,cpufreq-hw"; + reg = <0x12340000 0x1000>; + #performance-domain-cells = <1>; + }; + + // The node above defines a performance controller that is a performance + // domain provider and expects one cell as its phandle argument. + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x0 0x0>; + performance-domains = <&performance 1>; + }; + }; From 70d99a8f0442bbc5abfa34ea27ce1fcacff57f90 Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Wed, 19 May 2021 18:25:50 +0200 Subject: [PATCH 03/21] cpufreq: mediatek: add support for mt8365 Add compatible stirng for MediaTek MT8365 SoC. Add also the compatible in the blacklist of the cpufreq-dt-platdev driver. Signed-off-by: Fabien Parent Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + drivers/cpufreq/mediatek-cpufreq.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 345418b8250e..0bb10402f02c 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -126,6 +126,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "mediatek,mt8173", }, { .compatible = "mediatek,mt8176", }, { .compatible = "mediatek,mt8183", }, + { .compatible = "mediatek,mt8365", }, { .compatible = "mediatek,mt8516", }, { .compatible = "nvidia,tegra20", }, diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index f2e491b25b07..87019d5a9547 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -537,6 +537,7 @@ static const struct of_device_id mtk_cpufreq_machines[] __initconst = { { .compatible = "mediatek,mt8173", }, { .compatible = "mediatek,mt8176", }, { .compatible = "mediatek,mt8183", }, + { .compatible = "mediatek,mt8365", }, { .compatible = "mediatek,mt8516", }, { } From b791c7f94680ba9b60b0c0786b1d0eb4393053d6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 6 May 2021 21:09:48 +0200 Subject: [PATCH 04/21] cpufreq: scmi: Fix an error message 'ret' is known to be 0 here. The last error code is stored in 'nr_opp', so use it in the error message. Fixes: 71a37cd6a59d ("scmi-cpufreq: Remove deferred probe") Signed-off-by: Christophe JAILLET Reviewed-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/scmi-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index c8a4364ad3c2..ec9a87ca2dbb 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -174,7 +174,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) nr_opp = dev_pm_opp_get_opp_count(cpu_dev); if (nr_opp <= 0) { dev_err(cpu_dev, "%s: No OPPs for this device: %d\n", - __func__, ret); + __func__, nr_opp); ret = -ENODEV; goto out_free_opp; From 4814d9c5d3b956c5a8f47acbb6b98fdd4dfe334f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 20 May 2021 09:56:18 +0530 Subject: [PATCH 05/21] cpufreq: dt: Rename black/white-lists Rename them in accordance with the coding guidelines. Reviewed-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 0bb10402f02c..bef7528aecd3 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -15,7 +15,7 @@ * Machines for which the cpufreq device is *always* created, mostly used for * platforms using "operating-points" (V1) property. */ -static const struct of_device_id whitelist[] __initconst = { +static const struct of_device_id allowlist[] __initconst = { { .compatible = "allwinner,sun4i-a10", }, { .compatible = "allwinner,sun5i-a10s", }, { .compatible = "allwinner,sun5i-a13", }, @@ -100,7 +100,7 @@ static const struct of_device_id whitelist[] __initconst = { * Machines for which the cpufreq device is *not* created, mostly used for * platforms using "operating-points-v2" property. */ -static const struct of_device_id blacklist[] __initconst = { +static const struct of_device_id blocklist[] __initconst = { { .compatible = "allwinner,sun50i-h6", }, { .compatible = "arm,vexpress", }, @@ -179,13 +179,13 @@ static int __init cpufreq_dt_platdev_init(void) if (!np) return -ENODEV; - match = of_match_node(whitelist, np); + match = of_match_node(allowlist, np); if (match) { data = match->data; goto create_pdev; } - if (cpu0_node_has_opp_v2_prop() && !of_match_node(blacklist, np)) + if (cpu0_node_has_opp_v2_prop() && !of_match_node(blocklist, np)) goto create_pdev; of_node_put(np); From eed828895b2426a286717c1ddea8af45fa08bfc3 Mon Sep 17 00:00:00 2001 From: Seiya Wang Date: Tue, 1 Jun 2021 15:10:41 +0800 Subject: [PATCH 06/21] clk: mediatek: remove deprecated CLK_INFRA_CA57SEL for MT8173 SoC Remove CLK_INFRA_CA57SEL for MT8173 since it's no longer used. Acked-by: Rob Herring Reviewed-by: Matthias Brugger Signed-off-by: Seiya Wang Signed-off-by: Viresh Kumar --- include/dt-bindings/clock/mt8173-clk.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/dt-bindings/clock/mt8173-clk.h b/include/dt-bindings/clock/mt8173-clk.h index 3acebe937bfc..3d00c98b9654 100644 --- a/include/dt-bindings/clock/mt8173-clk.h +++ b/include/dt-bindings/clock/mt8173-clk.h @@ -186,7 +186,6 @@ #define CLK_INFRA_PMICWRAP 11 #define CLK_INFRA_CLK_13M 12 #define CLK_INFRA_CA53SEL 13 -#define CLK_INFRA_CA57SEL 14 /* Deprecated. Don't use it. */ #define CLK_INFRA_CA72SEL 14 #define CLK_INFRA_NR_CLK 15 From 9821a195d4e263801884b105554e801642c59f2a Mon Sep 17 00:00:00 2001 From: Seiya Wang Date: Tue, 1 Jun 2021 15:10:42 +0800 Subject: [PATCH 07/21] dt-bindings: cpufreq: update cpu type and clock name for MT8173 SoC Update the cpu type of cpu2 and cpu3 since MT8173 used Cortex-a72. Acked-by: Viresh Kumar Acked-by: Rob Herring Reviewed-by: Matthias Brugger Signed-off-by: Seiya Wang Signed-off-by: Viresh Kumar --- .../devicetree/bindings/cpufreq/cpufreq-mediatek.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt index ea4994b35207..ef68711716fb 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt @@ -202,11 +202,11 @@ Example 2 (MT8173 SoC): cpu2: cpu@100 { device_type = "cpu"; - compatible = "arm,cortex-a57"; + compatible = "arm,cortex-a72"; reg = <0x100>; enable-method = "psci"; cpu-idle-states = <&CPU_SLEEP_0>; - clocks = <&infracfg CLK_INFRA_CA57SEL>, + clocks = <&infracfg CLK_INFRA_CA72SEL>, <&apmixedsys CLK_APMIXED_MAINPLL>; clock-names = "cpu", "intermediate"; operating-points-v2 = <&cpu_opp_table_b>; @@ -214,11 +214,11 @@ Example 2 (MT8173 SoC): cpu3: cpu@101 { device_type = "cpu"; - compatible = "arm,cortex-a57"; + compatible = "arm,cortex-a72"; reg = <0x101>; enable-method = "psci"; cpu-idle-states = <&CPU_SLEEP_0>; - clocks = <&infracfg CLK_INFRA_CA57SEL>, + clocks = <&infracfg CLK_INFRA_CA72SEL>, <&apmixedsys CLK_APMIXED_MAINPLL>; clock-names = "cpu", "intermediate"; operating-points-v2 = <&cpu_opp_table_b>; From 797920a8498e420532ca6a63f9ac30fea477b3ff Mon Sep 17 00:00:00 2001 From: Bartosz Dudziak Date: Sat, 12 Jun 2021 22:53:34 +0200 Subject: [PATCH 08/21] dt-bindings: arm: msm: Add SAW2 for MSM8226 Add the dt-binding compatible in the SPM AVS Wrapper 2 (SAW2) for the MSM8226 SoC platform. Acked-by: Rob Herring Signed-off-by: Bartosz Dudziak Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210612205335.9730-2-bartosz.dudziak@snejp.pl --- Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt b/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt index ae4afc6dcfe0..94d50a949be1 100644 --- a/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt +++ b/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt @@ -25,6 +25,7 @@ PROPERTIES "qcom,saw2" A more specific value could be one of: "qcom,apq8064-saw2-v1.1-cpu" + "qcom,msm8226-saw2-v2.1-cpu" "qcom,msm8974-saw2-v2.1-cpu" "qcom,apq8084-saw2-v2.1-cpu" From 0f0ac1e4eef2753d4f9cd0117019da9501921fef Mon Sep 17 00:00:00 2001 From: Bartosz Dudziak Date: Sat, 12 Jun 2021 22:53:35 +0200 Subject: [PATCH 09/21] cpuidle: qcom: Add SPM register data for MSM8226 Add MSM8226 register data to SPM AVS Wrapper 2 (SAW2) power controller driver. Reviewed-by: Stephan Gerhold Signed-off-by: Bartosz Dudziak Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210612205335.9730-3-bartosz.dudziak@snejp.pl --- drivers/cpuidle/cpuidle-qcom-spm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/cpuidle/cpuidle-qcom-spm.c b/drivers/cpuidle/cpuidle-qcom-spm.c index adf91a6e4d7d..c0e7971da2da 100644 --- a/drivers/cpuidle/cpuidle-qcom-spm.c +++ b/drivers/cpuidle/cpuidle-qcom-spm.c @@ -87,6 +87,18 @@ static const struct spm_reg_data spm_reg_8974_8084_cpu = { .start_index[PM_SLEEP_MODE_SPC] = 3, }; +/* SPM register data for 8226 */ +static const struct spm_reg_data spm_reg_8226_cpu = { + .reg_offset = spm_reg_offset_v2_1, + .spm_cfg = 0x0, + .spm_dly = 0x3C102800, + .seq = { 0x60, 0x03, 0x60, 0x0B, 0x0F, 0x20, 0x10, 0x80, 0x30, 0x90, + 0x5B, 0x60, 0x03, 0x60, 0x3B, 0x76, 0x76, 0x0B, 0x94, 0x5B, + 0x80, 0x10, 0x26, 0x30, 0x0F }, + .start_index[PM_SLEEP_MODE_STBY] = 0, + .start_index[PM_SLEEP_MODE_SPC] = 5, +}; + static const u8 spm_reg_offset_v1_1[SPM_REG_NR] = { [SPM_REG_CFG] = 0x08, [SPM_REG_SPM_CTL] = 0x20, @@ -259,6 +271,8 @@ static struct spm_driver_data *spm_get_drv(struct platform_device *pdev, } static const struct of_device_id spm_match_table[] = { + { .compatible = "qcom,msm8226-saw2-v2.1-cpu", + .data = &spm_reg_8226_cpu }, { .compatible = "qcom,msm8974-saw2-v2.1-cpu", .data = &spm_reg_8974_8084_cpu }, { .compatible = "qcom,apq8084-saw2-v2.1-cpu", From 49d6feef94c9f47ac4030563058f8a36267597b0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 30 Jun 2021 18:44:46 +0200 Subject: [PATCH 10/21] cpufreq: intel_pstate: Combine ->stop_cpu() and ->offline() Combine the ->stop_cpu() and ->offline() callback routines for intel_pstate in the active mode so as to avoid setting the ->stop_cpu callback pointer which is going to be dropped from the framework. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/intel_pstate.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6012964df51b..bb4549959b11 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2532,7 +2532,7 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy) return 0; } -static int intel_pstate_cpu_offline(struct cpufreq_policy *policy) +static int intel_cpufreq_cpu_offline(struct cpufreq_policy *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; @@ -2577,11 +2577,11 @@ static int intel_pstate_cpu_online(struct cpufreq_policy *policy) return 0; } -static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) +static int intel_pstate_cpu_offline(struct cpufreq_policy *policy) { - pr_debug("CPU %d stopping\n", policy->cpu); - intel_pstate_clear_update_util_hook(policy->cpu); + + return intel_cpufreq_cpu_offline(policy); } static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) @@ -2654,7 +2654,6 @@ static struct cpufreq_driver intel_pstate = { .resume = intel_pstate_resume, .init = intel_pstate_cpu_init, .exit = intel_pstate_cpu_exit, - .stop_cpu = intel_pstate_stop_cpu, .offline = intel_pstate_cpu_offline, .online = intel_pstate_cpu_online, .update_limits = intel_pstate_update_limits, @@ -2956,7 +2955,7 @@ static struct cpufreq_driver intel_cpufreq = { .fast_switch = intel_cpufreq_fast_switch, .init = intel_cpufreq_cpu_init, .exit = intel_cpufreq_cpu_exit, - .offline = intel_pstate_cpu_offline, + .offline = intel_cpufreq_cpu_offline, .online = intel_pstate_cpu_online, .suspend = intel_pstate_suspend, .resume = intel_pstate_resume, From 9357a380f90a89a168d505561d11f68272e0e768 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 23 Jun 2021 09:54:39 +0530 Subject: [PATCH 11/21] cpufreq: CPPC: Migrate to ->exit() callback instead of ->stop_cpu() Commit 367dc4aa932b ("cpufreq: Add stop CPU callback to cpufreq_driver interface") added the ->stop_cpu() callback to allow the drivers to do clean up before the CPU is completely down and its state can't be modified. At that time the CPU hotplug framework used to call the cpufreq core's registered notifier for different events like CPU_DOWN_PREPARE and CPU_POST_DEAD. The ->stop_cpu() callback was called during the CPU_DOWN_PREPARE event. This is no longer the case, cpuhp_cpufreq_offline() is called only once by the CPU hotplug core now and we don't really need two separate callbacks for cpufreq drivers, i.e. ->stop_cpu() and -exit() callback itself. Migrate to using the ->exit() callback instead of ->stop_cpu(). Signed-off-by: Viresh Kumar [ rjw: Minor edits in the changelog and subject ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 46 ++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 2f769b1630c5..be4f62e2c5f1 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -182,27 +182,6 @@ static int cppc_verify_policy(struct cpufreq_policy_data *policy) return 0; } -static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy) -{ - struct cppc_cpudata *cpu_data = policy->driver_data; - struct cppc_perf_caps *caps = &cpu_data->perf_caps; - unsigned int cpu = policy->cpu; - int ret; - - cpu_data->perf_ctrls.desired_perf = caps->lowest_perf; - - ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); - if (ret) - pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", - caps->lowest_perf, cpu, ret); - - /* Remove CPU node from list and free driver data for policy */ - free_cpumask_var(cpu_data->shared_cpu_map); - list_del(&cpu_data->node); - kfree(policy->driver_data); - policy->driver_data = NULL; -} - /* * The PCC subspace describes the rate at which platform can accept commands * on the shared PCC channel (including READs which do not count towards freq @@ -352,6 +331,29 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) return ret; } +static int cppc_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + struct cppc_perf_caps *caps = &cpu_data->perf_caps; + unsigned int cpu = policy->cpu; + int ret; + + cpu_data->perf_ctrls.desired_perf = caps->lowest_perf; + + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); + if (ret) + pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", + caps->lowest_perf, cpu, ret); + + /* Remove CPU node from list and free driver data for policy */ + free_cpumask_var(cpu_data->shared_cpu_map); + list_del(&cpu_data->node); + kfree(policy->driver_data); + policy->driver_data = NULL; + + return 0; +} + static inline u64 get_delta(u64 t1, u64 t0) { if (t1 > t0 || t0 > ~(u32)0) @@ -451,7 +453,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = { .target = cppc_cpufreq_set_target, .get = cppc_cpufreq_get_rate, .init = cppc_cpufreq_cpu_init, - .stop_cpu = cppc_cpufreq_stop_cpu, + .exit = cppc_cpufreq_cpu_exit, .set_boost = cppc_cpufreq_set_boost, .attr = cppc_cpufreq_attr, .name = "cppc_cpufreq", From 952da0c9ab5b047665442dc239cee36d5c9edb98 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 23 Jun 2021 11:31:14 +0530 Subject: [PATCH 12/21] cpufreq: powernv: Migrate to ->exit() callback instead of ->stop_cpu() Commit 367dc4aa932b ("cpufreq: Add stop CPU callback to cpufreq_driver interface") added the ->stop_cpu() callback to allow the drivers to do clean up before the CPU is completely down and its state can't be modified. At that time the CPU hotplug framework used to call the cpufreq core's registered notifier for different events like CPU_DOWN_PREPARE and CPU_POST_DEAD. The ->stop_cpu() callback was called during the CPU_DOWN_PREPARE event. This is no longer the case, cpuhp_cpufreq_offline() is called only once by the CPU hotplug core now and we don't really need two separate callbacks for cpufreq drivers, i.e. ->stop_cpu() and ->exit(), as everything can be done from the ->exit() callback itself. Migrate to using the ->exit() callback instead of ->stop_cpu(). Signed-off-by: Viresh Kumar [ rjw: Minor changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index e439b43c19eb..005600cef273 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -875,7 +875,15 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) static int powernv_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - /* timer is deleted in cpufreq_cpu_stop() */ + struct powernv_smp_call_data freq_data; + struct global_pstate_info *gpstates = policy->driver_data; + + freq_data.pstate_id = idx_to_pstate(powernv_pstate_info.min); + freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min); + smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1); + if (gpstates) + del_timer_sync(&gpstates->timer); + kfree(policy->driver_data); return 0; @@ -1007,18 +1015,6 @@ static struct notifier_block powernv_cpufreq_opal_nb = { .priority = 0, }; -static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy) -{ - struct powernv_smp_call_data freq_data; - struct global_pstate_info *gpstates = policy->driver_data; - - freq_data.pstate_id = idx_to_pstate(powernv_pstate_info.min); - freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min); - smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1); - if (gpstates) - del_timer_sync(&gpstates->timer); -} - static unsigned int powernv_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { @@ -1042,7 +1038,6 @@ static struct cpufreq_driver powernv_cpufreq_driver = { .target_index = powernv_cpufreq_target_index, .fast_switch = powernv_fast_switch, .get = powernv_cpufreq_get, - .stop_cpu = powernv_cpufreq_stop_cpu, .attr = powernv_cpu_freq_attr, }; From 3e0f897fd92662f0ff21ca1759d724a9ad574858 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 23 Jun 2021 09:54:42 +0530 Subject: [PATCH 13/21] cpufreq: Remove the ->stop_cpu() driver callback Now that all users of ->stop_cpu() have been migrated to using other callbacks, drop it from the core. Signed-off-by: Viresh Kumar [ rjw: Minor edits in the subject and changelog ] Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpu-drivers.rst | 3 --- Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst | 3 --- drivers/cpufreq/cpufreq.c | 3 --- include/linux/cpufreq.h | 1 - 4 files changed, 10 deletions(-) diff --git a/Documentation/cpu-freq/cpu-drivers.rst b/Documentation/cpu-freq/cpu-drivers.rst index a697278ce190..74fac797c396 100644 --- a/Documentation/cpu-freq/cpu-drivers.rst +++ b/Documentation/cpu-freq/cpu-drivers.rst @@ -71,9 +71,6 @@ And optionally .exit - A pointer to a per-policy cleanup function called during CPU_POST_DEAD phase of cpu hotplug process. - .stop_cpu - A pointer to a per-policy stop function called during - CPU_DOWN_PREPARE phase of cpu hotplug process. - .suspend - A pointer to a per-policy suspend function which is called with interrupts disabled and _after_ the governor is stopped for the policy. diff --git a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst index 0ca2cb646666..9570e9c9e939 100644 --- a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst +++ b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst @@ -76,9 +76,6 @@ CPUfreq核心层注册一个cpufreq_driver结构体。 .exit - 一个指向per-policy清理函数的指针,该函数在cpu热插拔过程的CPU_POST_DEAD 阶段被调用。 - .stop_cpu - 一个指向per-policy停止函数的指针,该函数在cpu热插拔过程的CPU_DOWN_PREPARE - 阶段被调用。 - .suspend - 一个指向per-policy暂停函数的指针,该函数在关中断且在该策略的调节器停止 后被调用。 diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cbab834c37a0..5e4b5316d254 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1606,9 +1606,6 @@ static int cpufreq_offline(unsigned int cpu) policy->cdev = NULL; } - if (cpufreq_driver->stop_cpu) - cpufreq_driver->stop_cpu(policy); - if (has_target()) cpufreq_exit_governor(policy); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 353969c7acd3..2e2267a36502 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -371,7 +371,6 @@ struct cpufreq_driver { int (*online)(struct cpufreq_policy *policy); int (*offline)(struct cpufreq_policy *policy); int (*exit)(struct cpufreq_policy *policy); - void (*stop_cpu)(struct cpufreq_policy *policy); int (*suspend)(struct cpufreq_policy *policy); int (*resume)(struct cpufreq_policy *policy); From f9ccdec24d91ffddf1c6f4173b0e191fc08c7d14 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 29 Jun 2021 11:57:07 +0530 Subject: [PATCH 14/21] cpufreq: Reuse cpufreq_driver_resolve_freq() in __cpufreq_driver_target() __cpufreq_driver_target() open codes cpufreq_driver_resolve_freq(), lets make the former reuse the later. Separate out __resolve_freq() to accept relation as well as an argument and use it at both the locations. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 46 ++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5e4b5316d254..8271ed1a4947 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -524,6 +524,27 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); +static unsigned int __resolve_freq(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + policy->cached_target_freq = target_freq; + + if (cpufreq_driver->target_index) { + unsigned int idx; + + idx = cpufreq_frequency_table_target(policy, target_freq, + relation); + policy->cached_resolved_idx = idx; + return policy->freq_table[idx].frequency; + } + + if (cpufreq_driver->resolve_freq) + return cpufreq_driver->resolve_freq(policy, target_freq); + + return target_freq; +} + /** * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported * one. @@ -538,22 +559,7 @@ EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int target_freq) { - target_freq = clamp_val(target_freq, policy->min, policy->max); - policy->cached_target_freq = target_freq; - - if (cpufreq_driver->target_index) { - unsigned int idx; - - idx = cpufreq_frequency_table_target(policy, target_freq, - CPUFREQ_RELATION_L); - policy->cached_resolved_idx = idx; - return policy->freq_table[idx].frequency; - } - - if (cpufreq_driver->resolve_freq) - return cpufreq_driver->resolve_freq(policy, target_freq); - - return target_freq; + return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_L); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2231,13 +2237,11 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation) { unsigned int old_target_freq = target_freq; - int index; if (cpufreq_disabled()) return -ENODEV; - /* Make sure that target_freq is within supported range */ - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = __resolve_freq(policy, target_freq, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); @@ -2258,9 +2262,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (!cpufreq_driver->target_index) return -EINVAL; - index = cpufreq_frequency_table_target(policy, target_freq, relation); - - return __target_index(policy, index); + return __target_index(policy, policy->cached_resolved_idx); } EXPORT_SYMBOL_GPL(__cpufreq_driver_target); From b3beca76181681fce9cf72f37d19c3030e3353c0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 29 Jun 2021 11:57:08 +0530 Subject: [PATCH 15/21] cpufreq: Remove ->resolve_freq() Commit e3c062360870 ("cpufreq: add cpufreq_driver_resolve_freq()") introduced this callback, back in 2016, for drivers that provide the ->target() callback. The kernel hasn't seen a single user of it in the past 5 years and it is not likely to be used any time soon. Remove it for now. Signed-off-by: Viresh Kumar [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpu-drivers.rst | 3 --- .../zh_CN/cpu-freq/cpu-drivers.rst | 2 -- drivers/cpufreq/cpufreq.c | 23 ++++++++----------- include/linux/cpufreq.h | 9 -------- 4 files changed, 9 insertions(+), 28 deletions(-) diff --git a/Documentation/cpu-freq/cpu-drivers.rst b/Documentation/cpu-freq/cpu-drivers.rst index 74fac797c396..d84ededb66f9 100644 --- a/Documentation/cpu-freq/cpu-drivers.rst +++ b/Documentation/cpu-freq/cpu-drivers.rst @@ -58,9 +58,6 @@ And optionally .driver_data - cpufreq driver specific data. - .resolve_freq - Returns the most appropriate frequency for a target - frequency. Doesn't change the frequency though. - .get_intermediate and target_intermediate - Used to switch to stable frequency while changing CPU frequency. diff --git a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst index 9570e9c9e939..5ae9cfa2ec55 100644 --- a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst +++ b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst @@ -64,8 +64,6 @@ CPUfreq核心层注册一个cpufreq_driver结构体。 .driver_data - cpufreq驱动程序的特定数据。 - .resolve_freq - 返回最适合目标频率的频率。不过并不能改变频率。 - .get_intermediate 和 target_intermediate - 用于在改变CPU频率时切换到稳定 的频率。 diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8271ed1a4947..45f3416988f1 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -527,22 +527,17 @@ EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); static unsigned int __resolve_freq(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { + unsigned int idx; + target_freq = clamp_val(target_freq, policy->min, policy->max); + + if (!cpufreq_driver->target_index) + return target_freq; + + idx = cpufreq_frequency_table_target(policy, target_freq, relation); + policy->cached_resolved_idx = idx; policy->cached_target_freq = target_freq; - - if (cpufreq_driver->target_index) { - unsigned int idx; - - idx = cpufreq_frequency_table_target(policy, target_freq, - relation); - policy->cached_resolved_idx = idx; - return policy->freq_table[idx].frequency; - } - - if (cpufreq_driver->resolve_freq) - return cpufreq_driver->resolve_freq(policy, target_freq); - - return target_freq; + return policy->freq_table[idx].frequency; } /** diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 2e2267a36502..9fd719475fcd 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -330,15 +330,6 @@ struct cpufreq_driver { unsigned long target_perf, unsigned long capacity); - /* - * Caches and returns the lowest driver-supported frequency greater than - * or equal to the target frequency, subject to any driver limitations. - * Does not set the frequency. Only to be implemented for drivers with - * target(). - */ - unsigned int (*resolve_freq)(struct cpufreq_policy *policy, - unsigned int target_freq); - /* * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION * unset. From fe2535a44904a77615a3af8e8fd7dafb98fb0e1b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 18 Jun 2021 13:31:27 +0530 Subject: [PATCH 16/21] cpufreq: CPPC: Fix potential memleak in cppc_cpufreq_cpu_init It's a classic example of memleak, we allocate something, we fail and never free the resources. Make sure we free all resources on policy ->init() failures. Fixes: a28b2bfc099c ("cppc_cpufreq: replace per-cpu data array with a list") Tested-by: Vincent Guittot Reviewed-by: Ionela Voinescu Tested-by: Qian Cai Signed-off-by: Viresh Kumar --- drivers/cpufreq/cppc_cpufreq.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index be4f62e2c5f1..945ab4942c1c 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -256,6 +256,16 @@ static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu) return NULL; } +static void cppc_cpufreq_put_cpu_data(struct cpufreq_policy *policy) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + + list_del(&cpu_data->node); + free_cpumask_var(cpu_data->shared_cpu_map); + kfree(cpu_data); + policy->driver_data = NULL; +} + static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) { unsigned int cpu = policy->cpu; @@ -309,7 +319,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) default: pr_debug("Unsupported CPU co-ord type: %d\n", policy->shared_type); - return -EFAULT; + ret = -EFAULT; + goto out; } /* @@ -324,10 +335,16 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu_data->perf_ctrls.desired_perf = caps->highest_perf; ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); - if (ret) + if (ret) { pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", caps->highest_perf, cpu, ret); + goto out; + } + return 0; + +out: + cppc_cpufreq_put_cpu_data(policy); return ret; } @@ -345,12 +362,7 @@ static int cppc_cpufreq_cpu_exit(struct cpufreq_policy *policy) pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", caps->lowest_perf, cpu, ret); - /* Remove CPU node from list and free driver data for policy */ - free_cpumask_var(cpu_data->shared_cpu_map); - list_del(&cpu_data->node); - kfree(policy->driver_data); - policy->driver_data = NULL; - + cppc_cpufreq_put_cpu_data(policy); return 0; } From eead1840cbd31e553bf8ccdefbd5b065bf596b71 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 18 Jun 2021 13:42:23 +0530 Subject: [PATCH 17/21] cpufreq: CPPC: Pass structure instance by reference Don't pass structure instance by value, pass it by reference instead. Tested-by: Vincent Guittot Reviewed-by: Ionela Voinescu Tested-by: Qian Cai Signed-off-by: Viresh Kumar --- drivers/cpufreq/cppc_cpufreq.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 945ab4942c1c..4a7f0f9b8c60 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -375,18 +375,18 @@ static inline u64 get_delta(u64 t1, u64 t0) } static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, - struct cppc_perf_fb_ctrs fb_ctrs_t0, - struct cppc_perf_fb_ctrs fb_ctrs_t1) + struct cppc_perf_fb_ctrs *fb_ctrs_t0, + struct cppc_perf_fb_ctrs *fb_ctrs_t1) { u64 delta_reference, delta_delivered; u64 reference_perf, delivered_perf; - reference_perf = fb_ctrs_t0.reference_perf; + reference_perf = fb_ctrs_t0->reference_perf; - delta_reference = get_delta(fb_ctrs_t1.reference, - fb_ctrs_t0.reference); - delta_delivered = get_delta(fb_ctrs_t1.delivered, - fb_ctrs_t0.delivered); + delta_reference = get_delta(fb_ctrs_t1->reference, + fb_ctrs_t0->reference); + delta_delivered = get_delta(fb_ctrs_t1->delivered, + fb_ctrs_t0->delivered); /* Check to avoid divide-by zero */ if (delta_reference || delta_delivered) @@ -417,7 +417,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) if (ret) return ret; - return cppc_get_rate_from_fbctrs(cpu_data, fb_ctrs_t0, fb_ctrs_t1); + return cppc_get_rate_from_fbctrs(cpu_data, &fb_ctrs_t0, &fb_ctrs_t1); } static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) From 83150f5d05f065fb5c12c612f119015cabdcc124 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 15 Jun 2021 14:27:50 +0530 Subject: [PATCH 18/21] arch_topology: Avoid use-after-free for scale_freq_data Currently topology_scale_freq_tick() (which gets called from scheduler_tick()) may end up using a pointer to "struct scale_freq_data", which was previously cleared by topology_clear_scale_freq_source(), as there is no protection in place here. The users of topology_clear_scale_freq_source() though needs a guarantee that the previously cleared scale_freq_data isn't used anymore, so they can free the related resources. Since topology_scale_freq_tick() is called from scheduler tick, we don't want to add locking in there. Use the RCU update mechanism instead (which is already used by the scheduler's utilization update path) to guarantee race free updates here. synchronize_rcu() makes sure that all RCU critical sections that started before it is called, will finish before it returns. And so the callers of topology_clear_scale_freq_source() don't need to worry about their callback getting called anymore. Cc: Paul E. McKenney Fixes: 01e055c120a4 ("arch_topology: Allow multiple entities to provide sched_freq_tick() callback") Tested-by: Vincent Guittot Reviewed-by: Ionela Voinescu Tested-by: Qian Cai Signed-off-by: Viresh Kumar --- drivers/base/arch_topology.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index c1179edc0f3b..921312a8d957 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -18,10 +18,11 @@ #include #include #include +#include #include #include -static DEFINE_PER_CPU(struct scale_freq_data *, sft_data); +static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); static struct cpumask scale_freq_counters_mask; static bool scale_freq_invariant; @@ -66,16 +67,20 @@ void topology_set_scale_freq_source(struct scale_freq_data *data, if (cpumask_empty(&scale_freq_counters_mask)) scale_freq_invariant = topology_scale_freq_invariant(); + rcu_read_lock(); + for_each_cpu(cpu, cpus) { - sfd = per_cpu(sft_data, cpu); + sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu)); /* Use ARCH provided counters whenever possible */ if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) { - per_cpu(sft_data, cpu) = data; + rcu_assign_pointer(per_cpu(sft_data, cpu), data); cpumask_set_cpu(cpu, &scale_freq_counters_mask); } } + rcu_read_unlock(); + update_scale_freq_invariant(true); } EXPORT_SYMBOL_GPL(topology_set_scale_freq_source); @@ -86,22 +91,32 @@ void topology_clear_scale_freq_source(enum scale_freq_source source, struct scale_freq_data *sfd; int cpu; + rcu_read_lock(); + for_each_cpu(cpu, cpus) { - sfd = per_cpu(sft_data, cpu); + sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu)); if (sfd && sfd->source == source) { - per_cpu(sft_data, cpu) = NULL; + rcu_assign_pointer(per_cpu(sft_data, cpu), NULL); cpumask_clear_cpu(cpu, &scale_freq_counters_mask); } } + rcu_read_unlock(); + + /* + * Make sure all references to previous sft_data are dropped to avoid + * use-after-free races. + */ + synchronize_rcu(); + update_scale_freq_invariant(false); } EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source); void topology_scale_freq_tick(void) { - struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data); + struct scale_freq_data *sfd = rcu_dereference_sched(*this_cpu_ptr(&sft_data)); if (sfd) sfd->set_freq_scale(); From 1eb5dde674f57b1a1918dab33f09e35cdd64eb07 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 23 Jun 2020 15:49:40 +0530 Subject: [PATCH 19/21] cpufreq: CPPC: Add support for frequency invariance The Frequency Invariance Engine (FIE) is providing a frequency scaling correction factor that helps achieve more accurate load-tracking. Normally, this scaling factor can be obtained directly with the help of the cpufreq drivers as they know the exact frequency the hardware is running at. But that isn't the case for CPPC cpufreq driver. Another way of obtaining that is using the arch specific counter support, which is already present in kernel, but that hardware is optional for platforms. This patch updates the CPPC driver to register itself with the topology core to provide its own implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which gets called by the scheduler on every tick. Note that the arch specific counters have higher priority than CPPC counters, if available, though the CPPC driver doesn't need to have any special handling for that. On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we reach here from hard-irq context), which then schedules a normal work item and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable based on the counter updates since the last tick. To allow platforms to disable this CPPC counter-based frequency invariance support, this is all done under CONFIG_ACPI_CPPC_CPUFREQ_FIE, which is enabled by default. This also exports sched_setattr_nocheck() as the CPPC driver can be built as a module. Cc: linux-acpi@vger.kernel.org Tested-by: Vincent Guittot Reviewed-by: Ionela Voinescu Tested-by: Qian Cai Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 10 ++ drivers/cpufreq/cppc_cpufreq.c | 252 +++++++++++++++++++++++++++++++-- include/linux/arch_topology.h | 1 + kernel/sched/core.c | 1 + 4 files changed, 251 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index e65e0a43be64..a5c5f70acfc9 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -19,6 +19,16 @@ config ACPI_CPPC_CPUFREQ If in doubt, say N. +config ACPI_CPPC_CPUFREQ_FIE + bool "Frequency Invariance support for CPPC cpufreq driver" + depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY + default y + help + This extends frequency invariance support in the CPPC cpufreq driver, + by using CPPC delivered and reference performance counters. + + If in doubt, say N. + config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM tristate "Allwinner nvmem based SUN50I CPUFreq driver" depends on ARCH_SUNXI diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 4a7f0f9b8c60..d4c27022b9c9 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -10,14 +10,18 @@ #define pr_fmt(fmt) "CPPC Cpufreq:" fmt +#include #include #include #include #include #include #include +#include +#include #include #include +#include #include @@ -57,6 +61,216 @@ static struct cppc_workaround_oem_info wa_info[] = { } }; +#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE + +/* Frequency invariance support */ +struct cppc_freq_invariance { + int cpu; + struct irq_work irq_work; + struct kthread_work work; + struct cppc_perf_fb_ctrs prev_perf_fb_ctrs; + struct cppc_cpudata *cpu_data; +}; + +static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); +static struct kthread_worker *kworker_fie; + +static struct cpufreq_driver cppc_cpufreq_driver; +static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu); +static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, + struct cppc_perf_fb_ctrs *fb_ctrs_t0, + struct cppc_perf_fb_ctrs *fb_ctrs_t1); + +/** + * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance + * @work: The work item. + * + * The CPPC driver register itself with the topology core to provide its own + * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which + * gets called by the scheduler on every tick. + * + * Note that the arch specific counters have higher priority than CPPC counters, + * if available, though the CPPC driver doesn't need to have any special + * handling for that. + * + * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we + * reach here from hard-irq context), which then schedules a normal work item + * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable + * based on the counter updates since the last tick. + */ +static void cppc_scale_freq_workfn(struct kthread_work *work) +{ + struct cppc_freq_invariance *cppc_fi; + struct cppc_perf_fb_ctrs fb_ctrs = {0}; + struct cppc_cpudata *cpu_data; + unsigned long local_freq_scale; + u64 perf; + + cppc_fi = container_of(work, struct cppc_freq_invariance, work); + cpu_data = cppc_fi->cpu_data; + + if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) { + pr_warn("%s: failed to read perf counters\n", __func__); + return; + } + + perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs, + &fb_ctrs); + cppc_fi->prev_perf_fb_ctrs = fb_ctrs; + + perf <<= SCHED_CAPACITY_SHIFT; + local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf); + + /* This can happen due to counter's overflow */ + if (unlikely(local_freq_scale > 1024)) + local_freq_scale = 1024; + + per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale; +} + +static void cppc_irq_work(struct irq_work *irq_work) +{ + struct cppc_freq_invariance *cppc_fi; + + cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work); + kthread_queue_work(kworker_fie, &cppc_fi->work); +} + +static void cppc_scale_freq_tick(void) +{ + struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id()); + + /* + * cppc_get_perf_ctrs() can potentially sleep, call that from the right + * context. + */ + irq_work_queue(&cppc_fi->irq_work); +} + +static struct scale_freq_data cppc_sftd = { + .source = SCALE_FREQ_SOURCE_CPPC, + .set_freq_scale = cppc_scale_freq_tick, +}; + +static void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy) +{ + struct cppc_freq_invariance *cppc_fi; + int cpu, ret; + + if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + return; + + for_each_cpu(cpu, policy->cpus) { + cppc_fi = &per_cpu(cppc_freq_inv, cpu); + cppc_fi->cpu = cpu; + cppc_fi->cpu_data = policy->driver_data; + kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn); + init_irq_work(&cppc_fi->irq_work, cppc_irq_work); + + ret = cppc_get_perf_ctrs(cpu, &cppc_fi->prev_perf_fb_ctrs); + if (ret) { + pr_warn("%s: failed to read perf counters for cpu:%d: %d\n", + __func__, cpu, ret); + + /* + * Don't abort if the CPU was offline while the driver + * was getting registered. + */ + if (cpu_online(cpu)) + return; + } + } + + /* Register for freq-invariance */ + topology_set_scale_freq_source(&cppc_sftd, policy->cpus); +} + +/* + * We free all the resources on policy's removal and not on CPU removal as the + * irq-work are per-cpu and the hotplug core takes care of flushing the pending + * irq-works (hint: smpcfd_dying_cpu()) on CPU hotplug. Even if the kthread-work + * fires on another CPU after the concerned CPU is removed, it won't harm. + * + * We just need to make sure to remove them all on policy->exit(). + */ +static void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy) +{ + struct cppc_freq_invariance *cppc_fi; + int cpu; + + if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + return; + + /* policy->cpus will be empty here, use related_cpus instead */ + topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, policy->related_cpus); + + for_each_cpu(cpu, policy->related_cpus) { + cppc_fi = &per_cpu(cppc_freq_inv, cpu); + irq_work_sync(&cppc_fi->irq_work); + kthread_cancel_work_sync(&cppc_fi->work); + } +} + +static void __init cppc_freq_invariance_init(void) +{ + struct sched_attr attr = { + .size = sizeof(struct sched_attr), + .sched_policy = SCHED_DEADLINE, + .sched_nice = 0, + .sched_priority = 0, + /* + * Fake (unused) bandwidth; workaround to "fix" + * priority inheritance. + */ + .sched_runtime = 1000000, + .sched_deadline = 10000000, + .sched_period = 10000000, + }; + int ret; + + if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + return; + + kworker_fie = kthread_create_worker(0, "cppc_fie"); + if (IS_ERR(kworker_fie)) + return; + + ret = sched_setattr_nocheck(kworker_fie->task, &attr); + if (ret) { + pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__, + ret); + kthread_destroy_worker(kworker_fie); + return; + } +} + +static void cppc_freq_invariance_exit(void) +{ + if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + return; + + kthread_destroy_worker(kworker_fie); + kworker_fie = NULL; +} + +#else +static inline void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy) +{ +} + +static inline void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy) +{ +} + +static inline void cppc_freq_invariance_init(void) +{ +} + +static inline void cppc_freq_invariance_exit(void) +{ +} +#endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */ + /* Callback function used to retrieve the max frequency from DMI */ static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private) { @@ -341,6 +555,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) goto out; } + cppc_cpufreq_cpu_fie_init(policy); return 0; out: @@ -355,6 +570,8 @@ static int cppc_cpufreq_cpu_exit(struct cpufreq_policy *policy) unsigned int cpu = policy->cpu; int ret; + cppc_cpufreq_cpu_fie_exit(policy); + cpu_data->perf_ctrls.desired_perf = caps->lowest_perf; ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); @@ -374,12 +591,12 @@ static inline u64 get_delta(u64 t1, u64 t0) return (u32)t1 - (u32)t0; } -static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, - struct cppc_perf_fb_ctrs *fb_ctrs_t0, - struct cppc_perf_fb_ctrs *fb_ctrs_t1) +static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, + struct cppc_perf_fb_ctrs *fb_ctrs_t0, + struct cppc_perf_fb_ctrs *fb_ctrs_t1) { u64 delta_reference, delta_delivered; - u64 reference_perf, delivered_perf; + u64 reference_perf; reference_perf = fb_ctrs_t0->reference_perf; @@ -388,14 +605,11 @@ static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, delta_delivered = get_delta(fb_ctrs_t1->delivered, fb_ctrs_t0->delivered); - /* Check to avoid divide-by zero */ - if (delta_reference || delta_delivered) - delivered_perf = (reference_perf * delta_delivered) / - delta_reference; - else - delivered_perf = cpu_data->perf_ctrls.desired_perf; + /* Check to avoid divide-by zero and invalid delivered_perf */ + if (!delta_reference || !delta_delivered) + return cpu_data->perf_ctrls.desired_perf; - return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); + return (reference_perf * delta_delivered) / delta_reference; } static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) @@ -403,6 +617,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct cppc_cpudata *cpu_data = policy->driver_data; + u64 delivered_perf; int ret; cpufreq_cpu_put(policy); @@ -417,7 +632,10 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) if (ret) return ret; - return cppc_get_rate_from_fbctrs(cpu_data, &fb_ctrs_t0, &fb_ctrs_t1); + delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0, + &fb_ctrs_t1); + + return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); } static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) @@ -518,14 +736,21 @@ static void cppc_check_hisi_workaround(void) static int __init cppc_cpufreq_init(void) { + int ret; + if ((acpi_disabled) || !acpi_cpc_valid()) return -ENODEV; INIT_LIST_HEAD(&cpu_data_list); cppc_check_hisi_workaround(); + cppc_freq_invariance_init(); - return cpufreq_register_driver(&cppc_cpufreq_driver); + ret = cpufreq_register_driver(&cppc_cpufreq_driver); + if (ret) + cppc_freq_invariance_exit(); + + return ret; } static inline void free_cpu_data(void) @@ -543,6 +768,7 @@ static inline void free_cpu_data(void) static void __exit cppc_cpufreq_exit(void) { cpufreq_unregister_driver(&cppc_cpufreq_driver); + cppc_freq_invariance_exit(); free_cpu_data(); } diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 11e555cfaecb..f180240dc95f 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -37,6 +37,7 @@ bool topology_scale_freq_invariant(void); enum scale_freq_source { SCALE_FREQ_SOURCE_CPUFREQ = 0, SCALE_FREQ_SOURCE_ARCH, + SCALE_FREQ_SOURCE_CPPC, }; struct scale_freq_data { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cf16f8fda9a6..2d9ff40f4661 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7182,6 +7182,7 @@ int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) { return __sched_setscheduler(p, attr, false, true); } +EXPORT_SYMBOL_GPL(sched_setattr_nocheck); /** * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. From 75674eb06a28816af2a7331dcee4088cc1ab5f6d Mon Sep 17 00:00:00 2001 From: Mark-PK Tsai Date: Thu, 1 Jul 2021 08:45:38 +0800 Subject: [PATCH 20/21] PM: sleep: Use ktime_us_delta() in initcall_debug_report() Use ktime_us_delta() to make the debug log more precise instead of shifting the return value of ktime_to_ns() applied to a ktime_sub() result by 10 bit positions to the right. Signed-off-by: Mark-PK Tsai [ rjw: Changelog rewrite, subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f893c3c5af07..d568772152c2 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -220,16 +220,13 @@ static void initcall_debug_report(struct device *dev, ktime_t calltime, void *cb, int error) { ktime_t rettime; - s64 nsecs; if (!pm_print_times_enabled) return; rettime = ktime_get(); - nsecs = (s64) ktime_to_ns(ktime_sub(rettime, calltime)); - dev_info(dev, "%pS returned %d after %Ld usecs\n", cb, error, - (unsigned long long)nsecs >> 10); + (unsigned long long)ktime_us_delta(rettime, calltime)); } /** From 40ba55e40d0bd740fb1cb2b77c1630013536e440 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 24 Jun 2021 13:18:02 -0700 Subject: [PATCH 21/21] PM: domains: Shrink locking area of the gpd_list_lock On trogdor devices I see the following lockdep splat when stopping youtube with lockdep enabled in the kernel. ====================================================== WARNING: possible circular locking dependency detected 5.13.0-rc2 #71 Not tainted ------------------------------------------------------ ThreadPoolSingl/3969 is trying to acquire lock: ffffff80d4d5c080 (&inst->lock#3){+.+.}-{3:3}, at: vdec_buf_cleanup+0x3c/0x17c [venus_dec] but task is already holding lock: ffffff80d3c3c4f8 (&q->mmap_lock){+.+.}-{3:3}, at: vb2_core_reqbufs+0xe4/0x390 [videobuf2_common] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #5 (&q->mmap_lock){+.+.}-{3:3}: __mutex_lock_common+0xcc/0xb88 mutex_lock_nested+0x5c/0x68 vb2_mmap+0xf4/0x290 [videobuf2_common] v4l2_m2m_fop_mmap+0x44/0x50 [v4l2_mem2mem] v4l2_mmap+0x5c/0xa4 mmap_region+0x310/0x5a4 do_mmap+0x348/0x43c vm_mmap_pgoff+0xfc/0x178 ksys_mmap_pgoff+0x84/0xfc __arm64_compat_sys_aarch32_mmap2+0x2c/0x38 invoke_syscall+0x54/0x110 el0_svc_common+0x88/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 -> #4 (&mm->mmap_lock){++++}-{3:3}: __might_fault+0x60/0x88 filldir64+0x124/0x3a0 dcache_readdir+0x7c/0x1ec iterate_dir+0xc4/0x184 __arm64_sys_getdents64+0x78/0x170 invoke_syscall+0x54/0x110 el0_svc_common+0xa8/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 -> #3 (&sb->s_type->i_mutex_key#3){++++}-{3:3}: down_write+0x94/0x1f4 start_creating+0xb0/0x174 debugfs_create_dir+0x28/0x138 opp_debug_register+0x88/0xc0 _add_opp_dev+0x84/0x9c _add_opp_table_indexed+0x16c/0x310 _of_add_table_indexed+0x70/0xb5c dev_pm_opp_of_add_table_indexed+0x20/0x2c of_genpd_add_provider_onecell+0xc4/0x1c8 rpmhpd_probe+0x21c/0x278 platform_probe+0xb4/0xd4 really_probe+0x140/0x35c driver_probe_device+0x90/0xcc __device_attach_driver+0xa4/0xc0 bus_for_each_drv+0x8c/0xd8 __device_attach+0xc4/0x150 device_initial_probe+0x20/0x2c bus_probe_device+0x40/0xa4 device_add+0x22c/0x3fc of_device_add+0x44/0x54 of_platform_device_create_pdata+0xb0/0xf4 of_platform_bus_create+0x1d0/0x350 of_platform_populate+0x80/0xd4 devm_of_platform_populate+0x64/0xb0 rpmh_rsc_probe+0x378/0x3dc platform_probe+0xb4/0xd4 really_probe+0x140/0x35c driver_probe_device+0x90/0xcc __device_attach_driver+0xa4/0xc0 bus_for_each_drv+0x8c/0xd8 __device_attach+0xc4/0x150 device_initial_probe+0x20/0x2c bus_probe_device+0x40/0xa4 device_add+0x22c/0x3fc of_device_add+0x44/0x54 of_platform_device_create_pdata+0xb0/0xf4 of_platform_bus_create+0x1d0/0x350 of_platform_bus_create+0x21c/0x350 of_platform_populate+0x80/0xd4 of_platform_default_populate_init+0xb8/0xd4 do_one_initcall+0x1b4/0x400 do_initcall_level+0xa8/0xc8 do_initcalls+0x5c/0x9c do_basic_setup+0x2c/0x38 kernel_init_freeable+0x1a4/0x1ec kernel_init+0x20/0x118 ret_from_fork+0x10/0x30 -> #2 (gpd_list_lock){+.+.}-{3:3}: __mutex_lock_common+0xcc/0xb88 mutex_lock_nested+0x5c/0x68 __genpd_dev_pm_attach+0x70/0x18c genpd_dev_pm_attach_by_id+0xe4/0x158 genpd_dev_pm_attach_by_name+0x48/0x60 dev_pm_domain_attach_by_name+0x2c/0x38 dev_pm_opp_attach_genpd+0xac/0x160 vcodec_domains_get+0x94/0x14c [venus_core] core_get_v4+0x150/0x188 [venus_core] venus_probe+0x138/0x444 [venus_core] platform_probe+0xb4/0xd4 really_probe+0x140/0x35c driver_probe_device+0x90/0xcc device_driver_attach+0x58/0x7c __driver_attach+0xc8/0xe0 bus_for_each_dev+0x88/0xd4 driver_attach+0x30/0x3c bus_add_driver+0x10c/0x1e0 driver_register+0x70/0x108 __platform_driver_register+0x30/0x3c 0xffffffde113e1044 do_one_initcall+0x1b4/0x400 do_init_module+0x64/0x1fc load_module+0x17f4/0x1958 __arm64_sys_finit_module+0xb4/0xf0 invoke_syscall+0x54/0x110 el0_svc_common+0x88/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 -> #1 (&opp_table->genpd_virt_dev_lock){+.+.}-{3:3}: __mutex_lock_common+0xcc/0xb88 mutex_lock_nested+0x5c/0x68 _set_required_opps+0x74/0x120 _set_opp+0x94/0x37c dev_pm_opp_set_rate+0xa0/0x194 core_clks_set_rate+0x28/0x58 [venus_core] load_scale_v4+0x228/0x2b4 [venus_core] session_process_buf+0x160/0x198 [venus_core] venus_helper_vb2_buf_queue+0xcc/0x130 [venus_core] vdec_vb2_buf_queue+0xc4/0x140 [venus_dec] __enqueue_in_driver+0x164/0x188 [videobuf2_common] vb2_core_qbuf+0x13c/0x47c [videobuf2_common] vb2_qbuf+0x88/0xec [videobuf2_v4l2] v4l2_m2m_qbuf+0x84/0x15c [v4l2_mem2mem] v4l2_m2m_ioctl_qbuf+0x24/0x30 [v4l2_mem2mem] v4l_qbuf+0x54/0x68 __video_do_ioctl+0x2bc/0x3bc video_usercopy+0x558/0xb04 video_ioctl2+0x24/0x30 v4l2_ioctl+0x58/0x68 v4l2_compat_ioctl32+0x84/0xa0 __arm64_compat_sys_ioctl+0x12c/0x140 invoke_syscall+0x54/0x110 el0_svc_common+0x88/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 -> #0 (&inst->lock#3){+.+.}-{3:3}: __lock_acquire+0x248c/0x2d6c lock_acquire+0x240/0x314 __mutex_lock_common+0xcc/0xb88 mutex_lock_nested+0x5c/0x68 vdec_buf_cleanup+0x3c/0x17c [venus_dec] __vb2_queue_free+0x98/0x204 [videobuf2_common] vb2_core_reqbufs+0x14c/0x390 [videobuf2_common] vb2_reqbufs+0x58/0x74 [videobuf2_v4l2] v4l2_m2m_reqbufs+0x58/0x90 [v4l2_mem2mem] v4l2_m2m_ioctl_reqbufs+0x24/0x30 [v4l2_mem2mem] v4l_reqbufs+0x58/0x6c __video_do_ioctl+0x2bc/0x3bc video_usercopy+0x558/0xb04 video_ioctl2+0x24/0x30 v4l2_ioctl+0x58/0x68 v4l2_compat_ioctl32+0x84/0xa0 __arm64_compat_sys_ioctl+0x12c/0x140 invoke_syscall+0x54/0x110 el0_svc_common+0x88/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 other info that might help us debug this: Chain exists of: &inst->lock#3 --> &mm->mmap_lock --> &q->mmap_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&q->mmap_lock); lock(&mm->mmap_lock); lock(&q->mmap_lock); lock(&inst->lock#3); *** DEADLOCK *** 1 lock held by ThreadPoolSingl/3969: #0: ffffff80d3c3c4f8 (&q->mmap_lock){+.+.}-{3:3}, at: vb2_core_reqbufs+0xe4/0x390 [videobuf2_common] stack backtrace: CPU: 2 PID: 3969 Comm: ThreadPoolSingl Not tainted 5.13.0-rc2 #71 Hardware name: Google Lazor (rev3+) with KB Backlight (DT) Call trace: dump_backtrace+0x0/0x1b4 show_stack+0x24/0x30 dump_stack+0xe0/0x15c print_circular_bug+0x32c/0x388 check_noncircular+0x138/0x140 __lock_acquire+0x248c/0x2d6c lock_acquire+0x240/0x314 __mutex_lock_common+0xcc/0xb88 mutex_lock_nested+0x5c/0x68 vdec_buf_cleanup+0x3c/0x17c [venus_dec] __vb2_queue_free+0x98/0x204 [videobuf2_common] vb2_core_reqbufs+0x14c/0x390 [videobuf2_common] vb2_reqbufs+0x58/0x74 [videobuf2_v4l2] v4l2_m2m_reqbufs+0x58/0x90 [v4l2_mem2mem] v4l2_m2m_ioctl_reqbufs+0x24/0x30 [v4l2_mem2mem] v4l_reqbufs+0x58/0x6c __video_do_ioctl+0x2bc/0x3bc video_usercopy+0x558/0xb04 video_ioctl2+0x24/0x30 v4l2_ioctl+0x58/0x68 v4l2_compat_ioctl32+0x84/0xa0 __arm64_compat_sys_ioctl+0x12c/0x140 invoke_syscall+0x54/0x110 el0_svc_common+0x88/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 The 'gpd_list_lock' is nominally named as such to protect the 'gpd_list' from concurrent access and mutation. Unfortunately, holding that mutex around various OPP framework calls leads to lockdep splats because now we're doing various operations in OPP core such as registering with debugfs while holding the list lock. We don't need to hold any list mutex while we're calling into OPP, so let's shrink the locking area of the 'gpd_list_lock' so that lockdep isn't triggered. This also helps reduce contention on this lock, which probably doesn't matter much but at least is nice to have. Cc: Len Brown Cc: Pavel Machek Cc: Greg Kroah-Hartman Cc: Cc: Viresh Kumar Signed-off-by: Stephen Boyd Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 38 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index ab0b740cc0f1..a934c679e6ce 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2018,8 +2018,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd, mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); - genpd_debug_add(genpd); mutex_unlock(&gpd_list_lock); + genpd_debug_add(genpd); return 0; } @@ -2206,12 +2206,19 @@ static int genpd_add_provider(struct device_node *np, genpd_xlate_t xlate, static bool genpd_present(const struct generic_pm_domain *genpd) { + bool ret = false; const struct generic_pm_domain *gpd; - list_for_each_entry(gpd, &gpd_list, gpd_list_node) - if (gpd == genpd) - return true; - return false; + mutex_lock(&gpd_list_lock); + list_for_each_entry(gpd, &gpd_list, gpd_list_node) { + if (gpd == genpd) { + ret = true; + break; + } + } + mutex_unlock(&gpd_list_lock); + + return ret; } /** @@ -2222,15 +2229,13 @@ static bool genpd_present(const struct generic_pm_domain *genpd) int of_genpd_add_provider_simple(struct device_node *np, struct generic_pm_domain *genpd) { - int ret = -EINVAL; + int ret; if (!np || !genpd) return -EINVAL; - mutex_lock(&gpd_list_lock); - if (!genpd_present(genpd)) - goto unlock; + return -EINVAL; genpd->dev.of_node = np; @@ -2241,7 +2246,7 @@ int of_genpd_add_provider_simple(struct device_node *np, if (ret != -EPROBE_DEFER) dev_err(&genpd->dev, "Failed to add OPP table: %d\n", ret); - goto unlock; + return ret; } /* @@ -2259,16 +2264,13 @@ int of_genpd_add_provider_simple(struct device_node *np, dev_pm_opp_of_remove_table(&genpd->dev); } - goto unlock; + return ret; } genpd->provider = &np->fwnode; genpd->has_provider = true; -unlock: - mutex_unlock(&gpd_list_lock); - - return ret; + return 0; } EXPORT_SYMBOL_GPL(of_genpd_add_provider_simple); @@ -2287,8 +2289,6 @@ int of_genpd_add_provider_onecell(struct device_node *np, if (!np || !data) return -EINVAL; - mutex_lock(&gpd_list_lock); - if (!data->xlate) data->xlate = genpd_xlate_onecell; @@ -2328,8 +2328,6 @@ int of_genpd_add_provider_onecell(struct device_node *np, if (ret < 0) goto error; - mutex_unlock(&gpd_list_lock); - return 0; error: @@ -2348,8 +2346,6 @@ int of_genpd_add_provider_onecell(struct device_node *np, } } - mutex_unlock(&gpd_list_lock); - return ret; } EXPORT_SYMBOL_GPL(of_genpd_add_provider_onecell);