Merge branch 'cpufreq/arm/linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm

Pull ARM cpufreq updates for 5.17-rc1 from Viresh Kumar: "- Qcom cpufreq driver updates improve irq support (Ard Biesheuvel, Stephen Boyd, and Vladimir Zapolskiy). - Fixes double devm_remap for mediatek driver (Hector Yuan). - Introduces thermal pressure helpers (Lukasz Luba)." * 'cpufreq/arm/linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm: cpufreq: mediatek-hw: Fix double devm_remap in hotplug case cpufreq: qcom-hw: Use optional irq API cpufreq: qcom-hw: Set CPU affinity of dcvsh interrupts cpufreq: qcom-hw: Fix probable nested interrupt handling cpufreq: qcom-cpufreq-hw: Avoid stack buffer for IRQ name arch_topology: Remove unused topology_set_thermal_pressure() and related cpufreq: qcom-cpufreq-hw: Use new thermal pressure update function cpufreq: qcom-cpufreq-hw: Update offline CPUs per-cpu thermal pressure thermal: cpufreq_cooling: Use new thermal pressure update function arch_topology: Introduce thermal pressure update function
2024-07-23 03:29:48 +00:00 · 2021-12-30 15:49:54 +01:00 · 2021-12-30 15:49:54 +01:00 · 5ee22fa4a9
parent fe262d5c1f d776790a55
commit 5ee22fa4a9
9 changed files with 96 additions and 40 deletions
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@ -23,7 +23,7 @@

 /* Replace task scheduler's default thermal pressure API */
 #define arch_scale_thermal_pressure topology_get_thermal_pressure
-#define arch_set_thermal_pressure   topology_set_thermal_pressure
+#define arch_update_thermal_pressure	topology_update_thermal_pressure

 #else

--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@ -32,7 +32,7 @@ void update_freq_counters_refs(void);

 /* Replace task scheduler's default thermal pressure API */
 #define arch_scale_thermal_pressure topology_get_thermal_pressure
-#define arch_set_thermal_pressure   topology_set_thermal_pressure
+#define arch_update_thermal_pressure	topology_update_thermal_pressure

 #include <asm-generic/topology.h>

--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@ -22,6 +22,7 @@
 static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
 static struct cpumask scale_freq_counters_mask;
 static bool scale_freq_invariant;
+static DEFINE_PER_CPU(u32, freq_factor) = 1;

 static bool supports_scale_freq_counters(const struct cpumask *cpus)
 {
@ -155,15 +156,49 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)

 DEFINE_PER_CPU(unsigned long, thermal_pressure);

-void topology_set_thermal_pressure(const struct cpumask *cpus,
-			       unsigned long th_pressure)
+/**
+ * topology_update_thermal_pressure() - Update thermal pressure for CPUs
+ * @cpus        : The related CPUs for which capacity has been reduced
+ * @capped_freq : The maximum allowed frequency that CPUs can run at
+ *
+ * Update the value of thermal pressure for all @cpus in the mask. The
+ * cpumask should include all (online+offline) affected CPUs, to avoid
+ * operating on stale data when hot-plug is used for some CPUs. The
+ * @capped_freq reflects the currently allowed max CPUs frequency due to
+ * thermal capping. It might be also a boost frequency value, which is bigger
+ * than the internal 'freq_factor' max frequency. In such case the pressure
+ * value should simply be removed, since this is an indication that there is
+ * no thermal throttling. The @capped_freq must be provided in kHz.
+ */
+void topology_update_thermal_pressure(const struct cpumask *cpus,
+				      unsigned long capped_freq)
 {
+	unsigned long max_capacity, capacity, th_pressure;
+	u32 max_freq;
 	int cpu;

+	cpu = cpumask_first(cpus);
+	max_capacity = arch_scale_cpu_capacity(cpu);
+	max_freq = per_cpu(freq_factor, cpu);
+
+	/* Convert to MHz scale which is used in 'freq_factor' */
+	capped_freq /= 1000;
+
+	/*
+	 * Handle properly the boost frequencies, which should simply clean
+	 * the thermal pressure value.
+	 */
+	if (max_freq <= capped_freq)
+		capacity = max_capacity;
+	else
+		capacity = mult_frac(max_capacity, capped_freq, max_freq);
+
+	th_pressure = max_capacity - capacity;
+
 	for_each_cpu(cpu, cpus)
 		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
 }
-EXPORT_SYMBOL_GPL(topology_set_thermal_pressure);
+EXPORT_SYMBOL_GPL(topology_update_thermal_pressure);

 static ssize_t cpu_capacity_show(struct device *dev,
 				 struct device_attribute *attr,
@ -217,7 +252,6 @@ static void update_topology_flags_workfn(struct work_struct *work)
 	update_topology = 0;
 }

-static DEFINE_PER_CPU(u32, freq_factor) = 1;
 static u32 *raw_capacity;

 static int free_raw_capacity(void)
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@ -36,6 +36,8 @@ enum {
 struct mtk_cpufreq_data {
 	struct cpufreq_frequency_table *table;
 	void __iomem *reg_bases[REG_ARRAY_SIZE];
+	struct resource *res;
+	void __iomem *base;
 	int nr_opp;
 };

@ -156,6 +158,7 @@ static int mtk_cpu_resources_init(struct platform_device *pdev,
 {
 	struct mtk_cpufreq_data *data;
 	struct device *dev = &pdev->dev;
+	struct resource *res;
 	void __iomem *base;
 	int ret, i;
 	int index;
@ -170,9 +173,26 @@ static int mtk_cpu_resources_init(struct platform_device *pdev,
 	if (index < 0)
 		return index;

-	base = devm_platform_ioremap_resource(pdev, index);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, index);
+	if (!res) {
+		dev_err(dev, "failed to get mem resource %d\n", index);
+		return -ENODEV;
+	}
+
+	if (!request_mem_region(res->start, resource_size(res), res->name)) {
+		dev_err(dev, "failed to request resource %pR\n", res);
+		return -EBUSY;
+	}
+
+	base = ioremap(res->start, resource_size(res));
+	if (!base) {
+		dev_err(dev, "failed to map resource %pR\n", res);
+		ret = -ENOMEM;
+		goto release_region;
+	}
+
+	data->base = base;
+	data->res = res;

 	for (i = REG_FREQ_LUT_TABLE; i < REG_ARRAY_SIZE; i++)
 		data->reg_bases[i] = base + offsets[i];
@ -187,6 +207,9 @@ static int mtk_cpu_resources_init(struct platform_device *pdev,
 	policy->driver_data = data;

 	return 0;
+release_region:
+	release_mem_region(res->start, resource_size(res));
+	return ret;
 }

 static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
@ -233,9 +256,13 @@ static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 static int mtk_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct mtk_cpufreq_data *data = policy->driver_data;
+	struct resource *res = data->res;
+	void __iomem *base = data->base;

 	/* HW should be in paused state now */
 	writel_relaxed(0x0, data->reg_bases[REG_FREQ_ENABLE]);
+	iounmap(base);
+	release_mem_region(res->start, resource_size(res));

 	return 0;
 }
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@ -46,6 +46,7 @@ struct qcom_cpufreq_data {
 	 */
 	struct mutex throttle_lock;
 	int throttle_irq;
+	char irq_name[15];
 	bool cancel_throttle;
 	struct delayed_work throttle_work;
 	struct cpufreq_policy *policy;
@ -275,10 +276,10 @@ static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data)

 static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
 {
-	unsigned long max_capacity, capacity, freq_hz, throttled_freq;
 	struct cpufreq_policy *policy = data->policy;
 	int cpu = cpumask_first(policy->cpus);
 	struct device *dev = get_cpu_device(cpu);
+	unsigned long freq_hz, throttled_freq;
 	struct dev_pm_opp *opp;
 	unsigned int freq;

@ -295,16 +296,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)

 	throttled_freq = freq_hz / HZ_PER_KHZ;

-	/* Update thermal pressure */
-
-	max_capacity = arch_scale_cpu_capacity(cpu);
-	capacity = mult_frac(max_capacity, throttled_freq, policy->cpuinfo.max_freq);
-
-	/* Don't pass boost capacity to scheduler */
-	if (capacity > max_capacity)
-		capacity = max_capacity;
-
-	arch_set_thermal_pressure(policy->cpus, max_capacity - capacity);
+	/* Update thermal pressure (the boost frequencies are accepted) */
+	arch_update_thermal_pressure(policy->related_cpus, throttled_freq);

 	/*
 	 * In the unlikely case policy is unregistered do not enable
@ -342,9 +335,9 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)

 	/* Disable interrupt and enable polling */
 	disable_irq_nosync(c_data->throttle_irq);
-	qcom_lmh_dcvs_notify(c_data);
+	schedule_delayed_work(&c_data->throttle_work, 0);

-	return 0;
+	return IRQ_HANDLED;
 }

 static const struct qcom_cpufreq_soc_data qcom_soc_data = {
@ -375,16 +368,17 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index)
 {
 	struct qcom_cpufreq_data *data = policy->driver_data;
 	struct platform_device *pdev = cpufreq_get_driver_data();
-	char irq_name[15];
 	int ret;

 	/*
 	 * Look for LMh interrupt. If no interrupt line is specified /
 	 * if there is an error, allow cpufreq to be enabled as usual.
 	 */
-	data->throttle_irq = platform_get_irq(pdev, index);
-	if (data->throttle_irq <= 0)
-		return data->throttle_irq == -EPROBE_DEFER ? -EPROBE_DEFER : 0;
+	data->throttle_irq = platform_get_irq_optional(pdev, index);
+	if (data->throttle_irq == -ENXIO)
+		return 0;
+	if (data->throttle_irq < 0)
+		return data->throttle_irq;

 	data->cancel_throttle = false;
 	data->policy = policy;
@ -392,14 +386,19 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index)
 	mutex_init(&data->throttle_lock);
 	INIT_DEFERRABLE_WORK(&data->throttle_work, qcom_lmh_dcvs_poll);

-	snprintf(irq_name, sizeof(irq_name), "dcvsh-irq-%u", policy->cpu);
+	snprintf(data->irq_name, sizeof(data->irq_name), "dcvsh-irq-%u", policy->cpu);
 	ret = request_threaded_irq(data->throttle_irq, NULL, qcom_lmh_dcvs_handle_irq,
-				   IRQF_ONESHOT, irq_name, data);
+				   IRQF_ONESHOT, data->irq_name, data);
 	if (ret) {
-		dev_err(&pdev->dev, "Error registering %s: %d\n", irq_name, ret);
+		dev_err(&pdev->dev, "Error registering %s: %d\n", data->irq_name, ret);
 		return 0;
 	}

+	ret = irq_set_affinity_hint(data->throttle_irq, policy->cpus);
+	if (ret)
+		dev_err(&pdev->dev, "Failed to set CPU affinity of %s[%d]\n",
+			data->irq_name, data->throttle_irq);
+
 	return 0;
 }

--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@ -462,7 +462,6 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 	struct cpumask *cpus;
 	unsigned int frequency;
-	unsigned long max_capacity, capacity;
 	int ret;

 	/* Request state should be less than max_level */
@ -479,10 +478,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 	if (ret >= 0) {
 		cpufreq_cdev->cpufreq_state = state;
 		cpus = cpufreq_cdev->policy->related_cpus;
-		max_capacity = arch_scale_cpu_capacity(cpumask_first(cpus));
-		capacity = frequency * max_capacity;
-		capacity /= cpufreq_cdev->policy->cpuinfo.max_freq;
-		arch_set_thermal_pressure(cpus, max_capacity - capacity);
+		arch_update_thermal_pressure(cpus, frequency);
 		ret = 0;
 	}

--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@ -56,8 +56,8 @@ static inline unsigned long topology_get_thermal_pressure(int cpu)
 	return per_cpu(thermal_pressure, cpu);
 }

-void topology_set_thermal_pressure(const struct cpumask *cpus,
-				   unsigned long th_pressure);
+void topology_update_thermal_pressure(const struct cpumask *cpus,
+				      unsigned long capped_freq);

 struct cpu_topology {
 	int thread_id;
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@ -266,10 +266,10 @@ unsigned long arch_scale_thermal_pressure(int cpu)
 }
 #endif

-#ifndef arch_set_thermal_pressure
+#ifndef arch_update_thermal_pressure
 static __always_inline
-void arch_set_thermal_pressure(const struct cpumask *cpus,
-			       unsigned long th_pressure)
+void arch_update_thermal_pressure(const struct cpumask *cpus,
+				  unsigned long capped_frequency)
 { }
 #endif

--- a/init/Kconfig
+++ b/init/Kconfig
@ -550,7 +550,7 @@ config SCHED_THERMAL_PRESSURE
 	  i.e. put less load on throttled CPUs than on non/less throttled ones.

 	  This requires the architecture to implement
-	  arch_set_thermal_pressure() and arch_scale_thermal_pressure().
+	  arch_update_thermal_pressure() and arch_scale_thermal_pressure().

 config BSD_PROCESS_ACCT
 	bool "BSD Process Accounting"