sched/debug: Optimize sched_domain sysctl generation

Currently we unconditionally destroy all sysctl bits and regenerate
them after we've rebuild the domains (even if that rebuild is a
no-op).

And since we unconditionally (re)build the sysctl for all possible
CPUs, onlining all CPUs gets us O(n^2) time. Instead change this to
only rebuild the bits for CPUs we've actually installed new domains
on.

Reported-by: Ofer Levi(SW) <oferle@mellanox.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Peter Zijlstra 2017-08-10 17:10:26 +02:00 committed by Ingo Molnar
parent 09e0dd8e0f
commit bbdacdfed2
3 changed files with 59 additions and 14 deletions

View file

@ -327,38 +327,78 @@ static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
return table;
}
static cpumask_var_t sd_sysctl_cpus;
static struct ctl_table_header *sd_sysctl_header;
void register_sched_domain_sysctl(void)
{
int i, cpu_num = num_possible_cpus();
struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
static struct ctl_table *cpu_entries;
static struct ctl_table **cpu_idx;
char buf[32];
int i;
WARN_ON(sd_ctl_dir[0].child);
sd_ctl_dir[0].child = entry;
if (!cpu_entries) {
cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
if (!cpu_entries)
return;
if (entry == NULL)
return;
WARN_ON(sd_ctl_dir[0].child);
sd_ctl_dir[0].child = cpu_entries;
}
for_each_possible_cpu(i) {
snprintf(buf, 32, "cpu%d", i);
entry->procname = kstrdup(buf, GFP_KERNEL);
entry->mode = 0555;
entry->child = sd_alloc_ctl_cpu_table(i);
entry++;
if (!cpu_idx) {
struct ctl_table *e = cpu_entries;
cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
if (!cpu_idx)
return;
/* deal with sparse possible map */
for_each_possible_cpu(i) {
cpu_idx[i] = e;
e++;
}
}
if (!cpumask_available(sd_sysctl_cpus)) {
if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
return;
/* init to possible to not have holes in @cpu_entries */
cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
}
for_each_cpu(i, sd_sysctl_cpus) {
struct ctl_table *e = cpu_idx[i];
if (e->child)
sd_free_ctl_entry(&e->child);
if (!e->procname) {
snprintf(buf, 32, "cpu%d", i);
e->procname = kstrdup(buf, GFP_KERNEL);
}
e->mode = 0555;
e->child = sd_alloc_ctl_cpu_table(i);
__cpumask_clear_cpu(i, sd_sysctl_cpus);
}
WARN_ON(sd_sysctl_header);
sd_sysctl_header = register_sysctl_table(sd_ctl_root);
}
void dirty_sched_domain_sysctl(int cpu)
{
if (cpumask_available(sd_sysctl_cpus))
__cpumask_set_cpu(cpu, sd_sysctl_cpus);
}
/* may be called multiple times per register */
void unregister_sched_domain_sysctl(void)
{
unregister_sysctl_table(sd_sysctl_header);
sd_sysctl_header = NULL;
if (sd_ctl_dir[0].child)
sd_free_ctl_entry(&sd_ctl_dir[0].child);
}
#endif /* CONFIG_SYSCTL */
#endif /* CONFIG_SMP */

View file

@ -1120,11 +1120,15 @@ extern int group_balance_cpu(struct sched_group *sg);
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
void register_sched_domain_sysctl(void);
void dirty_sched_domain_sysctl(int cpu);
void unregister_sched_domain_sysctl(void);
#else
static inline void register_sched_domain_sysctl(void)
{
}
static inline void dirty_sched_domain_sysctl(int cpu)
{
}
static inline void unregister_sched_domain_sysctl(void)
{
}

View file

@ -459,6 +459,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
rq_attach_root(rq, rd);
tmp = rq->sd;
rcu_assign_pointer(rq->sd, sd);
dirty_sched_domain_sysctl(cpu);
destroy_sched_domains(tmp);
update_top_cache_domain(cpu);