sched: group scheduling, change how cpu load is calculated

This patch changes how the cpu load exerted by fair_sched_class tasks is calculated. Load exerted by fair_sched_class tasks on a cpu is now a summation of the group weights, rather than summation of task weights. Weight exerted by a group on a cpu is dependent on the shares allocated to it. This version of patch has a minor impact on code size, but should have no runtime/functional impact for !CONFIG_FAIR_GROUP_SCHED. Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2024-09-21 03:28:37 +00:00 · 2008-01-25 21:08:00 +01:00 · 2008-01-25 21:08:00 +01:00 · 58e2d4ca58
parent ec2c507fe8
commit 58e2d4ca58
3 changed files with 40 additions and 20 deletions
--- a/kernel/sched.c
+++ b/kernel/sched.c
@ -886,6 +886,16 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
 static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
 #endif
 static inline void inc_cpu_load(struct rq *rq, unsigned long load)
 {
 	update_load_add(&rq->load, load);
 }
 static inline void dec_cpu_load(struct rq *rq, unsigned long load)
 {
 	update_load_sub(&rq->load, load);
 }
 #include "sched_stats.h"
 #include "sched_idletask.c"
 #include "sched_fair.c"
@ -896,26 +906,14 @@ static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
 #define sched_class_highest (&rt_sched_class)
 static inline void inc_load(struct rq *rq, const struct task_struct *p)
 {
 	update_load_add(&rq->load, p->se.load.weight);
 }
 static inline void dec_load(struct rq *rq, const struct task_struct *p)
 {
 	update_load_sub(&rq->load, p->se.load.weight);
 }
 static void inc_nr_running(struct task_struct *p, struct rq *rq)
 {
 	rq->nr_running++;
 	inc_load(rq, p);
 }
 static void dec_nr_running(struct task_struct *p, struct rq *rq)
 {
 	rq->nr_running--;
 	dec_load(rq, p);
 }
 static void set_load_weight(struct task_struct *p)
@ -4087,10 +4085,8 @@ void set_user_nice(struct task_struct *p, long nice)
 		goto out_unlock;
 	}
 	on_rq = p->se.on_rq;
-	if (on_rq) {
+	if (on_rq)
 		dequeue_task(rq, p, 0);
 		dec_load(rq, p);
 	}
 	p->static_prio = NICE_TO_PRIO(nice);
 	set_load_weight(p);
@ -4100,7 +4096,6 @@ void set_user_nice(struct task_struct *p, long nice)
 	if (on_rq) {
 		enqueue_task(rq, p, 0);
 		inc_load(rq, p);
 		/*
 		 * If the task increased its priority or is running and
 		 * lowered its priority, then reschedule its CPU:
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@ -760,15 +760,26 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
 static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
 	struct cfs_rq *cfs_rq;
-	struct sched_entity *se = &p->se;
+	struct sched_entity *se = &p->se,
 			    *topse = NULL;	/* Highest schedulable entity */
 	int incload = 1;
 	for_each_sched_entity(se) {
-		if (se->on_rq)
+		topse = se;
 		if (se->on_rq) {
 			incload = 0;
 			break;
 		}
 		cfs_rq = cfs_rq_of(se);
 		enqueue_entity(cfs_rq, se, wakeup);
 		wakeup = 1;
 	}
 	/* Increment cpu load if we just enqueued the first task of a group on
 	 * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
 	 * at the highest grouping level.
 	 */
 	if (incload)
 		inc_cpu_load(rq, topse->load.weight);
 }
 /*
@ -779,16 +790,28 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
 {
 	struct cfs_rq *cfs_rq;
-	struct sched_entity *se = &p->se;
+	struct sched_entity *se = &p->se,
 			    *topse = NULL; 	/* Highest schedulable entity */
 	int decload = 1;
 	for_each_sched_entity(se) {
 		topse = se;
 		cfs_rq = cfs_rq_of(se);
 		dequeue_entity(cfs_rq, se, sleep);
 		/* Don't dequeue parent if it has other entities besides us */
-		if (cfs_rq->load.weight)
+		if (cfs_rq->load.weight) {
 			if (parent_entity(se))
 				decload = 0;
 			break;
 		}
 		sleep = 1;
 	}
 	/* Decrement cpu load if we just dequeued the last task of a group on
 	 * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
 	 * at the highest grouping level.
 	 */
 	if (decload)
 		dec_cpu_load(rq, topse->load.weight);
 }
 /*
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@ -32,6 +32,7 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
 	list_add_tail(&p->run_list, array->queue + p->prio);
 	__set_bit(p->prio, array->bitmap);
 	inc_cpu_load(rq, p->se.load.weight);
 }
 /*
@ -46,6 +47,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
 	list_del(&p->run_list);
 	if (list_empty(array->queue + p->prio))
 		__clear_bit(p->prio, array->bitmap);
 	dec_cpu_load(rq, p->se.load.weight);
 }
 /*