Pad tdq_lock to avoid false sharing with tdq_load and tdq_cpu_idle.

This enables CPU searches (which read tdq_load) to operate independently of any contention on the spinlock. Some scheduler-intensive workloads running on an 8C single-socket SNB Xeon show considerable improvement with this change (2-3% perf improvement, 5-6% decrease in CPU util). Sponsored by: Intel Reviewed by: jeff
svn path=/head/; revision=242014
2024-10-18 22:33:39 +00:00 · 2012-10-24 18:36:41 +00:00 · 2012-10-24 18:36:41 +00:00 · 39f819e2fc · 2020-12-20 02:59:44 +00:00
parent da1fc67f8a
commit 39f819e2fc
1 changed files with 6 additions and 1 deletions
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@ -223,8 +223,13 @@ static int sched_idlespinthresh = -1;
 * locking in sched_pickcpu();
 */
 struct tdq {
-	/* Ordered to improve efficiency of cpu_search() and switch(). */
+	/* 
+	 * Ordered to improve efficiency of cpu_search() and switch().
+	 * tdq_lock is padded to avoid false sharing with tdq_load and
+	 * tdq_cpu_idle.
+	 */
 	struct mtx	tdq_lock;		/* run queue lock. */
+	char		pad[64 - sizeof(struct mtx)];
 	struct cpu_group *tdq_cg;		/* Pointer to cpu topology. */
 	volatile int	tdq_load;		/* Aggregate load. */
 	volatile int	tdq_cpu_idle;		/* cpu_idle() is active. */