mirror of
https://github.com/torvalds/linux
synced 2024-11-03 01:56:01 +00:00
rcu: limit rcu_node leaf-level fanout
Some recent benchmarks have indicated possible lock contention on the leaf-level rcu_node locks. This commit therefore limits the number of CPUs per leaf-level rcu_node structure to 16, in other words, there can be at most 16 rcu_data structures fanning into a given rcu_node structure. Prior to this, the limit was 32 on 32-bit systems and 64 on 64-bit systems. Note that the fanout of non-leaf rcu_node structures is unchanged. The organization of accesses to the rcu_node tree is such that references to non-leaf rcu_node structures are much less frequent than to the leaf structures. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
parent
121dfc4b3e
commit
0209f6490b
2 changed files with 26 additions and 20 deletions
|
@ -1869,8 +1869,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
|
|||
{
|
||||
int i;
|
||||
|
||||
for (i = NUM_RCU_LVLS - 1; i >= 0; i--)
|
||||
for (i = NUM_RCU_LVLS - 1; i > 0; i--)
|
||||
rsp->levelspread[i] = CONFIG_RCU_FANOUT;
|
||||
rsp->levelspread[0] = RCU_FANOUT_LEAF;
|
||||
}
|
||||
#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
|
||||
static void __init rcu_init_levelspread(struct rcu_state *rsp)
|
||||
|
|
|
@ -31,46 +31,51 @@
|
|||
/*
|
||||
* Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
|
||||
* In theory, it should be possible to add more levels straightforwardly.
|
||||
* In practice, this has not been tested, so there is probably some
|
||||
* bug somewhere.
|
||||
* In practice, this did work well going from three levels to four.
|
||||
* Of course, your mileage may vary.
|
||||
*/
|
||||
#define MAX_RCU_LVLS 4
|
||||
#define RCU_FANOUT (CONFIG_RCU_FANOUT)
|
||||
#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT)
|
||||
#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT)
|
||||
#define RCU_FANOUT_FOURTH (RCU_FANOUT_CUBE * RCU_FANOUT)
|
||||
#if CONFIG_RCU_FANOUT > 16
|
||||
#define RCU_FANOUT_LEAF 16
|
||||
#else /* #if CONFIG_RCU_FANOUT > 16 */
|
||||
#define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT)
|
||||
#endif /* #else #if CONFIG_RCU_FANOUT > 16 */
|
||||
#define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
|
||||
#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
|
||||
#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
|
||||
#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
|
||||
|
||||
#if NR_CPUS <= RCU_FANOUT
|
||||
#if NR_CPUS <= RCU_FANOUT_1
|
||||
# define NUM_RCU_LVLS 1
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 (NR_CPUS)
|
||||
# define NUM_RCU_LVL_2 0
|
||||
# define NUM_RCU_LVL_3 0
|
||||
# define NUM_RCU_LVL_4 0
|
||||
#elif NR_CPUS <= RCU_FANOUT_SQ
|
||||
#elif NR_CPUS <= RCU_FANOUT_2
|
||||
# define NUM_RCU_LVLS 2
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
|
||||
# define NUM_RCU_LVL_2 (NR_CPUS)
|
||||
# define NUM_RCU_LVL_3 0
|
||||
# define NUM_RCU_LVL_4 0
|
||||
#elif NR_CPUS <= RCU_FANOUT_CUBE
|
||||
#elif NR_CPUS <= RCU_FANOUT_3
|
||||
# define NUM_RCU_LVLS 3
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
|
||||
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
|
||||
# define NUM_RCU_LVL_3 NR_CPUS
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
|
||||
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
|
||||
# define NUM_RCU_LVL_3 (NR_CPUS)
|
||||
# define NUM_RCU_LVL_4 0
|
||||
#elif NR_CPUS <= RCU_FANOUT_FOURTH
|
||||
#elif NR_CPUS <= RCU_FANOUT_4
|
||||
# define NUM_RCU_LVLS 4
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE)
|
||||
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
|
||||
# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
|
||||
# define NUM_RCU_LVL_4 NR_CPUS
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
|
||||
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
|
||||
# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
|
||||
# define NUM_RCU_LVL_4 (NR_CPUS)
|
||||
#else
|
||||
# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
|
||||
#endif /* #if (NR_CPUS) <= RCU_FANOUT */
|
||||
#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
|
||||
|
||||
#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
|
||||
#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
|
||||
|
|
Loading…
Reference in a new issue