From 467b171af881282fc627328e6c164f044a6df888 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 18 Aug 2022 18:40:41 +0530 Subject: [PATCH] mm/demotion: update node_is_toptier to work with memory tiers With memory tier support we can have memory only NUMA nodes in the top tier from which we want to avoid promotion tracking NUMA faults. Update node_is_toptier to work with memory tiers. All NUMA nodes are by default top tier nodes. With lower(slower) memory tiers added we consider all memory tiers above a memory tier having CPU NUMA nodes as a top memory tier [sj@kernel.org: include missed header file, memory-tiers.h] Link: https://lkml.kernel.org/r/20220820190720.248704-1-sj@kernel.org [akpm@linux-foundation.org: mm/memory.c needs linux/memory-tiers.h] [aneesh.kumar@linux.ibm.com: make toptier_distance inclusive upper bound of toptiers] Link: https://lkml.kernel.org/r/20220830081457.118960-1-aneesh.kumar@linux.ibm.com Link: https://lkml.kernel.org/r/20220818131042.113280-10-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Reviewed-by: "Huang, Ying" Acked-by: Wei Xu Cc: Alistair Popple Cc: Bharata B Rao Cc: Dan Williams Cc: Dave Hansen Cc: Davidlohr Bueso Cc: Hesham Almatary Cc: Jagdish Gediya Cc: Johannes Weiner Cc: Jonathan Cameron Cc: Michal Hocko Cc: Tim Chen Cc: Yang Shi Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/memory-tiers.h | 11 +++++++++ include/linux/node.h | 5 ---- kernel/sched/fair.c | 1 + mm/huge_memory.c | 1 + mm/memory-tiers.c | 47 ++++++++++++++++++++++++++++++++++++ mm/memory.c | 1 + mm/migrate.c | 1 + mm/mprotect.c | 1 + 8 files changed, 63 insertions(+), 5 deletions(-) diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 42791554b9b9..965009aa01d7 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -40,6 +40,7 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype); #ifdef CONFIG_MIGRATION int next_demotion_node(int node); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); +bool node_is_toptier(int node); #else static inline int next_demotion_node(int node) { @@ -50,6 +51,11 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target { *targets = NODE_MASK_NONE; } + +static inline bool node_is_toptier(int node) +{ + return true; +} #endif #else @@ -87,5 +93,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target { *targets = NODE_MASK_NONE; } + +static inline bool node_is_toptier(int node) +{ + return true; +} #endif /* CONFIG_NUMA */ #endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/include/linux/node.h b/include/linux/node.h index 40d641a8bfb0..9ec680dd607f 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg, #define to_node(device) container_of(device, struct node, dev) -static inline bool node_is_toptier(int node) -{ - return node_state(node, N_CPU); -} - #endif /* _LINUX_NODE_H_ */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d642e9ff2829..0e3e08a093d4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -40,6 +40,7 @@ #include #include +#include #include #include #include diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 949d7c325133..534d30cff9d7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 45dd6fa4e2d1..c82eb0111383 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -37,6 +37,7 @@ static LIST_HEAD(memory_tiers); static struct node_memory_type_map node_memory_types[MAX_NUMNODES]; static struct memory_dev_type *default_dram_type; #ifdef CONFIG_MIGRATION +static int top_tier_adistance; /* * node_demotion[] examples: * @@ -162,6 +163,31 @@ static struct memory_tier *__node_get_memory_tier(int node) } #ifdef CONFIG_MIGRATION +bool node_is_toptier(int node) +{ + bool toptier; + pg_data_t *pgdat; + struct memory_tier *memtier; + + pgdat = NODE_DATA(node); + if (!pgdat) + return false; + + rcu_read_lock(); + memtier = rcu_dereference(pgdat->memtier); + if (!memtier) { + toptier = true; + goto out; + } + if (memtier->adistance_start <= top_tier_adistance) + toptier = true; + else + toptier = false; +out: + rcu_read_unlock(); + return toptier; +} + void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) { struct memory_tier *memtier; @@ -319,6 +345,27 @@ static void establish_demotion_targets(void) } } while (1); } + /* + * Promotion is allowed from a memory tier to higher + * memory tier only if the memory tier doesn't include + * compute. We want to skip promotion from a memory tier, + * if any node that is part of the memory tier have CPUs. + * Once we detect such a memory tier, we consider that tier + * as top tiper from which promotion is not allowed. + */ + list_for_each_entry_reverse(memtier, &memory_tiers, list) { + tier_nodes = get_memtier_nodemask(memtier); + nodes_and(tier_nodes, node_states[N_CPU], tier_nodes); + if (!nodes_empty(tier_nodes)) { + /* + * abstract distance below the max value of this memtier + * is considered toptier. + */ + top_tier_adistance = memtier->adistance_start + + MEMTIER_CHUNK_SIZE - 1; + break; + } + } /* * Now build the lower_tier mask for each node collecting node mask from * all memory tier below it. This allows us to fallback demotion page diff --git a/mm/memory.c b/mm/memory.c index 63832dab15d3..cb955c0b7738 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/migrate.c b/mm/migrate.c index 2a2329bf7c1a..d74573c36573 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -50,6 +50,7 @@ #include #include #include +#include #include diff --git a/mm/mprotect.c b/mm/mprotect.c index ed013f836b4a..55ed4a889990 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include