mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages

Every slab page charged to a non-root memory cgroup has a pointer to the
memory cgroup and holds a reference to it, which protects a non-empty
memory cgroup from being released.  At the same time the page has a
pointer to the corresponding kmem_cache, and also hold a reference to the
kmem_cache.  And kmem_cache by itself holds a reference to the cgroup.

So there is clearly some redundancy, which allows to stop setting the
page->mem_cgroup pointer and rely on getting memcg pointer indirectly via
kmem_cache.  Further it will allow to change this pointer easier, without
a need to go over all charged pages.

So let's stop setting page->mem_cgroup pointer for slab pages, and stop
using the css refcounter directly for protecting the memory cgroup from
going away.  Instead rely on kmem_cache as an intermediate object.

Make sure that vmstats and shrinker lists are working as previously, as
well as /proc/kpagecgroup interface.

Link: http://lkml.kernel.org/r/20190611231813.3148843-10-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Waiman Long <longman@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Andrei Vagin <avagin@gmail.com>
Cc: Qian Cai <cai@lca.pw>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Roman Gushchin 2019-07-11 20:56:31 -07:00 committed by Linus Torvalds
parent f0a3a24b53
commit 4d96ba3530
3 changed files with 70 additions and 19 deletions

View file

@ -12,6 +12,7 @@
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/memcontrol.h>
#include "slab.h"
#ifdef CONFIG_MEMCG_KMEM
static LIST_HEAD(list_lrus);
@ -63,7 +64,7 @@ static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
if (!memcg_kmem_enabled())
return NULL;
page = virt_to_head_page(ptr);
return page->mem_cgroup;
return memcg_from_slab_page(page);
}
static inline struct list_lru_one *

View file

@ -486,7 +486,10 @@ ino_t page_cgroup_ino(struct page *page)
unsigned long ino = 0;
rcu_read_lock();
memcg = READ_ONCE(page->mem_cgroup);
if (PageHead(page) && PageSlab(page))
memcg = memcg_from_slab_page(page);
else
memcg = READ_ONCE(page->mem_cgroup);
while (memcg && !(memcg->css.flags & CSS_ONLINE))
memcg = parent_mem_cgroup(memcg);
if (memcg)
@ -2802,9 +2805,6 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
cancel_charge(memcg, nr_pages);
return -ENOMEM;
}
page->mem_cgroup = memcg;
return 0;
}
@ -2827,8 +2827,10 @@ int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
memcg = get_mem_cgroup_from_current();
if (!mem_cgroup_is_root(memcg)) {
ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg);
if (!ret)
if (!ret) {
page->mem_cgroup = memcg;
__SetPageKmemcg(page);
}
}
css_put(&memcg->css);
return ret;

View file

@ -255,30 +255,67 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
return s->memcg_params.root_cache;
}
/*
* Expects a pointer to a slab page. Please note, that PageSlab() check
* isn't sufficient, as it returns true also for tail compound slab pages,
* which do not have slab_cache pointer set.
* So this function assumes that the page can pass PageHead() and PageSlab()
* checks.
*/
static inline struct mem_cgroup *memcg_from_slab_page(struct page *page)
{
struct kmem_cache *s;
s = READ_ONCE(page->slab_cache);
if (s && !is_root_cache(s))
return s->memcg_params.memcg;
return NULL;
}
/*
* Charge the slab page belonging to the non-root kmem_cache.
* Can be called for non-root kmem_caches only.
*/
static __always_inline int memcg_charge_slab(struct page *page,
gfp_t gfp, int order,
struct kmem_cache *s)
{
struct mem_cgroup *memcg;
struct lruvec *lruvec;
int ret;
if (is_root_cache(s))
return 0;
ret = memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg);
memcg = s->memcg_params.memcg;
ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
if (ret)
return ret;
lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
mod_lruvec_state(lruvec, cache_vmstat_idx(s), 1 << order);
/* transer try_charge() page references to kmem_cache */
percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order);
css_put_many(&memcg->css, 1 << order);
return 0;
}
/*
* Uncharge a slab page belonging to a non-root kmem_cache.
* Can be called for non-root kmem_caches only.
*/
static __always_inline void memcg_uncharge_slab(struct page *page, int order,
struct kmem_cache *s)
{
if (!is_root_cache(s))
percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
memcg_kmem_uncharge(page, order);
struct mem_cgroup *memcg;
struct lruvec *lruvec;
memcg = s->memcg_params.memcg;
lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
mod_lruvec_state(lruvec, cache_vmstat_idx(s), -(1 << order));
memcg_kmem_uncharge_memcg(page, order, memcg);
percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
}
extern void slab_init_memcg_params(struct kmem_cache *);
@ -314,6 +351,11 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
return s;
}
static inline struct mem_cgroup *memcg_from_slab_page(struct page *page)
{
return NULL;
}
static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order,
struct kmem_cache *s)
{
@ -351,18 +393,24 @@ static __always_inline int charge_slab_page(struct page *page,
gfp_t gfp, int order,
struct kmem_cache *s)
{
int ret = memcg_charge_slab(page, gfp, order, s);
if (is_root_cache(s)) {
mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
1 << order);
return 0;
}
if (!ret)
mod_lruvec_page_state(page, cache_vmstat_idx(s), 1 << order);
return ret;
return memcg_charge_slab(page, gfp, order, s);
}
static __always_inline void uncharge_slab_page(struct page *page, int order,
struct kmem_cache *s)
{
mod_lruvec_page_state(page, cache_vmstat_idx(s), -(1 << order));
if (is_root_cache(s)) {
mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-(1 << order));
return;
}
memcg_uncharge_slab(page, order, s);
}