lib: add allocation tagging support for memory allocation profiling

Introduce CONFIG_MEM_ALLOC_PROFILING which provides definitions to easily
instrument memory allocators.  It registers an "alloc_tags" codetag type
with /proc/allocinfo interface to output allocation tag information when
the feature is enabled.

CONFIG_MEM_ALLOC_PROFILING_DEBUG is provided for debugging the memory
allocation profiling instrumentation.

Memory allocation profiling can be enabled or disabled at runtime using
/proc/sys/vm/mem_profiling sysctl when CONFIG_MEM_ALLOC_PROFILING_DEBUG=n.
CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT enables memory allocation
profiling by default.

[surenb@google.com: Documentation/filesystems/proc.rst: fix allocinfo title]
  Link: https://lkml.kernel.org/r/20240326073813.727090-1-surenb@google.com
[surenb@google.com: do limited memory accounting for modules with ARCH_NEEDS_WEAK_PER_CPU]
  Link: https://lkml.kernel.org/r/20240402180933.1663992-2-surenb@google.com
[klarasmodin@gmail.com: explicitly include irqflags.h in alloc_tag.h]
  Link: https://lkml.kernel.org/r/20240407133252.173636-1-klarasmodin@gmail.com
[surenb@google.com: fix alloc_tag_init() to prevent passing NULL to PTR_ERR()]
  Link: https://lkml.kernel.org/r/20240417003349.2520094-1-surenb@google.com
Link: https://lkml.kernel.org/r/20240321163705.3067592-14-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Co-developed-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Klara Modin <klarasmodin@gmail.com>
Tested-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alex Gaynor <alex.gaynor@gmail.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andreas Hindborg <a.hindborg@samsung.com>
Cc: Benno Lossin <benno.lossin@proton.me>
Cc: "Björn Roy Baron" <bjorn3_gh@protonmail.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Gary Guo <gary@garyguo.net>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wedson Almeida Filho <wedsonaf@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Suren Baghdasaryan 2024-03-21 09:36:35 -07:00 committed by Andrew Morton
parent 47a92dfbe0
commit 22d407b164
10 changed files with 428 additions and 0 deletions

View file

@ -43,6 +43,7 @@ Currently, these files are in /proc/sys/vm:
- legacy_va_layout
- lowmem_reserve_ratio
- max_map_count
- mem_profiling (only if CONFIG_MEM_ALLOC_PROFILING=y)
- memory_failure_early_kill
- memory_failure_recovery
- min_free_kbytes
@ -425,6 +426,21 @@ e.g., up to one or two maps per allocation.
The default value is 65530.
mem_profiling
==============
Enable memory profiling (when CONFIG_MEM_ALLOC_PROFILING=y)
1: Enable memory profiling.
0: Disable memory profiling.
Enabling memory profiling introduces a small performance overhead for all
memory allocations.
The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT.
memory_failure_early_kill:
==========================

View file

@ -688,6 +688,7 @@ files are there, and which are missing.
============ ===============================================================
File Content
============ ===============================================================
allocinfo Memory allocations profiling information
apm Advanced power management info
bootconfig Kernel command line obtained from boot config,
and, if there were kernel parameters from the
@ -953,6 +954,34 @@ also be allocatable although a lot of filesystem metadata may have to be
reclaimed to achieve this.
allocinfo
~~~~~~~~~
Provides information about memory allocations at all locations in the code
base. Each allocation in the code is identified by its source file, line
number, module (if originates from a loadable module) and the function calling
the allocation. The number of bytes allocated and number of calls at each
location are reported.
Example output.
::
> sort -rn /proc/allocinfo
127664128 31168 mm/page_ext.c:270 func:alloc_page_ext
56373248 4737 mm/slub.c:2259 func:alloc_slab_page
14880768 3633 mm/readahead.c:247 func:page_cache_ra_unbounded
14417920 3520 mm/mm_init.c:2530 func:alloc_large_system_hash
13377536 234 block/blk-mq.c:3421 func:blk_mq_alloc_rqs
11718656 2861 mm/filemap.c:1919 func:__filemap_get_folio
9192960 2800 kernel/fork.c:307 func:alloc_thread_stack_node
4206592 4 net/netfilter/nf_conntrack_core.c:2567 func:nf_ct_alloc_hashtable
4136960 1010 drivers/staging/ctagmod/ctagmod.c:20 [ctagmod] func:ctagmod_start
3940352 962 mm/memory.c:4214 func:alloc_anon_folio
2894464 22613 fs/kernfs/dir.c:615 func:__kernfs_new_node
...
meminfo
~~~~~~~

View file

@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ASM_GENERIC_CODETAG_LDS_H
#define __ASM_GENERIC_CODETAG_LDS_H
#define SECTION_WITH_BOUNDARIES(_name) \
. = ALIGN(8); \
__start_##_name = .; \
KEEP(*(_name)) \
__stop_##_name = .;
#define CODETAG_SECTIONS() \
SECTION_WITH_BOUNDARIES(alloc_tags)
#endif /* __ASM_GENERIC_CODETAG_LDS_H */

View file

@ -50,6 +50,8 @@
* [__nosave_begin, __nosave_end] for the nosave data
*/
#include <asm-generic/codetag.lds.h>
#ifndef LOAD_OFFSET
#define LOAD_OFFSET 0
#endif
@ -366,6 +368,7 @@
. = ALIGN(8); \
BOUNDED_SECTION_BY(__dyndbg_classes, ___dyndbg_classes) \
BOUNDED_SECTION_BY(__dyndbg, ___dyndbg) \
CODETAG_SECTIONS() \
LIKELY_PROFILE() \
BRANCH_PROFILE() \
TRACE_PRINTKS() \

156
include/linux/alloc_tag.h Normal file
View file

@ -0,0 +1,156 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* allocation tagging
*/
#ifndef _LINUX_ALLOC_TAG_H
#define _LINUX_ALLOC_TAG_H
#include <linux/bug.h>
#include <linux/codetag.h>
#include <linux/container_of.h>
#include <linux/preempt.h>
#include <asm/percpu.h>
#include <linux/cpumask.h>
#include <linux/static_key.h>
#include <linux/irqflags.h>
struct alloc_tag_counters {
u64 bytes;
u64 calls;
};
/*
* An instance of this structure is created in a special ELF section at every
* allocation callsite. At runtime, the special section is treated as
* an array of these. Embedded codetag utilizes codetag framework.
*/
struct alloc_tag {
struct codetag ct;
struct alloc_tag_counters __percpu *counters;
} __aligned(8);
#ifdef CONFIG_MEM_ALLOC_PROFILING
static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct)
{
return container_of(ct, struct alloc_tag, ct);
}
#ifdef ARCH_NEEDS_WEAK_PER_CPU
/*
* When percpu variables are required to be defined as weak, static percpu
* variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION).
* Instead we will accound all module allocations to a single counter.
*/
DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
#define DEFINE_ALLOC_TAG(_alloc_tag) \
static struct alloc_tag _alloc_tag __used __aligned(8) \
__section("alloc_tags") = { \
.ct = CODE_TAG_INIT, \
.counters = &_shared_alloc_tag };
#else /* ARCH_NEEDS_WEAK_PER_CPU */
#define DEFINE_ALLOC_TAG(_alloc_tag) \
static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \
static struct alloc_tag _alloc_tag __used __aligned(8) \
__section("alloc_tags") = { \
.ct = CODE_TAG_INIT, \
.counters = &_alloc_tag_cntr };
#endif /* ARCH_NEEDS_WEAK_PER_CPU */
DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
mem_alloc_profiling_key);
static inline bool mem_alloc_profiling_enabled(void)
{
return static_branch_maybe(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
&mem_alloc_profiling_key);
}
static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
{
struct alloc_tag_counters v = { 0, 0 };
struct alloc_tag_counters *counter;
int cpu;
for_each_possible_cpu(cpu) {
counter = per_cpu_ptr(tag->counters, cpu);
v.bytes += counter->bytes;
v.calls += counter->calls;
}
return v;
}
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag)
{
WARN_ONCE(ref && ref->ct,
"alloc_tag was not cleared (got tag for %s:%u)\n",
ref->ct->filename, ref->ct->lineno);
WARN_ONCE(!tag, "current->alloc_tag not set");
}
static inline void alloc_tag_sub_check(union codetag_ref *ref)
{
WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
}
#else
static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
#endif
/* Caller should verify both ref and tag to be valid */
static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
{
ref->ct = &tag->ct;
/*
* We need in increment the call counter every time we have a new
* allocation or when we split a large allocation into smaller ones.
* Each new reference for every sub-allocation needs to increment call
* counter because when we free each part the counter will be decremented.
*/
this_cpu_inc(tag->counters->calls);
}
static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
{
alloc_tag_add_check(ref, tag);
if (!ref || !tag)
return;
__alloc_tag_ref_set(ref, tag);
this_cpu_add(tag->counters->bytes, bytes);
}
static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
{
struct alloc_tag *tag;
alloc_tag_sub_check(ref);
if (!ref || !ref->ct)
return;
tag = ct_to_alloc_tag(ref->ct);
this_cpu_sub(tag->counters->bytes, bytes);
this_cpu_dec(tag->counters->calls);
ref->ct = NULL;
}
#else /* CONFIG_MEM_ALLOC_PROFILING */
#define DEFINE_ALLOC_TAG(_alloc_tag)
static inline bool mem_alloc_profiling_enabled(void) { return false; }
static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag,
size_t bytes) {}
static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {}
#endif /* CONFIG_MEM_ALLOC_PROFILING */
#endif /* _LINUX_ALLOC_TAG_H */

View file

@ -770,6 +770,10 @@ struct task_struct {
unsigned int flags;
unsigned int ptrace;
#ifdef CONFIG_MEM_ALLOC_PROFILING
struct alloc_tag *alloc_tag;
#endif
#ifdef CONFIG_SMP
int on_cpu;
struct __call_single_node wake_entry;
@ -810,6 +814,7 @@ struct task_struct {
struct task_group *sched_task_group;
#endif
#ifdef CONFIG_UCLAMP_TASK
/*
* Clamp values requested for a scheduling entity.
@ -2187,4 +2192,23 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
#ifdef CONFIG_MEM_ALLOC_PROFILING
static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
{
swap(current->alloc_tag, tag);
return tag;
}
static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
{
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
#endif
current->alloc_tag = old;
}
#else
#define alloc_tag_save(_tag) NULL
#define alloc_tag_restore(_tag, _old) do {} while (0)
#endif
#endif

View file

@ -972,6 +972,31 @@ config CODE_TAGGING
bool
select KALLSYMS
config MEM_ALLOC_PROFILING
bool "Enable memory allocation profiling"
default n
depends on PROC_FS
depends on !DEBUG_FORCE_WEAK_PER_CPU
select CODE_TAGGING
help
Track allocation source code and record total allocation size
initiated at that code location. The mechanism can be used to track
memory leaks with a low performance and memory impact.
config MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
bool "Enable memory allocation profiling by default"
default y
depends on MEM_ALLOC_PROFILING
config MEM_ALLOC_PROFILING_DEBUG
bool "Memory allocation profiler debugging"
default n
depends on MEM_ALLOC_PROFILING
select MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
help
Adds warnings with helpful error messages for memory allocation
profiling.
source "lib/Kconfig.kasan"
source "lib/Kconfig.kfence"
source "lib/Kconfig.kmsan"

View file

@ -234,6 +234,8 @@ obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-$(CONFIG_CODE_TAGGING) += codetag.o
obj-$(CONFIG_MEM_ALLOC_PROFILING) += alloc_tag.o
lib-$(CONFIG_GENERIC_BUG) += bug.o
obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o

152
lib/alloc_tag.c Normal file
View file

@ -0,0 +1,152 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/alloc_tag.h>
#include <linux/fs.h>
#include <linux/gfp.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/seq_buf.h>
#include <linux/seq_file.h>
static struct codetag_type *alloc_tag_cttype;
DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
EXPORT_SYMBOL(_shared_alloc_tag);
DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
mem_alloc_profiling_key);
static void *allocinfo_start(struct seq_file *m, loff_t *pos)
{
struct codetag_iterator *iter;
struct codetag *ct;
loff_t node = *pos;
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
m->private = iter;
if (!iter)
return NULL;
codetag_lock_module_list(alloc_tag_cttype, true);
*iter = codetag_get_ct_iter(alloc_tag_cttype);
while ((ct = codetag_next_ct(iter)) != NULL && node)
node--;
return ct ? iter : NULL;
}
static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
{
struct codetag_iterator *iter = (struct codetag_iterator *)arg;
struct codetag *ct = codetag_next_ct(iter);
(*pos)++;
if (!ct)
return NULL;
return iter;
}
static void allocinfo_stop(struct seq_file *m, void *arg)
{
struct codetag_iterator *iter = (struct codetag_iterator *)m->private;
if (iter) {
codetag_lock_module_list(alloc_tag_cttype, false);
kfree(iter);
}
}
static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
{
struct alloc_tag *tag = ct_to_alloc_tag(ct);
struct alloc_tag_counters counter = alloc_tag_read(tag);
s64 bytes = counter.bytes;
seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
codetag_to_text(out, ct);
seq_buf_putc(out, ' ');
seq_buf_putc(out, '\n');
}
static int allocinfo_show(struct seq_file *m, void *arg)
{
struct codetag_iterator *iter = (struct codetag_iterator *)arg;
char *bufp;
size_t n = seq_get_buf(m, &bufp);
struct seq_buf buf;
seq_buf_init(&buf, bufp, n);
alloc_tag_to_text(&buf, iter->ct);
seq_commit(m, seq_buf_used(&buf));
return 0;
}
static const struct seq_operations allocinfo_seq_op = {
.start = allocinfo_start,
.next = allocinfo_next,
.stop = allocinfo_stop,
.show = allocinfo_show,
};
static void __init procfs_init(void)
{
proc_create_seq("allocinfo", 0444, NULL, &allocinfo_seq_op);
}
static bool alloc_tag_module_unload(struct codetag_type *cttype,
struct codetag_module *cmod)
{
struct codetag_iterator iter = codetag_get_ct_iter(cttype);
struct alloc_tag_counters counter;
bool module_unused = true;
struct alloc_tag *tag;
struct codetag *ct;
for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) {
if (iter.cmod != cmod)
continue;
tag = ct_to_alloc_tag(ct);
counter = alloc_tag_read(tag);
if (WARN(counter.bytes,
"%s:%u module %s func:%s has %llu allocated at module unload",
ct->filename, ct->lineno, ct->modname, ct->function, counter.bytes))
module_unused = false;
}
return module_unused;
}
static struct ctl_table memory_allocation_profiling_sysctls[] = {
{
.procname = "mem_profiling",
.data = &mem_alloc_profiling_key,
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
.mode = 0444,
#else
.mode = 0644,
#endif
.proc_handler = proc_do_static_key,
},
{ }
};
static int __init alloc_tag_init(void)
{
const struct codetag_type_desc desc = {
.section = "alloc_tags",
.tag_size = sizeof(struct alloc_tag),
.module_unload = alloc_tag_module_unload,
};
alloc_tag_cttype = codetag_register_type(&desc);
if (IS_ERR(alloc_tag_cttype))
return PTR_ERR(alloc_tag_cttype);
register_sysctl_init("vm", memory_allocation_profiling_sysctls);
procfs_init();
return 0;
}
module_init(alloc_tag_init);

View file

@ -9,6 +9,8 @@
#define DISCARD_EH_FRAME *(.eh_frame)
#endif
#include <asm-generic/codetag.lds.h>
SECTIONS {
/DISCARD/ : {
*(.discard)
@ -47,12 +49,17 @@ SECTIONS {
.data : {
*(.data .data.[0-9a-zA-Z_]*)
*(.data..L*)
CODETAG_SECTIONS()
}
.rodata : {
*(.rodata .rodata.[0-9a-zA-Z_]*)
*(.rodata..L*)
}
#else
.data : {
CODETAG_SECTIONS()
}
#endif
}