diff --git a/mm/Kconfig b/mm/Kconfig index beb7a455915d..8fae42606d56 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -706,3 +706,11 @@ config ARCH_USES_HIGH_VMA_FLAGS bool config ARCH_HAS_PKEYS bool + +config PERCPU_STATS + bool "Collect percpu memory statistics" + default n + help + This feature collects and exposes statistics via debugfs. The + information includes global and per chunk statistics, which can + be used to help understand percpu memory usage. diff --git a/mm/Makefile b/mm/Makefile index 026f6a828a50..411bd24d4a7c 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -103,3 +103,4 @@ obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o +obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index 68bf18522a6d..d030fce745a2 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -5,6 +5,11 @@ #include struct pcpu_chunk { +#ifdef CONFIG_PERCPU_STATS + int nr_alloc; /* # of allocations */ + size_t max_alloc_size; /* largest allocation size */ +#endif + struct list_head list; /* linked to pcpu_slot lists */ int free_size; /* free bytes in the chunk */ int contig_hint; /* max contiguous size hint */ @@ -18,6 +23,11 @@ struct pcpu_chunk { void *data; /* chunk data */ int first_free; /* no free below this */ bool immutable; /* no [de]population allowed */ + bool has_reserved; /* Indicates if chunk has reserved space + at the beginning. Reserved chunk will + contain reservation for static chunk. + Dynamic chunk will contain reservation + for static and reserved chunks. */ int nr_populated; /* # of populated pages */ unsigned long populated[]; /* populated bitmap */ }; @@ -30,4 +40,125 @@ extern int pcpu_nr_slots; extern struct pcpu_chunk *pcpu_first_chunk; extern struct pcpu_chunk *pcpu_reserved_chunk; +#ifdef CONFIG_PERCPU_STATS + +#include + +struct percpu_stats { + u64 nr_alloc; /* lifetime # of allocations */ + u64 nr_dealloc; /* lifetime # of deallocations */ + u64 nr_cur_alloc; /* current # of allocations */ + u64 nr_max_alloc; /* max # of live allocations */ + u32 nr_chunks; /* current # of live chunks */ + u32 nr_max_chunks; /* max # of live chunks */ + size_t min_alloc_size; /* min allocaiton size */ + size_t max_alloc_size; /* max allocation size */ +}; + +extern struct percpu_stats pcpu_stats; +extern struct pcpu_alloc_info pcpu_stats_ai; + +/* + * For debug purposes. We don't care about the flexible array. + */ +static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai) +{ + memcpy(&pcpu_stats_ai, ai, sizeof(struct pcpu_alloc_info)); + + /* initialize min_alloc_size to unit_size */ + pcpu_stats.min_alloc_size = pcpu_stats_ai.unit_size; +} + +/* + * pcpu_stats_area_alloc - increment area allocation stats + * @chunk: the location of the area being allocated + * @size: size of area to allocate in bytes + * + * CONTEXT: + * pcpu_lock. + */ +static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size) +{ + lockdep_assert_held(&pcpu_lock); + + pcpu_stats.nr_alloc++; + pcpu_stats.nr_cur_alloc++; + pcpu_stats.nr_max_alloc = + max(pcpu_stats.nr_max_alloc, pcpu_stats.nr_cur_alloc); + pcpu_stats.min_alloc_size = + min(pcpu_stats.min_alloc_size, size); + pcpu_stats.max_alloc_size = + max(pcpu_stats.max_alloc_size, size); + + chunk->nr_alloc++; + chunk->max_alloc_size = max(chunk->max_alloc_size, size); +} + +/* + * pcpu_stats_area_dealloc - decrement allocation stats + * @chunk: the location of the area being deallocated + * + * CONTEXT: + * pcpu_lock. + */ +static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk) +{ + lockdep_assert_held(&pcpu_lock); + + pcpu_stats.nr_dealloc++; + pcpu_stats.nr_cur_alloc--; + + chunk->nr_alloc--; +} + +/* + * pcpu_stats_chunk_alloc - increment chunk stats + */ +static inline void pcpu_stats_chunk_alloc(void) +{ + spin_lock_irq(&pcpu_lock); + + pcpu_stats.nr_chunks++; + pcpu_stats.nr_max_chunks = + max(pcpu_stats.nr_max_chunks, pcpu_stats.nr_chunks); + + spin_unlock_irq(&pcpu_lock); +} + +/* + * pcpu_stats_chunk_dealloc - decrement chunk stats + */ +static inline void pcpu_stats_chunk_dealloc(void) +{ + spin_lock_irq(&pcpu_lock); + + pcpu_stats.nr_chunks--; + + spin_unlock_irq(&pcpu_lock); +} + +#else + +static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai) +{ +} + +static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size) +{ +} + +static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk) +{ +} + +static inline void pcpu_stats_chunk_alloc(void) +{ +} + +static inline void pcpu_stats_chunk_dealloc(void) +{ +} + +#endif /* !CONFIG_PERCPU_STATS */ + #endif diff --git a/mm/percpu-km.c b/mm/percpu-km.c index d66911ff42d9..3bbfa0c9d069 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -72,6 +72,8 @@ static struct pcpu_chunk *pcpu_create_chunk(void) pcpu_chunk_populated(chunk, 0, nr_pages); spin_unlock_irq(&pcpu_lock); + pcpu_stats_chunk_alloc(); + return chunk; } @@ -79,6 +81,8 @@ static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) { const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; + pcpu_stats_chunk_dealloc(); + if (chunk && chunk->data) __free_pages(chunk->data, order_base_2(nr_pages)); pcpu_free_chunk(chunk); diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c new file mode 100644 index 000000000000..03524a56eeff --- /dev/null +++ b/mm/percpu-stats.c @@ -0,0 +1,222 @@ +/* + * mm/percpu-debug.c + * + * Copyright (C) 2017 Facebook Inc. + * Copyright (C) 2017 Dennis Zhou + * + * This file is released under the GPLv2. + * + * Prints statistics about the percpu allocator and backing chunks. + */ +#include +#include +#include +#include +#include +#include + +#include "percpu-internal.h" + +#define P(X, Y) \ + seq_printf(m, " %-24s: %8lld\n", X, (long long int)Y) + +struct percpu_stats pcpu_stats; +struct pcpu_alloc_info pcpu_stats_ai; + +static int cmpint(const void *a, const void *b) +{ + return *(int *)a - *(int *)b; +} + +/* + * Iterates over all chunks to find the max # of map entries used. + */ +static int find_max_map_used(void) +{ + struct pcpu_chunk *chunk; + int slot, max_map_used; + + max_map_used = 0; + for (slot = 0; slot < pcpu_nr_slots; slot++) + list_for_each_entry(chunk, &pcpu_slot[slot], list) + max_map_used = max(max_map_used, chunk->map_used); + + return max_map_used; +} + +/* + * Prints out chunk state. Fragmentation is considered between + * the beginning of the chunk to the last allocation. + */ +static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, + void *buffer) +{ + int i, s_index, last_alloc, alloc_sign, as_len; + int *alloc_sizes, *p; + /* statistics */ + int sum_frag = 0, max_frag = 0; + int cur_min_alloc = 0, cur_med_alloc = 0, cur_max_alloc = 0; + + alloc_sizes = buffer; + s_index = chunk->has_reserved ? 1 : 0; + + /* find last allocation */ + last_alloc = -1; + for (i = chunk->map_used - 1; i >= s_index; i--) { + if (chunk->map[i] & 1) { + last_alloc = i; + break; + } + } + + /* if the chunk is not empty - ignoring reserve */ + if (last_alloc >= s_index) { + as_len = last_alloc + 1 - s_index; + + /* + * Iterate through chunk map computing size info. + * The first bit is overloaded to be a used flag. + * negative = free space, positive = allocated + */ + for (i = 0, p = chunk->map + s_index; i < as_len; i++, p++) { + alloc_sign = (*p & 1) ? 1 : -1; + alloc_sizes[i] = alloc_sign * + ((p[1] & ~1) - (p[0] & ~1)); + } + + sort(alloc_sizes, as_len, sizeof(chunk->map[0]), cmpint, NULL); + + /* Iterate through the unallocated fragements. */ + for (i = 0, p = alloc_sizes; *p < 0 && i < as_len; i++, p++) { + sum_frag -= *p; + max_frag = max(max_frag, -1 * (*p)); + } + + cur_min_alloc = alloc_sizes[i]; + cur_med_alloc = alloc_sizes[(i + as_len - 1) / 2]; + cur_max_alloc = alloc_sizes[as_len - 1]; + } + + P("nr_alloc", chunk->nr_alloc); + P("max_alloc_size", chunk->max_alloc_size); + P("free_size", chunk->free_size); + P("contig_hint", chunk->contig_hint); + P("sum_frag", sum_frag); + P("max_frag", max_frag); + P("cur_min_alloc", cur_min_alloc); + P("cur_med_alloc", cur_med_alloc); + P("cur_max_alloc", cur_max_alloc); + seq_putc(m, '\n'); +} + +static int percpu_stats_show(struct seq_file *m, void *v) +{ + struct pcpu_chunk *chunk; + int slot, max_map_used; + void *buffer; + +alloc_buffer: + spin_lock_irq(&pcpu_lock); + max_map_used = find_max_map_used(); + spin_unlock_irq(&pcpu_lock); + + buffer = vmalloc(max_map_used * sizeof(pcpu_first_chunk->map[0])); + if (!buffer) + return -ENOMEM; + + spin_lock_irq(&pcpu_lock); + + /* if the buffer allocated earlier is too small */ + if (max_map_used < find_max_map_used()) { + spin_unlock_irq(&pcpu_lock); + vfree(buffer); + goto alloc_buffer; + } + +#define PL(X) \ + seq_printf(m, " %-24s: %8lld\n", #X, (long long int)pcpu_stats_ai.X) + + seq_printf(m, + "Percpu Memory Statistics\n" + "Allocation Info:\n" + "----------------------------------------\n"); + PL(unit_size); + PL(static_size); + PL(reserved_size); + PL(dyn_size); + PL(atom_size); + PL(alloc_size); + seq_putc(m, '\n'); + +#undef PL + +#define PU(X) \ + seq_printf(m, " %-18s: %14llu\n", #X, (unsigned long long)pcpu_stats.X) + + seq_printf(m, + "Global Stats:\n" + "----------------------------------------\n"); + PU(nr_alloc); + PU(nr_dealloc); + PU(nr_cur_alloc); + PU(nr_max_alloc); + PU(nr_chunks); + PU(nr_max_chunks); + PU(min_alloc_size); + PU(max_alloc_size); + seq_putc(m, '\n'); + +#undef PU + + seq_printf(m, + "Per Chunk Stats:\n" + "----------------------------------------\n"); + + if (pcpu_reserved_chunk) { + seq_puts(m, "Chunk: <- Reserved Chunk\n"); + chunk_map_stats(m, pcpu_reserved_chunk, buffer); + } + + for (slot = 0; slot < pcpu_nr_slots; slot++) { + list_for_each_entry(chunk, &pcpu_slot[slot], list) { + if (chunk == pcpu_first_chunk) { + seq_puts(m, "Chunk: <- First Chunk\n"); + chunk_map_stats(m, chunk, buffer); + + + } else { + seq_puts(m, "Chunk:\n"); + chunk_map_stats(m, chunk, buffer); + } + + } + } + + spin_unlock_irq(&pcpu_lock); + + vfree(buffer); + + return 0; +} + +static int percpu_stats_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, percpu_stats_show, NULL); +} + +static const struct file_operations percpu_stats_fops = { + .open = percpu_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init init_percpu_stats_debugfs(void) +{ + debugfs_create_file("percpu_stats", 0444, NULL, NULL, + &percpu_stats_fops); + + return 0; +} + +late_initcall(init_percpu_stats_debugfs); diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 9ac639499bd1..5915a224da52 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -343,11 +343,16 @@ static struct pcpu_chunk *pcpu_create_chunk(void) chunk->data = vms; chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; + + pcpu_stats_chunk_alloc(); + return chunk; } static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) { + pcpu_stats_chunk_dealloc(); + if (chunk && chunk->data) pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); pcpu_free_chunk(chunk); diff --git a/mm/percpu.c b/mm/percpu.c index 75ac982c19df..44a1cadf74a7 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -657,6 +657,7 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme, int *p; lockdep_assert_held(&pcpu_lock); + pcpu_stats_area_dealloc(chunk); freeme |= 1; /* we are searching for pair */ @@ -721,6 +722,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) chunk->map[0] = 0; chunk->map[1] = pcpu_unit_size | 1; chunk->map_used = 1; + chunk->has_reserved = false; INIT_LIST_HEAD(&chunk->list); INIT_LIST_HEAD(&chunk->map_extend_list); @@ -970,6 +972,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, goto restart; area_found: + pcpu_stats_area_alloc(chunk, size); spin_unlock_irqrestore(&pcpu_lock, flags); /* populate if not all pages are already there */ @@ -1642,6 +1645,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); + pcpu_stats_save_ai(ai); + /* * Allocate chunk slots. The additional last slot is for * empty chunks. @@ -1685,6 +1690,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, if (schunk->free_size) schunk->map[++schunk->map_used] = ai->static_size + schunk->free_size; schunk->map[schunk->map_used] |= 1; + schunk->has_reserved = true; /* init dynamic chunk if necessary */ if (dyn_size) { @@ -1703,6 +1709,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, dchunk->map[1] = pcpu_reserved_chunk_limit; dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1; dchunk->map_used = 2; + dchunk->has_reserved = true; } /* link the first chunk in */ @@ -1711,6 +1718,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, pcpu_count_occupied_pages(pcpu_first_chunk, 1); pcpu_chunk_relocate(pcpu_first_chunk, -1); + pcpu_stats_chunk_alloc(); + /* we're done */ pcpu_base_addr = base_addr; return 0;