eventfs: Remove eventfs_file and just use eventfs_inode

Instead of having a descriptor for every file represented in the eventfs
directory, only have the directory itself represented. Change the API to
send in a list of entries that represent all the files in the directory
(but not other directories). The entry list contains a name and a callback
function that will be used to create the files when they are accessed.

struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
						const struct eventfs_entry *entries,
						int size, void *data);

is used for the top level eventfs directory, and returns an eventfs_inode
that will be used by:

struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
					 const struct eventfs_entry *entries,
					 int size, void *data);

where both of the above take an array of struct eventfs_entry entries for
every file that is in the directory.

The entries are defined by:

typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data,
				const struct file_operations **fops);

struct eventfs_entry {
	const char			*name;
	eventfs_callback		callback;
};

Where the name is the name of the file and the callback gets called when
the file is being created. The callback passes in the name (in case the
same callback is used for multiple files), a pointer to the mode, data and
fops. The data will be pointing to the data that was passed in
eventfs_create_dir() or eventfs_create_events_dir() but may be overridden
to point to something else, as it will be used to point to the
inode->i_private that is created. The information passed back from the
callback is used to create the dentry/inode.

If the callback fills the data and the file should be created, it must
return a positive number. On zero or negative, the file is ignored.

This logic may also be used as a prototype to convert entire pseudo file
systems into just-in-time allocation.

The "show_events_dentry" file has been updated to show the directories,
and any files they have.

With just the eventfs_file allocations:

 Before after deltas for meminfo (in kB):

   MemFree:		-14360
   MemAvailable:	-14260
   Buffers:		40
   Cached:		24
   Active:		44
   Inactive:		48
   Inactive(anon):	28
   Active(file):	44
   Inactive(file):	20
   Dirty:		-4
   AnonPages:		28
   Mapped:		4
   KReclaimable:	132
   Slab:		1604
   SReclaimable:	132
   SUnreclaim:		1472
   Committed_AS:	12

 Before after deltas for slabinfo:

   <slab>:		<objects>	[ * <size> = <total>]

   ext4_inode_cache	27		[* 1184 = 31968 ]
   extent_status	102		[*   40 = 4080 ]
   tracefs_inode_cache	144		[*  656 = 94464 ]
   buffer_head		39		[*  104 = 4056 ]
   shmem_inode_cache	49		[*  800 = 39200 ]
   filp			-53		[*  256 = -13568 ]
   dentry		251		[*  192 = 48192 ]
   lsm_file_cache	277		[*   32 = 8864 ]
   vm_area_struct	-14		[*  184 = -2576 ]
   trace_event_file	1748		[*   88 = 153824 ]
   kmalloc-1k		35		[* 1024 = 35840 ]
   kmalloc-256		49		[*  256 = 12544 ]
   kmalloc-192		-28		[*  192 = -5376 ]
   kmalloc-128		-30		[*  128 = -3840 ]
   kmalloc-96		10581		[*   96 = 1015776 ]
   kmalloc-64		3056		[*   64 = 195584 ]
   kmalloc-32		1291		[*   32 = 41312 ]
   kmalloc-16		2310		[*   16 = 36960 ]
   kmalloc-8		9216		[*    8 = 73728 ]

 Free memory dropped by 14,360 kB
 Available memory dropped by 14,260 kB
 Total slab additions in size: 1,771,032 bytes

With this change:

 Before after deltas for meminfo (in kB):

   MemFree:		-12084
   MemAvailable:	-11976
   Buffers:		32
   Cached:		32
   Active:		72
   Inactive:		168
   Inactive(anon):	176
   Active(file):	72
   Inactive(file):	-8
   Dirty:		24
   AnonPages:		196
   Mapped:		8
   KReclaimable:	148
   Slab:		836
   SReclaimable:	148
   SUnreclaim:		688
   Committed_AS:	324

 Before after deltas for slabinfo:

   <slab>:		<objects>	[ * <size> = <total>]

   tracefs_inode_cache	144		[* 656 = 94464 ]
   shmem_inode_cache	-23		[* 800 = -18400 ]
   filp			-92		[* 256 = -23552 ]
   dentry		179		[* 192 = 34368 ]
   lsm_file_cache	-3		[* 32 = -96 ]
   vm_area_struct	-13		[* 184 = -2392 ]
   trace_event_file	1748		[* 88 = 153824 ]
   kmalloc-1k		-49		[* 1024 = -50176 ]
   kmalloc-256		-27		[* 256 = -6912 ]
   kmalloc-128		1864		[* 128 = 238592 ]
   kmalloc-64		4685		[* 64 = 299840 ]
   kmalloc-32		-72		[* 32 = -2304 ]
   kmalloc-16		256		[* 16 = 4096 ]
   total = 721352

 Free memory dropped by 12,084 kB
 Available memory dropped by 11,976 kB
 Total slab additions in size:  721,352 bytes

That's over 2 MB in savings per instance for free and available memory,
and over 1 MB in savings per instance of slab memory.

Link: https://lore.kernel.org/linux-trace-kernel/20231003184059.4924468e@gandalf.local.home
Link: https://lore.kernel.org/linux-trace-kernel/20231004165007.43d79161@gandalf.local.home

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ajay Kaher <akaher@vmware.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
This commit is contained in:
Steven Rostedt (Google) 2023-10-04 16:50:07 -04:00
parent 2c6d0950f6
commit 5790b1fb3d
8 changed files with 711 additions and 542 deletions

File diff suppressed because it is too large Load diff

View file

@ -385,7 +385,7 @@ static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode)
ti = get_tracefs(inode);
if (ti && ti->flags & TRACEFS_EVENT_INODE)
eventfs_set_ef_status_free(ti, dentry);
eventfs_set_ei_status_free(ti, dentry);
iput(inode);
}

View file

@ -13,6 +13,41 @@ struct tracefs_inode {
struct inode vfs_inode;
};
/*
* struct eventfs_inode - hold the properties of the eventfs directories.
* @list: link list into the parent directory
* @entries: the array of entries representing the files in the directory
* @name: the name of the directory to create
* @children: link list into the child eventfs_inode
* @dentry: the dentry of the directory
* @d_parent: pointer to the parent's dentry
* @d_children: The array of dentries to represent the files when created
* @data: The private data to pass to the callbacks
* @nr_entries: The number of items in @entries
*/
struct eventfs_inode {
struct list_head list;
const struct eventfs_entry *entries;
const char *name;
struct list_head children;
struct dentry *dentry;
struct dentry *d_parent;
struct dentry **d_children;
void *data;
/*
* Union - used for deletion
* @del_list: list of eventfs_inode to delete
* @rcu: eventfs_indoe to delete in RCU
* @is_freed: node is freed if one of the above is set
*/
union {
struct list_head del_list;
struct rcu_head rcu;
unsigned long is_freed;
};
int nr_entries;
};
static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
{
return container_of(inode, struct tracefs_inode, vfs_inode);
@ -25,6 +60,6 @@ struct inode *tracefs_get_inode(struct super_block *sb);
struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
struct dentry *eventfs_failed_creating(struct dentry *dentry);
struct dentry *eventfs_end_creating(struct dentry *dentry);
void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry);
void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
#endif /* _TRACEFS_INTERNAL_H */

View file

@ -649,7 +649,7 @@ struct trace_event_file {
struct list_head list;
struct trace_event_call *event_call;
struct event_filter __rcu *filter;
struct eventfs_file *ef;
struct eventfs_inode *ei;
struct trace_array *tr;
struct trace_subsystem_dir *system;
struct list_head triggers;

View file

@ -23,26 +23,25 @@ struct file_operations;
struct eventfs_file;
struct dentry *eventfs_create_events_dir(const char *name,
struct dentry *parent);
typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data,
const struct file_operations **fops);
struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
struct dentry *parent);
struct eventfs_entry {
const char *name;
eventfs_callback callback;
};
struct eventfs_file *eventfs_add_dir(const char *name,
struct eventfs_file *ef_parent);
struct eventfs_inode;
int eventfs_add_file(const char *name, umode_t mode,
struct eventfs_file *ef_parent, void *data,
const struct file_operations *fops);
struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
const struct eventfs_entry *entries,
int size, void *data);
int eventfs_add_events_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops);
struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
const struct eventfs_entry *entries,
int size, void *data);
void eventfs_remove(struct eventfs_file *ef);
void eventfs_remove_events_dir(struct dentry *dentry);
void eventfs_remove_dir(struct eventfs_inode *ei);
struct dentry *tracefs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,

View file

@ -9764,7 +9764,6 @@ static __init void create_trace_instances(struct dentry *d_tracer)
static void
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
{
struct trace_event_file *file;
int cpu;
trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
@ -9797,11 +9796,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
trace_create_file("trace_marker", 0220, d_tracer,
tr, &tracing_mark_fops);
file = __find_event_file(tr, "ftrace", "print");
if (file && file->ef)
eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
file, &event_trigger_fops);
tr->trace_marker_file = file;
tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
trace_create_file("trace_marker_raw", 0220, d_tracer,
tr, &tracing_mark_raw_fops);

View file

@ -381,7 +381,7 @@ struct trace_array {
struct dentry *dir;
struct dentry *options;
struct dentry *percpu_dir;
struct dentry *event_dir;
struct eventfs_inode *event_dir;
struct trace_options *topts;
struct list_head systems;
struct list_head events;
@ -1349,7 +1349,7 @@ struct trace_subsystem_dir {
struct list_head list;
struct event_subsystem *subsystem;
struct trace_array *tr;
struct eventfs_file *ef;
struct eventfs_inode *ei;
int ref_count;
int nr_events;
};

View file

@ -984,7 +984,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
return;
if (!--dir->nr_events) {
eventfs_remove(dir->ef);
eventfs_remove_dir(dir->ei);
list_del(&dir->list);
__put_system_dir(dir);
}
@ -992,7 +992,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
static void remove_event_file_dir(struct trace_event_file *file)
{
eventfs_remove(file->ef);
eventfs_remove_dir(file->ei);
list_del(&file->list);
remove_subsystem(file->system);
free_event_filter(file->filter);
@ -2282,14 +2282,40 @@ create_new_subsystem(const char *name)
return NULL;
}
static struct eventfs_file *
int system_callback(const char *name, umode_t *mode, void **data,
const struct file_operations **fops)
{
if (strcmp(name, "filter") == 0)
*fops = &ftrace_subsystem_filter_fops;
else if (strcmp(name, "enable") == 0)
*fops = &ftrace_system_enable_fops;
else
return 0;
*mode = TRACE_MODE_WRITE;
return 1;
}
static struct eventfs_inode *
event_subsystem_dir(struct trace_array *tr, const char *name,
struct trace_event_file *file, struct dentry *parent)
struct trace_event_file *file, struct eventfs_inode *parent)
{
struct event_subsystem *system, *iter;
struct trace_subsystem_dir *dir;
struct eventfs_file *ef;
int res;
struct eventfs_inode *ei;
int nr_entries;
static struct eventfs_entry system_entries[] = {
{
.name = "filter",
.callback = system_callback,
},
{
.name = "enable",
.callback = system_callback,
}
};
/* First see if we did not already create this dir */
list_for_each_entry(dir, &tr->systems, list) {
@ -2297,7 +2323,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
if (strcmp(system->name, name) == 0) {
dir->nr_events++;
file->system = dir;
return dir->ef;
return dir->ei;
}
}
@ -2321,39 +2347,29 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
} else
__get_system(system);
ef = eventfs_add_subsystem_dir(name, parent);
if (IS_ERR(ef)) {
/* ftrace only has directories no files */
if (strcmp(name, "ftrace") == 0)
nr_entries = 0;
else
nr_entries = ARRAY_SIZE(system_entries);
ei = eventfs_create_dir(name, parent, system_entries, nr_entries, dir);
if (!ei) {
pr_warn("Failed to create system directory %s\n", name);
__put_system(system);
goto out_free;
}
dir->ef = ef;
dir->ei = ei;
dir->tr = tr;
dir->ref_count = 1;
dir->nr_events = 1;
dir->subsystem = system;
file->system = dir;
/* the ftrace system is special, do not create enable or filter files */
if (strcmp(name, "ftrace") != 0) {
res = eventfs_add_file("filter", TRACE_MODE_WRITE,
dir->ef, dir,
&ftrace_subsystem_filter_fops);
if (res) {
kfree(system->filter);
system->filter = NULL;
pr_warn("Could not create tracefs '%s/filter' entry\n", name);
}
eventfs_add_file("enable", TRACE_MODE_WRITE, dir->ef, dir,
&ftrace_system_enable_fops);
}
list_add(&dir->list, &tr->systems);
return dir->ef;
return dir->ei;
out_free:
kfree(dir);
@ -2402,15 +2418,134 @@ event_define_fields(struct trace_event_call *call)
return ret;
}
static int event_callback(const char *name, umode_t *mode, void **data,
const struct file_operations **fops)
{
struct trace_event_file *file = *data;
struct trace_event_call *call = file->event_call;
if (strcmp(name, "format") == 0) {
*mode = TRACE_MODE_READ;
*fops = &ftrace_event_format_fops;
*data = call;
return 1;
}
/*
* Only event directories that can be enabled should have
* triggers or filters, with the exception of the "print"
* event that can have a "trigger" file.
*/
if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
if (call->class->reg && strcmp(name, "enable") == 0) {
*mode = TRACE_MODE_WRITE;
*fops = &ftrace_enable_fops;
return 1;
}
if (strcmp(name, "filter") == 0) {
*mode = TRACE_MODE_WRITE;
*fops = &ftrace_event_filter_fops;
return 1;
}
}
if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
strcmp(trace_event_name(call), "print") == 0) {
if (strcmp(name, "trigger") == 0) {
*mode = TRACE_MODE_WRITE;
*fops = &event_trigger_fops;
return 1;
}
}
#ifdef CONFIG_PERF_EVENTS
if (call->event.type && call->class->reg &&
strcmp(name, "id") == 0) {
*mode = TRACE_MODE_READ;
*data = (void *)(long)call->event.type;
*fops = &ftrace_event_id_fops;
return 1;
}
#endif
#ifdef CONFIG_HIST_TRIGGERS
if (strcmp(name, "hist") == 0) {
*mode = TRACE_MODE_READ;
*fops = &event_hist_fops;
return 1;
}
#endif
#ifdef CONFIG_HIST_TRIGGERS_DEBUG
if (strcmp(name, "hist_debug") == 0) {
*mode = TRACE_MODE_READ;
*fops = &event_hist_debug_fops;
return 1;
}
#endif
#ifdef CONFIG_TRACE_EVENT_INJECT
if (call->event.type && call->class->reg &&
strcmp(name, "inject") == 0) {
*mode = 0200;
*fops = &event_inject_fops;
return 1;
}
#endif
return 0;
}
static int
event_create_dir(struct dentry *parent, struct trace_event_file *file)
event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file)
{
struct trace_event_call *call = file->event_call;
struct eventfs_file *ef_subsystem = NULL;
struct trace_array *tr = file->tr;
struct eventfs_file *ef;
struct eventfs_inode *e_events;
struct eventfs_inode *ei;
const char *name;
int nr_entries;
int ret;
static struct eventfs_entry event_entries[] = {
{
.name = "enable",
.callback = event_callback,
},
{
.name = "filter",
.callback = event_callback,
},
{
.name = "trigger",
.callback = event_callback,
},
{
.name = "format",
.callback = event_callback,
},
#ifdef CONFIG_PERF_EVENTS
{
.name = "id",
.callback = event_callback,
},
#endif
#ifdef CONFIG_HIST_TRIGGERS
{
.name = "hist",
.callback = event_callback,
},
#endif
#ifdef CONFIG_HIST_TRIGGERS_DEBUG
{
.name = "hist_debug",
.callback = event_callback,
},
#endif
#ifdef CONFIG_TRACE_EVENT_INJECT
{
.name = "inject",
.callback = event_callback,
},
#endif
};
/*
* If the trace point header did not define TRACE_SYSTEM
@ -2420,29 +2555,20 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0))
return -ENODEV;
ef_subsystem = event_subsystem_dir(tr, call->class->system, file, parent);
if (!ef_subsystem)
e_events = event_subsystem_dir(tr, call->class->system, file, parent);
if (!e_events)
return -ENOMEM;
nr_entries = ARRAY_SIZE(event_entries);
name = trace_event_name(call);
ef = eventfs_add_dir(name, ef_subsystem);
if (IS_ERR(ef)) {
ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file);
if (IS_ERR(ei)) {
pr_warn("Could not create tracefs '%s' directory\n", name);
return -1;
}
file->ef = ef;
if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
eventfs_add_file("enable", TRACE_MODE_WRITE, file->ef, file,
&ftrace_enable_fops);
#ifdef CONFIG_PERF_EVENTS
if (call->event.type && call->class->reg)
eventfs_add_file("id", TRACE_MODE_READ, file->ef,
(void *)(long)call->event.type,
&ftrace_event_id_fops);
#endif
file->ei = ei;
ret = event_define_fields(call);
if (ret < 0) {
@ -2450,35 +2576,6 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
return ret;
}
/*
* Only event directories that can be enabled should have
* triggers or filters.
*/
if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
eventfs_add_file("filter", TRACE_MODE_WRITE, file->ef,
file, &ftrace_event_filter_fops);
eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
file, &event_trigger_fops);
}
#ifdef CONFIG_HIST_TRIGGERS
eventfs_add_file("hist", TRACE_MODE_READ, file->ef, file,
&event_hist_fops);
#endif
#ifdef CONFIG_HIST_TRIGGERS_DEBUG
eventfs_add_file("hist_debug", TRACE_MODE_READ, file->ef, file,
&event_hist_debug_fops);
#endif
eventfs_add_file("format", TRACE_MODE_READ, file->ef, call,
&ftrace_event_format_fops);
#ifdef CONFIG_TRACE_EVENT_INJECT
if (call->event.type && call->class->reg)
eventfs_add_file("inject", 0200, file->ef, file,
&event_inject_fops);
#endif
return 0;
}
@ -3623,30 +3720,65 @@ static __init int setup_trace_event(char *str)
}
__setup("trace_event=", setup_trace_event);
static int events_callback(const char *name, umode_t *mode, void **data,
const struct file_operations **fops)
{
if (strcmp(name, "enable") == 0) {
*mode = TRACE_MODE_WRITE;
*fops = &ftrace_tr_enable_fops;
return 1;
}
if (strcmp(name, "header_page") == 0)
*data = ring_buffer_print_page_header;
else if (strcmp(name, "header_event") == 0)
*data = ring_buffer_print_entry_header;
else
return 0;
*mode = TRACE_MODE_READ;
*fops = &ftrace_show_header_fops;
return 1;
}
/* Expects to have event_mutex held when called */
static int
create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
{
struct dentry *d_events;
struct eventfs_inode *e_events;
struct dentry *entry;
int error = 0;
int nr_entries;
static struct eventfs_entry events_entries[] = {
{
.name = "enable",
.callback = events_callback,
},
{
.name = "header_page",
.callback = events_callback,
},
{
.name = "header_event",
.callback = events_callback,
},
};
entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
tr, &ftrace_set_event_fops);
if (!entry)
return -ENOMEM;
d_events = eventfs_create_events_dir("events", parent);
if (IS_ERR(d_events)) {
nr_entries = ARRAY_SIZE(events_entries);
e_events = eventfs_create_events_dir("events", parent, events_entries,
nr_entries, tr);
if (IS_ERR(e_events)) {
pr_warn("Could not create tracefs 'events' directory\n");
return -ENOMEM;
}
error = eventfs_add_events_file("enable", TRACE_MODE_WRITE, d_events,
tr, &ftrace_tr_enable_fops);
if (error)
return -ENOMEM;
/* There are not as crucial, just warn if they are not created */
trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
@ -3656,16 +3788,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
TRACE_MODE_WRITE, parent, tr,
&ftrace_set_event_notrace_pid_fops);
/* ring buffer internal formats */
eventfs_add_events_file("header_page", TRACE_MODE_READ, d_events,
ring_buffer_print_page_header,
&ftrace_show_header_fops);
eventfs_add_events_file("header_event", TRACE_MODE_READ, d_events,
ring_buffer_print_entry_header,
&ftrace_show_header_fops);
tr->event_dir = d_events;
tr->event_dir = e_events;
return 0;
}
@ -3749,7 +3872,7 @@ int event_trace_del_tracer(struct trace_array *tr)
down_write(&trace_event_sem);
__trace_remove_event_dirs(tr);
eventfs_remove_events_dir(tr->event_dir);
eventfs_remove_dir(tr->event_dir);
up_write(&trace_event_sem);
tr->event_dir = NULL;