linux/kernel/trace/trace_events_filter.c
Steven Rostedt (Google) bb32500fb9 tracing: Have trace_event_file have ref counters
The following can crash the kernel:

 # cd /sys/kernel/tracing
 # echo 'p:sched schedule' > kprobe_events
 # exec 5>>events/kprobes/sched/enable
 # > kprobe_events
 # exec 5>&-

The above commands:

 1. Change directory to the tracefs directory
 2. Create a kprobe event (doesn't matter what one)
 3. Open bash file descriptor 5 on the enable file of the kprobe event
 4. Delete the kprobe event (removes the files too)
 5. Close the bash file descriptor 5

The above causes a crash!

 BUG: kernel NULL pointer dereference, address: 0000000000000028
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 PGD 0 P4D 0
 Oops: 0000 [#1] PREEMPT SMP PTI
 CPU: 6 PID: 877 Comm: bash Not tainted 6.5.0-rc4-test-00008-g2c6b6b1029d4-dirty #186
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
 RIP: 0010:tracing_release_file_tr+0xc/0x50

What happens here is that the kprobe event creates a trace_event_file
"file" descriptor that represents the file in tracefs to the event. It
maintains state of the event (is it enabled for the given instance?).
Opening the "enable" file gets a reference to the event "file" descriptor
via the open file descriptor. When the kprobe event is deleted, the file is
also deleted from the tracefs system which also frees the event "file"
descriptor.

But as the tracefs file is still opened by user space, it will not be
totally removed until the final dput() is called on it. But this is not
true with the event "file" descriptor that is already freed. If the user
does a write to or simply closes the file descriptor it will reference the
event "file" descriptor that was just freed, causing a use-after-free bug.

To solve this, add a ref count to the event "file" descriptor as well as a
new flag called "FREED". The "file" will not be freed until the last
reference is released. But the FREE flag will be set when the event is
removed to prevent any more modifications to that event from happening,
even if there's still a reference to the event "file" descriptor.

Link: https://lore.kernel.org/linux-trace-kernel/20231031000031.1e705592@gandalf.local.home/
Link: https://lore.kernel.org/linux-trace-kernel/20231031122453.7a48b923@gandalf.local.home

Cc: stable@vger.kernel.org
Cc: Mark Rutland <mark.rutland@arm.com>
Fixes: f5ca233e2e ("tracing: Increase trace array ref count on enable and filter files")
Reported-by: Beau Belgrave <beaub@linux.microsoft.com>
Tested-by: Beau Belgrave <beaub@linux.microsoft.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2023-11-01 23:44:44 -04:00

2828 lines
69 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* trace_events_filter - generic event filtering
*
* Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
*/
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/mutex.h>
#include <linux/perf_event.h>
#include <linux/slab.h>
#include "trace.h"
#include "trace_output.h"
#define DEFAULT_SYS_FILTER_MESSAGE \
"### global filter ###\n" \
"# Use this to set filters for multiple events.\n" \
"# Only events with the given fields will be affected.\n" \
"# If no events are modified, an error message will be displayed here"
/* Due to token parsing '<=' must be before '<' and '>=' must be before '>' */
#define OPS \
C( OP_GLOB, "~" ), \
C( OP_NE, "!=" ), \
C( OP_EQ, "==" ), \
C( OP_LE, "<=" ), \
C( OP_LT, "<" ), \
C( OP_GE, ">=" ), \
C( OP_GT, ">" ), \
C( OP_BAND, "&" ), \
C( OP_MAX, NULL )
#undef C
#define C(a, b) a
enum filter_op_ids { OPS };
#undef C
#define C(a, b) b
static const char * ops[] = { OPS };
enum filter_pred_fn {
FILTER_PRED_FN_NOP,
FILTER_PRED_FN_64,
FILTER_PRED_FN_64_CPUMASK,
FILTER_PRED_FN_S64,
FILTER_PRED_FN_U64,
FILTER_PRED_FN_32,
FILTER_PRED_FN_32_CPUMASK,
FILTER_PRED_FN_S32,
FILTER_PRED_FN_U32,
FILTER_PRED_FN_16,
FILTER_PRED_FN_16_CPUMASK,
FILTER_PRED_FN_S16,
FILTER_PRED_FN_U16,
FILTER_PRED_FN_8,
FILTER_PRED_FN_8_CPUMASK,
FILTER_PRED_FN_S8,
FILTER_PRED_FN_U8,
FILTER_PRED_FN_COMM,
FILTER_PRED_FN_STRING,
FILTER_PRED_FN_STRLOC,
FILTER_PRED_FN_STRRELLOC,
FILTER_PRED_FN_PCHAR_USER,
FILTER_PRED_FN_PCHAR,
FILTER_PRED_FN_CPU,
FILTER_PRED_FN_CPU_CPUMASK,
FILTER_PRED_FN_CPUMASK,
FILTER_PRED_FN_CPUMASK_CPU,
FILTER_PRED_FN_FUNCTION,
FILTER_PRED_FN_,
FILTER_PRED_TEST_VISITED,
};
struct filter_pred {
struct regex *regex;
struct cpumask *mask;
unsigned short *ops;
struct ftrace_event_field *field;
u64 val;
u64 val2;
enum filter_pred_fn fn_num;
int offset;
int not;
int op;
};
/*
* pred functions are OP_LE, OP_LT, OP_GE, OP_GT, and OP_BAND
* pred_funcs_##type below must match the order of them above.
*/
#define PRED_FUNC_START OP_LE
#define PRED_FUNC_MAX (OP_BAND - PRED_FUNC_START)
#define ERRORS \
C(NONE, "No error"), \
C(INVALID_OP, "Invalid operator"), \
C(TOO_MANY_OPEN, "Too many '('"), \
C(TOO_MANY_CLOSE, "Too few '('"), \
C(MISSING_QUOTE, "Missing matching quote"), \
C(MISSING_BRACE_OPEN, "Missing '{'"), \
C(MISSING_BRACE_CLOSE, "Missing '}'"), \
C(OPERAND_TOO_LONG, "Operand too long"), \
C(EXPECT_STRING, "Expecting string field"), \
C(EXPECT_DIGIT, "Expecting numeric field"), \
C(ILLEGAL_FIELD_OP, "Illegal operation for field type"), \
C(FIELD_NOT_FOUND, "Field not found"), \
C(ILLEGAL_INTVAL, "Illegal integer value"), \
C(BAD_SUBSYS_FILTER, "Couldn't find or set field in one of a subsystem's events"), \
C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \
C(INVALID_FILTER, "Meaningless filter expression"), \
C(INVALID_CPULIST, "Invalid cpulist"), \
C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \
C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \
C(NO_FUNCTION, "Function not found"), \
C(ERRNO, "Error"), \
C(NO_FILTER, "No filter found")
#undef C
#define C(a, b) FILT_ERR_##a
enum { ERRORS };
#undef C
#define C(a, b) b
static const char *err_text[] = { ERRORS };
/* Called after a '!' character but "!=" and "!~" are not "not"s */
static bool is_not(const char *str)
{
switch (str[1]) {
case '=':
case '~':
return false;
}
return true;
}
/**
* struct prog_entry - a singe entry in the filter program
* @target: Index to jump to on a branch (actually one minus the index)
* @when_to_branch: The value of the result of the predicate to do a branch
* @pred: The predicate to execute.
*/
struct prog_entry {
int target;
int when_to_branch;
struct filter_pred *pred;
};
/**
* update_preds - assign a program entry a label target
* @prog: The program array
* @N: The index of the current entry in @prog
* @invert: What to assign a program entry for its branch condition
*
* The program entry at @N has a target that points to the index of a program
* entry that can have its target and when_to_branch fields updated.
* Update the current program entry denoted by index @N target field to be
* that of the updated entry. This will denote the entry to update if
* we are processing an "||" after an "&&".
*/
static void update_preds(struct prog_entry *prog, int N, int invert)
{
int t, s;
t = prog[N].target;
s = prog[t].target;
prog[t].when_to_branch = invert;
prog[t].target = N;
prog[N].target = s;
}
struct filter_parse_error {
int lasterr;
int lasterr_pos;
};
static void parse_error(struct filter_parse_error *pe, int err, int pos)
{
pe->lasterr = err;
pe->lasterr_pos = pos;
}
typedef int (*parse_pred_fn)(const char *str, void *data, int pos,
struct filter_parse_error *pe,
struct filter_pred **pred);
enum {
INVERT = 1,
PROCESS_AND = 2,
PROCESS_OR = 4,
};
static void free_predicate(struct filter_pred *pred)
{
if (pred) {
kfree(pred->regex);
kfree(pred->mask);
kfree(pred);
}
}
/*
* Without going into a formal proof, this explains the method that is used in
* parsing the logical expressions.
*
* For example, if we have: "a && !(!b || (c && g)) || d || e && !f"
* The first pass will convert it into the following program:
*
* n1: r=a; l1: if (!r) goto l4;
* n2: r=b; l2: if (!r) goto l4;
* n3: r=c; r=!r; l3: if (r) goto l4;
* n4: r=g; r=!r; l4: if (r) goto l5;
* n5: r=d; l5: if (r) goto T
* n6: r=e; l6: if (!r) goto l7;
* n7: r=f; r=!r; l7: if (!r) goto F
* T: return TRUE
* F: return FALSE
*
* To do this, we use a data structure to represent each of the above
* predicate and conditions that has:
*
* predicate, when_to_branch, invert, target
*
* The "predicate" will hold the function to determine the result "r".
* The "when_to_branch" denotes what "r" should be if a branch is to be taken
* "&&" would contain "!r" or (0) and "||" would contain "r" or (1).
* The "invert" holds whether the value should be reversed before testing.
* The "target" contains the label "l#" to jump to.
*
* A stack is created to hold values when parentheses are used.
*
* To simplify the logic, the labels will start at 0 and not 1.
*
* The possible invert values are 1 and 0. The number of "!"s that are in scope
* before the predicate determines the invert value, if the number is odd then
* the invert value is 1 and 0 otherwise. This means the invert value only
* needs to be toggled when a new "!" is introduced compared to what is stored
* on the stack, where parentheses were used.
*
* The top of the stack and "invert" are initialized to zero.
*
* ** FIRST PASS **
*
* #1 A loop through all the tokens is done:
*
* #2 If the token is an "(", the stack is push, and the current stack value
* gets the current invert value, and the loop continues to the next token.
* The top of the stack saves the "invert" value to keep track of what
* the current inversion is. As "!(a && !b || c)" would require all
* predicates being affected separately by the "!" before the parentheses.
* And that would end up being equivalent to "(!a || b) && !c"
*
* #3 If the token is an "!", the current "invert" value gets inverted, and
* the loop continues. Note, if the next token is a predicate, then
* this "invert" value is only valid for the current program entry,
* and does not affect other predicates later on.
*
* The only other acceptable token is the predicate string.
*
* #4 A new entry into the program is added saving: the predicate and the
* current value of "invert". The target is currently assigned to the
* previous program index (this will not be its final value).
*
* #5 We now enter another loop and look at the next token. The only valid
* tokens are ")", "&&", "||" or end of the input string "\0".
*
* #6 The invert variable is reset to the current value saved on the top of
* the stack.
*
* #7 The top of the stack holds not only the current invert value, but also
* if a "&&" or "||" needs to be processed. Note, the "&&" takes higher
* precedence than "||". That is "a && b || c && d" is equivalent to
* "(a && b) || (c && d)". Thus the first thing to do is to see if "&&" needs
* to be processed. This is the case if an "&&" was the last token. If it was
* then we call update_preds(). This takes the program, the current index in
* the program, and the current value of "invert". More will be described
* below about this function.
*
* #8 If the next token is "&&" then we set a flag in the top of the stack
* that denotes that "&&" needs to be processed, break out of this loop
* and continue with the outer loop.
*
* #9 Otherwise, if a "||" needs to be processed then update_preds() is called.
* This is called with the program, the current index in the program, but
* this time with an inverted value of "invert" (that is !invert). This is
* because the value taken will become the "when_to_branch" value of the
* program.
* Note, this is called when the next token is not an "&&". As stated before,
* "&&" takes higher precedence, and "||" should not be processed yet if the
* next logical operation is "&&".
*
* #10 If the next token is "||" then we set a flag in the top of the stack
* that denotes that "||" needs to be processed, break out of this loop
* and continue with the outer loop.
*
* #11 If this is the end of the input string "\0" then we break out of both
* loops.
*
* #12 Otherwise, the next token is ")", where we pop the stack and continue
* this inner loop.
*
* Now to discuss the update_pred() function, as that is key to the setting up
* of the program. Remember the "target" of the program is initialized to the
* previous index and not the "l" label. The target holds the index into the
* program that gets affected by the operand. Thus if we have something like
* "a || b && c", when we process "a" the target will be "-1" (undefined).
* When we process "b", its target is "0", which is the index of "a", as that's
* the predicate that is affected by "||". But because the next token after "b"
* is "&&" we don't call update_preds(). Instead continue to "c". As the
* next token after "c" is not "&&" but the end of input, we first process the
* "&&" by calling update_preds() for the "&&" then we process the "||" by
* calling updates_preds() with the values for processing "||".
*
* What does that mean? What update_preds() does is to first save the "target"
* of the program entry indexed by the current program entry's "target"
* (remember the "target" is initialized to previous program entry), and then
* sets that "target" to the current index which represents the label "l#".
* That entry's "when_to_branch" is set to the value passed in (the "invert"
* or "!invert"). Then it sets the current program entry's target to the saved
* "target" value (the old value of the program that had its "target" updated
* to the label).
*
* Looking back at "a || b && c", we have the following steps:
* "a" - prog[0] = { "a", X, -1 } // pred, when_to_branch, target
* "||" - flag that we need to process "||"; continue outer loop
* "b" - prog[1] = { "b", X, 0 }
* "&&" - flag that we need to process "&&"; continue outer loop
* (Notice we did not process "||")
* "c" - prog[2] = { "c", X, 1 }
* update_preds(prog, 2, 0); // invert = 0 as we are processing "&&"
* t = prog[2].target; // t = 1
* s = prog[t].target; // s = 0
* prog[t].target = 2; // Set target to "l2"
* prog[t].when_to_branch = 0;
* prog[2].target = s;
* update_preds(prog, 2, 1); // invert = 1 as we are now processing "||"
* t = prog[2].target; // t = 0
* s = prog[t].target; // s = -1
* prog[t].target = 2; // Set target to "l2"
* prog[t].when_to_branch = 1;
* prog[2].target = s;
*
* #13 Which brings us to the final step of the first pass, which is to set
* the last program entry's when_to_branch and target, which will be
* when_to_branch = 0; target = N; ( the label after the program entry after
* the last program entry processed above).
*
* If we denote "TRUE" to be the entry after the last program entry processed,
* and "FALSE" the program entry after that, we are now done with the first
* pass.
*
* Making the above "a || b && c" have a program of:
* prog[0] = { "a", 1, 2 }
* prog[1] = { "b", 0, 2 }
* prog[2] = { "c", 0, 3 }
*
* Which translates into:
* n0: r = a; l0: if (r) goto l2;
* n1: r = b; l1: if (!r) goto l2;
* n2: r = c; l2: if (!r) goto l3; // Which is the same as "goto F;"
* T: return TRUE; l3:
* F: return FALSE
*
* Although, after the first pass, the program is correct, it is
* inefficient. The simple sample of "a || b && c" could be easily been
* converted into:
* n0: r = a; if (r) goto T
* n1: r = b; if (!r) goto F
* n2: r = c; if (!r) goto F
* T: return TRUE;
* F: return FALSE;
*
* The First Pass is over the input string. The next too passes are over
* the program itself.
*
* ** SECOND PASS **
*
* Which brings us to the second pass. If a jump to a label has the
* same condition as that label, it can instead jump to its target.
* The original example of "a && !(!b || (c && g)) || d || e && !f"
* where the first pass gives us:
*
* n1: r=a; l1: if (!r) goto l4;
* n2: r=b; l2: if (!r) goto l4;
* n3: r=c; r=!r; l3: if (r) goto l4;
* n4: r=g; r=!r; l4: if (r) goto l5;
* n5: r=d; l5: if (r) goto T
* n6: r=e; l6: if (!r) goto l7;
* n7: r=f; r=!r; l7: if (!r) goto F:
* T: return TRUE;
* F: return FALSE
*
* We can see that "l3: if (r) goto l4;" and at l4, we have "if (r) goto l5;".
* And "l5: if (r) goto T", we could optimize this by converting l3 and l4
* to go directly to T. To accomplish this, we start from the last
* entry in the program and work our way back. If the target of the entry
* has the same "when_to_branch" then we could use that entry's target.
* Doing this, the above would end up as:
*
* n1: r=a; l1: if (!r) goto l4;
* n2: r=b; l2: if (!r) goto l4;
* n3: r=c; r=!r; l3: if (r) goto T;
* n4: r=g; r=!r; l4: if (r) goto T;
* n5: r=d; l5: if (r) goto T;
* n6: r=e; l6: if (!r) goto F;
* n7: r=f; r=!r; l7: if (!r) goto F;
* T: return TRUE
* F: return FALSE
*
* In that same pass, if the "when_to_branch" doesn't match, we can simply
* go to the program entry after the label. That is, "l2: if (!r) goto l4;"
* where "l4: if (r) goto T;", then we can convert l2 to be:
* "l2: if (!r) goto n5;".
*
* This will have the second pass give us:
* n1: r=a; l1: if (!r) goto n5;
* n2: r=b; l2: if (!r) goto n5;
* n3: r=c; r=!r; l3: if (r) goto T;
* n4: r=g; r=!r; l4: if (r) goto T;
* n5: r=d; l5: if (r) goto T
* n6: r=e; l6: if (!r) goto F;
* n7: r=f; r=!r; l7: if (!r) goto F
* T: return TRUE
* F: return FALSE
*
* Notice, all the "l#" labels are no longer used, and they can now
* be discarded.
*
* ** THIRD PASS **
*
* For the third pass we deal with the inverts. As they simply just
* make the "when_to_branch" get inverted, a simple loop over the
* program to that does: "when_to_branch ^= invert;" will do the
* job, leaving us with:
* n1: r=a; if (!r) goto n5;
* n2: r=b; if (!r) goto n5;
* n3: r=c: if (!r) goto T;
* n4: r=g; if (!r) goto T;
* n5: r=d; if (r) goto T
* n6: r=e; if (!r) goto F;
* n7: r=f; if (r) goto F
* T: return TRUE
* F: return FALSE
*
* As "r = a; if (!r) goto n5;" is obviously the same as
* "if (!a) goto n5;" without doing anything we can interpret the
* program as:
* n1: if (!a) goto n5;
* n2: if (!b) goto n5;
* n3: if (!c) goto T;
* n4: if (!g) goto T;
* n5: if (d) goto T
* n6: if (!e) goto F;
* n7: if (f) goto F
* T: return TRUE
* F: return FALSE
*
* Since the inverts are discarded at the end, there's no reason to store
* them in the program array (and waste memory). A separate array to hold
* the inverts is used and freed at the end.
*/
static struct prog_entry *
predicate_parse(const char *str, int nr_parens, int nr_preds,
parse_pred_fn parse_pred, void *data,
struct filter_parse_error *pe)
{
struct prog_entry *prog_stack;
struct prog_entry *prog;
const char *ptr = str;
char *inverts = NULL;
int *op_stack;
int *top;
int invert = 0;
int ret = -ENOMEM;
int len;
int N = 0;
int i;
nr_preds += 2; /* For TRUE and FALSE */
op_stack = kmalloc_array(nr_parens, sizeof(*op_stack), GFP_KERNEL);
if (!op_stack)
return ERR_PTR(-ENOMEM);
prog_stack = kcalloc(nr_preds, sizeof(*prog_stack), GFP_KERNEL);
if (!prog_stack) {
parse_error(pe, -ENOMEM, 0);
goto out_free;
}
inverts = kmalloc_array(nr_preds, sizeof(*inverts), GFP_KERNEL);
if (!inverts) {
parse_error(pe, -ENOMEM, 0);
goto out_free;
}
top = op_stack;
prog = prog_stack;
*top = 0;
/* First pass */
while (*ptr) { /* #1 */
const char *next = ptr++;
if (isspace(*next))
continue;
switch (*next) {
case '(': /* #2 */
if (top - op_stack > nr_parens) {
ret = -EINVAL;
goto out_free;
}
*(++top) = invert;
continue;
case '!': /* #3 */
if (!is_not(next))
break;
invert = !invert;
continue;
}
if (N >= nr_preds) {
parse_error(pe, FILT_ERR_TOO_MANY_PREDS, next - str);
goto out_free;
}
inverts[N] = invert; /* #4 */
prog[N].target = N-1;
len = parse_pred(next, data, ptr - str, pe, &prog[N].pred);
if (len < 0) {
ret = len;
goto out_free;
}
ptr = next + len;
N++;
ret = -1;
while (1) { /* #5 */
next = ptr++;
if (isspace(*next))
continue;
switch (*next) {
case ')':
case '\0':
break;
case '&':
case '|':
/* accepting only "&&" or "||" */
if (next[1] == next[0]) {
ptr++;
break;
}
fallthrough;
default:
parse_error(pe, FILT_ERR_TOO_MANY_PREDS,
next - str);
goto out_free;
}
invert = *top & INVERT;
if (*top & PROCESS_AND) { /* #7 */
update_preds(prog, N - 1, invert);
*top &= ~PROCESS_AND;
}
if (*next == '&') { /* #8 */
*top |= PROCESS_AND;
break;
}
if (*top & PROCESS_OR) { /* #9 */
update_preds(prog, N - 1, !invert);
*top &= ~PROCESS_OR;
}
if (*next == '|') { /* #10 */
*top |= PROCESS_OR;
break;
}
if (!*next) /* #11 */
goto out;
if (top == op_stack) {
ret = -1;
/* Too few '(' */
parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, ptr - str);
goto out_free;
}
top--; /* #12 */
}
}
out:
if (top != op_stack) {
/* Too many '(' */
parse_error(pe, FILT_ERR_TOO_MANY_OPEN, ptr - str);
goto out_free;
}
if (!N) {
/* No program? */
ret = -EINVAL;
parse_error(pe, FILT_ERR_NO_FILTER, ptr - str);
goto out_free;
}
prog[N].pred = NULL; /* #13 */
prog[N].target = 1; /* TRUE */
prog[N+1].pred = NULL;
prog[N+1].target = 0; /* FALSE */
prog[N-1].target = N;
prog[N-1].when_to_branch = false;
/* Second Pass */
for (i = N-1 ; i--; ) {
int target = prog[i].target;
if (prog[i].when_to_branch == prog[target].when_to_branch)
prog[i].target = prog[target].target;
}
/* Third Pass */
for (i = 0; i < N; i++) {
invert = inverts[i] ^ prog[i].when_to_branch;
prog[i].when_to_branch = invert;
/* Make sure the program always moves forward */
if (WARN_ON(prog[i].target <= i)) {
ret = -EINVAL;
goto out_free;
}
}
kfree(op_stack);
kfree(inverts);
return prog;
out_free:
kfree(op_stack);
kfree(inverts);
if (prog_stack) {
for (i = 0; prog_stack[i].pred; i++)
free_predicate(prog_stack[i].pred);
kfree(prog_stack);
}
return ERR_PTR(ret);
}
static inline int
do_filter_cpumask(int op, const struct cpumask *mask, const struct cpumask *cmp)
{
switch (op) {
case OP_EQ:
return cpumask_equal(mask, cmp);
case OP_NE:
return !cpumask_equal(mask, cmp);
case OP_BAND:
return cpumask_intersects(mask, cmp);
default:
return 0;
}
}
/* Optimisation of do_filter_cpumask() for scalar fields */
static inline int
do_filter_scalar_cpumask(int op, unsigned int cpu, const struct cpumask *mask)
{
/*
* Per the weight-of-one cpumask optimisations, the mask passed in this
* function has a weight >= 2, so it is never equal to a single scalar.
*/
switch (op) {
case OP_EQ:
return false;
case OP_NE:
return true;
case OP_BAND:
return cpumask_test_cpu(cpu, mask);
default:
return 0;
}
}
static inline int
do_filter_cpumask_scalar(int op, const struct cpumask *mask, unsigned int cpu)
{
switch (op) {
case OP_EQ:
return cpumask_test_cpu(cpu, mask) &&
cpumask_nth(1, mask) >= nr_cpu_ids;
case OP_NE:
return !cpumask_test_cpu(cpu, mask) ||
cpumask_nth(1, mask) < nr_cpu_ids;
case OP_BAND:
return cpumask_test_cpu(cpu, mask);
default:
return 0;
}
}
enum pred_cmp_types {
PRED_CMP_TYPE_NOP,
PRED_CMP_TYPE_LT,
PRED_CMP_TYPE_LE,
PRED_CMP_TYPE_GT,
PRED_CMP_TYPE_GE,
PRED_CMP_TYPE_BAND,
};
#define DEFINE_COMPARISON_PRED(type) \
static int filter_pred_##type(struct filter_pred *pred, void *event) \
{ \
switch (pred->op) { \
case OP_LT: { \
type *addr = (type *)(event + pred->offset); \
type val = (type)pred->val; \
return *addr < val; \
} \
case OP_LE: { \
type *addr = (type *)(event + pred->offset); \
type val = (type)pred->val; \
return *addr <= val; \
} \
case OP_GT: { \
type *addr = (type *)(event + pred->offset); \
type val = (type)pred->val; \
return *addr > val; \
} \
case OP_GE: { \
type *addr = (type *)(event + pred->offset); \
type val = (type)pred->val; \
return *addr >= val; \
} \
case OP_BAND: { \
type *addr = (type *)(event + pred->offset); \
type val = (type)pred->val; \
return !!(*addr & val); \
} \
default: \
return 0; \
} \
}
#define DEFINE_CPUMASK_COMPARISON_PRED(size) \
static int filter_pred_##size##_cpumask(struct filter_pred *pred, void *event) \
{ \
u##size *addr = (u##size *)(event + pred->offset); \
unsigned int cpu = *addr; \
\
if (cpu >= nr_cpu_ids) \
return 0; \
\
return do_filter_scalar_cpumask(pred->op, cpu, pred->mask); \
}
#define DEFINE_EQUALITY_PRED(size) \
static int filter_pred_##size(struct filter_pred *pred, void *event) \
{ \
u##size *addr = (u##size *)(event + pred->offset); \
u##size val = (u##size)pred->val; \
int match; \
\
match = (val == *addr) ^ pred->not; \
\
return match; \
}
DEFINE_COMPARISON_PRED(s64);
DEFINE_COMPARISON_PRED(u64);
DEFINE_COMPARISON_PRED(s32);
DEFINE_COMPARISON_PRED(u32);
DEFINE_COMPARISON_PRED(s16);
DEFINE_COMPARISON_PRED(u16);
DEFINE_COMPARISON_PRED(s8);
DEFINE_COMPARISON_PRED(u8);
DEFINE_CPUMASK_COMPARISON_PRED(64);
DEFINE_CPUMASK_COMPARISON_PRED(32);
DEFINE_CPUMASK_COMPARISON_PRED(16);
DEFINE_CPUMASK_COMPARISON_PRED(8);
DEFINE_EQUALITY_PRED(64);
DEFINE_EQUALITY_PRED(32);
DEFINE_EQUALITY_PRED(16);
DEFINE_EQUALITY_PRED(8);
/* user space strings temp buffer */
#define USTRING_BUF_SIZE 1024
struct ustring_buffer {
char buffer[USTRING_BUF_SIZE];
};
static __percpu struct ustring_buffer *ustring_per_cpu;
static __always_inline char *test_string(char *str)
{
struct ustring_buffer *ubuf;
char *kstr;
if (!ustring_per_cpu)
return NULL;
ubuf = this_cpu_ptr(ustring_per_cpu);
kstr = ubuf->buffer;
/* For safety, do not trust the string pointer */
if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
return NULL;
return kstr;
}
static __always_inline char *test_ustring(char *str)
{
struct ustring_buffer *ubuf;
char __user *ustr;
char *kstr;
if (!ustring_per_cpu)
return NULL;
ubuf = this_cpu_ptr(ustring_per_cpu);
kstr = ubuf->buffer;
/* user space address? */
ustr = (char __user *)str;
if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
return NULL;
return kstr;
}
/* Filter predicate for fixed sized arrays of characters */
static int filter_pred_string(struct filter_pred *pred, void *event)
{
char *addr = (char *)(event + pred->offset);
int cmp, match;
cmp = pred->regex->match(addr, pred->regex, pred->regex->field_len);
match = cmp ^ pred->not;
return match;
}
static __always_inline int filter_pchar(struct filter_pred *pred, char *str)
{
int cmp, match;
int len;
len = strlen(str) + 1; /* including tailing '\0' */
cmp = pred->regex->match(str, pred->regex, len);
match = cmp ^ pred->not;
return match;
}
/* Filter predicate for char * pointers */
static int filter_pred_pchar(struct filter_pred *pred, void *event)
{
char **addr = (char **)(event + pred->offset);
char *str;
str = test_string(*addr);
if (!str)
return 0;
return filter_pchar(pred, str);
}
/* Filter predicate for char * pointers in user space*/
static int filter_pred_pchar_user(struct filter_pred *pred, void *event)
{
char **addr = (char **)(event + pred->offset);
char *str;
str = test_ustring(*addr);
if (!str)
return 0;
return filter_pchar(pred, str);
}
/*
* Filter predicate for dynamic sized arrays of characters.
* These are implemented through a list of strings at the end
* of the entry.
* Also each of these strings have a field in the entry which
* contains its offset from the beginning of the entry.
* We have then first to get this field, dereference it
* and add it to the address of the entry, and at last we have
* the address of the string.
*/
static int filter_pred_strloc(struct filter_pred *pred, void *event)
{
u32 str_item = *(u32 *)(event + pred->offset);
int str_loc = str_item & 0xffff;
int str_len = str_item >> 16;
char *addr = (char *)(event + str_loc);
int cmp, match;
cmp = pred->regex->match(addr, pred->regex, str_len);
match = cmp ^ pred->not;
return match;
}
/*
* Filter predicate for relative dynamic sized arrays of characters.
* These are implemented through a list of strings at the end
* of the entry as same as dynamic string.
* The difference is that the relative one records the location offset
* from the field itself, not the event entry.
*/
static int filter_pred_strrelloc(struct filter_pred *pred, void *event)
{
u32 *item = (u32 *)(event + pred->offset);
u32 str_item = *item;
int str_loc = str_item & 0xffff;
int str_len = str_item >> 16;
char *addr = (char *)(&item[1]) + str_loc;
int cmp, match;
cmp = pred->regex->match(addr, pred->regex, str_len);
match = cmp ^ pred->not;
return match;
}
/* Filter predicate for CPUs. */
static int filter_pred_cpu(struct filter_pred *pred, void *event)
{
int cpu, cmp;
cpu = raw_smp_processor_id();
cmp = pred->val;
switch (pred->op) {
case OP_EQ:
return cpu == cmp;
case OP_NE:
return cpu != cmp;
case OP_LT:
return cpu < cmp;
case OP_LE:
return cpu <= cmp;
case OP_GT:
return cpu > cmp;
case OP_GE:
return cpu >= cmp;
default:
return 0;
}
}
/* Filter predicate for current CPU vs user-provided cpumask */
static int filter_pred_cpu_cpumask(struct filter_pred *pred, void *event)
{
int cpu = raw_smp_processor_id();
return do_filter_scalar_cpumask(pred->op, cpu, pred->mask);
}
/* Filter predicate for cpumask field vs user-provided cpumask */
static int filter_pred_cpumask(struct filter_pred *pred, void *event)
{
u32 item = *(u32 *)(event + pred->offset);
int loc = item & 0xffff;
const struct cpumask *mask = (event + loc);
const struct cpumask *cmp = pred->mask;
return do_filter_cpumask(pred->op, mask, cmp);
}
/* Filter predicate for cpumask field vs user-provided scalar */
static int filter_pred_cpumask_cpu(struct filter_pred *pred, void *event)
{
u32 item = *(u32 *)(event + pred->offset);
int loc = item & 0xffff;
const struct cpumask *mask = (event + loc);
unsigned int cpu = pred->val;
return do_filter_cpumask_scalar(pred->op, mask, cpu);
}
/* Filter predicate for COMM. */
static int filter_pred_comm(struct filter_pred *pred, void *event)
{
int cmp;
cmp = pred->regex->match(current->comm, pred->regex,
TASK_COMM_LEN);
return cmp ^ pred->not;
}
/* Filter predicate for functions. */
static int filter_pred_function(struct filter_pred *pred, void *event)
{
unsigned long *addr = (unsigned long *)(event + pred->offset);
unsigned long start = (unsigned long)pred->val;
unsigned long end = (unsigned long)pred->val2;
int ret = *addr >= start && *addr < end;
return pred->op == OP_EQ ? ret : !ret;
}
/*
* regex_match_foo - Basic regex callbacks
*
* @str: the string to be searched
* @r: the regex structure containing the pattern string
* @len: the length of the string to be searched (including '\0')
*
* Note:
* - @str might not be NULL-terminated if it's of type DYN_STRING
* RDYN_STRING, or STATIC_STRING, unless @len is zero.
*/
static int regex_match_full(char *str, struct regex *r, int len)
{
/* len of zero means str is dynamic and ends with '\0' */
if (!len)
return strcmp(str, r->pattern) == 0;
return strncmp(str, r->pattern, len) == 0;
}
static int regex_match_front(char *str, struct regex *r, int len)
{
if (len && len < r->len)
return 0;
return strncmp(str, r->pattern, r->len) == 0;
}
static int regex_match_middle(char *str, struct regex *r, int len)
{
if (!len)
return strstr(str, r->pattern) != NULL;
return strnstr(str, r->pattern, len) != NULL;
}
static int regex_match_end(char *str, struct regex *r, int len)
{
int strlen = len - 1;
if (strlen >= r->len &&
memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
return 1;
return 0;
}
static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused)
{
if (glob_match(r->pattern, str))
return 1;
return 0;
}
/**
* filter_parse_regex - parse a basic regex
* @buff: the raw regex
* @len: length of the regex
* @search: will point to the beginning of the string to compare
* @not: tell whether the match will have to be inverted
*
* This passes in a buffer containing a regex and this function will
* set search to point to the search part of the buffer and
* return the type of search it is (see enum above).
* This does modify buff.
*
* Returns enum type.
* search returns the pointer to use for comparison.
* not returns 1 if buff started with a '!'
* 0 otherwise.
*/
enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
{
int type = MATCH_FULL;
int i;
if (buff[0] == '!') {
*not = 1;
buff++;
len--;
} else
*not = 0;
*search = buff;
if (isdigit(buff[0]))
return MATCH_INDEX;
for (i = 0; i < len; i++) {
if (buff[i] == '*') {
if (!i) {
type = MATCH_END_ONLY;
} else if (i == len - 1) {
if (type == MATCH_END_ONLY)
type = MATCH_MIDDLE_ONLY;
else
type = MATCH_FRONT_ONLY;
buff[i] = 0;
break;
} else { /* pattern continues, use full glob */
return MATCH_GLOB;
}
} else if (strchr("[?\\", buff[i])) {
return MATCH_GLOB;
}
}
if (buff[0] == '*')
*search = buff + 1;
return type;
}
static void filter_build_regex(struct filter_pred *pred)
{
struct regex *r = pred->regex;
char *search;
enum regex_type type = MATCH_FULL;
if (pred->op == OP_GLOB) {
type = filter_parse_regex(r->pattern, r->len, &search, &pred->not);
r->len = strlen(search);
memmove(r->pattern, search, r->len+1);
}
switch (type) {
/* MATCH_INDEX should not happen, but if it does, match full */
case MATCH_INDEX:
case MATCH_FULL:
r->match = regex_match_full;
break;
case MATCH_FRONT_ONLY:
r->match = regex_match_front;
break;
case MATCH_MIDDLE_ONLY:
r->match = regex_match_middle;
break;
case MATCH_END_ONLY:
r->match = regex_match_end;
break;
case MATCH_GLOB:
r->match = regex_match_glob;
break;
}
}
#ifdef CONFIG_FTRACE_STARTUP_TEST
static int test_pred_visited_fn(struct filter_pred *pred, void *event);
#else
static int test_pred_visited_fn(struct filter_pred *pred, void *event)
{
return 0;
}
#endif
static int filter_pred_fn_call(struct filter_pred *pred, void *event);
/* return 1 if event matches, 0 otherwise (discard) */
int filter_match_preds(struct event_filter *filter, void *rec)
{
struct prog_entry *prog;
int i;
/* no filter is considered a match */
if (!filter)
return 1;
/* Protected by either SRCU(tracepoint_srcu) or preempt_disable */
prog = rcu_dereference_raw(filter->prog);
if (!prog)
return 1;
for (i = 0; prog[i].pred; i++) {
struct filter_pred *pred = prog[i].pred;
int match = filter_pred_fn_call(pred, rec);
if (match == prog[i].when_to_branch)
i = prog[i].target;
}
return prog[i].target;
}
EXPORT_SYMBOL_GPL(filter_match_preds);
static void remove_filter_string(struct event_filter *filter)
{
if (!filter)
return;
kfree(filter->filter_string);
filter->filter_string = NULL;
}
static void append_filter_err(struct trace_array *tr,
struct filter_parse_error *pe,
struct event_filter *filter)
{
struct trace_seq *s;
int pos = pe->lasterr_pos;
char *buf;
int len;
if (WARN_ON(!filter->filter_string))
return;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return;
trace_seq_init(s);
len = strlen(filter->filter_string);
if (pos > len)
pos = len;
/* indexing is off by one */
if (pos)
pos++;
trace_seq_puts(s, filter->filter_string);
if (pe->lasterr > 0) {
trace_seq_printf(s, "\n%*s", pos, "^");
trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]);
tracing_log_err(tr, "event filter parse error",
filter->filter_string, err_text,
pe->lasterr, pe->lasterr_pos);
} else {
trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr);
tracing_log_err(tr, "event filter parse error",
filter->filter_string, err_text,
FILT_ERR_ERRNO, 0);
}
trace_seq_putc(s, 0);
buf = kmemdup_nul(s->buffer, s->seq.len, GFP_KERNEL);
if (buf) {
kfree(filter->filter_string);
filter->filter_string = buf;
}
kfree(s);
}
static inline struct event_filter *event_filter(struct trace_event_file *file)
{
return file->filter;
}
/* caller must hold event_mutex */
void print_event_filter(struct trace_event_file *file, struct trace_seq *s)
{
struct event_filter *filter = event_filter(file);
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
trace_seq_puts(s, "none\n");
}
void print_subsystem_event_filter(struct event_subsystem *system,
struct trace_seq *s)
{
struct event_filter *filter;
mutex_lock(&event_mutex);
filter = system->filter;
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
trace_seq_puts(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
mutex_unlock(&event_mutex);
}
static void free_prog(struct event_filter *filter)
{
struct prog_entry *prog;
int i;
prog = rcu_access_pointer(filter->prog);
if (!prog)
return;
for (i = 0; prog[i].pred; i++)
free_predicate(prog[i].pred);
kfree(prog);
}
static void filter_disable(struct trace_event_file *file)
{
unsigned long old_flags = file->flags;
file->flags &= ~EVENT_FILE_FL_FILTERED;
if (old_flags != file->flags)
trace_buffered_event_disable();
}
static void __free_filter(struct event_filter *filter)
{
if (!filter)
return;
free_prog(filter);
kfree(filter->filter_string);
kfree(filter);
}
void free_event_filter(struct event_filter *filter)
{
__free_filter(filter);
}
static inline void __remove_filter(struct trace_event_file *file)
{
filter_disable(file);
remove_filter_string(file->filter);
}
static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir,
struct trace_array *tr)
{
struct trace_event_file *file;
list_for_each_entry(file, &tr->events, list) {
if (file->system != dir)
continue;
__remove_filter(file);
}
}
static inline void __free_subsystem_filter(struct trace_event_file *file)
{
__free_filter(file->filter);
file->filter = NULL;
}
static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
struct trace_array *tr)
{
struct trace_event_file *file;
list_for_each_entry(file, &tr->events, list) {
if (file->system != dir)
continue;
__free_subsystem_filter(file);
}
}
int filter_assign_type(const char *type)
{
if (strstr(type, "__data_loc")) {
if (strstr(type, "char"))
return FILTER_DYN_STRING;
if (strstr(type, "cpumask_t"))
return FILTER_CPUMASK;
}
if (strstr(type, "__rel_loc") && strstr(type, "char"))
return FILTER_RDYN_STRING;
if (strchr(type, '[') && strstr(type, "char"))
return FILTER_STATIC_STRING;
if (strcmp(type, "char *") == 0 || strcmp(type, "const char *") == 0)
return FILTER_PTR_STRING;
return FILTER_OTHER;
}
static enum filter_pred_fn select_comparison_fn(enum filter_op_ids op,
int field_size, int field_is_signed)
{
enum filter_pred_fn fn = FILTER_PRED_FN_NOP;
int pred_func_index = -1;
switch (op) {
case OP_EQ:
case OP_NE:
break;
default:
if (WARN_ON_ONCE(op < PRED_FUNC_START))
return fn;
pred_func_index = op - PRED_FUNC_START;
if (WARN_ON_ONCE(pred_func_index > PRED_FUNC_MAX))
return fn;
}
switch (field_size) {
case 8:
if (pred_func_index < 0)
fn = FILTER_PRED_FN_64;
else if (field_is_signed)
fn = FILTER_PRED_FN_S64;
else
fn = FILTER_PRED_FN_U64;
break;
case 4:
if (pred_func_index < 0)
fn = FILTER_PRED_FN_32;
else if (field_is_signed)
fn = FILTER_PRED_FN_S32;
else
fn = FILTER_PRED_FN_U32;
break;
case 2:
if (pred_func_index < 0)
fn = FILTER_PRED_FN_16;
else if (field_is_signed)
fn = FILTER_PRED_FN_S16;
else
fn = FILTER_PRED_FN_U16;
break;
case 1:
if (pred_func_index < 0)
fn = FILTER_PRED_FN_8;
else if (field_is_signed)
fn = FILTER_PRED_FN_S8;
else
fn = FILTER_PRED_FN_U8;
break;
}
return fn;
}
static int filter_pred_fn_call(struct filter_pred *pred, void *event)
{
switch (pred->fn_num) {
case FILTER_PRED_FN_64:
return filter_pred_64(pred, event);
case FILTER_PRED_FN_64_CPUMASK:
return filter_pred_64_cpumask(pred, event);
case FILTER_PRED_FN_S64:
return filter_pred_s64(pred, event);
case FILTER_PRED_FN_U64:
return filter_pred_u64(pred, event);
case FILTER_PRED_FN_32:
return filter_pred_32(pred, event);
case FILTER_PRED_FN_32_CPUMASK:
return filter_pred_32_cpumask(pred, event);
case FILTER_PRED_FN_S32:
return filter_pred_s32(pred, event);
case FILTER_PRED_FN_U32:
return filter_pred_u32(pred, event);
case FILTER_PRED_FN_16:
return filter_pred_16(pred, event);
case FILTER_PRED_FN_16_CPUMASK:
return filter_pred_16_cpumask(pred, event);
case FILTER_PRED_FN_S16:
return filter_pred_s16(pred, event);
case FILTER_PRED_FN_U16:
return filter_pred_u16(pred, event);
case FILTER_PRED_FN_8:
return filter_pred_8(pred, event);
case FILTER_PRED_FN_8_CPUMASK:
return filter_pred_8_cpumask(pred, event);
case FILTER_PRED_FN_S8:
return filter_pred_s8(pred, event);
case FILTER_PRED_FN_U8:
return filter_pred_u8(pred, event);
case FILTER_PRED_FN_COMM:
return filter_pred_comm(pred, event);
case FILTER_PRED_FN_STRING:
return filter_pred_string(pred, event);
case FILTER_PRED_FN_STRLOC:
return filter_pred_strloc(pred, event);
case FILTER_PRED_FN_STRRELLOC:
return filter_pred_strrelloc(pred, event);
case FILTER_PRED_FN_PCHAR_USER:
return filter_pred_pchar_user(pred, event);
case FILTER_PRED_FN_PCHAR:
return filter_pred_pchar(pred, event);
case FILTER_PRED_FN_CPU:
return filter_pred_cpu(pred, event);
case FILTER_PRED_FN_CPU_CPUMASK:
return filter_pred_cpu_cpumask(pred, event);
case FILTER_PRED_FN_CPUMASK:
return filter_pred_cpumask(pred, event);
case FILTER_PRED_FN_CPUMASK_CPU:
return filter_pred_cpumask_cpu(pred, event);
case FILTER_PRED_FN_FUNCTION:
return filter_pred_function(pred, event);
case FILTER_PRED_TEST_VISITED:
return test_pred_visited_fn(pred, event);
default:
return 0;
}
}
/* Called when a predicate is encountered by predicate_parse() */
static int parse_pred(const char *str, void *data,
int pos, struct filter_parse_error *pe,
struct filter_pred **pred_ptr)
{
struct trace_event_call *call = data;
struct ftrace_event_field *field;
struct filter_pred *pred = NULL;
unsigned long offset;
unsigned long size;
unsigned long ip;
char num_buf[24]; /* Big enough to hold an address */
char *field_name;
char *name;
bool function = false;
bool ustring = false;
char q;
u64 val;
int len;
int ret;
int op;
int s;
int i = 0;
/* First find the field to associate to */
while (isspace(str[i]))
i++;
s = i;
while (isalnum(str[i]) || str[i] == '_')
i++;
len = i - s;
if (!len)
return -1;
field_name = kmemdup_nul(str + s, len, GFP_KERNEL);
if (!field_name)
return -ENOMEM;
/* Make sure that the field exists */
field = trace_find_event_field(call, field_name);
kfree(field_name);
if (!field) {
parse_error(pe, FILT_ERR_FIELD_NOT_FOUND, pos + i);
return -EINVAL;
}
/* See if the field is a user space string */
if ((len = str_has_prefix(str + i, ".ustring"))) {
ustring = true;
i += len;
}
/* See if the field is a kernel function name */
if ((len = str_has_prefix(str + i, ".function"))) {
function = true;
i += len;
}
while (isspace(str[i]))
i++;
/* Make sure this op is supported */
for (op = 0; ops[op]; op++) {
/* This is why '<=' must come before '<' in ops[] */
if (strncmp(str + i, ops[op], strlen(ops[op])) == 0)
break;
}
if (!ops[op]) {
parse_error(pe, FILT_ERR_INVALID_OP, pos + i);
goto err_free;
}
i += strlen(ops[op]);
while (isspace(str[i]))
i++;
s = i;
pred = kzalloc(sizeof(*pred), GFP_KERNEL);
if (!pred)
return -ENOMEM;
pred->field = field;
pred->offset = field->offset;
pred->op = op;
if (function) {
/* The field must be the same size as long */
if (field->size != sizeof(long)) {
parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
goto err_free;
}
/* Function only works with '==' or '!=' and an unquoted string */
switch (op) {
case OP_NE:
case OP_EQ:
break;
default:
parse_error(pe, FILT_ERR_INVALID_OP, pos + i);
goto err_free;
}
if (isdigit(str[i])) {
/* We allow 0xDEADBEEF */
while (isalnum(str[i]))
i++;
len = i - s;
/* 0xfeedfacedeadbeef is 18 chars max */
if (len >= sizeof(num_buf)) {
parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
goto err_free;
}
strncpy(num_buf, str + s, len);
num_buf[len] = 0;
ret = kstrtoul(num_buf, 0, &ip);
if (ret) {
parse_error(pe, FILT_ERR_INVALID_VALUE, pos + i);
goto err_free;
}
} else {
s = i;
for (; str[i] && !isspace(str[i]); i++)
;
len = i - s;
name = kmemdup_nul(str + s, len, GFP_KERNEL);
if (!name)
goto err_mem;
ip = kallsyms_lookup_name(name);
kfree(name);
if (!ip) {
parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i);
goto err_free;
}
}
/* Now find the function start and end address */
if (!kallsyms_lookup_size_offset(ip, &size, &offset)) {
parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i);
goto err_free;
}
pred->fn_num = FILTER_PRED_FN_FUNCTION;
pred->val = ip - offset;
pred->val2 = pred->val + size;
} else if (ftrace_event_is_function(call)) {
/*
* Perf does things different with function events.
* It only allows an "ip" field, and expects a string.
* But the string does not need to be surrounded by quotes.
* If it is a string, the assigned function as a nop,
* (perf doesn't use it) and grab everything.
*/
if (strcmp(field->name, "ip") != 0) {
parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i);
goto err_free;
}
pred->fn_num = FILTER_PRED_FN_NOP;
/*
* Quotes are not required, but if they exist then we need
* to read them till we hit a matching one.
*/
if (str[i] == '\'' || str[i] == '"')
q = str[i];
else
q = 0;
for (i++; str[i]; i++) {
if (q && str[i] == q)
break;
if (!q && (str[i] == ')' || str[i] == '&' ||
str[i] == '|'))
break;
}
/* Skip quotes */
if (q)
s++;
len = i - s;
if (len >= MAX_FILTER_STR_VAL) {
parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
goto err_free;
}
pred->regex = kzalloc(sizeof(*pred->regex), GFP_KERNEL);
if (!pred->regex)
goto err_mem;
pred->regex->len = len;
strncpy(pred->regex->pattern, str + s, len);
pred->regex->pattern[len] = 0;
} else if (!strncmp(str + i, "CPUS", 4)) {
unsigned int maskstart;
bool single;
char *tmp;
switch (field->filter_type) {
case FILTER_CPUMASK:
case FILTER_CPU:
case FILTER_OTHER:
break;
default:
parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
goto err_free;
}
switch (op) {
case OP_EQ:
case OP_NE:
case OP_BAND:
break;
default:
parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
goto err_free;
}
/* Skip CPUS */
i += 4;
if (str[i++] != '{') {
parse_error(pe, FILT_ERR_MISSING_BRACE_OPEN, pos + i);
goto err_free;
}
maskstart = i;
/* Walk the cpulist until closing } */
for (; str[i] && str[i] != '}'; i++)
;
if (str[i] != '}') {
parse_error(pe, FILT_ERR_MISSING_BRACE_CLOSE, pos + i);
goto err_free;
}
if (maskstart == i) {
parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i);
goto err_free;
}
/* Copy the cpulist between { and } */
tmp = kmalloc((i - maskstart) + 1, GFP_KERNEL);
if (!tmp)
goto err_mem;
strscpy(tmp, str + maskstart, (i - maskstart) + 1);
pred->mask = kzalloc(cpumask_size(), GFP_KERNEL);
if (!pred->mask) {
kfree(tmp);
goto err_mem;
}
/* Now parse it */
if (cpulist_parse(tmp, pred->mask)) {
kfree(tmp);
parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i);
goto err_free;
}
kfree(tmp);
/* Move along */
i++;
/*
* Optimisation: if the user-provided mask has a weight of one
* then we can treat it as a scalar input.
*/
single = cpumask_weight(pred->mask) == 1;
if (single) {
pred->val = cpumask_first(pred->mask);
kfree(pred->mask);
pred->mask = NULL;
}
if (field->filter_type == FILTER_CPUMASK) {
pred->fn_num = single ?
FILTER_PRED_FN_CPUMASK_CPU :
FILTER_PRED_FN_CPUMASK;
} else if (field->filter_type == FILTER_CPU) {
if (single) {
if (pred->op == OP_BAND)
pred->op = OP_EQ;
pred->fn_num = FILTER_PRED_FN_CPU;
} else {
pred->fn_num = FILTER_PRED_FN_CPU_CPUMASK;
}
} else if (single) {
if (pred->op == OP_BAND)
pred->op = OP_EQ;
pred->fn_num = select_comparison_fn(pred->op, field->size, false);
if (pred->op == OP_NE)
pred->not = 1;
} else {
switch (field->size) {
case 8:
pred->fn_num = FILTER_PRED_FN_64_CPUMASK;
break;
case 4:
pred->fn_num = FILTER_PRED_FN_32_CPUMASK;
break;
case 2:
pred->fn_num = FILTER_PRED_FN_16_CPUMASK;
break;
case 1:
pred->fn_num = FILTER_PRED_FN_8_CPUMASK;
break;
}
}
/* This is either a string, or an integer */
} else if (str[i] == '\'' || str[i] == '"') {
char q = str[i];
/* Make sure the op is OK for strings */
switch (op) {
case OP_NE:
pred->not = 1;
fallthrough;
case OP_GLOB:
case OP_EQ:
break;
default:
parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
goto err_free;
}
/* Make sure the field is OK for strings */
if (!is_string_field(field)) {
parse_error(pe, FILT_ERR_EXPECT_DIGIT, pos + i);
goto err_free;
}
for (i++; str[i]; i++) {
if (str[i] == q)
break;
}
if (!str[i]) {
parse_error(pe, FILT_ERR_MISSING_QUOTE, pos + i);
goto err_free;
}
/* Skip quotes */
s++;
len = i - s;
if (len >= MAX_FILTER_STR_VAL) {
parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
goto err_free;
}
pred->regex = kzalloc(sizeof(*pred->regex), GFP_KERNEL);
if (!pred->regex)
goto err_mem;
pred->regex->len = len;
strncpy(pred->regex->pattern, str + s, len);
pred->regex->pattern[len] = 0;
filter_build_regex(pred);
if (field->filter_type == FILTER_COMM) {
pred->fn_num = FILTER_PRED_FN_COMM;
} else if (field->filter_type == FILTER_STATIC_STRING) {
pred->fn_num = FILTER_PRED_FN_STRING;
pred->regex->field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING) {
pred->fn_num = FILTER_PRED_FN_STRLOC;
} else if (field->filter_type == FILTER_RDYN_STRING)
pred->fn_num = FILTER_PRED_FN_STRRELLOC;
else {
if (!ustring_per_cpu) {
/* Once allocated, keep it around for good */
ustring_per_cpu = alloc_percpu(struct ustring_buffer);
if (!ustring_per_cpu)
goto err_mem;
}
if (ustring)
pred->fn_num = FILTER_PRED_FN_PCHAR_USER;
else
pred->fn_num = FILTER_PRED_FN_PCHAR;
}
/* go past the last quote */
i++;
} else if (isdigit(str[i]) || str[i] == '-') {
/* Make sure the field is not a string */
if (is_string_field(field)) {
parse_error(pe, FILT_ERR_EXPECT_STRING, pos + i);
goto err_free;
}
if (op == OP_GLOB) {
parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
goto err_free;
}
if (str[i] == '-')
i++;
/* We allow 0xDEADBEEF */
while (isalnum(str[i]))
i++;
len = i - s;
/* 0xfeedfacedeadbeef is 18 chars max */
if (len >= sizeof(num_buf)) {
parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
goto err_free;
}
strncpy(num_buf, str + s, len);
num_buf[len] = 0;
/* Make sure it is a value */
if (field->is_signed)
ret = kstrtoll(num_buf, 0, &val);
else
ret = kstrtoull(num_buf, 0, &val);
if (ret) {
parse_error(pe, FILT_ERR_ILLEGAL_INTVAL, pos + s);
goto err_free;
}
pred->val = val;
if (field->filter_type == FILTER_CPU)
pred->fn_num = FILTER_PRED_FN_CPU;
else {
pred->fn_num = select_comparison_fn(pred->op, field->size,
field->is_signed);
if (pred->op == OP_NE)
pred->not = 1;
}
} else {
parse_error(pe, FILT_ERR_INVALID_VALUE, pos + i);
goto err_free;
}
*pred_ptr = pred;
return i;
err_free:
free_predicate(pred);
return -EINVAL;
err_mem:
free_predicate(pred);
return -ENOMEM;
}
enum {
TOO_MANY_CLOSE = -1,
TOO_MANY_OPEN = -2,
MISSING_QUOTE = -3,
};
/*
* Read the filter string once to calculate the number of predicates
* as well as how deep the parentheses go.
*
* Returns:
* 0 - everything is fine (err is undefined)
* -1 - too many ')'
* -2 - too many '('
* -3 - No matching quote
*/
static int calc_stack(const char *str, int *parens, int *preds, int *err)
{
bool is_pred = false;
int nr_preds = 0;
int open = 1; /* Count the expression as "(E)" */
int last_quote = 0;
int max_open = 1;
int quote = 0;
int i;
*err = 0;
for (i = 0; str[i]; i++) {
if (isspace(str[i]))
continue;
if (quote) {
if (str[i] == quote)
quote = 0;
continue;
}
switch (str[i]) {
case '\'':
case '"':
quote = str[i];
last_quote = i;
break;
case '|':
case '&':
if (str[i+1] != str[i])
break;
is_pred = false;
continue;
case '(':
is_pred = false;
open++;
if (open > max_open)
max_open = open;
continue;
case ')':
is_pred = false;
if (open == 1) {
*err = i;
return TOO_MANY_CLOSE;
}
open--;
continue;
}
if (!is_pred) {
nr_preds++;
is_pred = true;
}
}
if (quote) {
*err = last_quote;
return MISSING_QUOTE;
}
if (open != 1) {
int level = open;
/* find the bad open */
for (i--; i; i--) {
if (quote) {
if (str[i] == quote)
quote = 0;
continue;
}
switch (str[i]) {
case '(':
if (level == open) {
*err = i;
return TOO_MANY_OPEN;
}
level--;
break;
case ')':
level++;
break;
case '\'':
case '"':
quote = str[i];
break;
}
}
/* First character is the '(' with missing ')' */
*err = 0;
return TOO_MANY_OPEN;
}
/* Set the size of the required stacks */
*parens = max_open;
*preds = nr_preds;
return 0;
}
static int process_preds(struct trace_event_call *call,
const char *filter_string,
struct event_filter *filter,
struct filter_parse_error *pe)
{
struct prog_entry *prog;
int nr_parens;
int nr_preds;
int index;
int ret;
ret = calc_stack(filter_string, &nr_parens, &nr_preds, &index);
if (ret < 0) {
switch (ret) {
case MISSING_QUOTE:
parse_error(pe, FILT_ERR_MISSING_QUOTE, index);
break;
case TOO_MANY_OPEN:
parse_error(pe, FILT_ERR_TOO_MANY_OPEN, index);
break;
default:
parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, index);
}
return ret;
}
if (!nr_preds)
return -EINVAL;
prog = predicate_parse(filter_string, nr_parens, nr_preds,
parse_pred, call, pe);
if (IS_ERR(prog))
return PTR_ERR(prog);
rcu_assign_pointer(filter->prog, prog);
return 0;
}
static inline void event_set_filtered_flag(struct trace_event_file *file)
{
unsigned long old_flags = file->flags;
file->flags |= EVENT_FILE_FL_FILTERED;
if (old_flags != file->flags)
trace_buffered_event_enable();
}
static inline void event_set_filter(struct trace_event_file *file,
struct event_filter *filter)
{
rcu_assign_pointer(file->filter, filter);
}
static inline void event_clear_filter(struct trace_event_file *file)
{
RCU_INIT_POINTER(file->filter, NULL);
}
struct filter_list {
struct list_head list;
struct event_filter *filter;
};
static int process_system_preds(struct trace_subsystem_dir *dir,
struct trace_array *tr,
struct filter_parse_error *pe,
char *filter_string)
{
struct trace_event_file *file;
struct filter_list *filter_item;
struct event_filter *filter = NULL;
struct filter_list *tmp;
LIST_HEAD(filter_list);
bool fail = true;
int err;
list_for_each_entry(file, &tr->events, list) {
if (file->system != dir)
continue;
filter = kzalloc(sizeof(*filter), GFP_KERNEL);
if (!filter)
goto fail_mem;
filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
if (!filter->filter_string)
goto fail_mem;
err = process_preds(file->event_call, filter_string, filter, pe);
if (err) {
filter_disable(file);
parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
append_filter_err(tr, pe, filter);
} else
event_set_filtered_flag(file);
filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
if (!filter_item)
goto fail_mem;
list_add_tail(&filter_item->list, &filter_list);
/*
* Regardless of if this returned an error, we still
* replace the filter for the call.
*/
filter_item->filter = event_filter(file);
event_set_filter(file, filter);
filter = NULL;
fail = false;
}
if (fail)
goto fail;
/*
* The calls can still be using the old filters.
* Do a synchronize_rcu() and to ensure all calls are
* done with them before we free them.
*/
tracepoint_synchronize_unregister();
list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
__free_filter(filter_item->filter);
list_del(&filter_item->list);
kfree(filter_item);
}
return 0;
fail:
/* No call succeeded */
list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
list_del(&filter_item->list);
kfree(filter_item);
}
parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
return -EINVAL;
fail_mem:
__free_filter(filter);
/* If any call succeeded, we still need to sync */
if (!fail)
tracepoint_synchronize_unregister();
list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
__free_filter(filter_item->filter);
list_del(&filter_item->list);
kfree(filter_item);
}
return -ENOMEM;
}
static int create_filter_start(char *filter_string, bool set_str,
struct filter_parse_error **pse,
struct event_filter **filterp)
{
struct event_filter *filter;
struct filter_parse_error *pe = NULL;
int err = 0;
if (WARN_ON_ONCE(*pse || *filterp))
return -EINVAL;
filter = kzalloc(sizeof(*filter), GFP_KERNEL);
if (filter && set_str) {
filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
if (!filter->filter_string)
err = -ENOMEM;
}
pe = kzalloc(sizeof(*pe), GFP_KERNEL);
if (!filter || !pe || err) {
kfree(pe);
__free_filter(filter);
return -ENOMEM;
}
/* we're committed to creating a new filter */
*filterp = filter;
*pse = pe;
return 0;
}
static void create_filter_finish(struct filter_parse_error *pe)
{
kfree(pe);
}
/**
* create_filter - create a filter for a trace_event_call
* @tr: the trace array associated with these events
* @call: trace_event_call to create a filter for
* @filter_string: filter string
* @set_str: remember @filter_str and enable detailed error in filter
* @filterp: out param for created filter (always updated on return)
* Must be a pointer that references a NULL pointer.
*
* Creates a filter for @call with @filter_str. If @set_str is %true,
* @filter_str is copied and recorded in the new filter.
*
* On success, returns 0 and *@filterp points to the new filter. On
* failure, returns -errno and *@filterp may point to %NULL or to a new
* filter. In the latter case, the returned filter contains error
* information if @set_str is %true and the caller is responsible for
* freeing it.
*/
static int create_filter(struct trace_array *tr,
struct trace_event_call *call,
char *filter_string, bool set_str,
struct event_filter **filterp)
{
struct filter_parse_error *pe = NULL;
int err;
/* filterp must point to NULL */
if (WARN_ON(*filterp))
*filterp = NULL;
err = create_filter_start(filter_string, set_str, &pe, filterp);
if (err)
return err;
err = process_preds(call, filter_string, *filterp, pe);
if (err && set_str)
append_filter_err(tr, pe, *filterp);
create_filter_finish(pe);
return err;
}
int create_event_filter(struct trace_array *tr,
struct trace_event_call *call,
char *filter_str, bool set_str,
struct event_filter **filterp)
{
return create_filter(tr, call, filter_str, set_str, filterp);
}
/**
* create_system_filter - create a filter for an event subsystem
* @dir: the descriptor for the subsystem directory
* @filter_str: filter string
* @filterp: out param for created filter (always updated on return)
*
* Identical to create_filter() except that it creates a subsystem filter
* and always remembers @filter_str.
*/
static int create_system_filter(struct trace_subsystem_dir *dir,
char *filter_str, struct event_filter **filterp)
{
struct filter_parse_error *pe = NULL;
int err;
err = create_filter_start(filter_str, true, &pe, filterp);
if (!err) {
err = process_system_preds(dir, dir->tr, pe, filter_str);
if (!err) {
/* System filters just show a default message */
kfree((*filterp)->filter_string);
(*filterp)->filter_string = NULL;
} else {
append_filter_err(dir->tr, pe, *filterp);
}
}
create_filter_finish(pe);
return err;
}
/* caller must hold event_mutex */
int apply_event_filter(struct trace_event_file *file, char *filter_string)
{
struct trace_event_call *call = file->event_call;
struct event_filter *filter = NULL;
int err;
if (file->flags & EVENT_FILE_FL_FREED)
return -ENODEV;
if (!strcmp(strstrip(filter_string), "0")) {
filter_disable(file);
filter = event_filter(file);
if (!filter)
return 0;
event_clear_filter(file);
/* Make sure the filter is not being used */
tracepoint_synchronize_unregister();
__free_filter(filter);
return 0;
}
err = create_filter(file->tr, call, filter_string, true, &filter);
/*
* Always swap the call filter with the new filter
* even if there was an error. If there was an error
* in the filter, we disable the filter and show the error
* string
*/
if (filter) {
struct event_filter *tmp;
tmp = event_filter(file);
if (!err)
event_set_filtered_flag(file);
else
filter_disable(file);
event_set_filter(file, filter);
if (tmp) {
/* Make sure the call is done with the filter */
tracepoint_synchronize_unregister();
__free_filter(tmp);
}
}
return err;
}
int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
char *filter_string)
{
struct event_subsystem *system = dir->subsystem;
struct trace_array *tr = dir->tr;
struct event_filter *filter = NULL;
int err = 0;
mutex_lock(&event_mutex);
/* Make sure the system still has events */
if (!dir->nr_events) {
err = -ENODEV;
goto out_unlock;
}
if (!strcmp(strstrip(filter_string), "0")) {
filter_free_subsystem_preds(dir, tr);
remove_filter_string(system->filter);
filter = system->filter;
system->filter = NULL;
/* Ensure all filters are no longer used */
tracepoint_synchronize_unregister();
filter_free_subsystem_filters(dir, tr);
__free_filter(filter);
goto out_unlock;
}
err = create_system_filter(dir, filter_string, &filter);
if (filter) {
/*
* No event actually uses the system filter
* we can free it without synchronize_rcu().
*/
__free_filter(system->filter);
system->filter = filter;
}
out_unlock:
mutex_unlock(&event_mutex);
return err;
}
#ifdef CONFIG_PERF_EVENTS
void ftrace_profile_free_filter(struct perf_event *event)
{
struct event_filter *filter = event->filter;
event->filter = NULL;
__free_filter(filter);
}
struct function_filter_data {
struct ftrace_ops *ops;
int first_filter;
int first_notrace;
};
#ifdef CONFIG_FUNCTION_TRACER
static char **
ftrace_function_filter_re(char *buf, int len, int *count)
{
char *str, **re;
str = kstrndup(buf, len, GFP_KERNEL);
if (!str)
return NULL;
/*
* The argv_split function takes white space
* as a separator, so convert ',' into spaces.
*/
strreplace(str, ',', ' ');
re = argv_split(GFP_KERNEL, str, count);
kfree(str);
return re;
}
static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter,
int reset, char *re, int len)
{
int ret;
if (filter)
ret = ftrace_set_filter(ops, re, len, reset);
else
ret = ftrace_set_notrace(ops, re, len, reset);
return ret;
}
static int __ftrace_function_set_filter(int filter, char *buf, int len,
struct function_filter_data *data)
{
int i, re_cnt, ret = -EINVAL;
int *reset;
char **re;
reset = filter ? &data->first_filter : &data->first_notrace;
/*
* The 'ip' field could have multiple filters set, separated
* either by space or comma. We first cut the filter and apply
* all pieces separately.
*/
re = ftrace_function_filter_re(buf, len, &re_cnt);
if (!re)
return -EINVAL;
for (i = 0; i < re_cnt; i++) {
ret = ftrace_function_set_regexp(data->ops, filter, *reset,
re[i], strlen(re[i]));
if (ret)
break;
if (*reset)
*reset = 0;
}
argv_free(re);
return ret;
}
static int ftrace_function_check_pred(struct filter_pred *pred)
{
struct ftrace_event_field *field = pred->field;
/*
* Check the predicate for function trace, verify:
* - only '==' and '!=' is used
* - the 'ip' field is used
*/
if ((pred->op != OP_EQ) && (pred->op != OP_NE))
return -EINVAL;
if (strcmp(field->name, "ip"))
return -EINVAL;
return 0;
}
static int ftrace_function_set_filter_pred(struct filter_pred *pred,
struct function_filter_data *data)
{
int ret;
/* Checking the node is valid for function trace. */
ret = ftrace_function_check_pred(pred);
if (ret)
return ret;
return __ftrace_function_set_filter(pred->op == OP_EQ,
pred->regex->pattern,
pred->regex->len,
data);
}
static bool is_or(struct prog_entry *prog, int i)
{
int target;
/*
* Only "||" is allowed for function events, thus,
* all true branches should jump to true, and any
* false branch should jump to false.
*/
target = prog[i].target + 1;
/* True and false have NULL preds (all prog entries should jump to one */
if (prog[target].pred)
return false;
/* prog[target].target is 1 for TRUE, 0 for FALSE */
return prog[i].when_to_branch == prog[target].target;
}
static int ftrace_function_set_filter(struct perf_event *event,
struct event_filter *filter)
{
struct prog_entry *prog = rcu_dereference_protected(filter->prog,
lockdep_is_held(&event_mutex));
struct function_filter_data data = {
.first_filter = 1,
.first_notrace = 1,
.ops = &event->ftrace_ops,
};
int i;
for (i = 0; prog[i].pred; i++) {
struct filter_pred *pred = prog[i].pred;
if (!is_or(prog, i))
return -EINVAL;
if (ftrace_function_set_filter_pred(pred, &data) < 0)
return -EINVAL;
}
return 0;
}
#else
static int ftrace_function_set_filter(struct perf_event *event,
struct event_filter *filter)
{
return -ENODEV;
}
#endif /* CONFIG_FUNCTION_TRACER */
int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str)
{
int err;
struct event_filter *filter = NULL;
struct trace_event_call *call;
mutex_lock(&event_mutex);
call = event->tp_event;
err = -EINVAL;
if (!call)
goto out_unlock;
err = -EEXIST;
if (event->filter)
goto out_unlock;
err = create_filter(NULL, call, filter_str, false, &filter);
if (err)
goto free_filter;
if (ftrace_event_is_function(call))
err = ftrace_function_set_filter(event, filter);
else
event->filter = filter;
free_filter:
if (err || ftrace_event_is_function(call))
__free_filter(filter);
out_unlock:
mutex_unlock(&event_mutex);
return err;
}
#endif /* CONFIG_PERF_EVENTS */
#ifdef CONFIG_FTRACE_STARTUP_TEST
#include <linux/types.h>
#include <linux/tracepoint.h>
#define CREATE_TRACE_POINTS
#include "trace_events_filter_test.h"
#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
{ \
.filter = FILTER, \
.rec = { .a = va, .b = vb, .c = vc, .d = vd, \
.e = ve, .f = vf, .g = vg, .h = vh }, \
.match = m, \
.not_visited = nvisit, \
}
#define YES 1
#define NO 0
static struct test_filter_data_t {
char *filter;
struct trace_event_raw_ftrace_test_filter rec;
int match;
char *not_visited;
} test_filter_data[] = {
#define FILTER "a == 1 && b == 1 && c == 1 && d == 1 && " \
"e == 1 && f == 1 && g == 1 && h == 1"
DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, ""),
DATA_REC(NO, 0, 1, 1, 1, 1, 1, 1, 1, "bcdefgh"),
DATA_REC(NO, 1, 1, 1, 1, 1, 1, 1, 0, ""),
#undef FILTER
#define FILTER "a == 1 || b == 1 || c == 1 || d == 1 || " \
"e == 1 || f == 1 || g == 1 || h == 1"
DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""),
DATA_REC(YES, 0, 0, 0, 0, 0, 0, 0, 1, ""),
DATA_REC(YES, 1, 0, 0, 0, 0, 0, 0, 0, "bcdefgh"),
#undef FILTER
#define FILTER "(a == 1 || b == 1) && (c == 1 || d == 1) && " \
"(e == 1 || f == 1) && (g == 1 || h == 1)"
DATA_REC(NO, 0, 0, 1, 1, 1, 1, 1, 1, "dfh"),
DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""),
DATA_REC(YES, 1, 0, 1, 0, 0, 1, 0, 1, "bd"),
DATA_REC(NO, 1, 0, 1, 0, 0, 1, 0, 0, "bd"),
#undef FILTER
#define FILTER "(a == 1 && b == 1) || (c == 1 && d == 1) || " \
"(e == 1 && f == 1) || (g == 1 && h == 1)"
DATA_REC(YES, 1, 0, 1, 1, 1, 1, 1, 1, "efgh"),
DATA_REC(YES, 0, 0, 0, 0, 0, 0, 1, 1, ""),
DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""),
#undef FILTER
#define FILTER "(a == 1 && b == 1) && (c == 1 && d == 1) && " \
"(e == 1 && f == 1) || (g == 1 && h == 1)"
DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 0, "gh"),
DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""),
DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, ""),
#undef FILTER
#define FILTER "((a == 1 || b == 1) || (c == 1 || d == 1) || " \
"(e == 1 || f == 1)) && (g == 1 || h == 1)"
DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 1, "bcdef"),
DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""),
DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, "h"),
#undef FILTER
#define FILTER "((((((((a == 1) && (b == 1)) || (c == 1)) && (d == 1)) || " \
"(e == 1)) && (f == 1)) || (g == 1)) && (h == 1))"
DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "ceg"),
DATA_REC(NO, 0, 1, 0, 1, 0, 1, 0, 1, ""),
DATA_REC(NO, 1, 0, 1, 0, 1, 0, 1, 0, ""),
#undef FILTER
#define FILTER "((((((((a == 1) || (b == 1)) && (c == 1)) || (d == 1)) && " \
"(e == 1)) || (f == 1)) && (g == 1)) || (h == 1))"
DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "bdfh"),
DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""),
DATA_REC(YES, 1, 0, 1, 0, 1, 0, 1, 0, "bdfh"),
};
#undef DATA_REC
#undef FILTER
#undef YES
#undef NO
#define DATA_CNT ARRAY_SIZE(test_filter_data)
static int test_pred_visited;
static int test_pred_visited_fn(struct filter_pred *pred, void *event)
{
struct ftrace_event_field *field = pred->field;
test_pred_visited = 1;
printk(KERN_INFO "\npred visited %s\n", field->name);
return 1;
}
static void update_pred_fn(struct event_filter *filter, char *fields)
{
struct prog_entry *prog = rcu_dereference_protected(filter->prog,
lockdep_is_held(&event_mutex));
int i;
for (i = 0; prog[i].pred; i++) {
struct filter_pred *pred = prog[i].pred;
struct ftrace_event_field *field = pred->field;
WARN_ON_ONCE(pred->fn_num == FILTER_PRED_FN_NOP);
if (!field) {
WARN_ONCE(1, "all leafs should have field defined %d", i);
continue;
}
if (!strchr(fields, *field->name))
continue;
pred->fn_num = FILTER_PRED_TEST_VISITED;
}
}
static __init int ftrace_test_event_filter(void)
{
int i;
printk(KERN_INFO "Testing ftrace filter: ");
for (i = 0; i < DATA_CNT; i++) {
struct event_filter *filter = NULL;
struct test_filter_data_t *d = &test_filter_data[i];
int err;
err = create_filter(NULL, &event_ftrace_test_filter,
d->filter, false, &filter);
if (err) {
printk(KERN_INFO
"Failed to get filter for '%s', err %d\n",
d->filter, err);
__free_filter(filter);
break;
}
/* Needed to dereference filter->prog */
mutex_lock(&event_mutex);
/*
* The preemption disabling is not really needed for self
* tests, but the rcu dereference will complain without it.
*/
preempt_disable();
if (*d->not_visited)
update_pred_fn(filter, d->not_visited);
test_pred_visited = 0;
err = filter_match_preds(filter, &d->rec);
preempt_enable();
mutex_unlock(&event_mutex);
__free_filter(filter);
if (test_pred_visited) {
printk(KERN_INFO
"Failed, unwanted pred visited for filter %s\n",
d->filter);
break;
}
if (err != d->match) {
printk(KERN_INFO
"Failed to match filter '%s', expected %d\n",
d->filter, d->match);
break;
}
}
if (i == DATA_CNT)
printk(KERN_CONT "OK\n");
return 0;
}
late_initcall(ftrace_test_event_filter);
#endif /* CONFIG_FTRACE_STARTUP_TEST */