fail(9) support:

Add support for kernel fault injection using KFAIL_POINT_* macros and
fail_point_* infrastructure. Add example fail point in vfs_bio.c to
simulate VM buf pressure.

Approved by:        dfr (mentor)
This commit is contained in:
Zachary Loafman 2009-05-27 16:36:54 +00:00
parent 84c5da4c4e
commit cfeb7489c2
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=192908
7 changed files with 1088 additions and 3 deletions

View file

@ -99,6 +99,7 @@ MAN= accept_filter.9 \
DRIVER_MODULE.9 \
EVENTHANDLER.9 \
extattr.9 \
fail.9 \
fetch.9 \
firmware.9 \
g_access.9 \

197
share/man/man9/fail.9 Normal file
View file

@ -0,0 +1,197 @@
.\"
.\" Copyright (c) 2009 Isilon Inc http://www.isilon.com/
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice(s), this list of conditions and the following disclaimer as
.\" the first lines of this file unmodified other than the possible
.\" addition of one or more copyright notices.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice(s), this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
.\" DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
.\" DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd May 10, 2009
.Dt FAIL 9
.Os
.Sh NAME
.Nm KFAIL_POINT_CODE ,
.Nm KFAIL_POINT_RETURN ,
.Nm KFAIL_POINT_RETURN_VOID ,
.Nm KFAIL_POINT_ERROR ,
.Nm KFAIL_POINT_GOTO ,
.Nm fail_point ,
.Nm DEBUG_FP
.
.Nd fail points
.Sh SYNOPSIS
.In sys/fail.h
.Fn KFAIL_POINT_CODE "parent" "name" "code"
.Fn KFAIL_POINT_RETURN "parent" "name"
.Fn KFAIL_POINT_RETURN_VOID "parent" "name"
.Fn KFAIL_POINT_ERROR "parent" "name" "error_var"
.Fn KFAIL_POINT_GOTO "parent" "name" "error_var" "label"
.Sh DESCRIPTION
Fail points are used to add code points where errors may be injected
in a user controlled fashion. Fail points provide a convenient wrapper
around user provided error injection code, providing a
.Xr sysctl 9 MIB , and a parser for that MIB that describes how the error
injection code should fire.
.Pp
The base fail point macro is
.Fn KFAIL_POINT_CODE
where
.Fa parent
is a sysctl tree (frequently
.Sy DEBUG_FP
for kernel fail points, but various subsystems may wish to provide
their own fail point trees), and
.Fa name
is the name of the MIB in that tree, and
.Fa code
is the error injection code. The
.Fa code
argument does not require braces, but it is considered good style to
use braces for any multi-line code arguments. Inside the
.Fa code
argument, the evaluation of
.Sy RETURN_VALUE
is derived from the
.Fn return
value set in the sysctl MIB. See
.Sx SYSCTL SETTINGS
below.
.Pp
The remaining
.Fn KFAIL_POINT_*
macros are wrappers around common error injection paths:
.Bl -tag -width 8
.It Fn KFAIL_POINT_RETURN parent name
is the equivalent of
.Sy KFAIL_POINT_CODE(..., return RETURN_VALUE)
.It Fn KFAIL_POINT_RETURN_VOID parent name
is the equivalent of
.Sy KFAIL_POINT_CODE(..., return)
.It Fn KFAIL_POINT_ERROR parent name error_var
is the equivalent of
.Sy KFAIL_POINT_CODE(..., error_var = RETURN_VALUE)
.It Fn KFAIL_POINT_GOTO parent name error_var label
is the equivalent of
.Sy KFAIL_POINT_CODE(...,
{ error_var = RETURN_VALUE; goto label;})
.El
.Pp
.Sh SYSCTL VARIABLES
The
.Fn KFAIL_POINT_*
macros add sysctl MIBs where specified. Many base kernel MIBs can be
found in the
.Sy debug.fail_point
tree (referenced in code by
.Sy DEBUG_FP
).
.Pp
The sysctl setting recognizes the following grammar:
.Pp
<fail_point> ::
<term> ( "->" <term> )*
.Pp
<term> ::
( (<float> "%") | (<integer> "*" ) )*
<type>
[ "(" <integer> ")" ]
.Pp
<float> ::
<integer> [ "." <integer> ] |
"." <integer>
.Pp
<type> ::
"off" | "return" | "sleep" | "panic" | "break" | "print"
.Pp
The <type>
argument specifies which action to take:
.Bl -tag -width ".Dv return"
.It Sy off
Take no action (does not trigger fail point code)
.It Sy return
Trigger fail point code with specified argument
.It Sy sleep
Sleep the specified number of milliseconds
.It Sy panic
Panic
.It Sy break
Break into the debugger.
.It Sy print
Print that the fail point executed
.El
.Pp
The <float>% and <integer>* modifiers prior to <type> control when
<type> is executed. The <float>% form (e.g. "1.2%") can be used to
specify a probability that <type> will execute. The <integer>* form
(e.g. "5*") can be used to specify the number of times <type> should
be executed before this <term> is disabled. Only the last probability
and the last count are used if multiple are specified, i.e. "1.2%2%"
is the same as "2%". When both a probability and a count are
specified, the probability is evaluated before the count, i.e. "2%5*"
means "2% of the time, but only execute it 5 times total".
.Pp
The operator -> can be used to express cascading terms. If you specify
<term1>-><term2>, it means that if <term1> doesn't 'execute', <term2>
is evaluated. For the purpose of this operator, the return() and
print() operators are the only types that cascade. A return() term
only cascades if the code executes, and a print() term only cascades
when passed a non-zero argument.
.Pp
.Sh EXAMPLES
.Bl -tag
.It Sy sysctl debug.fail_point.foobar="2.1%return(5)"
21/1000ths of the time, execute
.Fa code
with RETURN_VALUE set to 5.
.It Sy sysctl debug.fail_point.foobar="2%return(5)->5%return(22)"
2/100th of the time, execute
.Fa code
with RETURN_VALUE set to 5. If that doesn't happen, 5% of the time
execute
.Fa code
with RETURN_VALUE set to 22.
.It Sy sysctl debug.fail_point.foobar="5*return(5)->0.1%return(22)"
For 5 times, return 5. After that, 1/1000ths of the time, return 22.
.It Sy sysctl debug.fail_point.foobar="0.1%5*return(5)"
Return 5 for 1 in 1000 executions, but only execute 5 times total.
.It Sy sysctl debug.fail_point.foobar="1%*sleep(50)"
1/100ths of the time, sleep 50ms.
.El
.Pp
.Sh CAVEATS
It's easy to shoot yourself in the foot by setting fail points too
aggressively or setting too many in combination. For example, forcing
.Fn malloc
to fail consistently is potentially harmful to uptime.
.Pp
The
.Fn sleep
sysctl setting may not be appropriate in all situations. Currently,
.Fn fail_point_eval
does not verify whether the context is appropriate for calling
.Fn msleep .
.Pp
.Sh AUTHORS
.An -nosplit
This manual page was written by
.An Zach Loafman Aq zml@FreeBSD.org .

View file

@ -1860,6 +1860,7 @@ kern/kern_environment.c standard
kern/kern_event.c standard
kern/kern_exec.c standard
kern/kern_exit.c standard
kern/kern_fail.c standard
kern/kern_fork.c standard
kern/kern_idle.c standard
kern/kern_intr.c standard

575
sys/kern/kern_fail.c Normal file
View file

@ -0,0 +1,575 @@
/*-
* Copyright (c) 2009 Isilon Inc http://www.isilon.com/
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/**
* @file
*
* fail(9) Facility.
*
* @ingroup failpoint_private
*/
/**
* @defgroup failpoint fail(9) Facility
*
* Failpoints allow for injecting fake errors into running code on the fly,
* without modifying code or recompiling with flags. Failpoints are always
* present, and are very efficient when disabled. Failpoints are described
* in man fail(9).
*/
/**
* @defgroup failpoint_private Private fail(9) Implementation functions
*
* Private implementations for the actual failpoint code.
*
* @ingroup failpoint
*/
/**
* @addtogroup failpoint_private
* @{
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/errno.h>
#include <sys/fail.h>
#include <sys/kernel.h>
#include <sys/libkern.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/sbuf.h>
#include <machine/stdarg.h>
#ifdef ILOG_DEFINE_FOR_FILE
ILOG_DEFINE_FOR_FILE(L_ISI_FAIL_POINT, L_ILOG, fail_point);
#endif
MALLOC_DEFINE(M_FAIL_POINT, "Fail Points", "fail points system");
#define fp_free(ptr) free(ptr, M_FAIL_POINT)
#define fp_malloc(size, flags) malloc((size), M_FAIL_POINT, (flags))
static struct mtx g_fp_mtx;
MTX_SYSINIT(g_fp_mtx, &g_fp_mtx, "fail point mtx", MTX_DEF);
#define FP_LOCK() mtx_lock(&g_fp_mtx)
#define FP_UNLOCK() mtx_unlock(&g_fp_mtx)
static inline void
fail_point_sleep(struct fail_point *fp, struct fail_point_entry *ent,
int msecs, enum fail_point_return_code *pret)
{
/* convert from millisecs to ticks, rounding up */
int timo = ((msecs * hz) + 999) / 1000;
if (timo) {
if (fp->fp_sleep_fn == NULL) {
msleep(fp, &g_fp_mtx, PWAIT, "failpt", timo);
} else {
timeout(fp->fp_sleep_fn, fp->fp_sleep_arg, timo);
*pret = FAIL_POINT_RC_QUEUED;
}
}
}
/**
* Defines stating the equivalent of probablilty one (100%)
*/
enum {
PROB_MAX = 1000000, /* probability between zero and this number */
PROB_DIGITS = 6, /* number of zero's in above number */
};
static const char *fail_type_strings[] = {
"off",
"panic",
"return",
"break",
"print",
"sleep",
};
static char *parse_fail_point(struct fail_point_entries *, char *);
static char *parse_term(struct fail_point_entries *, char *);
static char *parse_number(int *out_units, int *out_decimal, char *);
static char *parse_type(struct fail_point_entry *, char *);
static void free_entry(struct fail_point_entries *, struct fail_point_entry *);
static void clear_entries(struct fail_point_entries *);
/**
* Initialize a fail_point. The name is formed in a printf-like fashion
* from "fmt" and subsequent arguments. This function is generally used
* for custom failpoints located at odd places in the sysctl tree, and is
* not explicitly needed for standard in-line-declared failpoints.
*
* @ingroup failpoint
*/
void
fail_point_init(struct fail_point *fp, const char *fmt, ...)
{
va_list ap;
char *name;
int n;
TAILQ_INIT(&fp->fp_entries);
fp->fp_flags = 0;
/* Figure out the size of the name. */
va_start(ap, fmt);
n = vsnprintf(NULL, 0, fmt, ap);
va_end(ap);
/* Allocate the name and fill it in. */
name = fp_malloc(n + 1, M_WAITOK);
if (name != NULL) {
va_start(ap, fmt);
vsnprintf(name, n + 1, fmt, ap);
va_end(ap);
}
fp->fp_name = name;
fp->fp_flags |= FAIL_POINT_DYNAMIC_NAME;
fp->fp_sleep_fn = NULL;
fp->fp_sleep_arg = NULL;
}
/**
* Free the resources held by a fail_point.
*
* @ingroup failpoint
*/
void
fail_point_destroy(struct fail_point *fp)
{
struct fail_point_entry *ent;
if (fp->fp_flags & FAIL_POINT_DYNAMIC_NAME && fp->fp_name != NULL) {
fp_free((void *)(intptr_t)fp->fp_name);
fp->fp_name = NULL;
}
fp->fp_flags = 0;
while (!TAILQ_EMPTY(&fp->fp_entries)) {
ent = TAILQ_FIRST(&fp->fp_entries);
TAILQ_REMOVE(&fp->fp_entries, ent, fe_entries);
fp_free(ent);
}
}
/**
* This does the real work of evaluating a fail point. If the fail point tells
* us to return a value, this function returns 1 and fills in 'return_value'
* (return_value is allowed to be null). If the fail point tells us to panic,
* we never return. Otherwise we just return 0 after doing some work, which
* means "keep going".
*/
enum fail_point_return_code
fail_point_eval_nontrivial(struct fail_point *fp, int *return_value)
{
enum fail_point_return_code ret = FAIL_POINT_RC_CONTINUE;
struct fail_point_entry *ent, *next;
int msecs;
FP_LOCK();
ent = TAILQ_FIRST(&fp->fp_entries);
while (ent) {
int cont = 0; /* don't continue by default */
next = TAILQ_NEXT(ent, fe_entries);
if (ent->fe_prob < PROB_MAX &&
ent->fe_prob < random() % PROB_MAX) {
cont = 1;
goto loop_end;
}
switch (ent->fe_type) {
case FAIL_POINT_PANIC:
panic("fail point %s panicking", fp->fp_name);
/* NOTREACHED */
case FAIL_POINT_RETURN:
if (return_value)
*return_value = ent->fe_arg;
ret = FAIL_POINT_RC_RETURN;
break;
case FAIL_POINT_BREAK:
printf("fail point %s breaking to debugger\n", fp->fp_name);
breakpoint();
break;
case FAIL_POINT_PRINT:
printf("fail point %s executing\n", fp->fp_name);
cont = ent->fe_arg;
break;
case FAIL_POINT_SLEEP:
/*
* Free the entry now if necessary, since
* we're about to drop the mutex and sleep.
*/
msecs = ent->fe_arg;
if (ent->fe_count > 0 && --ent->fe_count == 0) {
free_entry(&fp->fp_entries, ent);
ent = NULL;
}
if (msecs)
fail_point_sleep(fp, ent, msecs, &ret);
break;
default:
break;
}
if (ent && ent->fe_count > 0 && --ent->fe_count == 0)
free_entry(&fp->fp_entries, ent);
loop_end:
if (cont)
ent = next;
else
break;
}
/* Get rid of "off"s at the end. */
while ((ent = TAILQ_LAST(&fp->fp_entries, fail_point_entries)) &&
ent->fe_type == FAIL_POINT_OFF)
free_entry(&fp->fp_entries, ent);
FP_UNLOCK();
return ret;
}
/**
* Translate internal fail_point structure into human-readable text.
*/
static void
fail_point_get(struct fail_point *fp, struct sbuf *sb)
{
struct fail_point_entry *ent;
FP_LOCK();
TAILQ_FOREACH(ent, &fp->fp_entries, fe_entries) {
if (ent->fe_prob < PROB_MAX) {
int decimal = ent->fe_prob % (PROB_MAX / 100);
int units = ent->fe_prob / (PROB_MAX / 100);
sbuf_printf(sb, "%d", units);
if (decimal) {
int digits = PROB_DIGITS - 2;
while (!(decimal % 10)) {
digits--;
decimal /= 10;
}
sbuf_printf(sb, ".%0*d", digits, decimal);
}
sbuf_printf(sb, "%%");
}
if (ent->fe_count > 0)
sbuf_printf(sb, "%d*", ent->fe_count);
sbuf_printf(sb, "%s", fail_type_strings[ent->fe_type]);
if (ent->fe_arg)
sbuf_printf(sb, "(%d)", ent->fe_arg);
if (TAILQ_NEXT(ent, fe_entries))
sbuf_printf(sb, "->");
}
if (TAILQ_EMPTY(&fp->fp_entries))
sbuf_printf(sb, "off");
FP_UNLOCK();
}
/**
* Set an internal fail_point structure from a human-readable failpoint string
* in a lock-safe manner.
*/
static int
fail_point_set(struct fail_point *fp, char *buf)
{
int error = 0;
struct fail_point_entry *ent, *ent_next;
struct fail_point_entries new_entries;
/* Parse new entries. */
TAILQ_INIT(&new_entries);
if (!parse_fail_point(&new_entries, buf)) {
clear_entries(&new_entries);
error = EINVAL;
goto end;
}
FP_LOCK();
/* Move new entries in. */
TAILQ_SWAP(&fp->fp_entries, &new_entries, fail_point_entry, fe_entries);
clear_entries(&new_entries);
/* Get rid of useless zero probability entries. */
TAILQ_FOREACH_SAFE(ent, &fp->fp_entries, fe_entries, ent_next) {
if (ent->fe_prob == 0)
free_entry(&fp->fp_entries, ent);
}
/* Get rid of "off"s at the end. */
while ((ent = TAILQ_LAST(&fp->fp_entries, fail_point_entries)) &&
ent->fe_type == FAIL_POINT_OFF)
free_entry(&fp->fp_entries, ent);
FP_UNLOCK();
end:
#ifdef IWARNING
if (error)
IWARNING("Failed to set %s (%s) to %s",
fp->fp_name, fp->fp_location, buf);
else
INOTICE("Set %s (%s) to %s",
fp->fp_name, fp->fp_location, buf);
#endif /* IWARNING */
return error;
}
#define MAX_FAIL_POINT_BUF 1023
/**
* Handle kernel failpoint set/get.
*/
int
fail_point_sysctl(SYSCTL_HANDLER_ARGS)
{
struct fail_point *fp = arg1;
char *buf = NULL;
struct sbuf sb;
int error;
/* Retrieving */
sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND);
fail_point_get(fp, &sb);
sbuf_trim(&sb);
sbuf_finish(&sb);
error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
sbuf_delete(&sb);
/* Setting */
if (!error && req->newptr) {
if (req->newlen > MAX_FAIL_POINT_BUF) {
error = EINVAL;
goto out;
}
buf = fp_malloc(req->newlen + 1, M_WAITOK);
error = SYSCTL_IN(req, buf, req->newlen);
if (error)
goto out;
buf[req->newlen] = '\0';
error = fail_point_set(fp, buf);
}
out:
if (buf)
fp_free(buf);
return error;
}
/**
* Internal helper function to translate a human-readable failpoint string
* into a internally-parsable fail_point structure.
*/
static char *
parse_fail_point(struct fail_point_entries *ents, char *p)
{
/* <fail_point> ::
* <term> ( "->" <term> )*
*/
if (!(p = parse_term(ents, p)))
return 0;
while (*p)
if (p[0] != '-' || p[1] != '>' || !(p = parse_term(ents, p+2)))
return 0;
return p;
}
/**
* Internal helper function to parse an individual term from a failpoint.
*/
static char *
parse_term(struct fail_point_entries *ents, char *p)
{
struct fail_point_entry *ent;
ent = fp_malloc(sizeof *ent, M_WAITOK | M_ZERO);
ent->fe_prob = PROB_MAX;
TAILQ_INSERT_TAIL(ents, ent, fe_entries);
/*
* <term> ::
* ( (<float> "%") | (<integer> "*" ) )*
* <type>
* [ "(" <integer> ")" ]
*/
/* ( (<float> "%") | (<integer> "*" ) )* */
while (('0' <= *p && *p <= '9') || *p == '.') {
int units, decimal;
if (!(p = parse_number(&units, &decimal, p)))
return 0;
if (*p == '%') {
if (units > 100) /* prevent overflow early */
units = 100;
ent->fe_prob = units * (PROB_MAX / 100) + decimal;
if (ent->fe_prob > PROB_MAX)
ent->fe_prob = PROB_MAX;
} else if (*p == '*') {
if (!units || decimal)
return 0;
ent->fe_count = units;;
} else {
return 0;
}
p++;
}
/* <type> */
if (!(p = parse_type(ent, p)))
return 0;
if (*p == '\0')
return p;
/* [ "(" <integer> ")" ] */
if (*p != '(')
return p;
p++;
if (('0' <= *p && *p <= '9') || *p == '-')
ent->fe_arg = strtol(p, &p, 0);
else
return 0;
if (*p++ != ')')
return 0;
return p;
}
/**
* Internal helper function to parse a numeric for a failpoint term.
*/
static char *
parse_number(int *out_units, int *out_decimal, char *p)
{
char *old_p;
/*
* <number> ::
* <integer> [ "." <integer> ] |
* "." <integer>
*/
/* whole part */
old_p = p;
*out_units = strtol(p, &p, 10);;
if (p == old_p && *p != '.')
return 0;
/* fractional part */
*out_decimal = 0;
if (*p == '.') {
int digits = 0;
p++;
while ('0' <= *p && *p <= '9') {
int digit = *p - '0';
if (digits < PROB_DIGITS - 2)
*out_decimal = *out_decimal * 10 + digit;
else if (digits == PROB_DIGITS - 2 && digit >= 5)
(*out_decimal)++;
digits++;
p++;
}
if (!digits) /* need at least one digit after '.' */
return 0;
while (digits++ < PROB_DIGITS - 2) /* add implicit zeros */
*out_decimal *= 10;
}
return p; /* success */
}
/**
* Internal helper function to parse an individual type for a failpoint term.
*/
static char *
parse_type(struct fail_point_entry *ent, char *beg)
{
enum fail_point_t type;
char *end = beg;
while ('a' <= *end && *end <= 'z')
end++;
if (beg == end)
return 0;
for (type = FAIL_POINT_OFF; type != FAIL_POINT_INVALID; type++) {
const char *p = fail_type_strings[type];
const char *q = beg;
while (q < end && *p++ == *q++);
if (q == end && *p == '\0') {
ent->fe_type = type;
return end;
}
}
return 0;
}
/**
* Internal helper function to free an individual failpoint term.
*/
static void
free_entry(struct fail_point_entries *ents, struct fail_point_entry *ent)
{
TAILQ_REMOVE(ents, ent, fe_entries);
fp_free(ent);
}
/**
* Internal helper function to clear out all failpoint terms for a single
* failpoint.
*/
static void
clear_entries(struct fail_point_entries *ents)
{
struct fail_point_entry *ent, *ent_next;
TAILQ_FOREACH_SAFE(ent, ents, fe_entries, ent_next)
fp_free(ent);
TAILQ_INIT(ents);
}
/* The fail point sysctl tree. */
SYSCTL_NODE(_debug, OID_AUTO, fail_point, CTLFLAG_RW, 0, "fail points");

View file

@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
#include <sys/buf.h>
#include <sys/devicestat.h>
#include <sys/eventhandler.h>
#include <sys/fail.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@ -1167,6 +1168,15 @@ buf_dirty_count_severe(void)
return(numdirtybuffers >= hidirtybuffers);
}
static __noinline int
buf_vm_page_count_severe(void)
{
KFAIL_POINT_CODE(DEBUG_FP, buf_pressure, return 1);
return vm_page_count_severe();
}
/*
* brelse:
*
@ -1233,7 +1243,7 @@ brelse(struct buf *bp)
*/
if (bp->b_flags & B_DELWRI)
bp->b_flags &= ~B_RELBUF;
else if (vm_page_count_severe()) {
else if (buf_vm_page_count_severe()) {
/*
* The locking of the BO_LOCK is not necessary since
* BKGRDINPROG cannot be set while we hold the buf
@ -1502,7 +1512,7 @@ bqrelse(struct buf *bp)
* lock, it can only be cleared if it is already
* pending.
*/
if (!vm_page_count_severe() || (bp->b_vflags & BV_BKGRDINPROG)) {
if (!buf_vm_page_count_severe() || (bp->b_vflags & BV_BKGRDINPROG)) {
bp->b_qindex = QUEUE_CLEAN;
TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp,
b_freelist);
@ -1571,7 +1581,7 @@ vfs_vmio_release(struct buf *bp)
vm_page_free(m);
} else if (bp->b_flags & B_DIRECT) {
vm_page_try_to_free(m);
} else if (vm_page_count_severe()) {
} else if (buf_vm_page_count_severe()) {
vm_page_try_to_cache(m);
}
}

260
sys/sys/fail.h Normal file
View file

@ -0,0 +1,260 @@
/*-
* Copyright (c) 2009 Isilon Inc http://www.isilon.com/
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/**
* @file
*
* Main header for failpoint facility.
*/
#ifndef _SYS_FAIL_H_
#define _SYS_FAIL_H_
#include <sys/types.h>
#include <sys/linker_set.h>
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/sysctl.h>
/**
* Failpoint types.
* Don't change these without changing fail_type_strings in fail.c.
* @ingroup failpoint_private
*/
enum fail_point_t {
FAIL_POINT_OFF, /**< don't fail */
FAIL_POINT_PANIC, /**< panic */
FAIL_POINT_RETURN, /**< return an errorcode */
FAIL_POINT_BREAK, /**< break into the debugger */
FAIL_POINT_PRINT, /**< print a message */
FAIL_POINT_SLEEP, /**< sleep for some msecs */
FAIL_POINT_INVALID, /**< placeholder */
};
/**
* Failpoint return codes, used internally.
* @ingroup failpoint_private
*/
enum fail_point_return_code {
FAIL_POINT_RC_CONTINUE = 0, /**< Continue with normal execution */
FAIL_POINT_RC_RETURN, /**< FP evaluated to 'return' */
FAIL_POINT_RC_QUEUED, /**< sleep_fn will be called */
};
TAILQ_HEAD(fail_point_entries, fail_point_entry);
/**
* Internal failpoint structure, tracking all the current details of the
* failpoint. This structure is the core component shared between the
* failure-injection code and the user-interface.
* @ingroup failpoint_private
*/
struct fail_point {
const char *fp_name; /**< name of fail point */
const char *fp_location; /**< file:line of fail point */
struct fail_point_entries fp_entries; /**< list of entries */
int fp_flags;
void (*fp_sleep_fn)(void *); /**< Function to call at end of
* sleep for sleep failpoints */
void *fp_sleep_arg; /**< Arg for sleep_fn */
};
#define FAIL_POINT_DYNAMIC_NAME 0x01 /**< Must free name on destroy */
/**
* Internal structure tracking a single term of a complete failpoint.
* @ingroup failpoint_private
*/
struct fail_point_entry {
enum fail_point_t fe_type; /**< type of entry */
int fe_arg; /**< argument to type (e.g. return value) */
int fe_prob; /**< likelihood of firing in millionths */
int fe_count; /**< number of times to fire, 0 means always */
TAILQ_ENTRY(fail_point_entry) fe_entries; /**< next entry in fail point */
};
/* Private failpoint eval function -- use fail_point_eval() instead. */
enum fail_point_return_code fail_point_eval_nontrivial(struct fail_point *,
int *ret);
/**
* @addtogroup failpoint
* @{
*/
/*
* Initialize a fail-point. The name is formed in printf-like fashion
* from "fmt" and the subsequent arguments.
* Pair with fail_point_destroy().
*/
void fail_point_init(struct fail_point *, const char *fmt, ...)
__printflike(2, 3);
/**
* Set the sleep function for a fail point
* If sleep_fn is specified, then FAIL_POINT_SLEEP will result in a
* (*fp->sleep_fn)(fp->sleep_arg) call by the timer thread. Otherwise,
* if sleep_fn is NULL (default), then FAIL_POINT_SLEEP will result in the
* fail_point_eval() call sleeping.
*/
static __inline void
fail_point_sleep_set_func(struct fail_point *fp, void (*sleep_fn)(void *))
{
fp->fp_sleep_fn = sleep_fn;
}
/**
* Set the argument for the sleep function for a fail point
*/
static __inline void
fail_point_sleep_set_arg(struct fail_point *fp, void *sleep_arg)
{
fp->fp_sleep_arg = sleep_arg;
}
/**
* Free the resources used by a fail-point. Pair with fail_point_init().
*/
void fail_point_destroy(struct fail_point *);
/**
* Evaluate a failpoint.
*/
static __inline enum fail_point_return_code
fail_point_eval(struct fail_point *fp, int *ret)
{
if (TAILQ_EMPTY(&fp->fp_entries)) {
return (FAIL_POINT_RC_CONTINUE);
}
return (fail_point_eval_nontrivial(fp, ret));
}
/* Declare a fail_point and its sysctl in a function. */
#define _FAIL_POINT_NAME(name) _fail_point_##name
#define _STRINGIFY_HELPER(x) #x
#define _STRINGIFY(x) _STRINGIFY_HELPER(x)
#define _FAIL_POINT_LOCATION() __FILE__ ":" _STRINGIFY(__LINE__)
/**
* Instantiate a failpoint which returns "value" from the function when triggered.
* @param parent The parent sysctl under which to locate the sysctl
* @param name The name of the failpoint in the sysctl tree (and printouts)
* @return Instantly returns the return("value") specified in the
* failpoint, if triggered.
*/
#define KFAIL_POINT_RETURN(parent, name) \
KFAIL_POINT_CODE(parent, name, return RETURN_VALUE)
/**
* Instantiate a failpoint which returns (void) from the function when triggered.
* @param parent The parent sysctl under which to locate the sysctl
* @param name The name of the failpoint in the sysctl tree (and printouts)
* @return Instantly returns void, if triggered in the failpoint.
*/
#define KFAIL_POINT_RETURN_VOID(parent, name) \
KFAIL_POINT_CODE(parent, name, return)
/**
* Instantiate a failpoint which sets an error when triggered.
* @param parent The parent sysctl under which to locate the sysctl
* @param name The name of the failpoint in the sysctl tree (and printouts)
* @param error_var A variable to set to the failpoint's specified
* return-value when triggered
*/
#define KFAIL_POINT_ERROR(parent, name, error_var) \
KFAIL_POINT_CODE(parent, name, (error_var) = RETURN_VALUE)
/**
* Instantiate a failpoint which sets an error and then goes to a
* specified label in the function when triggered.
* @param parent The parent sysctl under which to locate the sysctl
* @param name The name of the failpoint in the sysctl tree (and printouts)
* @param error_var A variable to set to the failpoint's specified
* return-value when triggered
* @param label The location to goto when triggered.
*/
#define KFAIL_POINT_GOTO(parent, name, error_var, label) \
KFAIL_POINT_CODE(parent, name, (error_var) = RETURN_VALUE; goto label)
/**
* Instantiate a failpoint which runs arbitrary code when triggered.
* @param parent The parent sysctl under which to locate the sysctl
* @param name The name of the failpoint in the sysctl tree (and printouts)
* @param code The arbitrary code to run when triggered. Can reference
* "RETURN_VALUE" if desired to extract the specified user
* return-value when triggered
*/
#define KFAIL_POINT_CODE(parent, name, code) \
KFAIL_POINT_START(parent, name) { \
code; \
} FAIL_POINT_END
/**
* @}
* (end group failpoint)
*/
/**
* Internal macro to implement above #defines -- should not be used directly.
* @ingroup failpoint_private
*/
#define KFAIL_POINT_START(parent, name) \
do { \
int RETURN_VALUE; \
static struct fail_point _FAIL_POINT_NAME(name) = { \
#name, \
_FAIL_POINT_LOCATION(), \
TAILQ_HEAD_INITIALIZER( \
_FAIL_POINT_NAME(name).fp_entries), \
0, \
NULL, NULL, \
}; \
SYSCTL_OID(parent, OID_AUTO, name, \
CTLTYPE_STRING | CTLFLAG_RW, \
&_FAIL_POINT_NAME(name), 0, fail_point_sysctl, \
"A", ""); \
\
if (__predict_false( \
fail_point_eval(&_FAIL_POINT_NAME(name), \
&RETURN_VALUE))) {
/**
* Internal macro to implement above #defines -- should not be used directly.
* @ingroup failpoint_private
*/
#define FAIL_POINT_END \
} \
} while (0)
#ifdef _KERNEL
int fail_point_sysctl(SYSCTL_HANDLER_ARGS);
/* The fail point sysctl tree. */
SYSCTL_DECL(_debug_fail_point);
#endif
#define DEBUG_FP _debug_fail_point
#endif /* _SYS_FAIL_H_ */

View file

@ -309,6 +309,20 @@ struct { \
(head)->stqh_last = &STAILQ_NEXT((elm), field); \
} while (0)
#define STAILQ_SWAP(head1, head2, type) do { \
struct type *swap_first = STAILQ_FIRST(head1); \
struct type **swap_last = (head1)->stqh_last; \
STAILQ_FIRST(head1) = STAILQ_FIRST(head2); \
(head1)->stqh_last = (head2)->stqh_last; \
STAILQ_FIRST(head2) = swap_first; \
(head2)->stqh_last = swap_last; \
if (STAILQ_EMPTY(head1)) \
(head1)->stqh_last = &STAILQ_FIRST(head1); \
if (STAILQ_EMPTY(head2)) \
(head2)->stqh_last = &STAILQ_FIRST(head2); \
} while (0)
/*
* List declarations.
*/
@ -411,6 +425,16 @@ struct { \
TRASHIT((elm)->field.le_prev); \
} while (0)
#define LIST_SWAP(head1, head2, type, field) do { \
struct type *swap_tmp = LIST_FIRST((head1)); \
LIST_FIRST((head1)) = LIST_FIRST((head2)); \
LIST_FIRST((head2)) = swap_tmp; \
if ((swap_tmp = LIST_FIRST((head1))) != NULL) \
swap_tmp->field.le_prev = &LIST_FIRST((head1)); \
if ((swap_tmp = LIST_FIRST((head2))) != NULL) \
swap_tmp->field.le_prev = &LIST_FIRST((head2)); \
} while (0)
/*
* Tail queue declarations.
*/
@ -578,4 +602,21 @@ struct { \
QMD_TRACE_ELEM(&(elm)->field); \
} while (0)
#define TAILQ_SWAP(head1, head2, type, field) do { \
struct type *swap_first = (head1)->tqh_first; \
struct type **swap_last = (head1)->tqh_last; \
(head1)->tqh_first = (head2)->tqh_first; \
(head1)->tqh_last = (head2)->tqh_last; \
(head2)->tqh_first = swap_first; \
(head2)->tqh_last = swap_last; \
if ((swap_first = (head1)->tqh_first) != NULL) \
swap_first->field.tqe_prev = &(head1)->tqh_first; \
else \
(head1)->tqh_last = &(head1)->tqh_first; \
if ((swap_first = (head2)->tqh_first) != NULL) \
swap_first->field.tqe_prev = &(head2)->tqh_first; \
else \
(head2)->tqh_last = &(head2)->tqh_first; \
} while (0)
#endif /* !_SYS_QUEUE_H_ */