Add new vnode dumper to support live minidumps

This dumper can instantiate and write the dump's contents to a
file-backed vnode.

Unlike existing disk or network dumpers, the vnode dumper should not be
invoked during a system panic, and therefore is not added to the global
dumper_configs list. Instead, the vnode dumper is constructed ad-hoc
when a live dump is requested using the new ioctl on /dev/mem. This is
similar in spirit to a kgdb session against the live system via
/dev/mem.

As described briefly in the mem(4) man page, live dumps are not
guaranteed to result in a usuable output file, but offer some debugging
value where forcefully panicing a system to dump its memory is not
desirable/feasible.

A future change to savecore(8) will add an option to save a live dump.

Reviewed by:	markj, Pau Amma <pauamma@gundo.com> (manpages)
Discussed with:	kib
MFC after:	3 weeks
Sponsored by:	Juniper Networks, Inc.
Sponsored by:	Klara, Inc.
Differential Revision:	https://reviews.freebsd.org/D33813
This commit is contained in:
Mitchell Horne 2021-03-23 17:47:14 -03:00
parent 59c27ea18c
commit c9114f9f86
8 changed files with 296 additions and 2 deletions

View file

@ -202,6 +202,50 @@ to update an existing or establish a new range, or to
.Dv MEMRANGE_SET_REMOVE
to remove a range.
.El
.Ss Live Kernel Dumps
.Pp
The
.Dv MEM_KERNELDUMP
ioctl will initiate a kernel dump against the running system, the contents of
which will be written to a process-owned file descriptor.
The resulting dump output will be in minidump format.
The request is described by
.Bd -literal
struct mem_livedump_arg {
int fd; /* input */
int flags /* input */
uint8_t compression /* input */
};
.Ed
.Pp
The
.Va fd
field is used to pass the file descriptor.
.Pp
The
.Va flags
field is currently unused and must be set to zero.
.Pp
The
.Va compression
field can be used to specify the desired compression to
be applied to the dump output.
The supported values are defined in
.In sys/kerneldump.h ;
that is,
.Dv KERNELDUMP_COMP_NONE ,
.Dv KERNELDUMP_COMP_GZIP ,
or
.Dv KERNELDUMP_COMP_ZSTD .
.Pp
Kernel dumps taken against the running system may have inconsistent kernel data
structures due to allocation, deallocation, or modification of memory
concurrent to the dump procedure.
Thus, the resulting core dump is not guaranteed to be usable.
A system under load is more likely to produce an inconsistent result.
Despite this, live kernel dumps can be useful for offline debugging of certain
types of kernel bugs, such as deadlocks, or in inspecting a particular part of
the system's state.
.Sh RETURN VALUES
.Ss MEM_EXTRACT_PADDR
The
@ -229,6 +273,24 @@ base/length supplied.
An attempt to remove a range failed because the range is permanently
enabled.
.El
.Ss MEM_KERNELDUMP
.Bl -tag -width Er
.It Bq Er EOPNOTSUPP
Kernel minidumps are not supported on this architecture.
.It Bq Er EPERM
An attempt to begin the kernel dump failed because the calling thread lacks the
.It Bq Er EBADF
The supplied file descriptor was invalid, or does not have write permission.
.It Bq Er EBUSY
An attempt to begin the kernel dump failed because one is already in progress.
.It Bq Er EINVAL
An invalid or unsupported value was specified in
.Va flags .
.It Bq Er EINVAL
An invalid or unsupported compression type was specified.
.Dv PRIV_KMEM_READ
privilege.
.El
.Sh FILES
.Bl -tag -width /dev/kmem -compact
.It Pa /dev/mem

View file

@ -3839,6 +3839,7 @@ kern/kern_tslog.c optional tslog
kern/kern_ubsan.c optional kubsan
kern/kern_umtx.c standard
kern/kern_uuid.c standard
kern/kern_vnodedumper.c standard
kern/kern_xxx.c standard
kern/link_elf.c standard
kern/linker_if.m standard

View file

@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/fcntl.h>
#include <sys/ioccom.h>
#include <sys/kernel.h>
#include <sys/kerneldump.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/memrange.h>
@ -96,6 +97,7 @@ memioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags,
{
vm_map_t map;
vm_map_entry_t entry;
const struct mem_livedump_arg *marg;
struct mem_extract *me;
int error;
@ -120,6 +122,10 @@ memioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags,
}
vm_map_unlock_read(map);
break;
case MEM_KERNELDUMP:
marg = (const struct mem_livedump_arg *)data;
error = livedump_start(marg->fd, marg->flags, marg->compression);
break;
default:
error = memioctl_md(dev, cmd, data, flags, td);
break;

View file

@ -390,6 +390,17 @@ print_uptime(void)
printf("%lds\n", (long)ts.tv_sec);
}
/*
* Set up a context that can be extracted from the dump.
*/
void
dump_savectx(void)
{
savectx(&dumppcb);
dumptid = curthread->td_tid;
}
int
doadump(boolean_t textdump)
{
@ -402,8 +413,7 @@ doadump(boolean_t textdump)
if (TAILQ_EMPTY(&dumper_configs))
return (ENXIO);
savectx(&dumppcb);
dumptid = curthread->td_tid;
dump_savectx();
dumping++;
coredump = TRUE;

202
sys/kern/kern_vnodedumper.c Normal file
View file

@ -0,0 +1,202 @@
/*-
* Copyright (c) 2021-2022 Juniper Networks
*
* This software was developed by Mitchell Horne <mhorne@FreeBSD.org>
* under sponsorship from Juniper Networks and Klara Systems.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/caprights.h>
#include <sys/disk.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/kerneldump.h>
#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/namei.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <machine/vmparam.h>
static dumper_start_t vnode_dumper_start;
static dumper_t vnode_dump;
static dumper_hdr_t vnode_write_headers;
static struct sx livedump_sx;
SX_SYSINIT(livedump, &livedump_sx, "Livedump sx");
/*
* Invoke a live minidump on the system.
*/
int
livedump_start(int fd, int flags, uint8_t compression)
{
#if MINIDUMP_PAGE_TRACKING == 1
struct dumperinfo di, *livedi;
struct diocskerneldump_arg kda;
struct vnode *vp;
struct file *fp;
void *rl_cookie;
int error;
error = priv_check(curthread, PRIV_KMEM_READ);
if (error != 0)
return (error);
if (flags != 0)
return (EINVAL);
error = getvnode(curthread, fd, &cap_write_rights, &fp);
if (error != 0)
return (error);
vp = fp->f_vnode;
if ((fp->f_flag & FWRITE) == 0) {
error = EBADF;
goto drop;
}
/* Set up a new dumper. */
bzero(&di, sizeof(di));
di.dumper_start = vnode_dumper_start;
di.dumper = vnode_dump;
di.dumper_hdr = vnode_write_headers;
di.blocksize = PAGE_SIZE; /* Arbitrary. */
di.maxiosize = MAXDUMPPGS * PAGE_SIZE;
bzero(&kda, sizeof(kda));
kda.kda_compression = compression;
error = dumper_create(&di, "livedump", &kda, &livedi);
if (error != 0)
goto drop;
/* Only allow one livedump to proceed at a time. */
if (sx_try_xlock(&livedump_sx) == 0) {
dumper_destroy(livedi);
error = EBUSY;
goto drop;
}
/* To be used by the callback functions. */
livedi->priv = vp;
/* Lock the entire file range and vnode. */
rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
dump_savectx();
error = minidumpsys(livedi, true);
VOP_UNLOCK(vp);
vn_rangelock_unlock(vp, rl_cookie);
sx_xunlock(&livedump_sx);
dumper_destroy(livedi);
drop:
fdrop(fp, curthread);
return (error);
#else
return (EOPNOTSUPP);
#endif /* MINIDUMP_PAGE_TRACKING == 1 */
}
int
vnode_dumper_start(struct dumperinfo *di, void *key, uint32_t keysize)
{
/* Always begin with an offset of zero. */
di->dumpoff = 0;
KASSERT(keysize == 0, ("encryption not supported for livedumps"));
return (0);
}
/*
* Callback from dumpsys() to dump a chunk of memory.
*
* Parameters:
* arg Opaque private pointer to vnode
* virtual Virtual address (where to read the data from)
* physical Physical memory address (unused)
* offset Offset from start of core file
* length Data length
*
* Return value:
* 0 on success
* errno on error
*/
int
vnode_dump(void *arg, void *virtual, vm_offset_t physical __unused,
off_t offset, size_t length)
{
struct vnode *vp;
int error = 0;
vp = arg;
MPASS(vp != NULL);
ASSERT_VOP_LOCKED(vp, __func__);
/* Done? */
if (virtual == NULL)
return (0);
error = vn_rdwr(UIO_WRITE, vp, virtual, length, offset, UIO_SYSSPACE,
IO_NODELOCKED, curthread->td_ucred, NOCRED, NULL, curthread);
if (error != 0)
uprintf("%s: error writing livedump block at offset %jx: %d\n",
__func__, (uintmax_t)offset, error);
return (error);
}
/*
* Callback from dumpsys() to write out the dump header, placed at the end.
*/
int
vnode_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh)
{
struct vnode *vp;
int error;
off_t offset;
vp = di->priv;
MPASS(vp != NULL);
ASSERT_VOP_LOCKED(vp, __func__);
/* Compensate for compression/encryption adjustment of dumpoff. */
offset = roundup2(di->dumpoff, di->blocksize);
/* Write the kernel dump header to the end of the file. */
error = vn_rdwr(UIO_WRITE, vp, kdh, sizeof(*kdh), offset,
UIO_SYSSPACE, IO_NODELOCKED, curthread->td_ucred, NOCRED, NULL,
curthread);
if (error != 0)
uprintf("%s: error writing livedump header: %d\n", __func__,
error);
return (error);
}

View file

@ -362,6 +362,7 @@ struct dumperinfo {
extern int dumping; /* system is dumping */
void dump_savectx(void);
int doadump(boolean_t);
struct diocskerneldump_arg;
int dumper_create(const struct dumperinfo *di_template, const char *devname,

View file

@ -162,6 +162,8 @@ void dumpsys_pb_progress(size_t);
extern int do_minidump;
int livedump_start(int, int, uint8_t);
#endif
#endif /* _SYS_KERNELDUMP_H */

View file

@ -59,6 +59,16 @@ struct mem_extract {
#define MEM_EXTRACT_PADDR _IOWR('m', 52, struct mem_extract)
struct mem_livedump_arg {
int fd;
int flags;
uint8_t compression;
uint8_t pad1[7];
uint64_t pad2[2];
};
#define MEM_KERNELDUMP _IOW('m', 53, struct mem_livedump_arg)
#ifdef _KERNEL
MALLOC_DECLARE(M_MEMDESC);