Notable upstream pull request merges:
 #15769 082338875 Add 'zpool status -e' flag to see unhealthy vdevs
 #15804 a0d3fe72b libzdb: Initial breakout of libzdb
 #15847 229b9f4ed LUA: Backport CVE-2020-24370's patch

Obtained from:	OpenZFS
OpenZFS commit:	229b9f4ed0
This commit is contained in:
Martin Matuska 2024-02-08 13:39:04 +01:00
commit a4e5e0106a
37 changed files with 737 additions and 182 deletions

View file

@ -20,6 +20,7 @@ SUBDIR.${MK_ZFS}+= \
libtpool \
libumem \
libuutil \
libzdb \
libzfs \
libzfs_core \
libzfsbootenv \

25
cddl/lib/libzdb/Makefile Normal file
View file

@ -0,0 +1,25 @@
.PATH: ${SRCTOP}/sys/contrib/openzfs/lib/libzdb
.PATH: ${SRCTOP}/sys/contrib/openzfs/include
LIB= zdb
PACKAGE= zfs
INCS = libzdb.h
SRCS = libzdb.c
WARNS?= 2
CSTD= c99
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include/os/freebsd
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include/os/freebsd/zfs
CFLAGS+= -I${SRCTOP}/sys
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
CFLAGS+= -include ${SRCTOP}/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libzutil
CFLAGS+= -DHAVE_ISSETUGID -DIN_BASE
CFLAGS+= -include ${SRCTOP}/sys/modules/zfs/zfs_config.h
.include <bsd.lib.mk>

View file

@ -0,0 +1,14 @@
# Autogenerated - do NOT edit!
DIRDEPS = \
include \
lib/${CSU_DIR} \
lib/libc \
lib/libcompiler_rt \
.include <dirdeps.mk>
.if ${DEP_RELDIR} == ${_DEP_RELDIR}
# local dependencies - needed for -jN in clean tree
.endif

View file

@ -23,7 +23,7 @@ CFLAGS+= \
-include ${ZFSTOP}/include/os/freebsd/spl/sys/ccompile.h \
-DHAVE_ISSETUGID
LIBADD= nvpair umem uutil zfs spl avl zutil zpool crypto
LIBADD= nvpair umem uutil zdb zfs spl avl zutil zpool crypto
CFLAGS.gcc+= -fms-extensions
# Since there are many asserts in this program, it makes no sense to compile

View file

@ -153,7 +153,7 @@ CRUNCH_LIBS_zfs+= ${LIBBE} \
${LIBNVPAIR}
CRUNCH_LIBS_bectl+= ${CRUNCH_LIBS_zfs}
CRUNCH_LIBS_zpool+= ${CRUNCH_LIBS_zfs}
CRUNCH_LIBS_zdb+= ${CRUNCH_LIBS_zfs}
CRUNCH_LIBS_zdb+= ${CRUNCH_LIBS_zfs} ${LIBZDB}
.else
# liblzma needs pthread
CRUNCH_LIBS+= -lpthread

View file

@ -167,6 +167,7 @@ LIBXPG4?= ${LIBDESTDIR}${LIBDIR_BASE}/libxpg4.a
LIBY?= ${LIBDESTDIR}${LIBDIR_BASE}/liby.a
LIBYPCLNT?= ${LIBDESTDIR}${LIBDIR_BASE}/libypclnt.a
LIBZ?= ${LIBDESTDIR}${LIBDIR_BASE}/libz.a
LIBZDB?= ${LIBDESTDIR}${LIBDIR_BASE}/libzdb.a
LIBZFS?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfs.a
LIBZFS_CORE?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfs_core.a
LIBZFSBOOTENV?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfsbootenv.a

View file

@ -223,6 +223,7 @@ _LIBRARIES= \
y \
ypclnt \
z \
zdb \
zfs_core \
zfs \
zfsbootenv \
@ -683,6 +684,8 @@ LIBNVPAIRDIR= ${_LIB_OBJTOP}/cddl/lib/libnvpair
LIBNVPAIR?= ${LIBNVPAIRDIR}/libnvpair${PIE_SUFFIX}.a
LIBUMEMDIR= ${_LIB_OBJTOP}/cddl/lib/libumem
LIBUUTILDIR= ${_LIB_OBJTOP}/cddl/lib/libuutil
LIBZDBDIR= ${_LIB_OBJTOP}/cddl/lib/libzdb
LIBZDB?= ${LIBZDBDIR}/libzdb${PIE_SUFFIX}.a
LIBZFSDIR= ${_LIB_OBJTOP}/cddl/lib/libzfs
LIBZFS?= ${LIBZFSDIR}/libzfs${PIE_SUFFIX}.a
LIBZFS_COREDIR= ${_LIB_OBJTOP}/cddl/lib/libzfs_core

View file

@ -10,6 +10,7 @@ zdb_SOURCES = \
%D%/zdb_il.c
zdb_LDADD = \
libzdb.la \
libzpool.la \
libzfs_core.la \
libnvpair.la

View file

@ -88,36 +88,10 @@
#include <libnvpair.h>
#include <libzutil.h>
#include <libzdb.h>
#include "zdb.h"
#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
zio_compress_table[(idx)].ci_name : "UNKNOWN")
#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
zio_checksum_table[(idx)].ci_name : "UNKNOWN")
#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \
(idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \
DMU_OT_ZAP_OTHER : \
(idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
/* Some platforms require part of inode IDs to be remapped */
#ifdef __APPLE__
#define ZDB_MAP_OBJECT_ID(obj) INO_XNUTOZFS(obj, 2)
#else
#define ZDB_MAP_OBJECT_ID(obj) (obj)
#endif
static const char *
zdb_ot_name(dmu_object_type_t type)
{
if (type < DMU_OT_NUMTYPES)
return (dmu_ot[type].ot_name);
else if ((type & DMU_OT_NEWTYPE) &&
((type & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS))
return (dmu_ot_byteswap[type & DMU_OT_BYTESWAP_MASK].ob_name);
else
return ("UNKNOWN");
}
extern int reference_tracking_enable;
extern int zfs_recover;
@ -135,35 +109,12 @@ typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
static uint64_t *zopt_metaslab = NULL;
static unsigned zopt_metaslab_args = 0;
typedef struct zopt_object_range {
uint64_t zor_obj_start;
uint64_t zor_obj_end;
uint64_t zor_flags;
} zopt_object_range_t;
static zopt_object_range_t *zopt_object_ranges = NULL;
static unsigned zopt_object_args = 0;
static int flagbits[256];
#define ZOR_FLAG_PLAIN_FILE 0x0001
#define ZOR_FLAG_DIRECTORY 0x0002
#define ZOR_FLAG_SPACE_MAP 0x0004
#define ZOR_FLAG_ZAP 0x0008
#define ZOR_FLAG_ALL_TYPES -1
#define ZOR_SUPPORTED_FLAGS (ZOR_FLAG_PLAIN_FILE | \
ZOR_FLAG_DIRECTORY | \
ZOR_FLAG_SPACE_MAP | \
ZOR_FLAG_ZAP)
#define ZDB_FLAG_CHECKSUM 0x0001
#define ZDB_FLAG_DECOMPRESS 0x0002
#define ZDB_FLAG_BSWAP 0x0004
#define ZDB_FLAG_GBH 0x0008
#define ZDB_FLAG_INDIRECT 0x0010
#define ZDB_FLAG_RAW 0x0020
#define ZDB_FLAG_PRINT_BLKPTR 0x0040
#define ZDB_FLAG_VERBOSE 0x0080
static uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */
static int leaked_objects = 0;
@ -176,62 +127,7 @@ static void mos_obj_refd_multiple(uint64_t);
static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free,
dmu_tx_t *tx);
typedef struct sublivelist_verify {
/* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */
zfs_btree_t sv_pair;
/* ALLOC's without a matching FREE, accumulates across sub-livelists */
zfs_btree_t sv_leftover;
} sublivelist_verify_t;
static int
livelist_compare(const void *larg, const void *rarg)
{
const blkptr_t *l = larg;
const blkptr_t *r = rarg;
/* Sort them according to dva[0] */
uint64_t l_dva0_vdev, r_dva0_vdev;
l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]);
if (l_dva0_vdev < r_dva0_vdev)
return (-1);
else if (l_dva0_vdev > r_dva0_vdev)
return (+1);
/* if vdevs are equal, sort by offsets. */
uint64_t l_dva0_offset;
uint64_t r_dva0_offset;
l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
if (l_dva0_offset < r_dva0_offset) {
return (-1);
} else if (l_dva0_offset > r_dva0_offset) {
return (+1);
}
/*
* Since we're storing blkptrs without cancelling FREE/ALLOC pairs,
* it's possible the offsets are equal. In that case, sort by txg
*/
if (l->blk_birth < r->blk_birth) {
return (-1);
} else if (l->blk_birth > r->blk_birth) {
return (+1);
}
return (0);
}
typedef struct sublivelist_verify_block {
dva_t svb_dva;
/*
* We need this to check if the block marked as allocated
* in the livelist was freed (and potentially reallocated)
* in the metaslab spacemaps at a later TXG.
*/
uint64_t svb_allocated_txg;
} sublivelist_verify_block_t;
static void zdb_print_blkptr(const blkptr_t *bp, int flags);

View file

@ -2161,6 +2161,7 @@ typedef struct status_cbdata {
boolean_t cb_explain;
boolean_t cb_first;
boolean_t cb_dedup_stats;
boolean_t cb_print_unhealthy;
boolean_t cb_print_status;
boolean_t cb_print_slow_ios;
boolean_t cb_print_vdev_init;
@ -2357,6 +2358,35 @@ health_str_to_color(const char *health)
return (NULL);
}
/*
* Called for each leaf vdev. Returns 0 if the vdev is healthy.
* A vdev is unhealthy if any of the following are true:
* 1) there are read, write, or checksum errors,
* 2) its state is not ONLINE, or
* 3) slow IO reporting was requested (-s) and there are slow IOs.
*/
static int
vdev_health_check_cb(void *hdl_data, nvlist_t *nv, void *data)
{
status_cbdata_t *cb = data;
vdev_stat_t *vs;
uint_t vsc;
(void) hdl_data;
if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &vsc) != 0)
return (1);
if (vs->vs_checksum_errors || vs->vs_read_errors ||
vs->vs_write_errors || vs->vs_state != VDEV_STATE_HEALTHY)
return (1);
if (cb->cb_print_slow_ios && vs->vs_slow_ios)
return (1);
return (0);
}
/*
* Print out configuration state as requested by status_callback.
*/
@ -2375,7 +2405,8 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
const char *state;
const char *type;
const char *path = NULL;
const char *rcolor = NULL, *wcolor = NULL, *ccolor = NULL;
const char *rcolor = NULL, *wcolor = NULL, *ccolor = NULL,
*scolor = NULL;
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0)
@ -2402,6 +2433,15 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
state = gettext("AVAIL");
}
/*
* If '-e' is specified then top-level vdevs and their children
* can be pruned if all of their leaves are healthy.
*/
if (cb->cb_print_unhealthy && depth > 0 &&
for_each_vdev_in_nvlist(nv, vdev_health_check_cb, cb) == 0) {
return;
}
printf_color(health_str_to_color(state),
"\t%*s%-*s %-8s", depth, "", cb->cb_namewidth - depth,
name, state);
@ -2416,6 +2456,9 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
if (vs->vs_checksum_errors)
ccolor = ANSI_RED;
if (vs->vs_slow_ios)
scolor = ANSI_BLUE;
if (cb->cb_literal) {
fputc(' ', stdout);
printf_color(rcolor, "%5llu",
@ -2448,9 +2491,10 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
}
if (cb->cb_literal)
printf(" %5llu", (u_longlong_t)vs->vs_slow_ios);
printf_color(scolor, " %5llu",
(u_longlong_t)vs->vs_slow_ios);
else
printf(" %5s", rbuf);
printf_color(scolor, " %5s", rbuf);
}
if (cb->cb_print_power) {
if (children == 0) {
@ -9106,9 +9150,11 @@ status_callback(zpool_handle_t *zhp, void *data)
(void) printf(gettext(
"errors: No known data errors\n"));
} else if (!cbp->cb_verbose) {
color_start(ANSI_RED);
(void) printf(gettext("errors: %llu data "
"errors, use '-v' for a list\n"),
(u_longlong_t)nerr);
color_end();
} else {
print_error_log(zhp);
}
@ -9129,6 +9175,7 @@ status_callback(zpool_handle_t *zhp, void *data)
* [pool] [interval [count]]
*
* -c CMD For each vdev, run command CMD
* -e Display only unhealthy vdevs
* -i Display vdev initialization status.
* -g Display guid for individual vdev name.
* -L Follow links when resolving vdev path name.
@ -9160,7 +9207,7 @@ zpool_do_status(int argc, char **argv)
};
/* check options */
while ((c = getopt_long(argc, argv, "c:igLpPsvxDtT:", long_options,
while ((c = getopt_long(argc, argv, "c:eigLpPsvxDtT:", long_options,
NULL)) != -1) {
switch (c) {
case 'c':
@ -9187,6 +9234,9 @@ zpool_do_status(int argc, char **argv)
}
cmd = optarg;
break;
case 'e':
cb.cb_print_unhealthy = B_TRUE;
break;
case 'i':
cb.cb_print_vdev_init = B_TRUE;
break;

View file

@ -524,6 +524,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEVNAME], [
dnl #
dnl # 5.19 API: blkdev_issue_secure_erase()
dnl # 4.7 API: __blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
dnl # 3.10 API: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
@ -539,6 +540,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
sector, nr_sects, GFP_KERNEL);
])
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_async_flags], [
#include <linux/blkdev.h>
],[
struct block_device *bdev = NULL;
sector_t sector = 0;
sector_t nr_sects = 0;
unsigned long flags = 0;
struct bio *biop = NULL;
int error __attribute__ ((unused));
error = __blkdev_issue_discard(bdev,
sector, nr_sects, GFP_KERNEL, flags, &biop);
])
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [
#include <linux/blkdev.h>
],[
@ -562,13 +577,22 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [
],[
AC_MSG_RESULT(no)
AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
AC_MSG_CHECKING([whether __blkdev_issue_discard() is available])
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_flags], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1,
[blkdev_issue_discard() is available])
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC, 1,
[__blkdev_issue_discard() is available])
],[
ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()])
AC_MSG_RESULT(no)
AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1,
[blkdev_issue_discard() is available])
],[
ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()])
])
])
])
])

View file

@ -186,6 +186,7 @@ USER_H = \
libuutil.h \
libuutil_common.h \
libuutil_impl.h \
libzdb.h \
libzfs.h \
libzfs_core.h \
libzfsbootenv.h \

View file

@ -0,0 +1,68 @@
#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
zio_compress_table[(idx)].ci_name : "UNKNOWN")
#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
zio_checksum_table[(idx)].ci_name : "UNKNOWN")
#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \
(idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \
DMU_OT_ZAP_OTHER : \
(idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
/* Some platforms require part of inode IDs to be remapped */
#ifdef __APPLE__
#define ZDB_MAP_OBJECT_ID(obj) INO_XNUTOZFS(obj, 2)
#else
#define ZDB_MAP_OBJECT_ID(obj) (obj)
#endif
#define ZOR_FLAG_PLAIN_FILE 0x0001
#define ZOR_FLAG_DIRECTORY 0x0002
#define ZOR_FLAG_SPACE_MAP 0x0004
#define ZOR_FLAG_ZAP 0x0008
#define ZOR_FLAG_ALL_TYPES -1
#define ZOR_SUPPORTED_FLAGS (ZOR_FLAG_PLAIN_FILE | \
ZOR_FLAG_DIRECTORY | \
ZOR_FLAG_SPACE_MAP | \
ZOR_FLAG_ZAP)
#define ZDB_FLAG_CHECKSUM 0x0001
#define ZDB_FLAG_DECOMPRESS 0x0002
#define ZDB_FLAG_BSWAP 0x0004
#define ZDB_FLAG_GBH 0x0008
#define ZDB_FLAG_INDIRECT 0x0010
#define ZDB_FLAG_RAW 0x0020
#define ZDB_FLAG_PRINT_BLKPTR 0x0040
#define ZDB_FLAG_VERBOSE 0x0080
typedef struct zdb_ctx {
} zdb_ctx_t;
typedef struct zopt_object_range {
uint64_t zor_obj_start;
uint64_t zor_obj_end;
uint64_t zor_flags;
} zopt_object_range_t;
typedef struct sublivelist_verify {
/* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */
zfs_btree_t sv_pair;
/* ALLOC's without a matching FREE, accumulates across sub-livelists */
zfs_btree_t sv_leftover;
} sublivelist_verify_t;
typedef struct sublivelist_verify_block {
dva_t svb_dva;
/*
* We need this to check if the block marked as allocated
* in the livelist was freed (and potentially reallocated)
* in the metaslab spacemaps at a later TXG.
*/
uint64_t svb_allocated_txg;
} sublivelist_verify_block_t;
const char *zdb_ot_name(dmu_object_type_t type);
int livelist_compare(const void *larg, const void *rarg);

View file

@ -285,7 +285,6 @@ typedef struct zfid_long {
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
extern int zfs_super_owner;
extern int zfs_bclone_enabled;
extern void zfs_init(void);
extern void zfs_fini(void);

View file

@ -45,8 +45,6 @@ extern "C" {
typedef struct zfsvfs zfsvfs_t;
struct znode;
extern int zfs_bclone_enabled;
/*
* This structure emulates the vfs_t from other platforms. It's purpose
* is to facilitate the handling of mount options and minimize structural

View file

@ -24,8 +24,11 @@
#ifndef _SYS_FS_ZFS_VNOPS_H
#define _SYS_FS_ZFS_VNOPS_H
#include <sys/zfs_vnops_os.h>
extern int zfs_bclone_enabled;
extern int zfs_fsync(znode_t *, int, cred_t *);
extern int zfs_read(znode_t *, zfs_uio_t *, int, cred_t *);
extern int zfs_write(znode_t *, zfs_uio_t *, int, cred_t *);

View file

@ -9,11 +9,11 @@
# These library interfaces are subject to change at any time.
#
#
# CMDS: zhack/ztest/zdb/ zfs/zpool/zed/
# CMDS: zhack/ztest/ zfs/zpool/zed/
# raidz_{test,bench} zinject/zstream
# | |
# LIBS: | | libzfsbootenv*
# | | |
# |--libzdb--zdb | |
# | | |
# libzpool libzfs* ----------------+
# | | | \ / | | |
@ -62,6 +62,7 @@ include $(srcdir)/%D%/libspl/Makefile.am
include $(srcdir)/%D%/libtpool/Makefile.am
include $(srcdir)/%D%/libunicode/Makefile.am
include $(srcdir)/%D%/libuutil/Makefile.am
include $(srcdir)/%D%/libzdb/Makefile.am
include $(srcdir)/%D%/libzfs_core/Makefile.am
include $(srcdir)/%D%/libzfs/Makefile.am
include $(srcdir)/%D%/libzfsbootenv/Makefile.am

View file

@ -0,0 +1,7 @@
libzdb_la_CFLAGS = $(AM_CFLAGS) $(LIBRARY_CFLAGS)
libzdb_la_CFLAGS += -fvisibility=hidden
noinst_LTLIBRARIES += libzdb.la
libzdb_la_SOURCES = \
%D%/libzdb.c

View file

@ -0,0 +1,102 @@
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <ctype.h>
#include <getopt.h>
#include <openssl/evp.h>
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/dmu.h>
#include <sys/zap.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_sa.h>
#include <sys/sa.h>
#include <sys/sa_impl.h>
#include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/metaslab_impl.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_bookmark.h>
#include <sys/dbuf.h>
#include <sys/zil.h>
#include <sys/zil_impl.h>
#include <sys/stat.h>
#include <sys/resource.h>
#include <sys/dmu_send.h>
#include <sys/dmu_traverse.h>
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/zfs_fuid.h>
#include <sys/arc.h>
#include <sys/arc_impl.h>
#include <sys/ddt.h>
#include <sys/zfeature.h>
#include <sys/abd.h>
#include <sys/blkptr.h>
#include <sys/dsl_crypt.h>
#include <sys/dsl_scan.h>
#include <sys/btree.h>
#include <sys/brt.h>
#include <sys/brt_impl.h>
#include <zfs_comutil.h>
#include <sys/zstd/zstd.h>
#include <libnvpair.h>
#include <libzutil.h>
#include <libzdb.h>
const char *
zdb_ot_name(dmu_object_type_t type)
{
if (type < DMU_OT_NUMTYPES)
return (dmu_ot[type].ot_name);
else if ((type & DMU_OT_NEWTYPE) &&
((type & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS))
return (dmu_ot_byteswap[type & DMU_OT_BYTESWAP_MASK].ob_name);
else
return ("UNKNOWN");
}
int
livelist_compare(const void *larg, const void *rarg)
{
const blkptr_t *l = larg;
const blkptr_t *r = rarg;
/* Sort them according to dva[0] */
uint64_t l_dva0_vdev, r_dva0_vdev;
l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]);
if (l_dva0_vdev < r_dva0_vdev)
return (-1);
else if (l_dva0_vdev > r_dva0_vdev)
return (+1);
/* if vdevs are equal, sort by offsets. */
uint64_t l_dva0_offset;
uint64_t r_dva0_offset;
l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
if (l_dva0_offset < r_dva0_offset) {
return (-1);
} else if (l_dva0_offset > r_dva0_offset) {
return (+1);
}
/*
* Since we're storing blkptrs without cancelling FREE/ALLOC pairs,
* it's possible the offsets are equal. In that case, sort by txg
*/
if (l->blk_birth < r->blk_birth) {
return (-1);
} else if (l->blk_birth > r->blk_birth) {
return (+1);
}
return (0);
}

View file

@ -1159,6 +1159,15 @@ Enable the experimental block cloning feature.
If this setting is 0, then even if feature@block_cloning is enabled,
attempts to clone blocks will act as though the feature is disabled.
.
.It Sy zfs_bclone_wait_dirty Ns = Ns Sy 0 Ns | Ns 1 Pq int
When set to 1 the FICLONE and FICLONERANGE ioctls wait for dirty data to be
written to disk.
This allows the clone operation to reliably succeed when a file is
modified and then immediately cloned.
For small files this may be slower than making a copy of the file.
Therefore, this setting defaults to 0 which causes a clone operation to
immediately fail when encountering a dirty block.
.
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
Select a BLAKE3 implementation.
.Pp

View file

@ -36,7 +36,7 @@
.Sh SYNOPSIS
.Nm zpool
.Cm status
.Op Fl DigLpPstvx
.Op Fl DeigLpPstvx
.Op Fl T Sy u Ns | Ns Sy d
.Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns
.Oo Ar pool Oc Ns
@ -69,6 +69,8 @@ See the
option of
.Nm zpool Cm iostat
for complete details.
.It Fl e
Only show unhealthy vdevs (not-ONLINE or with errors).
.It Fl i
Display vdev initialization status.
.It Fl g

View file

@ -111,10 +111,11 @@ static const char *upvalname (Proto *p, int uv) {
static const char *findvararg (CallInfo *ci, int n, StkId *pos) {
int nparams = clLvalue(ci->func)->p->numparams;
if (n >= ci->u.l.base - ci->func - nparams)
int nvararg = cast_int(ci->u.l.base - ci->func) - nparams;
if (n <= -nvararg)
return NULL; /* no such vararg */
else {
*pos = ci->func + nparams + n;
*pos = ci->func + nparams - n;
return "(*vararg)"; /* generic name for any vararg */
}
}
@ -126,7 +127,7 @@ static const char *findlocal (lua_State *L, CallInfo *ci, int n,
StkId base;
if (isLua(ci)) {
if (n < 0) /* access to vararg values? */
return findvararg(ci, -n, pos);
return findvararg(ci, n, pos);
else {
base = ci->u.l.base;
name = luaF_getlocalname(ci_func(ci)->p, n, currentpc(ci));

View file

@ -89,10 +89,6 @@ int zfs_debug_level;
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
"Debug level");
int zfs_bclone_enabled = 1;
SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
&zfs_bclone_enabled, 0, "Enable block cloning");
struct zfs_jailparam {
int mount_snapshot;
};

View file

@ -862,27 +862,66 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
return (0);
}
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \
defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC)
BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
{
zio_t *zio = bio->bi_private;
#ifdef HAVE_1ARG_BIO_END_IO_T
zio->io_error = BIO_END_IO_ERROR(bio);
#else
zio->io_error = -error;
#endif
bio_put(bio);
if (zio->io_error)
vdev_disk_error(zio);
zio_interrupt(zio);
}
static int
vdev_issue_discard_trim(zio_t *zio, unsigned long flags)
{
int ret;
struct bio *bio = NULL;
#if defined(BLKDEV_DISCARD_SECURE)
ret = - __blkdev_issue_discard(
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, flags, &bio);
#else
(void) flags;
ret = - __blkdev_issue_discard(
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, &bio);
#endif
if (!ret && bio) {
bio->bi_private = zio;
bio->bi_end_io = vdev_disk_discard_end_io;
vdev_submit_bio(bio);
}
return (ret);
}
#endif
static int
vdev_disk_io_trim(zio_t *zio)
{
vdev_t *v = zio->io_vd;
vdev_disk_t *vd = v->vdev_tsd;
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
if (zio->io_trim_flags & ZIO_TRIM_SECURE) {
return (-blkdev_issue_secure_erase(BDH_BDEV(vd->vd_bdh),
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
} else {
return (-blkdev_issue_discard(BDH_BDEV(vd->vd_bdh),
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
}
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
unsigned long trim_flags = 0;
#if defined(BLKDEV_DISCARD_SECURE)
if (zio->io_trim_flags & ZIO_TRIM_SECURE)
if (zio->io_trim_flags & ZIO_TRIM_SECURE) {
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
return (-blkdev_issue_secure_erase(
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
#elif defined(BLKDEV_DISCARD_SECURE)
trim_flags |= BLKDEV_DISCARD_SECURE;
#endif
return (-blkdev_issue_discard(BDH_BDEV(vd->vd_bdh),
}
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \
defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC)
return (vdev_issue_discard_trim(zio, trim_flags));
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
return (-blkdev_issue_discard(
BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, trim_flags));
#else
#error "Unsupported kernel"
@ -968,7 +1007,12 @@ vdev_disk_io_start(zio_t *zio)
case ZIO_TYPE_TRIM:
zio->io_error = vdev_disk_io_trim(zio);
rw_exit(&vd->vd_lock);
#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
if (zio->io_trim_flags & ZIO_TRIM_SECURE)
zio_interrupt(zio);
#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
zio_interrupt(zio);
#endif
return;
default:

View file

@ -4255,9 +4255,4 @@ EXPORT_SYMBOL(zfs_map);
/* CSTYLED */
module_param(zfs_delete_blocks, ulong, 0644);
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
/* CSTYLED */
module_param(zfs_bclone_enabled, uint, 0644);
MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
#endif

View file

@ -31,8 +31,6 @@
#include <sys/zfs_vnops.h>
#include <sys/zfeature.h>
int zfs_bclone_enabled = 1;
/*
* Clone part of a file via block cloning.
*
@ -40,7 +38,7 @@ int zfs_bclone_enabled = 1;
* care of that depending on how it was called.
*/
static ssize_t
__zpl_clone_file_range(struct file *src_file, loff_t src_off,
zpl_clone_file_range_impl(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, size_t len)
{
struct inode *src_i = file_inode(src_file);
@ -96,11 +94,12 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
{
ssize_t ret;
/* Flags is reserved for future extensions and must be zero. */
if (flags != 0)
return (-EINVAL);
/* Try to do it via zfs_clone_range() */
ret = __zpl_clone_file_range(src_file, src_off,
/* Try to do it via zfs_clone_range() and allow shortening. */
ret = zpl_clone_file_range_impl(src_file, src_off,
dst_file, dst_off, len);
#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
@ -137,6 +136,11 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
* FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
* range in both files and if they're the same, arrange for them to be backed
* by the same storage.
*
* REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range
* if we want. It's designed for filesystems that may need to shorten the
* length for alignment, EOF, or any other requirement. ZFS may shorten the
* request when there is outstanding dirty data which hasn't been written.
*/
loff_t
zpl_remap_file_range(struct file *src_file, loff_t src_off,
@ -145,24 +149,21 @@ zpl_remap_file_range(struct file *src_file, loff_t src_off,
if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
return (-EINVAL);
/*
* REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given
* range if we want. Its designed for filesystems that make data past
* EOF available, and don't want it to be visible in both files. ZFS
* doesn't do that, so we just turn the flag off.
*/
flags &= ~REMAP_FILE_CAN_SHORTEN;
/* No support for dedup yet */
if (flags & REMAP_FILE_DEDUP)
/* No support for dedup yet */
return (-EOPNOTSUPP);
/* Zero length means to clone everything to the end of the file */
if (len == 0)
len = i_size_read(file_inode(src_file)) - src_off;
return (__zpl_clone_file_range(src_file, src_off,
dst_file, dst_off, len));
ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
dst_file, dst_off, len);
if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len)
ret = -EINVAL;
return (ret);
}
#endif /* HAVE_VFS_REMAP_FILE_RANGE */
@ -179,8 +180,14 @@ zpl_clone_file_range(struct file *src_file, loff_t src_off,
if (len == 0)
len = i_size_read(file_inode(src_file)) - src_off;
return (__zpl_clone_file_range(src_file, src_off,
dst_file, dst_off, len));
/* The entire length must be cloned or this is an error. */
ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
dst_file, dst_off, len);
if (ret >= 0 && ret != len)
ret = -EINVAL;
return (ret);
}
#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
@ -214,8 +221,7 @@ zpl_ioctl_ficlone(struct file *dst_file, void *arg)
size_t len = i_size_read(file_inode(src_file));
ssize_t ret =
__zpl_clone_file_range(src_file, 0, dst_file, 0, len);
ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len);
fput(src_file);
@ -253,7 +259,7 @@ zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
if (len == 0)
len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset,
ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset,
dst_file, fcr.fcr_dest_offset, len);
fput(src_file);

View file

@ -58,6 +58,26 @@
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
/*
* Enable the experimental block cloning feature. If this setting is 0, then
* even if feature@block_cloning is enabled, attempts to clone blocks will act
* as though the feature is disabled.
*/
int zfs_bclone_enabled = 1;
/*
* When set zfs_clone_range() waits for dirty data to be written to disk.
* This allows the clone operation to reliably succeed when a file is modified
* and then immediately cloned. For small files this may be slower than making
* a copy of the file and is therefore not the default. However, in certain
* scenarios this behavior may be desirable so a tunable is provided.
*/
static int zfs_bclone_wait_dirty = 0;
/*
* Maximum bytes to read per chunk in zfs_read().
*/
static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024;
int
zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
@ -182,8 +202,6 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr)
return (error);
}
static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */
/*
* Read bytes from specified file into supplied buffer.
*
@ -1049,6 +1067,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
size_t maxblocks, nbps;
uint_t inblksz;
uint64_t clear_setid_bits_txg = 0;
uint64_t last_synced_txg = 0;
inoff = *inoffp;
outoff = *outoffp;
@ -1287,15 +1306,23 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
}
nbps = maxblocks;
last_synced_txg = spa_last_synced_txg(dmu_objset_spa(inos));
error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, bps,
&nbps);
if (error != 0) {
/*
* If we are trying to clone a block that was created
* in the current transaction group, error will be
* EAGAIN here, which we can just return to the caller
* so it can fallback if it likes.
* in the current transaction group, the error will be
* EAGAIN here. Based on zfs_bclone_wait_dirty either
* return a shortened range to the caller so it can
* fallback, or wait for the next TXG and check again.
*/
if (error == EAGAIN && zfs_bclone_wait_dirty) {
txg_wait_synced(dmu_objset_pool(inos),
last_synced_txg + 1);
continue;
}
break;
}
@ -1517,3 +1544,9 @@ EXPORT_SYMBOL(zfs_clone_range_replay);
ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW,
"Bytes to read per chunk");
ZFS_MODULE_PARAM(zfs, zfs_, bclone_enabled, INT, ZMOD_RW,
"Enable block cloning");
ZFS_MODULE_PARAM(zfs, zfs_, bclone_wait_dirty, INT, ZMOD_RW,
"Wait for dirty blocks when cloning");

View file

@ -536,7 +536,8 @@ tags = ['functional', 'cli_root', 'zpool_split']
tests = ['zpool_status_001_pos', 'zpool_status_002_pos',
'zpool_status_003_pos', 'zpool_status_004_pos',
'zpool_status_005_pos', 'zpool_status_006_pos',
'zpool_status_007_pos', 'zpool_status_features_001_pos']
'zpool_status_007_pos', 'zpool_status_008_pos',
'zpool_status_features_001_pos']
tags = ['functional', 'cli_root', 'zpool_status']
[tests/functional/cli_root/zpool_sync]
@ -631,7 +632,7 @@ tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos',
tags = ['functional', 'compression']
[tests/functional/cp_files]
tests = ['cp_files_001_pos', 'cp_stress']
tests = ['cp_files_001_pos', 'cp_files_002_pos', 'cp_stress']
tags = ['functional', 'cp_files']
[tests/functional/crtime]

View file

@ -176,6 +176,7 @@ if sys.platform.startswith('freebsd'):
'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
'cli_root/zfs_unshare/zfs_unshare_008_pos': ['SKIP', na_reason],
'cp_files/cp_files_002_pos': ['SKIP', na_reason],
'link_count/link_count_001': ['SKIP', na_reason],
'casenorm/mixed_create_failure': ['FAIL', 13215],
'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
@ -312,6 +313,7 @@ elif sys.platform.startswith('linux'):
['SKIP', cfr_reason],
'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason],
'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason],
'cp_files/cp_files_002_pos': ['SKIP', cfr_reason],
'fault/auto_online_002_pos': ['FAIL', 11889],
'fault/auto_replace_001_pos': ['FAIL', 14851],
'fault/auto_spare_002_pos': ['FAIL', 11889],

View file

@ -94,6 +94,7 @@ VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
BCLONE_WAIT_DIRTY zfs_bclone_wait_dirty zfs_bclone_wait_dirty
XATTR_COMPAT xattr_compat zfs_xattr_compat
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max

View file

@ -1239,6 +1239,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_status/zpool_status_005_pos.ksh \
functional/cli_root/zpool_status/zpool_status_006_pos.ksh \
functional/cli_root/zpool_status/zpool_status_007_pos.ksh \
functional/cli_root/zpool_status/zpool_status_008_pos.ksh \
functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh \
functional/cli_root/zpool_sync/cleanup.ksh \
functional/cli_root/zpool_sync/setup.ksh \
@ -1394,6 +1395,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/compression/setup.ksh \
functional/cp_files/cleanup.ksh \
functional/cp_files/cp_files_001_pos.ksh \
functional/cp_files/cp_files_002_pos.ksh \
functional/cp_files/cp_stress.ksh \
functional/cp_files/setup.ksh \
functional/crtime/cleanup.ksh \

View file

@ -51,7 +51,7 @@ else
fi
set -A args "" "-x" "-v" "-x $testpool" "-v $testpool" "-xv $testpool" \
"-vx $testpool"
"-vx $testpool" "-e $testpool" "-es $testpool"
log_assert "Executing 'zpool status' with correct options succeeds"
@ -64,4 +64,6 @@ while [[ $i -lt ${#args[*]} ]]; do
(( i = i + 1 ))
done
cleanup
log_pass "'zpool status' with correct options succeeded"

View file

@ -37,6 +37,7 @@
# 3. Read the file
# 4. Take a snapshot and make a clone
# 5. Verify we see "snapshot, clone and filesystem" output in 'zpool status -v'
# and 'zpool status -ev'
function cleanup
{
@ -68,6 +69,7 @@ log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"
log_must eval "zpool status -ev | grep '$TESTPOOL2/10m_file'"
log_mustnot eval "zpool status -v | grep '$TESTFS1'"
log_pass "'zpool status -v' outputs affected filesystem, snapshot & clone"

View file

@ -0,0 +1,104 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# Verify 'zpool status -e' only shows unhealthy devices.
#
# STRATEGY:
# 1. Create zpool
# 2. Force DEGRADE, FAULT, or inject slow IOs for vdevs
# 3. Verify vdevs are reported correctly with -e and -s
# 4. Verify parents are reported as DEGRADED
# 5. Verify healthy children are not reported
#
function cleanup
{
log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
zinject -c all
poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2
log_must rm -f $all_vdevs
}
log_assert "Verify 'zpool status -e'"
log_onexit cleanup
all_vdevs=$(echo $TESTDIR/vdev{1..6})
log_must mkdir -p $TESTDIR
log_must truncate -s $MINVDEVSIZE $all_vdevs
OLD_SLOW_IO=$(get_tunable ZIO_SLOW_IO_MS)
for raid_type in "draid2:3d:6c:1s" "raidz2"; do
log_must zpool create -f $TESTPOOL2 $raid_type $all_vdevs
# Check DEGRADED vdevs are shown.
log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev4 "ONLINE"
log_must zinject -d $TESTDIR/vdev4 -A degrade $TESTPOOL2
log_must eval "zpool status -e $TESTPOOL2 | grep $TESTDIR/vdev4 | grep DEGRADED"
# Check FAULTED vdevs are shown.
log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev5 "ONLINE"
log_must zinject -d $TESTDIR/vdev5 -A fault $TESTPOOL2
log_must eval "zpool status -e $TESTPOOL2 | grep $TESTDIR/vdev5 | grep FAULTED"
# Check no ONLINE vdevs are shown
log_mustnot eval "zpool status -e $TESTPOOL2 | grep ONLINE"
# Check no ONLINE slow vdevs are show. Then mark IOs greater than
# 10ms slow, delay IOs 20ms to vdev6, check slow IOs.
log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev6 "ONLINE"
log_mustnot eval "zpool status -es $TESTPOOL2 | grep ONLINE"
log_must set_tunable64 ZIO_SLOW_IO_MS 10
log_must zinject -d $TESTDIR/vdev6 -D20:100 $TESTPOOL2
log_must mkfile 1048576 /$TESTPOOL2/testfile
sync_pool $TESTPOOL2
log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
# Check vdev6 slow IOs are only shown when requested with -s.
log_mustnot eval "zpool status -e $TESTPOOL2 | grep $TESTDIR/vdev6 | grep ONLINE"
log_must eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev6 | grep ONLINE"
# Pool level and top-vdev level status must be DEGRADED.
log_must eval "zpool status -e $TESTPOOL2 | grep $TESTPOOL2 | grep DEGRADED"
log_must eval "zpool status -e $TESTPOOL2 | grep $raid_type | grep DEGRADED"
# Check that healthy vdevs[1-3] aren't shown with -e.
log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev1 "ONLINE"
log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev2 "ONLINE"
log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev3 "ONLINE"
log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev1 | grep ONLINE"
log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev2 | grep ONLINE"
log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev3 | grep ONLINE"
log_must zinject -c all
log_must zpool status -es $TESTPOOL2
zpool destroy $TESTPOOL2
done
log_pass "Verify zpool status -e shows only unhealthy vdevs"

View file

@ -0,0 +1,161 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
#
# DESCRIPTION:
# Verify all cp --reflink modes work with modified file.
#
# STRATEGY:
# 1. Verify "cp --reflink=never|auto|always" behaves as expected.
# Two different modes of operation are tested.
#
# a. zfs_bclone_wait_dirty=0: FICLONE and FICLONERANGE fail with EINVAL
# when there are dirty blocks which cannot be immediately cloned.
# This is the default behavior.
#
# b. zfs_bclone_wait_dirty=1: FICLONE and FICLONERANGE wait for
# dirty blocks to be written to disk allowing the clone to succeed.
# The downside to this is it may be slow which depending on the
# situtation may defeat the point of making a clone.
#
verify_runnable "global"
verify_block_cloning
if ! is_linux; then
log_unsupported "cp --reflink is a GNU coreutils option"
fi
function cleanup
{
datasetexists $TESTPOOL/cp-reflink && \
destroy_dataset $$TESTPOOL/cp-reflink -f
log_must set_tunable32 BCLONE_WAIT_DIRTY 0
}
function verify_copy
{
src_cksum=$(sha256digest $1)
dst_cksum=$(sha256digest $2)
if [[ "$src_cksum" != "$dst_cksum" ]]; then
log_must ls -l $CP_TESTDIR
log_fail "checksum mismatch ($src_cksum != $dst_cksum)"
fi
}
log_assert "Verify all cp --reflink modes work with modified file"
log_onexit cleanup
SRC_FILE=src.data
DST_FILE=dst.data
SRC_SIZE=$(($RANDOM % 2048))
# A smaller recordsize is used merely to speed up the test.
RECORDSIZE=4096
log_must zfs create -o recordsize=$RECORDSIZE $TESTPOOL/cp-reflink
CP_TESTDIR=$(get_prop mountpoint $TESTPOOL/cp-reflink)
log_must cd $CP_TESTDIR
# Never wait on dirty blocks (zfs_bclone_wait_dirty=0)
log_must set_tunable32 BCLONE_WAIT_DIRTY 0
for mode in "never" "auto" "always"; do
log_note "Checking 'cp --reflink=$mode'"
# Create a new file and immediately copy it.
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE count=$SRC_SIZE
if [[ "$mode" == "always" ]]; then
log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
log_must ls -l $CP_TESTDIR
else
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
fi
log_must rm -f $DST_FILE
# Append to an existing file and immediately copy it.
sync_pool $TESTPOOL
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE seek=$SRC_SIZE \
count=1 conv=notrunc
if [[ "$mode" == "always" ]]; then
log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
log_must ls -l $CP_TESTDIR
else
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
fi
log_must rm -f $DST_FILE
# Overwrite a random range of an existing file and immediately copy it.
sync_pool $TESTPOOL
log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \
seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc
if [[ "$mode" == "always" ]]; then
log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
log_must ls -l $CP_TESTDIR
else
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
fi
log_must rm -f $SRC_FILE $DST_FILE
done
# Wait on dirty blocks (zfs_bclone_wait_dirty=1)
log_must set_tunable32 BCLONE_WAIT_DIRTY 1
for mode in "never" "auto" "always"; do
log_note "Checking 'cp --reflink=$mode'"
# Create a new file and immediately copy it.
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE count=$SRC_SIZE
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
log_must rm -f $DST_FILE
# Append to an existing file and immediately copy it.
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE seek=$SRC_SIZE \
count=1 conv=notrunc
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
log_must rm -f $DST_FILE
# Overwrite a random range of an existing file and immediately copy it.
log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \
seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
verify_copy $SRC_FILE $DST_FILE
log_must rm -f $SRC_FILE $DST_FILE
done
log_pass

View file

@ -1152,7 +1152,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.2.99-333-FreeBSD_g2e6b3c4d9"
#define ZFS_META_ALIAS "zfs-2.2.99-338-FreeBSD_g229b9f4ed"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@ -1182,7 +1182,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
#define ZFS_META_RELEASE "333-FreeBSD_g2e6b3c4d9"
#define ZFS_META_RELEASE "338-FreeBSD_g229b9f4ed"
/* Define the project version. */
#define ZFS_META_VERSION "2.2.99"

View file

@ -1 +1 @@
#define ZFS_META_GITREV "zfs-2.2.99-333-g2e6b3c4d9"
#define ZFS_META_GITREV "zfs-2.2.99-338-g229b9f4ed"