diff --git a/cddl/lib/libspl/Makefile b/cddl/lib/libspl/Makefile index e432b313d9d0..6c43e1750c3c 100644 --- a/cddl/lib/libspl/Makefile +++ b/cddl/lib/libspl/Makefile @@ -11,6 +11,7 @@ PACKAGE= zfs SRCS = \ assert.c \ + backtrace.c \ list.c \ mkdirp.c \ os/freebsd/zone.c \ diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index 449b6bf2ccb3..704fcf4422d4 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -85,6 +85,7 @@ #include #include #include +#include #include #include @@ -120,6 +121,9 @@ static int flagbits[256]; static uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */ static int leaked_objects = 0; static range_tree_t *mos_refd_objs; +static spa_t *spa; +static objset_t *os; +static boolean_t kernel_init_done; static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *, boolean_t); @@ -131,6 +135,7 @@ static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free, static void zdb_print_blkptr(const blkptr_t *bp, int flags); +static void zdb_exit(int reason); typedef struct sublivelist_verify_block_refcnt { /* block pointer entry in livelist being verified */ @@ -818,17 +823,40 @@ usage(void) (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " "to make only that option verbose\n"); (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); - exit(1); + zdb_exit(1); } static void dump_debug_buffer(void) { - if (dump_opt['G']) { - (void) printf("\n"); - (void) fflush(stdout); - zfs_dbgmsg_print("zdb"); - } + ssize_t ret __attribute__((unused)); + + if (!dump_opt['G']) + return; + /* + * We use write() instead of printf() so that this function + * is safe to call from a signal handler. + */ + ret = write(STDERR_FILENO, "\n", 1); + zfs_dbgmsg_print(STDERR_FILENO, "zdb"); +} + +static void sig_handler(int signo) +{ + struct sigaction action; + + libspl_backtrace(STDERR_FILENO); + dump_debug_buffer(); + + /* + * Restore default action and re-raise signal so SIGSEGV and + * SIGABRT can trigger a core dump. + */ + action.sa_handler = SIG_DFL; + sigemptyset(&action.sa_mask); + action.sa_flags = 0; + (void) sigaction(signo, &action, NULL); + raise(signo); } /* @@ -849,7 +877,7 @@ fatal(const char *fmt, ...) dump_debug_buffer(); - exit(1); + zdb_exit(1); } static void @@ -2276,7 +2304,7 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, buf = malloc(SPA_MAXBLOCKSIZE); if (buf == NULL) { (void) fprintf(stderr, "out of memory\n"); - exit(1); + zdb_exit(1); } decode_embedded_bp_compressed(bp, buf); memcpy(&zstd_hdr, buf, sizeof (zstd_hdr)); @@ -3231,6 +3259,23 @@ fuid_table_destroy(void) } } +static void +zdb_exit(int reason) +{ + if (os != NULL) { + close_objset(os, FTAG); + } else if (spa != NULL) { + spa_close(spa, FTAG); + } + + fuid_table_destroy(); + + if (kernel_init_done) + kernel_fini(); + + exit(reason); +} + /* * print uid or gid information. * For normal POSIX id just the id is printed in decimal format. @@ -4161,32 +4206,32 @@ dump_cachefile(const char *cachefile) if ((fd = open64(cachefile, O_RDONLY)) < 0) { (void) printf("cannot open '%s': %s\n", cachefile, strerror(errno)); - exit(1); + zdb_exit(1); } if (fstat64(fd, &statbuf) != 0) { (void) printf("failed to stat '%s': %s\n", cachefile, strerror(errno)); - exit(1); + zdb_exit(1); } if ((buf = malloc(statbuf.st_size)) == NULL) { (void) fprintf(stderr, "failed to allocate %llu bytes\n", (u_longlong_t)statbuf.st_size); - exit(1); + zdb_exit(1); } if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { (void) fprintf(stderr, "failed to read %llu bytes\n", (u_longlong_t)statbuf.st_size); - exit(1); + zdb_exit(1); } (void) close(fd); if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) { (void) fprintf(stderr, "failed to unpack nvlist\n"); - exit(1); + zdb_exit(1); } free(buf); @@ -5102,14 +5147,14 @@ dump_label(const char *dev) if ((fd = open64(path, O_RDONLY)) < 0) { (void) printf("cannot open '%s': %s\n", path, strerror(errno)); - exit(1); + zdb_exit(1); } if (fstat64_blk(fd, &statbuf) != 0) { (void) printf("failed to stat '%s': %s\n", path, strerror(errno)); (void) close(fd); - exit(1); + zdb_exit(1); } if (S_ISBLK(statbuf.st_mode) && zfs_dev_flush(fd) != 0) @@ -5122,7 +5167,7 @@ dump_label(const char *dev) sizeof (cksum_record_t), offsetof(cksum_record_t, link)); psize = statbuf.st_size; - psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); + psize = P2ALIGN_TYPED(psize, sizeof (vdev_label_t), uint64_t); ashift = SPA_MINBLOCKSHIFT; /* @@ -8221,7 +8266,7 @@ dump_zpool(spa_t *spa) if (rc != 0) { dump_debug_buffer(); - exit(rc); + zdb_exit(rc); } } @@ -8825,18 +8870,18 @@ zdb_embedded_block(char *thing) words + 12, words + 13, words + 14, words + 15); if (err != 16) { (void) fprintf(stderr, "invalid input format\n"); - exit(1); + zdb_exit(1); } ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE); buf = malloc(SPA_MAXBLOCKSIZE); if (buf == NULL) { (void) fprintf(stderr, "out of memory\n"); - exit(1); + zdb_exit(1); } err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp)); if (err != 0) { (void) fprintf(stderr, "decode failed: %u\n", err); - exit(1); + zdb_exit(1); } zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0); free(buf); @@ -8863,8 +8908,6 @@ int main(int argc, char **argv) { int c; - spa_t *spa = NULL; - objset_t *os = NULL; int dump_all = 1; int verbose = 0; int error = 0; @@ -8880,9 +8923,27 @@ main(int argc, char **argv) char *spa_config_path_env, *objset_str; boolean_t target_is_spa = B_TRUE, dataset_lookup = B_FALSE; nvlist_t *cfg = NULL; + struct sigaction action; dprintf_setup(&argc, argv); + /* + * Set up signal handlers, so if we crash due to bad on-disk data we + * can get more info. Unlike ztest, we don't bail out if we can't set + * up signal handlers, because zdb is very useful without them. + */ + action.sa_handler = sig_handler; + sigemptyset(&action.sa_mask); + action.sa_flags = 0; + if (sigaction(SIGSEGV, &action, NULL) < 0) { + (void) fprintf(stderr, "zdb: cannot catch SIGSEGV: %s\n", + strerror(errno)); + } + if (sigaction(SIGABRT, &action, NULL) < 0) { + (void) fprintf(stderr, "zdb: cannot catch SIGABRT: %s\n", + strerror(errno)); + } + /* * If there is an environment variable SPA_CONFIG_PATH it overrides * default spa_config_path setting. If -U flag is specified it will @@ -9093,6 +9154,7 @@ main(int argc, char **argv) spa_mode_readable_spacemaps = B_TRUE; kernel_init(SPA_MODE_READ); + kernel_init_done = B_TRUE; if (dump_all) verbose = MAX(verbose, 1); @@ -9116,19 +9178,23 @@ main(int argc, char **argv) if (argc != 1) usage(); zdb_embedded_block(argv[0]); - return (0); + error = 0; + goto fini; } if (argc < 1) { if (!dump_opt['e'] && dump_opt['C']) { dump_cachefile(spa_config_path); - return (0); + error = 0; + goto fini; } usage(); } - if (dump_opt['l']) - return (dump_label(argv[0])); + if (dump_opt['l']) { + error = dump_label(argv[0]); + goto fini; + } if (dump_opt['X'] || dump_opt['F']) rewind = ZPOOL_DO_REWIND | @@ -9183,7 +9249,8 @@ main(int argc, char **argv) } else if (objset_str && !zdb_numeric(objset_str + 1) && dump_opt['N']) { printf("Supply a numeric objset ID with -N\n"); - exit(1); + error = 1; + goto fini; } } else { target_pool = target; @@ -9240,7 +9307,8 @@ main(int argc, char **argv) if (argc != 2) usage(); dump_opt['v'] = verbose + 3; - return (dump_path(argv[0], argv[1], NULL)); + error = dump_path(argv[0], argv[1], NULL); + goto fini; } if (dump_opt['r']) { @@ -9328,7 +9396,7 @@ main(int argc, char **argv) fatal("can't dump '%s': %s", target, strerror(error)); } - return (error); + goto fini; } else { target_pool = strdup(target); if (strpbrk(target, "/@") != NULL) @@ -9458,9 +9526,10 @@ main(int argc, char **argv) free(checkpoint_target); } +fini: if (os != NULL) { close_objset(os, FTAG); - } else { + } else if (spa != NULL) { spa_close(spa, FTAG); } @@ -9468,7 +9537,8 @@ main(int argc, char **argv) dump_debug_buffer(); - kernel_fini(); + if (kernel_init_done) + kernel_fini(); return (error); } diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c index 69163b80bd5a..d0372608c723 100644 --- a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c +++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c @@ -702,7 +702,7 @@ zfs_enable_ds(void *arg) { unavailpool_t *pool = (unavailpool_t *)arg; - (void) zpool_enable_datasets(pool->uap_zhp, NULL, 0); + (void) zpool_enable_datasets(pool->uap_zhp, NULL, 0, 512); zpool_close(pool->uap_zhp); free(pool); } diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am index 812558cf6d0f..093a04c4636a 100644 --- a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am +++ b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am @@ -9,6 +9,7 @@ dist_zedexec_SCRIPTS = \ %D%/all-debug.sh \ %D%/all-syslog.sh \ %D%/data-notify.sh \ + %D%/deadman-slot_off.sh \ %D%/generic-notify.sh \ %D%/pool_import-led.sh \ %D%/resilver_finish-notify.sh \ @@ -29,6 +30,7 @@ SUBSTFILES += $(nodist_zedexec_SCRIPTS) zedconfdefaults = \ all-syslog.sh \ data-notify.sh \ + deadman-slot_off.sh \ history_event-zfs-list-cacher.sh \ pool_import-led.sh \ resilver_finish-notify.sh \ diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh b/sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh new file mode 100755 index 000000000000..7b339b3add01 --- /dev/null +++ b/sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh @@ -0,0 +1,71 @@ +#!/bin/sh +# shellcheck disable=SC3014,SC2154,SC2086,SC2034 +# +# Turn off disk's enclosure slot if an I/O is hung triggering the deadman. +# +# It's possible for outstanding I/O to a misbehaving SCSI disk to neither +# promptly complete or return an error. This can occur due to retry and +# recovery actions taken by the SCSI layer, driver, or disk. When it occurs +# the pool will be unresponsive even though there may be sufficient redundancy +# configured to proceeded without this single disk. +# +# When a hung I/O is detected by the kmods it will be posted as a deadman +# event. By default an I/O is considered to be hung after 5 minutes. This +# value can be changed with the zfs_deadman_ziotime_ms module parameter. +# If ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN is set the disk's enclosure +# slot will be powered off causing the outstanding I/O to fail. The ZED +# will then handle this like a normal disk failure and FAULT the vdev. +# +# We assume the user will be responsible for turning the slot back on +# after replacing the disk. +# +# Note that this script requires that your enclosure be supported by the +# Linux SCSI Enclosure services (SES) driver. The script will do nothing +# if you have no enclosure, or if your enclosure isn't supported. +# +# Exit codes: +# 0: slot successfully powered off +# 1: enclosure not available +# 2: ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN disabled +# 3: System not configured to wait on deadman +# 4: The enclosure sysfs path passed from ZFS does not exist +# 5: Enclosure slot didn't actually turn off after we told it to + +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" + +if [ ! -d /sys/class/enclosure ] ; then + # No JBOD enclosure or NVMe slots + exit 1 +fi + +if [ "${ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN}" != "1" ] ; then + exit 2 +fi + +if [ "$ZEVENT_POOL_FAILMODE" != "wait" ] ; then + exit 3 +fi + +if [ ! -f "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" ] ; then + exit 4 +fi + +# Turn off the slot and wait for sysfs to report that the slot is off. +# It can take ~400ms on some enclosures and multiple retries may be needed. +for i in $(seq 1 20) ; do + echo "off" | tee "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" + + for j in $(seq 1 5) ; do + if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" == "off" ] ; then + break 2 + fi + sleep 0.1 + done +done + +if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" != "off" ] ; then + exit 5 +fi + +zed_log_msg "powered down slot $ZEVENT_VDEV_ENC_SYSFS_PATH for $ZEVENT_VDEV_PATH" diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/zed.rc b/sys/contrib/openzfs/cmd/zed/zed.d/zed.rc index ec64ecfaa13c..af56147a969b 100644 --- a/sys/contrib/openzfs/cmd/zed/zed.d/zed.rc +++ b/sys/contrib/openzfs/cmd/zed/zed.d/zed.rc @@ -148,6 +148,13 @@ ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event" # supports slot power control via sysfs. #ZED_POWER_OFF_ENCLOSURE_SLOT_ON_FAULT=1 +## +# Power off the drive's slot in the enclosure if there is a hung I/O which +# exceeds the deadman timeout. This can help prevent a single misbehaving +# drive from rendering a redundant pool unavailable. This assumes your drive +# enclosure fully supports slot power control via sysfs. +#ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN=1 + ## # Ntfy topic # This defines which topic will receive the ntfy notification. diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c index 3fc2d2810495..b77917764c86 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c @@ -7192,6 +7192,8 @@ share_mount(int op, int argc, char **argv) int c, ret = 0; char *options = NULL; int flags = 0; + const uint_t mount_nthr = 512; + uint_t nthr; /* check options */ while ((c = getopt(argc, argv, op == OP_MOUNT ? ":aRlvo:Of" : "al")) @@ -7310,9 +7312,9 @@ share_mount(int op, int argc, char **argv) * be serialized so that we can prompt the user for their keys * in a consistent manner. */ + nthr = op == OP_MOUNT && !(flags & MS_CRYPT) ? mount_nthr : 1; zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used, - share_mount_one_cb, &share_mount_state, - op == OP_MOUNT && !(flags & MS_CRYPT)); + share_mount_one_cb, &share_mount_state, nthr); zfs_commit_shares(NULL); ret = share_mount_state.sm_status; diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index 300b383af4f6..57170c8ae717 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -80,6 +80,8 @@ libzfs_handle_t *g_zfs; +static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */ + static int zpool_do_create(int, char **); static int zpool_do_destroy(int, char **); @@ -2030,10 +2032,19 @@ zpool_do_destroy(int argc, char **argv) } typedef struct export_cbdata { + tpool_t *tpool; + pthread_mutex_t mnttab_lock; boolean_t force; boolean_t hardforce; + int retval; } export_cbdata_t; + +typedef struct { + char *aea_poolname; + export_cbdata_t *aea_cbdata; +} async_export_args_t; + /* * Export one pool */ @@ -2042,11 +2053,20 @@ zpool_export_one(zpool_handle_t *zhp, void *data) { export_cbdata_t *cb = data; - if (zpool_disable_datasets(zhp, cb->force) != 0) - return (1); + /* + * zpool_disable_datasets() is not thread-safe for mnttab access. + * So we serialize access here for 'zpool export -a' parallel case. + */ + if (cb->tpool != NULL) + pthread_mutex_lock(&cb->mnttab_lock); - /* The history must be logged as part of the export */ - log_history = B_FALSE; + int retval = zpool_disable_datasets(zhp, cb->force); + + if (cb->tpool != NULL) + pthread_mutex_unlock(&cb->mnttab_lock); + + if (retval) + return (1); if (cb->hardforce) { if (zpool_export_force(zhp, history_str) != 0) @@ -2058,6 +2078,48 @@ zpool_export_one(zpool_handle_t *zhp, void *data) return (0); } +/* + * Asynchronous export request + */ +static void +zpool_export_task(void *arg) +{ + async_export_args_t *aea = arg; + + zpool_handle_t *zhp = zpool_open(g_zfs, aea->aea_poolname); + if (zhp != NULL) { + int ret = zpool_export_one(zhp, aea->aea_cbdata); + if (ret != 0) + aea->aea_cbdata->retval = ret; + zpool_close(zhp); + } else { + aea->aea_cbdata->retval = 1; + } + + free(aea->aea_poolname); + free(aea); +} + +/* + * Process an export request in parallel + */ +static int +zpool_export_one_async(zpool_handle_t *zhp, void *data) +{ + tpool_t *tpool = ((export_cbdata_t *)data)->tpool; + async_export_args_t *aea = safe_malloc(sizeof (async_export_args_t)); + + /* save pool name since zhp will go out of scope */ + aea->aea_poolname = strdup(zpool_get_name(zhp)); + aea->aea_cbdata = data; + + /* ship off actual export to another thread */ + if (tpool_dispatch(tpool, zpool_export_task, (void *)aea) != 0) + return (errno); /* unlikely */ + else + return (0); +} + /* * zpool export [-f] ... * @@ -2098,17 +2160,33 @@ zpool_do_export(int argc, char **argv) cb.force = force; cb.hardforce = hardforce; + cb.tpool = NULL; + cb.retval = 0; argc -= optind; argv += optind; + /* The history will be logged as part of the export itself */ + log_history = B_FALSE; + if (do_all) { if (argc != 0) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } - return (for_each_pool(argc, argv, B_TRUE, NULL, - ZFS_TYPE_POOL, B_FALSE, zpool_export_one, &cb)); + cb.tpool = tpool_create(1, 5 * sysconf(_SC_NPROCESSORS_ONLN), + 0, NULL); + pthread_mutex_init(&cb.mnttab_lock, NULL); + + /* Asynchronously call zpool_export_one using thread pool */ + ret = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, zpool_export_one_async, &cb); + + tpool_wait(cb.tpool); + tpool_destroy(cb.tpool); + (void) pthread_mutex_destroy(&cb.mnttab_lock); + + return (ret | cb.retval); } /* check arguments */ @@ -2938,6 +3016,7 @@ show_import(nvlist_t *config, boolean_t report_error) const char *health; uint_t vsc; const char *comment; + const char *indent; status_cbdata_t cb = { 0 }; verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, @@ -2962,82 +3041,84 @@ show_import(nvlist_t *config, boolean_t report_error) if (reason != ZPOOL_STATUS_OK && !report_error) return (reason); - (void) printf(gettext(" pool: %s\n"), name); - (void) printf(gettext(" id: %llu\n"), (u_longlong_t)guid); - (void) printf(gettext(" state: %s"), health); + if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) { + indent = " "; + } else { + comment = NULL; + indent = ""; + } + + (void) printf(gettext("%s pool: %s\n"), indent, name); + (void) printf(gettext("%s id: %llu\n"), indent, (u_longlong_t)guid); + (void) printf(gettext("%s state: %s"), indent, health); if (pool_state == POOL_STATE_DESTROYED) (void) printf(gettext(" (DESTROYED)")); (void) printf("\n"); + if (reason != ZPOOL_STATUS_OK) { + (void) printf("%s", indent); + printf_color(ANSI_BOLD, gettext("status: ")); + } switch (reason) { case ZPOOL_STATUS_MISSING_DEV_R: case ZPOOL_STATUS_MISSING_DEV_NR: case ZPOOL_STATUS_BAD_GUID_SUM: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices are " "missing from the system.\n")); break; case ZPOOL_STATUS_CORRUPT_LABEL_R: case ZPOOL_STATUS_CORRUPT_LABEL_NR: - printf_color(ANSI_BOLD, gettext("status: ")); - printf_color(ANSI_YELLOW, gettext("One or more devices contains" - " corrupted data.\n")); + printf_color(ANSI_YELLOW, gettext("One or more devices " + "contains corrupted data.\n")); break; case ZPOOL_STATUS_CORRUPT_DATA: - (void) printf( - gettext(" status: The pool data is corrupted.\n")); + printf_color(ANSI_YELLOW, gettext("The pool data is " + "corrupted.\n")); break; case ZPOOL_STATUS_OFFLINE_DEV: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices " "are offlined.\n")); break; case ZPOOL_STATUS_CORRUPT_POOL: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool metadata is " "corrupted.\n")); break; case ZPOOL_STATUS_VERSION_OLDER: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool is formatted using " "a legacy on-disk version.\n")); break; case ZPOOL_STATUS_VERSION_NEWER: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool is formatted using " "an incompatible version.\n")); break; case ZPOOL_STATUS_FEAT_DISABLED: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("Some supported " - "features are not enabled on the pool.\n\t" - "(Note that they may be intentionally disabled " - "if the\n\t'compatibility' property is set.)\n")); + "features are not enabled on the pool.\n" + "\t%s(Note that they may be intentionally disabled if the\n" + "\t%s'compatibility' property is set.)\n"), indent, indent); break; case ZPOOL_STATUS_COMPATIBILITY_ERR: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("Error reading or parsing " "the file(s) indicated by the 'compatibility'\n" - "property.\n")); + "\t%sproperty.\n"), indent); break; case ZPOOL_STATUS_INCOMPATIBLE_FEAT: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more features " "are enabled on the pool despite not being\n" - "requested by the 'compatibility' property.\n")); + "\t%srequested by the 'compatibility' property.\n"), + indent); break; case ZPOOL_STATUS_UNSUP_FEAT_READ: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool uses the following " "feature(s) not supported on this system:\n")); color_start(ANSI_YELLOW); @@ -3046,66 +3127,60 @@ show_import(nvlist_t *config, boolean_t report_error) break; case ZPOOL_STATUS_UNSUP_FEAT_WRITE: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool can only be " - "accessed in read-only mode on this system. It\n\tcannot be" - " accessed in read-write mode because it uses the " - "following\n\tfeature(s) not supported on this system:\n")); + "accessed in read-only mode on this system. It\n" + "\t%scannot be accessed in read-write mode because it uses " + "the following\n" + "\t%sfeature(s) not supported on this system:\n"), + indent, indent); color_start(ANSI_YELLOW); zpool_print_unsup_feat(config); color_end(); break; case ZPOOL_STATUS_HOSTID_ACTIVE: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool is currently " "imported by another system.\n")); break; case ZPOOL_STATUS_HOSTID_REQUIRED: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool has the " - "multihost property on. It cannot\n\tbe safely imported " - "when the system hostid is not set.\n")); + "multihost property on. It cannot\n" + "\t%sbe safely imported when the system hostid is not " + "set.\n"), indent); break; case ZPOOL_STATUS_HOSTID_MISMATCH: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("The pool was last accessed " "by another system.\n")); break; case ZPOOL_STATUS_FAULTED_DEV_R: case ZPOOL_STATUS_FAULTED_DEV_NR: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices are " "faulted.\n")); break; case ZPOOL_STATUS_BAD_LOG: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("An intent log record cannot " "be read.\n")); break; case ZPOOL_STATUS_RESILVERING: case ZPOOL_STATUS_REBUILDING: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices were " "being resilvered.\n")); break; case ZPOOL_STATUS_ERRATA: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("Errata #%d detected.\n"), errata); break; case ZPOOL_STATUS_NON_NATIVE_ASHIFT: - printf_color(ANSI_BOLD, gettext("status: ")); printf_color(ANSI_YELLOW, gettext("One or more devices are " "configured to use a non-native block size.\n" - "\tExpect reduced performance.\n")); + "\t%sExpect reduced performance.\n"), indent); break; default: @@ -3118,114 +3193,121 @@ show_import(nvlist_t *config, boolean_t report_error) /* * Print out an action according to the overall state of the pool. */ + if (vs->vs_state != VDEV_STATE_HEALTHY || + reason != ZPOOL_STATUS_ERRATA || errata != ZPOOL_ERRATA_NONE) { + (void) printf("%s", indent); + (void) printf(gettext("action: ")); + } if (vs->vs_state == VDEV_STATE_HEALTHY) { if (reason == ZPOOL_STATUS_VERSION_OLDER || reason == ZPOOL_STATUS_FEAT_DISABLED) { - (void) printf(gettext(" action: The pool can be " - "imported using its name or numeric identifier, " - "though\n\tsome features will not be available " - "without an explicit 'zpool upgrade'.\n")); + (void) printf(gettext("The pool can be imported using " + "its name or numeric identifier, though\n" + "\t%ssome features will not be available without " + "an explicit 'zpool upgrade'.\n"), indent); } else if (reason == ZPOOL_STATUS_COMPATIBILITY_ERR) { - (void) printf(gettext(" action: The pool can be " - "imported using its name or numeric\n\tidentifier, " - "though the file(s) indicated by its " - "'compatibility'\n\tproperty cannot be parsed at " - "this time.\n")); + (void) printf(gettext("The pool can be imported using " + "its name or numeric\n" + "\t%sidentifier, though the file(s) indicated by " + "its 'compatibility'\n" + "\t%sproperty cannot be parsed at this time.\n"), + indent, indent); } else if (reason == ZPOOL_STATUS_HOSTID_MISMATCH) { - (void) printf(gettext(" action: The pool can be " - "imported using its name or numeric " - "identifier and\n\tthe '-f' flag.\n")); + (void) printf(gettext("The pool can be imported using " + "its name or numeric identifier and\n" + "\t%sthe '-f' flag.\n"), indent); } else if (reason == ZPOOL_STATUS_ERRATA) { switch (errata) { - case ZPOOL_ERRATA_NONE: - break; - case ZPOOL_ERRATA_ZOL_2094_SCRUB: - (void) printf(gettext(" action: The pool can " - "be imported using its name or numeric " - "identifier,\n\thowever there is a compat" - "ibility issue which should be corrected" - "\n\tby running 'zpool scrub'\n")); + (void) printf(gettext("The pool can be " + "imported using its name or numeric " + "identifier,\n" + "\t%showever there is a compatibility " + "issue which should be corrected\n" + "\t%sby running 'zpool scrub'\n"), + indent, indent); break; case ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY: - (void) printf(gettext(" action: The pool can" - "not be imported with this version of ZFS " - "due to\n\tan active asynchronous destroy. " - "Revert to an earlier version\n\tand " - "allow the destroy to complete before " - "updating.\n")); + (void) printf(gettext("The pool cannot be " + "imported with this version of ZFS due to\n" + "\t%san active asynchronous destroy. " + "Revert to an earlier version\n" + "\t%sand allow the destroy to complete " + "before updating.\n"), indent, indent); break; case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION: - (void) printf(gettext(" action: Existing " - "encrypted datasets contain an on-disk " - "incompatibility, which\n\tneeds to be " - "corrected. Backup these datasets to new " - "encrypted datasets\n\tand destroy the " - "old ones.\n")); + (void) printf(gettext("Existing encrypted " + "datasets contain an on-disk " + "incompatibility, which\n" + "\t%sneeds to be corrected. Backup these " + "datasets to new encrypted datasets\n" + "\t%sand destroy the old ones.\n"), + indent, indent); break; case ZPOOL_ERRATA_ZOL_8308_ENCRYPTION: - (void) printf(gettext(" action: Existing " - "encrypted snapshots and bookmarks contain " - "an on-disk\n\tincompatibility. This may " - "cause on-disk corruption if they are used" - "\n\twith 'zfs recv'. To correct the " - "issue, enable the bookmark_v2 feature.\n\t" - "No additional action is needed if there " - "are no encrypted snapshots or\n\t" - "bookmarks. If preserving the encrypted " - "snapshots and bookmarks is\n\trequired, " - "use a non-raw send to backup and restore " - "them. Alternately,\n\tthey may be removed" - " to resolve the incompatibility.\n")); + (void) printf(gettext("Existing encrypted " + "snapshots and bookmarks contain an " + "on-disk\n" + "\t%sincompatibility. This may cause " + "on-disk corruption if they are used\n" + "\t%swith 'zfs recv'. To correct the " + "issue, enable the bookmark_v2 feature.\n" + "\t%sNo additional action is needed if " + "there are no encrypted snapshots or\n" + "\t%sbookmarks. If preserving the " + "encrypted snapshots and bookmarks is\n" + "\t%srequired, use a non-raw send to " + "backup and restore them. Alternately,\n" + "\t%sthey may be removed to resolve the " + "incompatibility.\n"), indent, indent, + indent, indent, indent, indent); break; default: /* * All errata must contain an action message. */ - assert(0); + assert(errata == ZPOOL_ERRATA_NONE); } } else { - (void) printf(gettext(" action: The pool can be " - "imported using its name or numeric " - "identifier.\n")); + (void) printf(gettext("The pool can be imported using " + "its name or numeric identifier.\n")); } } else if (vs->vs_state == VDEV_STATE_DEGRADED) { - (void) printf(gettext(" action: The pool can be imported " - "despite missing or damaged devices. The\n\tfault " - "tolerance of the pool may be compromised if imported.\n")); + (void) printf(gettext("The pool can be imported despite " + "missing or damaged devices. The\n" + "\t%sfault tolerance of the pool may be compromised if " + "imported.\n"), indent); } else { switch (reason) { case ZPOOL_STATUS_VERSION_NEWER: - (void) printf(gettext(" action: The pool cannot be " - "imported. Access the pool on a system running " - "newer\n\tsoftware, or recreate the pool from " - "backup.\n")); + (void) printf(gettext("The pool cannot be imported. " + "Access the pool on a system running newer\n" + "\t%ssoftware, or recreate the pool from " + "backup.\n"), indent); break; case ZPOOL_STATUS_UNSUP_FEAT_READ: - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("The pool cannot be " - "imported. Access the pool on a system that " - "supports\n\tthe required feature(s), or recreate " - "the pool from backup.\n")); + (void) printf(gettext("The pool cannot be imported. " + "Access the pool on a system that supports\n" + "\t%sthe required feature(s), or recreate the pool " + "from backup.\n"), indent); break; case ZPOOL_STATUS_UNSUP_FEAT_WRITE: - printf_color(ANSI_BOLD, gettext("action: ")); - printf_color(ANSI_YELLOW, gettext("The pool cannot be " - "imported in read-write mode. Import the pool " - "with\n" - "\t\"-o readonly=on\", access the pool on a system " - "that supports the\n\trequired feature(s), or " - "recreate the pool from backup.\n")); + (void) printf(gettext("The pool cannot be imported in " + "read-write mode. Import the pool with\n" + "\t%s'-o readonly=on', access the pool on a system " + "that supports the\n" + "\t%srequired feature(s), or recreate the pool " + "from backup.\n"), indent, indent); break; case ZPOOL_STATUS_MISSING_DEV_R: case ZPOOL_STATUS_MISSING_DEV_NR: case ZPOOL_STATUS_BAD_GUID_SUM: - (void) printf(gettext(" action: The pool cannot be " - "imported. Attach the missing\n\tdevices and try " - "again.\n")); + (void) printf(gettext("The pool cannot be imported. " + "Attach the missing\n" + "\t%sdevices and try again.\n"), indent); break; case ZPOOL_STATUS_HOSTID_ACTIVE: VERIFY0(nvlist_lookup_nvlist(config, @@ -3239,47 +3321,49 @@ show_import(nvlist_t *config, boolean_t report_error) hostid = fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_HOSTID); - (void) printf(gettext(" action: The pool must be " - "exported from %s (hostid=%"PRIx64")\n\tbefore it " - "can be safely imported.\n"), hostname, hostid); + (void) printf(gettext("The pool must be exported from " + "%s (hostid=%"PRIx64")\n" + "\t%sbefore it can be safely imported.\n"), + hostname, hostid, indent); break; case ZPOOL_STATUS_HOSTID_REQUIRED: - (void) printf(gettext(" action: Set a unique system " - "hostid with the zgenhostid(8) command.\n")); + (void) printf(gettext("Set a unique system hostid with " + "the zgenhostid(8) command.\n")); break; default: - (void) printf(gettext(" action: The pool cannot be " - "imported due to damaged devices or data.\n")); + (void) printf(gettext("The pool cannot be imported due " + "to damaged devices or data.\n")); } } /* Print the comment attached to the pool. */ - if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) + if (comment != NULL) (void) printf(gettext("comment: %s\n"), comment); /* * If the state is "closed" or "can't open", and the aux state * is "corrupt data": */ - if (((vs->vs_state == VDEV_STATE_CLOSED) || - (vs->vs_state == VDEV_STATE_CANT_OPEN)) && - (vs->vs_aux == VDEV_AUX_CORRUPT_DATA)) { + if ((vs->vs_state == VDEV_STATE_CLOSED || + vs->vs_state == VDEV_STATE_CANT_OPEN) && + vs->vs_aux == VDEV_AUX_CORRUPT_DATA) { if (pool_state == POOL_STATE_DESTROYED) - (void) printf(gettext("\tThe pool was destroyed, " - "but can be imported using the '-Df' flags.\n")); + (void) printf(gettext("\t%sThe pool was destroyed, " + "but can be imported using the '-Df' flags.\n"), + indent); else if (pool_state != POOL_STATE_EXPORTED) - (void) printf(gettext("\tThe pool may be active on " - "another system, but can be imported using\n\t" - "the '-f' flag.\n")); + (void) printf(gettext("\t%sThe pool may be active on " + "another system, but can be imported using\n" + "\t%sthe '-f' flag.\n"), indent, indent); } if (msgid != NULL) { - (void) printf(gettext( - " see: https://openzfs.github.io/openzfs-docs/msg/%s\n"), - msgid); + (void) printf(gettext("%s see: " + "https://openzfs.github.io/openzfs-docs/msg/%s\n"), + indent, msgid); } - (void) printf(gettext(" config:\n\n")); + (void) printf(gettext("%sconfig:\n\n"), indent); cb.cb_namewidth = max_width(NULL, nvroot, 0, strlen(name), VDEV_NAME_TYPE_ID); @@ -3293,9 +3377,10 @@ show_import(nvlist_t *config, boolean_t report_error) print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_CLASS_LOGS); if (reason == ZPOOL_STATUS_BAD_GUID_SUM) { - (void) printf(gettext("\n\tAdditional devices are known to " - "be part of this pool, though their\n\texact " - "configuration cannot be determined.\n")); + (void) printf(gettext("\n\t%sAdditional devices are known to " + "be part of this pool, though their\n" + "\t%sexact configuration cannot be determined.\n"), + indent, indent); } return (0); } @@ -3342,7 +3427,7 @@ zfs_force_import_required(nvlist_t *config) */ static int do_import(nvlist_t *config, const char *newname, const char *mntopts, - nvlist_t *props, int flags) + nvlist_t *props, int flags, uint_t mntthreads) { int ret = 0; int ms_status = 0; @@ -3442,7 +3527,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && !(flags & ZFS_IMPORT_ONLY)) { - ms_status = zpool_enable_datasets(zhp, mntopts, 0); + ms_status = zpool_enable_datasets(zhp, mntopts, 0, mntthreads); if (ms_status == EZFS_SHAREFAILED) { (void) fprintf(stderr, gettext("Import was " "successful, but unable to share some datasets\n")); @@ -3461,6 +3546,7 @@ typedef struct import_parameters { const char *ip_mntopts; nvlist_t *ip_props; int ip_flags; + uint_t ip_mntthreads; int *ip_err; } import_parameters_t; @@ -3469,7 +3555,7 @@ do_import_task(void *arg) { import_parameters_t *ip = arg; *ip->ip_err |= do_import(ip->ip_config, NULL, ip->ip_mntopts, - ip->ip_props, ip->ip_flags); + ip->ip_props, ip->ip_flags, ip->ip_mntthreads); free(ip); } @@ -3483,6 +3569,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, uint64_t pool_state; boolean_t pool_specified = (import->poolname != NULL || import->guid != 0); + uint_t npools = 0; tpool_t *tp = NULL; @@ -3500,6 +3587,10 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, int err = 0; nvpair_t *elem = NULL; boolean_t first = B_TRUE; + if (!pool_specified && import->do_all) { + while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) + npools++; + } while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { verify(nvpair_value_nvlist(elem, &config) == 0); @@ -3530,6 +3621,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, ip->ip_mntopts = mntopts; ip->ip_props = props; ip->ip_flags = flags; + ip->ip_mntthreads = mount_tp_nthr / npools; ip->ip_err = &err; (void) tpool_dispatch(tp, do_import_task, @@ -3597,7 +3689,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, err = B_TRUE; } else { err |= do_import(found_config, new_name, - mntopts, props, flags); + mntopts, props, flags, mount_tp_nthr); } } @@ -7141,7 +7233,8 @@ zpool_do_split(int argc, char **argv) } if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) { - ms_status = zpool_enable_datasets(zhp, mntopts, 0); + ms_status = zpool_enable_datasets(zhp, mntopts, 0, + mount_tp_nthr); if (ms_status == EZFS_SHAREFAILED) { (void) fprintf(stderr, gettext("Split was successful, " "datasets are mounted but sharing of some datasets " diff --git a/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c b/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c index 8392ef3de72f..f9e01d1aa4c4 100644 --- a/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c +++ b/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c @@ -77,7 +77,7 @@ zstream_do_recompress(int argc, char *argv[]) while ((c = getopt(argc, argv, "l:")) != -1) { switch (c) { case 'l': - if (sscanf(optarg, "%d", &level) != 0) { + if (sscanf(optarg, "%d", &level) != 1) { fprintf(stderr, "failed to parse level '%s'\n", optarg); diff --git a/sys/contrib/openzfs/cmd/zstream/zstream_redup.c b/sys/contrib/openzfs/cmd/zstream/zstream_redup.c index c56a09cee75d..6866639fe465 100644 --- a/sys/contrib/openzfs/cmd/zstream/zstream_redup.c +++ b/sys/contrib/openzfs/cmd/zstream/zstream_redup.c @@ -186,7 +186,7 @@ static void zfs_redup_stream(int infd, int outfd, boolean_t verbose) { int bufsz = SPA_MAXBLOCKSIZE; - dmu_replay_record_t thedrr = { 0 }; + dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; redup_table_t rdt; zio_cksum_t stream_cksum; @@ -194,6 +194,8 @@ zfs_redup_stream(int infd, int outfd, boolean_t verbose) uint64_t num_records = 0; uint64_t num_write_byref_records = 0; + memset(&thedrr, 0, sizeof (dmu_replay_record_t)); + #ifdef _ILP32 uint64_t max_rde_size = SMALLEST_POSSIBLE_MAX_RDT_MB << 20; #else diff --git a/sys/contrib/openzfs/cmd/ztest.c b/sys/contrib/openzfs/cmd/ztest.c index b0fea8b3cfb4..f77a37c21545 100644 --- a/sys/contrib/openzfs/cmd/ztest.c +++ b/sys/contrib/openzfs/cmd/ztest.c @@ -136,9 +136,7 @@ #include #include #include -#if (__GLIBC__ && !__UCLIBC__) -#include /* for backtrace() */ -#endif +#include static int ztest_fd_data = -1; static int ztest_fd_rand = -1; @@ -617,22 +615,15 @@ dump_debug_buffer(void) * We use write() instead of printf() so that this function * is safe to call from a signal handler. */ - ret = write(STDOUT_FILENO, "\n", 1); - zfs_dbgmsg_print("ztest"); + ret = write(STDERR_FILENO, "\n", 1); + zfs_dbgmsg_print(STDERR_FILENO, "ztest"); } -#define BACKTRACE_SZ 100 - static void sig_handler(int signo) { struct sigaction action; -#if (__GLIBC__ && !__UCLIBC__) /* backtrace() is a GNU extension */ - int nptrs; - void *buffer[BACKTRACE_SZ]; - nptrs = backtrace(buffer, BACKTRACE_SZ); - backtrace_symbols_fd(buffer, nptrs, STDERR_FILENO); -#endif + libspl_backtrace(STDERR_FILENO); dump_debug_buffer(); /* @@ -2529,7 +2520,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf, ASSERT3P(zio, !=, NULL); size = doi.doi_data_block_size; if (ISP2(size)) { - offset = P2ALIGN(offset, size); + offset = P2ALIGN_TYPED(offset, size, uint64_t); } else { ASSERT3U(offset, <, size); offset = 0; @@ -3978,7 +3969,8 @@ raidz_scratch_verify(void) raidvd = vdev_lookup_top(spa, vre->vre_vdev_id); offset = RRSS_GET_OFFSET(&spa->spa_uberblock); state = RRSS_GET_STATE(&spa->spa_uberblock); - write_size = P2ALIGN(VDEV_BOOT_SIZE, 1 << raidvd->vdev_ashift); + write_size = P2ALIGN_TYPED(VDEV_BOOT_SIZE, 1 << raidvd->vdev_ashift, + uint64_t); logical_size = write_size * raidvd->vdev_children; switch (state) { @@ -5016,7 +5008,8 @@ ztest_dmu_object_next_chunk(ztest_ds_t *zd, uint64_t id) */ mutex_enter(&os->os_obj_lock); object = ztest_random(os->os_obj_next_chunk); - os->os_obj_next_chunk = P2ALIGN(object, dnodes_per_chunk); + os->os_obj_next_chunk = P2ALIGN_TYPED(object, dnodes_per_chunk, + uint64_t); mutex_exit(&os->os_obj_lock); } @@ -6638,7 +6631,8 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) * the end of the disk (vdev_psize) is aligned to * sizeof (vdev_label_t). */ - uint64_t psize = P2ALIGN(fsize, sizeof (vdev_label_t)); + uint64_t psize = P2ALIGN_TYPED(fsize, sizeof (vdev_label_t), + uint64_t); if ((leaf & 1) == 1 && offset + sizeof (bad) > psize - VDEV_LABEL_END_SIZE) continue; @@ -6962,8 +6956,8 @@ ztest_fletcher_incr(ztest_ds_t *zd, uint64_t id) size_t inc = 64 * ztest_random(size / 67); /* sometimes add few bytes to test non-simd */ if (ztest_random(100) < 10) - inc += P2ALIGN(ztest_random(64), - sizeof (uint32_t)); + inc += P2ALIGN_TYPED(ztest_random(64), + sizeof (uint32_t), uint64_t); if (inc > (size - pos)) inc = size - pos; diff --git a/sys/contrib/openzfs/config/kernel-types.m4 b/sys/contrib/openzfs/config/kernel-types.m4 new file mode 100644 index 000000000000..ed76af28337b --- /dev/null +++ b/sys/contrib/openzfs/config/kernel-types.m4 @@ -0,0 +1,40 @@ +dnl # +dnl # check if kernel provides definitions for given types +dnl # + +dnl _ZFS_AC_KERNEL_SRC_TYPE(type) +AC_DEFUN([_ZFS_AC_KERNEL_SRC_TYPE], [ + ZFS_LINUX_TEST_SRC([type_$1], [ + #include + ],[ + const $1 __attribute__((unused)) x = ($1) 0; + ]) +]) + +dnl _ZFS_AC_KERNEL_TYPE(type) +AC_DEFUN([_ZFS_AC_KERNEL_TYPE], [ + AC_MSG_CHECKING([whether kernel defines $1]) + ZFS_LINUX_TEST_RESULT([type_$1], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_KERNEL_]m4_quote(m4_translit([$1], [a-z], [A-Z])), + 1, [kernel defines $1]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl ZFS_AC_KERNEL_TYPES([types...]) +AC_DEFUN([ZFS_AC_KERNEL_TYPES], [ + AC_DEFUN([ZFS_AC_KERNEL_SRC_TYPES], [ + m4_foreach_w([type], [$1], [ + _ZFS_AC_KERNEL_SRC_TYPE(type) + ]) + ]) + AC_DEFUN([ZFS_AC_KERNEL_TYPES], [ + m4_foreach_w([type], [$1], [ + _ZFS_AC_KERNEL_TYPE(type) + ]) + ]) +]) + +ZFS_AC_KERNEL_TYPES([intptr_t]) diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4 index 548905ccd04d..b51477b6a951 100644 --- a/sys/contrib/openzfs/config/kernel.m4 +++ b/sys/contrib/openzfs/config/kernel.m4 @@ -37,6 +37,7 @@ dnl # only once the compilation can be done in parallel significantly dnl # speeding up the process. dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ + ZFS_AC_KERNEL_SRC_TYPES ZFS_AC_KERNEL_SRC_OBJTOOL ZFS_AC_KERNEL_SRC_GLOBAL_PAGE_STATE ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE @@ -187,6 +188,7 @@ dnl # dnl # Check results of kernel interface tests. dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ + ZFS_AC_KERNEL_TYPES ZFS_AC_KERNEL_ACCESS_OK_TYPE ZFS_AC_KERNEL_GLOBAL_PAGE_STATE ZFS_AC_KERNEL_OBJTOOL diff --git a/sys/contrib/openzfs/config/zfs-build.m4 b/sys/contrib/openzfs/config/zfs-build.m4 index bb5a85d815d1..368684e1c512 100644 --- a/sys/contrib/openzfs/config/zfs-build.m4 +++ b/sys/contrib/openzfs/config/zfs-build.m4 @@ -512,32 +512,33 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ [with_vendor=$withval], [with_vendor=check]) AS_IF([test "x$with_vendor" = "xcheck"],[ - if test -f /etc/toss-release ; then - VENDOR=toss ; - elif test -f /etc/fedora-release ; then - VENDOR=fedora ; - elif test -f /etc/redhat-release ; then - VENDOR=redhat ; - elif test -f /etc/gentoo-release ; then - VENDOR=gentoo ; + if test -f /etc/alpine-release ; then + VENDOR=alpine ; elif test -f /etc/arch-release ; then VENDOR=arch ; + elif test -f /etc/fedora-release ; then + VENDOR=fedora ; + elif test -f /bin/freebsd-version ; then + VENDOR=freebsd ; + elif test -f /etc/gentoo-release ; then + VENDOR=gentoo ; + elif test -f /etc/lunar.release ; then + VENDOR=lunar ; + elif test -f /etc/openEuler-release ; then + VENDOR=openeuler ; elif test -f /etc/SuSE-release ; then VENDOR=sles ; elif test -f /etc/slackware-version ; then VENDOR=slackware ; - elif test -f /etc/lunar.release ; then - VENDOR=lunar ; + elif test -f /etc/toss-release ; then + VENDOR=toss ; elif test -f /etc/lsb-release ; then VENDOR=ubuntu ; + # put debian and redhat last as derivatives may have also their file elif test -f /etc/debian_version ; then VENDOR=debian ; - elif test -f /etc/alpine-release ; then - VENDOR=alpine ; - elif test -f /bin/freebsd-version ; then - VENDOR=freebsd ; - elif test -f /etc/openEuler-release ; then - VENDOR=openeuler ; + elif test -f /etc/redhat-release ; then + VENDOR=redhat ; else VENDOR= ; fi], @@ -550,20 +551,15 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ AC_MSG_CHECKING([default package type]) case "$VENDOR" in - toss) DEFAULT_PACKAGE=rpm ;; - redhat) DEFAULT_PACKAGE=rpm ;; - fedora) DEFAULT_PACKAGE=rpm ;; - gentoo) DEFAULT_PACKAGE=tgz ;; - alpine) DEFAULT_PACKAGE=tgz ;; - arch) DEFAULT_PACKAGE=tgz ;; - sles) DEFAULT_PACKAGE=rpm ;; - slackware) DEFAULT_PACKAGE=tgz ;; - lunar) DEFAULT_PACKAGE=tgz ;; - ubuntu) DEFAULT_PACKAGE=deb ;; - debian) DEFAULT_PACKAGE=deb ;; - freebsd) DEFAULT_PACKAGE=pkg ;; - openeuler) DEFAULT_PACKAGE=rpm ;; - *) DEFAULT_PACKAGE=rpm ;; + alpine|arch|gentoo|lunar|slackware) + DEFAULT_PACKAGE=tgz ;; + debian|ubuntu) + DEFAULT_PACKAGE=deb ;; + freebsd) + DEFAULT_PACKAGE=pkg ;; + *) + # fedora|openeuler|redhat|sles|toss + DEFAULT_PACKAGE=rpm ;; esac AC_MSG_RESULT([$DEFAULT_PACKAGE]) AC_SUBST(DEFAULT_PACKAGE) @@ -578,7 +574,7 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ AC_MSG_CHECKING([default shell]) case "$VENDOR" in - gentoo|alpine) DEFAULT_INIT_SHELL=/sbin/openrc-run + alpine|gentoo) DEFAULT_INIT_SHELL=/sbin/openrc-run IS_SYSV_RC=false ;; *) DEFAULT_INIT_SHELL=/bin/sh IS_SYSV_RC=true ;; @@ -598,17 +594,19 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ AC_MSG_CHECKING([default init config directory]) case "$VENDOR" in - alpine) initconfdir=/etc/conf.d ;; - gentoo) initconfdir=/etc/conf.d ;; - toss) initconfdir=/etc/sysconfig ;; - redhat) initconfdir=/etc/sysconfig ;; - fedora) initconfdir=/etc/sysconfig ;; - sles) initconfdir=/etc/sysconfig ;; - openeuler) initconfdir=/etc/sysconfig ;; - ubuntu) initconfdir=/etc/default ;; - debian) initconfdir=/etc/default ;; - freebsd) initconfdir=$sysconfdir/rc.conf.d;; - *) initconfdir=/etc/default ;; + alpine|gentoo) + initconfdir=/etc/conf.d + ;; + fedora|openeuler|redhat|sles|toss) + initconfdir=/etc/sysconfig + ;; + freebsd) + initconfdir=$sysconfdir/rc.conf.d + ;; + *) + # debian|ubuntu + initconfdir=/etc/default + ;; esac AC_MSG_RESULT([$initconfdir]) AC_SUBST(initconfdir) @@ -625,11 +623,15 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ AC_MSG_CHECKING([default bash completion directory]) case "$VENDOR" in - ubuntu) bashcompletiondir=/usr/share/bash-completion/completions ;; - debian) bashcompletiondir=/usr/share/bash-completion/completions ;; - freebsd) bashcompletiondir=$sysconfdir/bash_completion.d;; - gentoo) bashcompletiondir=/usr/share/bash-completion/completions ;; - *) bashcompletiondir=/etc/bash_completion.d ;; + alpine|debian|gentoo|ubuntu) + bashcompletiondir=/usr/share/bash-completion/completions + ;; + freebsd) + bashcompletiondir=$sysconfdir/bash_completion.d + ;; + *) + bashcompletiondir=/etc/bash_completion.d + ;; esac AC_MSG_RESULT([$bashcompletiondir]) AC_SUBST(bashcompletiondir) diff --git a/sys/contrib/openzfs/include/libzfs.h b/sys/contrib/openzfs/include/libzfs.h index 2823b8845827..7836c2325f4e 100644 --- a/sys/contrib/openzfs/include/libzfs.h +++ b/sys/contrib/openzfs/include/libzfs.h @@ -716,7 +716,7 @@ typedef struct get_all_cb { } get_all_cb_t; _LIBZFS_H void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, - size_t, zfs_iter_f, void *, boolean_t); + size_t, zfs_iter_f, void *, uint_t); _LIBZFS_H void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); /* @@ -1004,7 +1004,8 @@ _LIBZFS_H int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, * Enable and disable datasets within a pool by mounting/unmounting and * sharing/unsharing them. */ -_LIBZFS_H int zpool_enable_datasets(zpool_handle_t *, const char *, int); +_LIBZFS_H int zpool_enable_datasets(zpool_handle_t *, const char *, int, + uint_t); _LIBZFS_H int zpool_disable_datasets(zpool_handle_t *, boolean_t); _LIBZFS_H void zpool_disable_datasets_os(zpool_handle_t *, boolean_t); _LIBZFS_H void zpool_disable_volume_os(const char *); diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h index 26cf4db87aea..bebeb0db2452 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h @@ -138,7 +138,8 @@ typedef int enum_t; #define readdir64 readdir #define dirent64 dirent #endif -#define P2ALIGN(x, align) ((x) & -(align)) +// Deprecated. Use P2ALIGN_TYPED instead. +// #define P2ALIGN(x, align) ((x) & -(align)) #define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1) #define P2ROUNDUP(x, align) ((((x) - 1) | ((align) - 1)) + 1) #define P2PHASE(x, align) ((x) & ((align) - 1)) diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h index 2e4efc60544a..894ccd8bf9b1 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/misc.h @@ -45,11 +45,11 @@ #define F_SEEK_HOLE FIOSEEKHOLE struct opensolaris_utsname { - char *sysname; - char *nodename; - char *release; - char version[32]; - char *machine; + const char *sysname; + const char *nodename; + const char *release; + char version[32]; + const char *machine; }; #define task_io_account_read(n) diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/sig.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/sig.h index a4d440d38326..17fc65cbe3e2 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/sig.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/sig.h @@ -39,20 +39,14 @@ #include #include -#define FORREAL 0 -#define JUSTLOOKING 1 - static __inline int -issig(int why) +issig(void) { struct thread *td = curthread; struct proc *p; int sig; - ASSERT(why == FORREAL || why == JUSTLOOKING); if (SIGPENDING(td)) { - if (why == JUSTLOOKING) - return (1); p = td->td_proc; PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/sysmacros.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/sysmacros.h index 3e8841ae66bd..2c9f4438d769 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/sysmacros.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/sysmacros.h @@ -191,7 +191,8 @@ extern unsigned char bcd_to_byte[256]; * eg, P2ALIGN(0x1234, 0x100) == 0x1200 (0x12*align) * eg, P2ALIGN(0x5600, 0x100) == 0x5600 (0x56*align) */ -#define P2ALIGN(x, align) ((x) & -(align)) +// Deprecated. Use P2ALIGN_TYPED instead. +// #define P2ALIGN(x, align) ((x) & -(align)) /* * return x % (mod) align diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/signal.h b/sys/contrib/openzfs/include/os/linux/spl/sys/signal.h index 6b538c8966f2..cb4b33261647 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/signal.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/signal.h @@ -30,9 +30,6 @@ #include #endif -#define FORREAL 0 /* Usual side-effects */ -#define JUSTLOOKING 1 /* Don't stop the process */ - -extern int issig(int why); +extern int issig(void); #endif /* SPL_SIGNAL_H */ diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h b/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h index 99e3a6fb41c6..0e8390736309 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h @@ -159,7 +159,8 @@ makedev(unsigned int major, unsigned int minor) /* * Compatibility macros/typedefs needed for Solaris -> Linux port */ -#define P2ALIGN(x, align) ((x) & -(align)) +// Deprecated. Use P2ALIGN_TYPED instead. +// #define P2ALIGN(x, align) ((x) & -(align)) #define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1) #define P2ROUNDUP(x, align) ((((x) - 1) | ((align) - 1)) + 1) #define P2PHASE(x, align) ((x) & ((align) - 1)) diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/types.h b/sys/contrib/openzfs/include/os/linux/spl/sys/types.h index 20ba457f7efe..94ba7b6ad323 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/types.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/types.h @@ -38,7 +38,9 @@ typedef unsigned long ulong_t; typedef unsigned long long u_longlong_t; typedef long long longlong_t; +#ifndef HAVE_KERNEL_INTPTR_T typedef long intptr_t; +#endif typedef unsigned long long rlim64_t; typedef struct task_struct kthread_t; diff --git a/sys/contrib/openzfs/include/sys/spa_impl.h b/sys/contrib/openzfs/include/sys/spa_impl.h index a40914ec5fcb..5605a35b8641 100644 --- a/sys/contrib/openzfs/include/sys/spa_impl.h +++ b/sys/contrib/openzfs/include/sys/spa_impl.h @@ -243,6 +243,7 @@ struct spa { dsl_pool_t *spa_dsl_pool; boolean_t spa_is_initializing; /* true while opening pool */ boolean_t spa_is_exporting; /* true while exporting pool */ + kthread_t *spa_export_thread; /* valid during pool export */ kthread_t *spa_load_thread; /* loading, no namespace lock */ metaslab_class_t *spa_normal_class; /* normal data class */ metaslab_class_t *spa_log_class; /* intent log data class */ @@ -478,10 +479,8 @@ struct spa { extern char *spa_config_path; extern const char *zfs_deadman_failmode; extern uint_t spa_slop_shift; -extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, - task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent, zio_t *zio); -extern void spa_taskq_dispatch_sync(spa_t *, zio_type_t t, zio_taskq_type_t q, - task_func_t *func, void *arg, uint_t flags); +extern void spa_taskq_dispatch(spa_t *spa, zio_type_t t, zio_taskq_type_t q, + task_func_t *func, zio_t *zio, boolean_t cutinline); extern void spa_load_spares(spa_t *spa); extern void spa_load_l2cache(spa_t *spa); extern sysevent_t *spa_event_create(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, diff --git a/sys/contrib/openzfs/include/sys/zfs_context.h b/sys/contrib/openzfs/include/sys/zfs_context.h index 8f264b50e995..e4711ce4194a 100644 --- a/sys/contrib/openzfs/include/sys/zfs_context.h +++ b/sys/contrib/openzfs/include/sys/zfs_context.h @@ -249,8 +249,7 @@ extern struct proc p0; extern kthread_t *zk_thread_create(const char *name, void (*func)(void *), void *arg, size_t stksize, int state); -#define issig(why) (FALSE) -#define ISSIG(thr, why) (FALSE) +#define issig() (FALSE) #define KPREEMPT_SYNC (-1) diff --git a/sys/contrib/openzfs/include/sys/zfs_debug.h b/sys/contrib/openzfs/include/sys/zfs_debug.h index 8d94557a5882..e509c8b7c638 100644 --- a/sys/contrib/openzfs/include/sys/zfs_debug.h +++ b/sys/contrib/openzfs/include/sys/zfs_debug.h @@ -104,7 +104,7 @@ extern void zfs_dbgmsg_fini(void); #ifndef _KERNEL extern int dprintf_find_string(const char *string); -extern void zfs_dbgmsg_print(const char *tag); +extern void zfs_dbgmsg_print(int fd, const char *tag); #endif #ifdef __cplusplus diff --git a/sys/contrib/openzfs/lib/libefi/rdwr_efi.c b/sys/contrib/openzfs/lib/libefi/rdwr_efi.c index 739219e0410f..63c91059ae5f 100644 --- a/sys/contrib/openzfs/lib/libefi/rdwr_efi.c +++ b/sys/contrib/openzfs/lib/libefi/rdwr_efi.c @@ -1175,8 +1175,8 @@ efi_use_whole_disk(int fd) * (for performance reasons). The alignment should match the * alignment used by the "zpool_label_disk" function. */ - limit = P2ALIGN(efi_label->efi_last_lba - nblocks - EFI_MIN_RESV_SIZE, - PARTITION_END_ALIGNMENT); + limit = P2ALIGN_TYPED(efi_label->efi_last_lba - nblocks - + EFI_MIN_RESV_SIZE, PARTITION_END_ALIGNMENT, diskaddr_t); if (data_start + data_size != limit || resv_start != limit) sync_needed = B_TRUE; diff --git a/sys/contrib/openzfs/lib/libnvpair/libnvpair.abi b/sys/contrib/openzfs/lib/libnvpair/libnvpair.abi index ef92f3e9bda6..69009375e887 100644 --- a/sys/contrib/openzfs/lib/libnvpair/libnvpair.abi +++ b/sys/contrib/openzfs/lib/libnvpair/libnvpair.abi @@ -79,6 +79,7 @@ + @@ -1156,6 +1157,11 @@ + + + + + @@ -2041,9 +2047,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -2051,11 +2125,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -2536,11 +2642,6 @@ - - - - - diff --git a/sys/contrib/openzfs/lib/libspl/Makefile.am b/sys/contrib/openzfs/lib/libspl/Makefile.am index eb2377305aca..f8943572bf29 100644 --- a/sys/contrib/openzfs/lib/libspl/Makefile.am +++ b/sys/contrib/openzfs/lib/libspl/Makefile.am @@ -7,7 +7,8 @@ noinst_LTLIBRARIES += libspl_assert.la libspl.la CPPCHECKTARGETS += libspl_assert.la libspl.la libspl_assert_la_SOURCES = \ - %D%/assert.c + %D%/assert.c \ + %D%/backtrace.c libspl_la_SOURCES = \ %D%/libspl_impl.h \ @@ -45,3 +46,7 @@ libspl_la_LIBADD = \ libspl_la_LIBADD += $(LIBATOMIC_LIBS) $(LIBCLOCK_GETTIME) libspl_assert_la_LIBADD = $(BACKTRACE_LIBS) $(LIBUNWIND_LIBS) + +if BUILD_FREEBSD +libspl_assert_la_LIBADD += -lpthread +endif diff --git a/sys/contrib/openzfs/lib/libspl/assert.c b/sys/contrib/openzfs/lib/libspl/assert.c index 5b12c14acd6e..315ddd6b9a9d 100644 --- a/sys/contrib/openzfs/lib/libspl/assert.c +++ b/sys/contrib/openzfs/lib/libspl/assert.c @@ -28,6 +28,7 @@ #include #include +#include #if defined(__linux__) #include @@ -51,53 +52,17 @@ pthread_getname_np(pthread_self(), buf, len); #endif -#if defined(HAVE_LIBUNWIND) -#define UNW_LOCAL_ONLY -#include - -static inline void -libspl_dump_backtrace(void) +#if defined(__APPLE__) +static inline uint64_t +libspl_gettid(void) { - unw_context_t uc; - unw_cursor_t cp; - unw_word_t ip, off; - char funcname[128]; -#ifdef HAVE_LIBUNWIND_ELF - char objname[128]; - unw_word_t objoff; -#endif + uint64_t tid; - fprintf(stderr, "Call trace:\n"); - unw_getcontext(&uc); - unw_init_local(&cp, &uc); - while (unw_step(&cp) > 0) { - unw_get_reg(&cp, UNW_REG_IP, &ip); - unw_get_proc_name(&cp, funcname, sizeof (funcname), &off); -#ifdef HAVE_LIBUNWIND_ELF - unw_get_elf_filename(&cp, objname, sizeof (objname), &objoff); - fprintf(stderr, " [0x%08lx] %s+0x%2lx (in %s +0x%2lx)\n", - ip, funcname, off, objname, objoff); -#else - fprintf(stderr, " [0x%08lx] %s+0x%2lx\n", ip, funcname, off); -#endif - } -} -#elif defined(HAVE_BACKTRACE) -#include + if (pthread_threadid_np(NULL, &tid) != 0) + tid = 0; -static inline void -libspl_dump_backtrace(void) -{ - void *btptrs[100]; - size_t nptrs = backtrace(btptrs, 100); - char **bt = backtrace_symbols(btptrs, nptrs); - fprintf(stderr, "Call trace:\n"); - for (size_t i = 0; i < nptrs; i++) - fprintf(stderr, " %s\n", bt[i]); - free(bt); + return (tid); } -#else -#define libspl_dump_backtrace() #endif #if defined(__APPLE__) @@ -151,7 +116,7 @@ libspl_assertf(const char *file, const char *func, int line, getpid(), libspl_getprogname(), libspl_gettid(), tname); - libspl_dump_backtrace(); + libspl_backtrace(STDERR_FILENO); #if !__has_feature(attribute_analyzer_noreturn) && !defined(__COVERITY__) if (libspl_assert_ok) { diff --git a/sys/contrib/openzfs/lib/libspl/backtrace.c b/sys/contrib/openzfs/lib/libspl/backtrace.c new file mode 100644 index 000000000000..d26d742106e2 --- /dev/null +++ b/sys/contrib/openzfs/lib/libspl/backtrace.c @@ -0,0 +1,119 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2024, Rob Norris + * Copyright (c) 2024, Klara Inc. + */ + +#include +#include +#include + +/* + * libspl_backtrace() must be safe to call from inside a signal hander. This + * mostly means it must not allocate, and so we can't use things like printf. + */ + +#if defined(HAVE_LIBUNWIND) +#define UNW_LOCAL_ONLY +#include + +static size_t +libspl_u64_to_hex_str(uint64_t v, size_t digits, char *buf, size_t buflen) +{ + static const char hexdigits[] = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + + size_t pos = 0; + boolean_t want = (digits == 0); + for (int i = 15; i >= 0; i--) { + const uint64_t d = v >> (i * 4) & 0xf; + if (!want && (d != 0 || digits > i)) + want = B_TRUE; + if (want) { + buf[pos++] = hexdigits[d]; + if (pos == buflen) + break; + } + } + return (pos); +} + +void +libspl_backtrace(int fd) +{ + ssize_t ret __attribute__((unused)); + unw_context_t uc; + unw_cursor_t cp; + unw_word_t loc; + char buf[128]; + size_t n; + + ret = write(fd, "Call trace:\n", 12); + unw_getcontext(&uc); + unw_init_local(&cp, &uc); + while (unw_step(&cp) > 0) { + unw_get_reg(&cp, UNW_REG_IP, &loc); + ret = write(fd, " [0x", 5); + n = libspl_u64_to_hex_str(loc, 10, buf, sizeof (buf)); + ret = write(fd, buf, n); + ret = write(fd, "] ", 2); + unw_get_proc_name(&cp, buf, sizeof (buf), &loc); + for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {} + ret = write(fd, buf, n); + ret = write(fd, "+0x", 3); + n = libspl_u64_to_hex_str(loc, 2, buf, sizeof (buf)); + ret = write(fd, buf, n); +#ifdef HAVE_LIBUNWIND_ELF + ret = write(fd, " (in ", 5); + unw_get_elf_filename(&cp, buf, sizeof (buf), &loc); + for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {} + ret = write(fd, buf, n); + ret = write(fd, " +0x", 4); + n = libspl_u64_to_hex_str(loc, 2, buf, sizeof (buf)); + ret = write(fd, buf, n); + ret = write(fd, ")", 1); +#endif + ret = write(fd, "\n", 1); + } +} +#elif defined(HAVE_BACKTRACE) +#include + +void +libspl_backtrace(int fd) +{ + ssize_t ret __attribute__((unused)); + void *btptrs[64]; + size_t nptrs = backtrace(btptrs, 64); + ret = write(fd, "Call trace:\n", 12); + backtrace_symbols_fd(btptrs, nptrs, fd); +} +#else +#include + +void +libspl_backtrace(int fd __maybe_unused) +{ +} +#endif diff --git a/sys/contrib/openzfs/lib/libspl/include/Makefile.am b/sys/contrib/openzfs/lib/libspl/include/Makefile.am index 2c1d21edf19d..4ad3b854cbee 100644 --- a/sys/contrib/openzfs/lib/libspl/include/Makefile.am +++ b/sys/contrib/openzfs/lib/libspl/include/Makefile.am @@ -27,6 +27,7 @@ libspl_sys_HEADERS = \ %D%/sys/acl.h \ %D%/sys/acl_impl.h \ %D%/sys/asm_linkage.h \ + %D%/sys/backtrace.h \ %D%/sys/callb.h \ %D%/sys/cmn_err.h \ %D%/sys/cred.h \ diff --git a/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/sysmacros.h b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/sysmacros.h index 5765ee25c6cb..26e1c87a35af 100644 --- a/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/sysmacros.h +++ b/sys/contrib/openzfs/lib/libspl/include/os/linux/sys/sysmacros.h @@ -52,7 +52,8 @@ /* * Compatibility macros/typedefs needed for Solaris -> Linux port */ -#define P2ALIGN(x, align) ((x) & -(align)) +// Deprecated. Use P2ALIGN_TYPED instead. +// #define P2ALIGN(x, align) ((x) & -(align)) #define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1) #define P2ROUNDUP(x, align) ((((x) - 1) | ((align) - 1)) + 1) #define P2BOUNDARY(off, len, align) \ diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/backtrace.h b/sys/contrib/openzfs/lib/libspl/include/sys/backtrace.h new file mode 100644 index 000000000000..f9869ffc9e1a --- /dev/null +++ b/sys/contrib/openzfs/lib/libspl/include/sys/backtrace.h @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2024, Rob Norris + * Copyright (c) 2024, Klara Inc. + */ + +#ifndef _LIBSPL_SYS_BACKTRACE_H +#define _LIBSPL_SYS_BACKTRACE_H + +void libspl_backtrace(int fd); + +#endif diff --git a/sys/contrib/openzfs/lib/libtpool/thread_pool.c b/sys/contrib/openzfs/lib/libtpool/thread_pool.c index 7802f8c1750f..9bf9cdf5dc84 100644 --- a/sys/contrib/openzfs/lib/libtpool/thread_pool.c +++ b/sys/contrib/openzfs/lib/libtpool/thread_pool.c @@ -423,6 +423,26 @@ tpool_dispatch(tpool_t *tpool, void (*func)(void *), void *arg) pthread_mutex_lock(&tpool->tp_mutex); + if (!(tpool->tp_flags & TP_SUSPEND)) { + if (tpool->tp_idle > 0) + (void) pthread_cond_signal(&tpool->tp_workcv); + else if (tpool->tp_current >= tpool->tp_maximum) { + /* At worker limit. Leave task on queue */ + } else { + if (create_worker(tpool) == 0) { + /* Started a new worker thread */ + tpool->tp_current++; + } else if (tpool->tp_current > 0) { + /* Leave task on queue */ + } else { + /* Cannot start a single worker! */ + pthread_mutex_unlock(&tpool->tp_mutex); + free(job); + return (-1); + } + } + } + if (tpool->tp_head == NULL) tpool->tp_head = job; else @@ -430,14 +450,6 @@ tpool_dispatch(tpool_t *tpool, void (*func)(void *), void *arg) tpool->tp_tail = job; tpool->tp_njobs++; - if (!(tpool->tp_flags & TP_SUSPEND)) { - if (tpool->tp_idle > 0) - (void) pthread_cond_signal(&tpool->tp_workcv); - else if (tpool->tp_current < tpool->tp_maximum && - create_worker(tpool) == 0) - tpool->tp_current++; - } - pthread_mutex_unlock(&tpool->tp_mutex); return (0); } diff --git a/sys/contrib/openzfs/lib/libuutil/libuutil.abi b/sys/contrib/openzfs/lib/libuutil/libuutil.abi index e942d24c6531..2ed2fb2e41e6 100644 --- a/sys/contrib/openzfs/lib/libuutil/libuutil.abi +++ b/sys/contrib/openzfs/lib/libuutil/libuutil.abi @@ -149,6 +149,7 @@ + @@ -242,6 +243,22 @@ + + + + + + + + + + + + + + + + @@ -576,6 +593,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -596,14 +634,11 @@ - + - - - - + @@ -800,9 +835,16 @@ + + + + + + + @@ -912,6 +954,25 @@ + + + + + + + + + + + + + + + + + + + @@ -920,12 +981,23 @@ + + + + + + + + + + + @@ -937,8 +1009,9 @@ - + + diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi index 2bbaae6345ab..80f4b7439a55 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi +++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi @@ -191,6 +191,7 @@ + @@ -777,6 +778,18 @@ + + + + + + + + + + + + @@ -1092,6 +1105,19 @@ + + + + + + + + + + + + + @@ -5532,13 +5558,14 @@ - + + @@ -6251,6 +6278,11 @@ + + + + + @@ -6362,7 +6394,7 @@ - + @@ -8671,7 +8703,7 @@ - + @@ -8696,6 +8728,12 @@ + + + + + + diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c index ec6ebad2f180..42988bf9cbb2 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c @@ -83,8 +83,6 @@ #include #define MAXISALEN 257 /* based on sysinfo(2) man page */ -static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */ - static void zfs_mount_task(void *); static const proto_table_t proto_table[SA_PROTOCOL_COUNT] = { @@ -1098,7 +1096,10 @@ zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles, mnt_param->mnt_func = func; mnt_param->mnt_data = data; - (void) tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param); + if (tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param)) { + /* Could not dispatch to thread pool; execute directly */ + zfs_mount_task((void*)mnt_param); + } } /* @@ -1202,19 +1203,20 @@ zfs_mount_task(void *arg) * * Callbacks are issued in one of two ways: * - * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT + * 1. Sequentially: If the nthr argument is <= 1 or the ZFS_SERIAL_MOUNT * environment variable is set, then we issue callbacks sequentially. * - * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT + * 2. In parallel: If the nthr argument is > 1 and the ZFS_SERIAL_MOUNT * environment variable is not set, then we use a tpool to dispatch threads * to mount filesystems in parallel. This function dispatches tasks to mount * the filesystems at the top-level mountpoints, and these tasks in turn * are responsible for recursively mounting filesystems in their children - * mountpoints. + * mountpoints. The value of the nthr argument will be the number of worker + * threads for the thread pool. */ void zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, - size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel) + size_t num_handles, zfs_iter_f func, void *data, uint_t nthr) { zoneid_t zoneid = getzoneid(); @@ -1223,7 +1225,7 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, * variable that can be used as a convenience to do a/b comparison * of serial vs. parallel mounting. */ - boolean_t serial_mount = !parallel || + boolean_t serial_mount = nthr <= 1 || (getenv("ZFS_SERIAL_MOUNT") != NULL); /* @@ -1243,7 +1245,7 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, * Issue the callback function for each dataset using a parallel * algorithm that uses a thread pool to manage threads. */ - tpool_t *tp = tpool_create(1, mount_tp_nthr, 0, NULL); + tpool_t *tp = tpool_create(1, nthr, 0, NULL); /* * There may be multiple "top level" mountpoints outside of the pool's @@ -1270,10 +1272,12 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, /* * Mount and share all datasets within the given pool. This assumes that no - * datasets within the pool are currently mounted. + * datasets within the pool are currently mounted. nthr will be number of + * worker threads to use while mounting datasets. */ int -zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) +zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags, + uint_t nthr) { get_all_cb_t cb = { 0 }; mount_state_t ms = { 0 }; @@ -1299,7 +1303,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) ms.ms_mntopts = mntopts; ms.ms_mntflags = flags; zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, - zfs_mount_one, &ms, B_TRUE); + zfs_mount_one, &ms, nthr); if (ms.ms_mntstatus != 0) ret = EZFS_MOUNTFAILED; @@ -1310,7 +1314,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) */ ms.ms_mntstatus = 0; zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, - zfs_share_one, &ms, B_FALSE); + zfs_share_one, &ms, 1); if (ms.ms_mntstatus != 0) ret = EZFS_SHAREFAILED; else diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c index 526f57ea403c..0370112c022a 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c @@ -2170,7 +2170,8 @@ zfs_send_saved(zfs_handle_t *zhp, sendflags_t *flags, int outfd, static int send_conclusion_record(int fd, zio_cksum_t *zc) { - dmu_replay_record_t drr = { 0 }; + dmu_replay_record_t drr; + memset(&drr, 0, sizeof (dmu_replay_record_t)); drr.drr_type = DRR_END; if (zc != NULL) drr.drr_u.drr_end.drr_checksum = *zc; @@ -2272,7 +2273,8 @@ send_prelim_records(zfs_handle_t *zhp, const char *from, int fd, } if (!dryrun) { - dmu_replay_record_t drr = { 0 }; + dmu_replay_record_t drr; + memset(&drr, 0, sizeof (dmu_replay_record_t)); /* write first begin record */ drr.drr_type = DRR_BEGIN; drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; diff --git a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c index 86eef3255bc2..7b18e31c8674 100644 --- a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c +++ b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c @@ -268,7 +268,8 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name) if (start_block == MAXOFFSET_T) start_block = NEW_START_BLOCK; slice_size -= start_block; - slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT); + slice_size = P2ALIGN_TYPED(slice_size, PARTITION_END_ALIGNMENT, + uint64_t); vtoc->efi_parts[0].p_start = start_block; vtoc->efi_parts[0].p_size = slice_size; diff --git a/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi b/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi index 5b95c8f779db..cf9d6bddc9fc 100644 --- a/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi +++ b/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi @@ -132,6 +132,7 @@ + @@ -231,10 +232,18 @@ + + + + + + + + @@ -242,6 +251,14 @@ + + + + + + + + @@ -574,6 +591,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -594,14 +632,11 @@ - + - - - - + @@ -770,6 +805,13 @@ + + + + + + + @@ -873,12 +915,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -895,8 +967,9 @@ - + + @@ -1119,7 +1192,7 @@ - + @@ -1127,7 +1200,7 @@ - + diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index 5edd80659e08..f1d14b4d01a4 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -906,6 +906,13 @@ Historically used for controlling what reporting was available under .Pa /proc/spl/kstat/zfs . No effect. . +.It Sy zfs_deadman_checktime_ms Ns = Ns Sy 60000 Ns ms Po 1 min Pc Pq u64 +Check time in milliseconds. +This defines the frequency at which we check for hung I/O requests +and potentially invoke the +.Sy zfs_deadman_failmode +behavior. +. .It Sy zfs_deadman_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int When a pool sync operation takes longer than .Sy zfs_deadman_synctime_ms , @@ -921,6 +928,10 @@ By default, the deadman is enabled and set to which results in "hung" I/O operations only being logged. The deadman is automatically disabled when a pool gets suspended. . +.It Sy zfs_deadman_events_per_second Ns = Ns Sy 1 Ns /s Pq int +Rate limit deadman zevents (which report hung I/O operations) to this many per +second. +. .It Sy zfs_deadman_failmode Ns = Ns Sy wait Pq charp Controls the failure behavior when the deadman detects a "hung" I/O operation. Valid values are: @@ -938,13 +949,6 @@ This can be used to facilitate automatic fail-over to a properly configured fail-over partner. .El . -.It Sy zfs_deadman_checktime_ms Ns = Ns Sy 60000 Ns ms Po 1 min Pc Pq u64 -Check time in milliseconds. -This defines the frequency at which we check for hung I/O requests -and potentially invoke the -.Sy zfs_deadman_failmode -behavior. -. .It Sy zfs_deadman_synctime_ms Ns = Ns Sy 600000 Ns ms Po 10 min Pc Pq u64 Interval in milliseconds after which the deadman is triggered and also the interval after which a pool sync operation is considered to be "hung". @@ -1002,8 +1006,7 @@ will result in objects waiting when there is not actually contention on the same object. . .It Sy zfs_slow_io_events_per_second Ns = Ns Sy 20 Ns /s Pq int -Rate limit delay and deadman zevents (which report slow I/O operations) to this -many per +Rate limit delay zevents (which report slow I/O operations) to this many per second. . .It Sy zfs_unflushed_max_mem_amt Ns = Ns Sy 1073741824 Ns B Po 1 GiB Pc Pq u64 @@ -2367,7 +2370,7 @@ This is an advanced debugging parameter. Don't change this unless you understand what it does. Set values only apply to pools imported/created after that. . -.It Sy zio_taskq_write Ns = Ns Sy sync fixed,1,5 scale fixed,1,5 Pq charp +.It Sy zio_taskq_write Ns = Ns Sy sync null scale null Pq charp Set the queue and thread configuration for the IO write queues. This is an advanced debugging parameter. Don't change this unless you understand what it does. diff --git a/sys/contrib/openzfs/module/icp/io/aes.c b/sys/contrib/openzfs/module/icp/io/aes.c index d6f01304f56b..522c436497bc 100644 --- a/sys/contrib/openzfs/module/icp/io/aes.c +++ b/sys/contrib/openzfs/module/icp/io/aes.c @@ -832,12 +832,14 @@ aes_encrypt_atomic(crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *plaintext, crypto_data_t *ciphertext, crypto_spi_ctx_template_t template) { - aes_ctx_t aes_ctx = {{{{0}}}}; + aes_ctx_t aes_ctx; off_t saved_offset; size_t saved_length; size_t length_needed; int ret; + memset(&aes_ctx, 0, sizeof (aes_ctx_t)); + ASSERT(ciphertext != NULL); /* @@ -956,12 +958,14 @@ aes_decrypt_atomic(crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *ciphertext, crypto_data_t *plaintext, crypto_spi_ctx_template_t template) { - aes_ctx_t aes_ctx = {{{{0}}}}; + aes_ctx_t aes_ctx; off_t saved_offset; size_t saved_length; size_t length_needed; int ret; + memset(&aes_ctx, 0, sizeof (aes_ctx_t)); + ASSERT(plaintext != NULL); /* diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_misc.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_misc.c index a5fc996b6550..2d0821417ad9 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_misc.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_misc.c @@ -37,6 +37,9 @@ #include static struct opensolaris_utsname hw_utsname = { + .sysname = ostype, + .nodename = prison0.pr_hostname, + .release = osrelease, .machine = MACHINE }; @@ -49,10 +52,6 @@ utsname(void) static void opensolaris_utsname_init(void *arg) { - - hw_utsname.sysname = ostype; - hw_utsname.nodename = prison0.pr_hostname; - hw_utsname.release = osrelease; snprintf(hw_utsname.version, sizeof (hw_utsname.version), "%d", osreldate); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c index 9d88971919db..38c1d8e9e464 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c @@ -457,7 +457,7 @@ vdev_geom_read_config(struct g_consumer *cp, nvlist_t **configp) ZFS_LOG(1, "Reading config from %s...", pp->name); psize = pp->mediasize; - psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); + psize = P2ALIGN_TYPED(psize, sizeof (vdev_label_t), uint64_t); size = sizeof (*vdev_lists[0]) + pp->sectorsize - ((sizeof (*vdev_lists[0]) - 1) % pp->sectorsize) - 1; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c index 78d50c6fd8b7..c4cebe102075 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_debug.c @@ -232,15 +232,30 @@ __dprintf(boolean_t dprint, const char *file, const char *func, #else void -zfs_dbgmsg_print(const char *tag) +zfs_dbgmsg_print(int fd, const char *tag) { - zfs_dbgmsg_t *zdm; + ssize_t ret __attribute__((unused)); + + /* + * We use write() in this function instead of printf() + * so it is safe to call from a signal handler. + */ + ret = write(fd, "ZFS_DBGMSG(", 11); + ret = write(fd, tag, strlen(tag)); + ret = write(fd, ") START:\n", 9); - (void) printf("ZFS_DBGMSG(%s):\n", tag); mutex_enter(&zfs_dbgmsgs_lock); - for (zdm = list_head(&zfs_dbgmsgs); zdm; + + for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs); zdm != NULL; zdm = list_next(&zfs_dbgmsgs, zdm)) - (void) printf("%s\n", zdm->zdm_msg); + ret = write(fd, zdm->zdm_msg, strlen(zdm->zdm_msg)); + ret = write(fd, "\n", 1); + } + + ret = write(fd, "ZFS_DBGMSG(", 11); + ret = write(fd, tag, strlen(tag)); + ret = write(fd, ") END\n", 6); + mutex_exit(&zfs_dbgmsgs_lock); } #endif /* _KERNEL */ diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c index 948df8e50de1..3cdb94d6cd53 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_dir.c @@ -543,6 +543,7 @@ zfs_rmnode(znode_t *zp) dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1); zfs_znode_delete(zp, tx); + zfs_znode_free(zp); dmu_tx_commit(tx); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index 7f4f8e2e2f78..b9d4da3de084 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -1175,10 +1175,25 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode, return (error); } zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); + + error = zfs_link_create(dzp, name, zp, tx, ZNEW); + if (error != 0) { + /* + * Since, we failed to add the directory entry for it, + * delete the newly created dnode. + */ + zfs_znode_delete(zp, tx); + VOP_UNLOCK1(ZTOV(zp)); + zrele(zp); + zfs_acl_ids_free(&acl_ids); + dmu_tx_commit(tx); + getnewvnode_drop_reserve(); + goto out; + } + if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); - (void) zfs_link_create(dzp, name, zp, tx, ZNEW); txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); zfs_log_create(zilog, tx, txtype, dzp, zp, name, vsecp, acl_ids.z_fuidp, vap); @@ -1526,13 +1541,19 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, */ zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); - if (fuid_dirtied) - zfs_fuid_sync(zfsvfs, tx); - /* * Now put new name in parent dir. */ - (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); + error = zfs_link_create(dzp, dirname, zp, tx, ZNEW); + if (error != 0) { + zfs_znode_delete(zp, tx); + VOP_UNLOCK1(ZTOV(zp)); + zrele(zp); + goto out; + } + + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); *zpp = zp; @@ -1540,6 +1561,7 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, acl_ids.z_fuidp, vap); +out: zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); @@ -1550,7 +1572,7 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, zil_commit(zilog, 0); zfs_exit(zfsvfs, FTAG); - return (0); + return (error); } #if __FreeBSD_version < 1300124 @@ -3586,10 +3608,14 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, /* * Insert the new object into the directory. */ - (void) zfs_link_create(dzp, name, zp, tx, ZNEW); - - zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); - *zpp = zp; + error = zfs_link_create(dzp, name, zp, tx, ZNEW); + if (error != 0) { + zfs_znode_delete(zp, tx); + VOP_UNLOCK1(ZTOV(zp)); + zrele(zp); + } else { + zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); + } zfs_acl_ids_free(&acl_ids); @@ -3597,8 +3623,12 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, getnewvnode_drop_reserve(); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); + if (error == 0) { + *zpp = zp; + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + } zfs_exit(zfsvfs, FTAG); return (error); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c index 0d4c94555c6b..0eea2a849416 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c @@ -1234,7 +1234,6 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) VERIFY0(dmu_object_free(os, obj, tx)); zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); - zfs_znode_free(zp); } void diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c b/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c index 5e073950d61a..91840ed2ca22 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-procfs-list.c @@ -234,7 +234,7 @@ procfs_list_install(const char *module, modulestr = kmem_asprintf("%s/%s", module, submodule); else modulestr = kmem_asprintf("%s", module); - mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&procfs_list->pl_lock, NULL, MUTEX_NOLOCKDEP, NULL); list_create(&procfs_list->pl_list, procfs_list_node_off + sizeof (procfs_list_node_t), procfs_list_node_off + offsetof(procfs_list_node_t, pln_link)); diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c index c384b7b378c3..e7b812c3b5b5 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c @@ -158,7 +158,7 @@ task_alloc(taskq_t *tq, uint_t flags, unsigned long *irqflags) * throttling the task dispatch rate. */ spin_unlock_irqrestore(&tq->tq_lock, *irqflags); - schedule_timeout(HZ / 100); + schedule_timeout_interruptible(HZ / 100); spin_lock_irqsave_nested(&tq->tq_lock, *irqflags, tq->tq_lock_class); if (count < 100) { diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c index ee3eb4690c3a..dbb8eefa7ec4 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c @@ -152,26 +152,16 @@ spl_kthread_create(int (*func)(void *), void *data, const char namefmt[], ...) EXPORT_SYMBOL(spl_kthread_create); /* - * The "why" argument indicates the allowable side-effects of the call: - * - * FORREAL: Extract the next pending signal from p_sig into p_cursig; - * stop the process if a stop has been requested or if a traced signal - * is pending. - * - * JUSTLOOKING: Don't stop the process, just indicate whether or not - * a signal might be pending (FORREAL is needed to tell for sure). + * Extract the next pending signal from p_sig into p_cursig; stop the process + * if a stop has been requested or if a traced signal is pending. */ int -issig(int why) +issig(void) { - ASSERT(why == FORREAL || why == JUSTLOOKING); if (!signal_pending(current)) return (0); - if (why != FORREAL) - return (1); - struct task_struct *task = current; spl_kernel_siginfo_t __info; sigset_t set; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c index 463c5f705102..7284b922b3bf 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c @@ -397,7 +397,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, if (v->vdev_removed) break; - schedule_timeout(MSEC_TO_TICK(10)); + schedule_timeout_interruptible(MSEC_TO_TICK(10)); } else if (unlikely(BDH_PTR_ERR(bdh) == -ERESTARTSYS)) { timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms * 10); continue; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c index f707959c9445..9ee40771fc19 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_debug.c @@ -221,29 +221,29 @@ __dprintf(boolean_t dprint, const char *file, const char *func, #else void -zfs_dbgmsg_print(const char *tag) +zfs_dbgmsg_print(int fd, const char *tag) { ssize_t ret __attribute__((unused)); + mutex_enter(&zfs_dbgmsgs.pl_lock); + /* * We use write() in this function instead of printf() * so it is safe to call from a signal handler. */ - ret = write(STDOUT_FILENO, "ZFS_DBGMSG(", 11); - ret = write(STDOUT_FILENO, tag, strlen(tag)); - ret = write(STDOUT_FILENO, ") START:\n", 9); + ret = write(fd, "ZFS_DBGMSG(", 11); + ret = write(fd, tag, strlen(tag)); + ret = write(fd, ") START:\n", 9); - mutex_enter(&zfs_dbgmsgs.pl_lock); for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs.pl_list); zdm != NULL; zdm = list_next(&zfs_dbgmsgs.pl_list, zdm)) { - ret = write(STDOUT_FILENO, zdm->zdm_msg, - strlen(zdm->zdm_msg)); - ret = write(STDOUT_FILENO, "\n", 1); + ret = write(fd, zdm->zdm_msg, strlen(zdm->zdm_msg)); + ret = write(fd, "\n", 1); } - ret = write(STDOUT_FILENO, "ZFS_DBGMSG(", 11); - ret = write(STDOUT_FILENO, tag, strlen(tag)); - ret = write(STDOUT_FILENO, ") END\n", 6); + ret = write(fd, "ZFS_DBGMSG(", 11); + ret = write(fd, tag, strlen(tag)); + ret = write(fd, ") END\n", 6); mutex_exit(&zfs_dbgmsgs.pl_lock); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index 2a036dc5136b..3e020e532263 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -384,7 +384,7 @@ zvol_discard(zv_request_t *zvr) */ if (!io_is_secure_erase(bio, rq)) { start = P2ROUNDUP(start, zv->zv_volblocksize); - end = P2ALIGN(end, zv->zv_volblocksize); + end = P2ALIGN_TYPED(end, zv->zv_volblocksize, uint64_t); size = end - start; } @@ -798,7 +798,8 @@ zvol_open(struct block_device *bdev, fmode_t flag) if ((gethrtime() - start) > timeout) return (SET_ERROR(-ERESTARTSYS)); - schedule_timeout(MSEC_TO_TICK(10)); + schedule_timeout_interruptible( + MSEC_TO_TICK(10)); goto retry; #endif } else { diff --git a/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c b/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c index 619ddef0243a..74b8c2a475a1 100644 --- a/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c +++ b/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c @@ -471,7 +471,8 @@ fletcher_4_native(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { (void) ctx_template; - const uint64_t p2size = P2ALIGN(size, FLETCHER_MIN_SIMD_SIZE); + const uint64_t p2size = P2ALIGN_TYPED(size, FLETCHER_MIN_SIMD_SIZE, + uint64_t); ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t))); @@ -519,7 +520,8 @@ fletcher_4_byteswap(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { (void) ctx_template; - const uint64_t p2size = P2ALIGN(size, FLETCHER_MIN_SIMD_SIZE); + const uint64_t p2size = P2ALIGN_TYPED(size, FLETCHER_MIN_SIMD_SIZE, + uint64_t); ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t))); @@ -878,7 +880,7 @@ abd_fletcher_4_iter(void *data, size_t size, void *private) fletcher_4_ctx_t *ctx = cdp->acd_ctx; fletcher_4_ops_t *ops = (fletcher_4_ops_t *)cdp->acd_private; boolean_t native = cdp->acd_byteorder == ZIO_CHECKSUM_NATIVE; - uint64_t asize = P2ALIGN(size, FLETCHER_MIN_SIMD_SIZE); + uint64_t asize = P2ALIGN_TYPED(size, FLETCHER_MIN_SIMD_SIZE, uint64_t); ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t))); diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index 51039af9bcc0..30d30b98a6c6 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -8143,11 +8143,11 @@ l2arc_dev_get_next(void) ASSERT3P(next, !=, NULL); } while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild || - next->l2ad_trim_all); + next->l2ad_trim_all || next->l2ad_spa->spa_is_exporting); /* if we were unable to find any usable vdevs, return NULL */ if (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild || - next->l2ad_trim_all) + next->l2ad_trim_all || next->l2ad_spa->spa_is_exporting) next = NULL; l2arc_dev_last = next; @@ -8886,7 +8886,7 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all) * assertions may be violated without functional consequences * as the device is about to be removed. */ - ASSERT3U(dev->l2ad_hand + distance, <, dev->l2ad_end); + ASSERT3U(dev->l2ad_hand + distance, <=, dev->l2ad_end); if (!dev->l2ad_first) ASSERT3U(dev->l2ad_hand, <=, dev->l2ad_evict); } diff --git a/sys/contrib/openzfs/module/zfs/btree.c b/sys/contrib/openzfs/module/zfs/btree.c index af2b94a850be..9c52083603f1 100644 --- a/sys/contrib/openzfs/module/zfs/btree.c +++ b/sys/contrib/openzfs/module/zfs/btree.c @@ -218,7 +218,7 @@ zfs_btree_create_custom(zfs_btree_t *tree, zfs_btree_find_in_buf : bt_find_in_buf; tree->bt_elem_size = size; tree->bt_leaf_size = lsize; - tree->bt_leaf_cap = P2ALIGN(esize / size, 2); + tree->bt_leaf_cap = P2ALIGN_TYPED(esize / size, 2, size_t); tree->bt_height = -1; tree->bt_bulk = NULL; } diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index bb913f556374..56fe2c4dbe30 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -292,8 +292,8 @@ dbuf_cons(void *vdb, void *unused, int kmflag) dmu_buf_impl_t *db = vdb; memset(db, 0, sizeof (dmu_buf_impl_t)); - mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL); - rw_init(&db->db_rwlock, NULL, RW_DEFAULT, NULL); + mutex_init(&db->db_mtx, NULL, MUTEX_NOLOCKDEP, NULL); + rw_init(&db->db_rwlock, NULL, RW_NOLOCKDEP, NULL); cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL); multilist_link_init(&db->db_cache_link); zfs_refcount_create(&db->db_holds); @@ -578,7 +578,7 @@ dbuf_evict_user(dmu_buf_impl_t *db) */ uint64_t size = dbu->dbu_size; (void) zfs_refcount_remove_many( - &dbuf_caches[db->db_caching_status].size, size, db); + &dbuf_caches[db->db_caching_status].size, size, dbu); if (db->db_caching_status == DB_DBUF_CACHE) DBUF_STAT_DECR(cache_levels_bytes[db->db_level], size); } @@ -784,12 +784,15 @@ dbuf_evict_one(void) if (db != NULL) { multilist_sublist_remove(mls, db); multilist_sublist_unlock(mls); - uint64_t size = db->db.db_size + dmu_buf_user_size(&db->db); + uint64_t size = db->db.db_size; + uint64_t usize = dmu_buf_user_size(&db->db); (void) zfs_refcount_remove_many( &dbuf_caches[DB_DBUF_CACHE].size, size, db); + (void) zfs_refcount_remove_many( + &dbuf_caches[DB_DBUF_CACHE].size, usize, db->db_user); DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]); DBUF_STAT_BUMPDOWN(cache_count); - DBUF_STAT_DECR(cache_levels_bytes[db->db_level], size); + DBUF_STAT_DECR(cache_levels_bytes[db->db_level], size + usize); ASSERT3U(db->db_caching_status, ==, DB_DBUF_CACHE); db->db_caching_status = DB_NO_CACHE; dbuf_destroy(db); @@ -958,7 +961,7 @@ dbuf_init(void) 0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0); for (int i = 0; i < hmsize; i++) - mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL); + mutex_init(&h->hash_mutexes[i], NULL, MUTEX_NOLOCKDEP, NULL); dbuf_stats_init(h); @@ -2850,6 +2853,7 @@ dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed) failed = B_FALSE; } else if (failed) { VERIFY(!dbuf_undirty(db, tx)); + arc_buf_destroy(db->db_buf, db); db->db_buf = NULL; dbuf_clear_data(db); DTRACE_SET_STATE(db, "fill failed"); @@ -3794,16 +3798,21 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, multilist_remove(&dbuf_caches[db->db_caching_status].cache, db); - uint64_t size = db->db.db_size + dmu_buf_user_size(&db->db); + uint64_t size = db->db.db_size; + uint64_t usize = dmu_buf_user_size(&db->db); (void) zfs_refcount_remove_many( &dbuf_caches[db->db_caching_status].size, size, db); + (void) zfs_refcount_remove_many( + &dbuf_caches[db->db_caching_status].size, usize, + db->db_user); if (db->db_caching_status == DB_DBUF_METADATA_CACHE) { DBUF_STAT_BUMPDOWN(metadata_cache_count); } else { DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]); DBUF_STAT_BUMPDOWN(cache_count); - DBUF_STAT_DECR(cache_levels_bytes[db->db_level], size); + DBUF_STAT_DECR(cache_levels_bytes[db->db_level], + size + usize); } db->db_caching_status = DB_NO_CACHE; } @@ -4022,10 +4031,12 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag, boolean_t evicting) db->db_caching_status = dcs; multilist_insert(&dbuf_caches[dcs].cache, db); - uint64_t db_size = db->db.db_size + - dmu_buf_user_size(&db->db); - size = zfs_refcount_add_many( + uint64_t db_size = db->db.db_size; + uint64_t dbu_size = dmu_buf_user_size(&db->db); + (void) zfs_refcount_add_many( &dbuf_caches[dcs].size, db_size, db); + size = zfs_refcount_add_many( + &dbuf_caches[dcs].size, dbu_size, db->db_user); uint8_t db_level = db->db_level; mutex_exit(&db->db_mtx); @@ -4038,7 +4049,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag, boolean_t evicting) DBUF_STAT_MAX(cache_size_bytes_max, size); DBUF_STAT_BUMP(cache_levels[db_level]); DBUF_STAT_INCR(cache_levels_bytes[db_level], - db_size); + db_size + dbu_size); } if (dcs == DB_DBUF_CACHE && !evicting) diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c index 6ef149aab9a6..8b440aafba43 100644 --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -537,7 +537,8 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, if (dn->dn_datablkshift) { int blkshift = dn->dn_datablkshift; nblks = (P2ROUNDUP(offset + length, 1ULL << blkshift) - - P2ALIGN(offset, 1ULL << blkshift)) >> blkshift; + P2ALIGN_TYPED(offset, 1ULL << blkshift, uint64_t)) + >> blkshift; } else { if (offset + length > dn->dn_datablksz) { zfs_panic_recover("zfs: accessing past end of object " @@ -854,7 +855,7 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum, uint64_t *l1blks) } /* set start to the beginning of this L1 indirect */ - *start = P2ALIGN(*start, iblkrange); + *start = P2ALIGN_TYPED(*start, iblkrange, uint64_t); } if (*start < minimum) *start = minimum; diff --git a/sys/contrib/openzfs/module/zfs/dmu_diff.c b/sys/contrib/openzfs/module/zfs/dmu_diff.c index a2b1a27c88e9..0def0956beb8 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_diff.c +++ b/sys/contrib/openzfs/module/zfs/dmu_diff.c @@ -116,7 +116,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, dmu_diffarg_t *da = arg; int err = 0; - if (issig(JUSTLOOKING) && issig(FORREAL)) + if (issig()) return (SET_ERROR(EINTR)); if (zb->zb_level == ZB_DNODE_LEVEL || diff --git a/sys/contrib/openzfs/module/zfs/dmu_object.c b/sys/contrib/openzfs/module/zfs/dmu_object.c index d0e39a423bb0..56986ea43446 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_object.c +++ b/sys/contrib/openzfs/module/zfs/dmu_object.c @@ -160,7 +160,7 @@ dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize, * is not suitably aligned. */ os->os_obj_next_chunk = - P2ALIGN(object, dnodes_per_chunk) + + P2ALIGN_TYPED(object, dnodes_per_chunk, uint64_t) + dnodes_per_chunk; (void) atomic_swap_64(cpuobj, object); mutex_exit(&os->os_obj_lock); diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c index f1818ae155bd..8f4fefa4f4dd 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_objset.c +++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c @@ -2437,7 +2437,7 @@ dmu_objset_space_upgrade(objset_t *os) if (err != 0) return (err); - if (issig(JUSTLOOKING) && issig(FORREAL)) + if (issig()) return (SET_ERROR(EINTR)); objerr = dmu_bonus_hold(os, obj, FTAG, &db); diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c index 680aed4513bc..0119191d7920 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_recv.c +++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c @@ -3389,7 +3389,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp) * stream, then we free drc->drc_rrd and exit. */ while (rwa->err == 0) { - if (issig(JUSTLOOKING) && issig(FORREAL)) { + if (issig()) { err = SET_ERROR(EINTR); break; } diff --git a/sys/contrib/openzfs/module/zfs/dmu_redact.c b/sys/contrib/openzfs/module/zfs/dmu_redact.c index 5ac14edfca12..1feba0ba83de 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_redact.c +++ b/sys/contrib/openzfs/module/zfs/dmu_redact.c @@ -912,7 +912,7 @@ perform_redaction(objset_t *os, redaction_list_t *rl, object = prev_obj; } while (err == 0 && object <= rec->end_object) { - if (issig(JUSTLOOKING) && issig(FORREAL)) { + if (issig()) { err = EINTR; break; } diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c index b6cc2f0a5e91..cb2b62fed313 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_send.c +++ b/sys/contrib/openzfs/module/zfs/dmu_send.c @@ -2552,7 +2552,7 @@ dmu_send_impl(struct dmu_send_params *dspp) while (err == 0 && !range->eos_marker) { err = do_dump(&dsc, range); range = get_next_range(&srt_arg->q, range); - if (issig(JUSTLOOKING) && issig(FORREAL)) + if (issig()) err = SET_ERROR(EINTR); } diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c index 9e762357b727..7170b5eefcea 100644 --- a/sys/contrib/openzfs/module/zfs/metaslab.c +++ b/sys/contrib/openzfs/module/zfs/metaslab.c @@ -627,8 +627,8 @@ metaslab_class_expandable_space(metaslab_class_t *mc) * metaslabs. We report the expandable space in terms * of the metaslab size since that's the unit of expansion. */ - space += P2ALIGN(tvd->vdev_max_asize - tvd->vdev_asize, - 1ULL << tvd->vdev_ms_shift); + space += P2ALIGN_TYPED(tvd->vdev_max_asize - tvd->vdev_asize, + 1ULL << tvd->vdev_ms_shift, uint64_t); } spa_config_exit(mc->mc_spa, SCL_VDEV, FTAG); return (space); @@ -638,6 +638,7 @@ void metaslab_class_evict_old(metaslab_class_t *mc, uint64_t txg) { multilist_t *ml = &mc->mc_metaslab_txg_list; + hrtime_t now = gethrtime(); for (int i = 0; i < multilist_get_num_sublists(ml); i++) { multilist_sublist_t *mls = multilist_sublist_lock_idx(ml, i); metaslab_t *msp = multilist_sublist_head(mls); @@ -661,8 +662,10 @@ metaslab_class_evict_old(metaslab_class_t *mc, uint64_t txg) multilist_sublist_unlock(mls); if (txg > msp->ms_selected_txg + metaslab_unload_delay && - gethrtime() > msp->ms_selected_time + - (uint64_t)MSEC2NSEC(metaslab_unload_delay_ms)) { + now > msp->ms_selected_time + + MSEC2NSEC(metaslab_unload_delay_ms) && + (msp->ms_allocator == -1 || + !metaslab_preload_enabled)) { metaslab_evict(msp, txg); } else { /* diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index c3800e018c73..638572996c3a 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -34,6 +34,7 @@ * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2021, Colm Buckley * Copyright (c) 2023 Hewlett Packard Enterprise Development LP. + * Copyright (c) 2024, Klara Inc. */ /* @@ -170,14 +171,19 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { * that scales with the number of CPUs. * * The different taskq priorities are to handle the different contexts (issue - * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that - * need to be handled with minimum delay. + * and interrupt) and then to reserve threads for high priority I/Os that + * need to be handled with minimum delay. Illumos taskq has unfair TQ_FRONT + * implementation, so separate high priority threads are used there. */ static zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ { ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */ +#ifdef illumos { ZTI_SYNC, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */ +#else + { ZTI_SYNC, ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* WRITE */ +#endif { ZTI_SCALE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FLUSH */ @@ -1217,7 +1223,7 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q) * * Example (the defaults for READ and WRITE) * zio_taskq_read='fixed,1,8 null scale null' - * zio_taskq_write='sync fixed,1,5 scale fixed,1,5' + * zio_taskq_write='sync null scale null' * * Each sets the entire row at a time. * @@ -1484,9 +1490,9 @@ spa_taskq_write_param(ZFS_MODULE_PARAM_ARGS) * Note that a type may have multiple discrete taskqs to avoid lock contention * on the taskq itself. */ -static taskq_t * -spa_taskq_dispatch_select(spa_t *spa, zio_type_t t, zio_taskq_type_t q, - zio_t *zio) +void +spa_taskq_dispatch(spa_t *spa, zio_type_t t, zio_taskq_type_t q, + task_func_t *func, zio_t *zio, boolean_t cutinline) { spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; taskq_t *tq; @@ -1494,37 +1500,25 @@ spa_taskq_dispatch_select(spa_t *spa, zio_type_t t, zio_taskq_type_t q, ASSERT3P(tqs->stqs_taskq, !=, NULL); ASSERT3U(tqs->stqs_count, !=, 0); + /* + * NB: We are assuming that the zio can only be dispatched + * to a single taskq at a time. It would be a grievous error + * to dispatch the zio to another taskq at the same time. + */ + ASSERT(zio); + ASSERT(taskq_empty_ent(&zio->io_tqent)); + if (tqs->stqs_count == 1) { tq = tqs->stqs_taskq[0]; } else if ((t == ZIO_TYPE_WRITE) && (q == ZIO_TASKQ_ISSUE) && - (zio != NULL) && ZIO_HAS_ALLOCATOR(zio)) { + ZIO_HAS_ALLOCATOR(zio)) { tq = tqs->stqs_taskq[zio->io_allocator % tqs->stqs_count]; } else { tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count]; } - return (tq); -} -void -spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, - task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent, - zio_t *zio) -{ - taskq_t *tq = spa_taskq_dispatch_select(spa, t, q, zio); - taskq_dispatch_ent(tq, func, arg, flags, ent); -} - -/* - * Same as spa_taskq_dispatch_ent() but block on the task until completion. - */ -void -spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q, - task_func_t *func, void *arg, uint_t flags) -{ - taskq_t *tq = spa_taskq_dispatch_select(spa, t, q, NULL); - taskqid_t id = taskq_dispatch(tq, func, arg, flags); - if (id) - taskq_wait_id(tq, id); + taskq_dispatch_ent(tq, func, zio, cutinline ? TQ_FRONT : 0, + &zio->io_tqent); } static void @@ -1986,7 +1980,8 @@ spa_destroy_aux_threads(spa_t *spa) static void spa_unload(spa_t *spa) { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(MUTEX_HELD(&spa_namespace_lock) || + spa->spa_export_thread == curthread); ASSERT(spa_state(spa) != POOL_STATE_UNINITIALIZED); spa_import_progress_remove(spa_guid(spa)); @@ -6950,7 +6945,7 @@ static int spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, boolean_t force, boolean_t hardforce) { - int error; + int error = 0; spa_t *spa; hrtime_t export_start = gethrtime(); @@ -6974,8 +6969,8 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, spa->spa_is_exporting = B_TRUE; /* - * Put a hold on the pool, drop the namespace lock, stop async tasks, - * reacquire the namespace lock, and see if we can export. + * Put a hold on the pool, drop the namespace lock, stop async tasks + * and see if we can export. */ spa_open_ref(spa, FTAG); mutex_exit(&spa_namespace_lock); @@ -6985,10 +6980,14 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, taskq_wait(spa->spa_zvol_taskq); } mutex_enter(&spa_namespace_lock); + spa->spa_export_thread = curthread; spa_close(spa, FTAG); - if (spa->spa_state == POOL_STATE_UNINITIALIZED) + if (spa->spa_state == POOL_STATE_UNINITIALIZED) { + mutex_exit(&spa_namespace_lock); goto export_spa; + } + /* * The pool will be in core if it's openable, in which case we can * modify its state. Objsets may be open only because they're dirty, @@ -7009,6 +7008,14 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, goto fail; } + mutex_exit(&spa_namespace_lock); + /* + * At this point we no longer hold the spa_namespace_lock and + * there were no references on the spa. Future spa_lookups will + * notice the spa->spa_export_thread and wait until we signal + * that we are finshed. + */ + if (spa->spa_sync_on) { vdev_t *rvd = spa->spa_root_vdev; /* @@ -7020,6 +7027,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, if (!force && new_state == POOL_STATE_EXPORTED && spa_has_active_shared_spare(spa)) { error = SET_ERROR(EXDEV); + mutex_enter(&spa_namespace_lock); goto fail; } @@ -7084,6 +7092,13 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, if (oldconfig && spa->spa_config) *oldconfig = fnvlist_dup(spa->spa_config); + if (new_state == POOL_STATE_EXPORTED) + zio_handle_export_delay(spa, gethrtime() - export_start); + + /* + * Take the namespace lock for the actual spa_t removal + */ + mutex_enter(&spa_namespace_lock); if (new_state != POOL_STATE_UNINITIALIZED) { if (!hardforce) spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE); @@ -7095,17 +7110,25 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, * we make sure to reset the exporting flag. */ spa->spa_is_exporting = B_FALSE; + spa->spa_export_thread = NULL; } - if (new_state == POOL_STATE_EXPORTED) - zio_handle_export_delay(spa, gethrtime() - export_start); - + /* + * Wake up any waiters in spa_lookup() + */ + cv_broadcast(&spa_namespace_cv); mutex_exit(&spa_namespace_lock); return (0); fail: spa->spa_is_exporting = B_FALSE; + spa->spa_export_thread = NULL; + spa_async_resume(spa); + /* + * Wake up any waiters in spa_lookup() + */ + cv_broadcast(&spa_namespace_cv); mutex_exit(&spa_namespace_lock); return (error); } @@ -10145,6 +10168,9 @@ spa_sync(spa_t *spa, uint64_t txg) metaslab_class_evict_old(spa->spa_normal_class, txg); metaslab_class_evict_old(spa->spa_log_class, txg); + /* spa_embedded_log_class has only one metaslab per vdev. */ + metaslab_class_evict_old(spa->spa_special_class, txg); + metaslab_class_evict_old(spa->spa_dedup_class, txg); spa_sync_close_syncing_log_sm(spa); diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c index e6d4a9bdb29c..d1d41bbe7214 100644 --- a/sys/contrib/openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/openzfs/module/zfs/spa_misc.c @@ -27,7 +27,7 @@ * Copyright (c) 2017 Datto Inc. * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, loli10K . All rights reserved. - * Copyright (c) 2023, Klara Inc. + * Copyright (c) 2023, 2024, Klara Inc. */ #include @@ -82,8 +82,8 @@ * - Check if spa_refcount is zero * - Rename a spa_t * - add/remove/attach/detach devices - * - Held for the duration of create/destroy/export - * - Held at the start and end of import + * - Held for the duration of create/destroy + * - Held at the start and end of import and export * * It does not need to handle recursion. A create or destroy may * reference objects (files or zvols) in other pools, but by @@ -636,8 +636,14 @@ spa_lookup(const char *name) if (spa == NULL) return (NULL); - if (spa->spa_load_thread != NULL && - spa->spa_load_thread != curthread) { + /* + * Avoid racing with import/export, which don't hold the namespace + * lock for their entire duration. + */ + if ((spa->spa_load_thread != NULL && + spa->spa_load_thread != curthread) || + (spa->spa_export_thread != NULL && + spa->spa_export_thread != curthread)) { cv_wait(&spa_namespace_cv, &spa_namespace_lock); goto retry; } @@ -950,14 +956,15 @@ spa_open_ref(spa_t *spa, const void *tag) /* * Remove a reference to the given spa_t. Must have at least one reference, or - * have the namespace lock held. + * have the namespace lock held or be part of a pool import/export. */ void spa_close(spa_t *spa, const void *tag) { ASSERT(zfs_refcount_count(&spa->spa_refcount) > spa->spa_minref || MUTEX_HELD(&spa_namespace_lock) || - spa->spa_load_thread == curthread); + spa->spa_load_thread == curthread || + spa->spa_export_thread == curthread); (void) zfs_refcount_remove(&spa->spa_refcount, tag); } @@ -977,13 +984,15 @@ spa_async_close(spa_t *spa, const void *tag) /* * Check to see if the spa refcount is zero. Must be called with - * spa_namespace_lock held. We really compare against spa_minref, which is the - * number of references acquired when opening a pool + * spa_namespace_lock held or be the spa export thread. We really + * compare against spa_minref, which is the number of references + * acquired when opening a pool */ boolean_t spa_refcount_zero(spa_t *spa) { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(MUTEX_HELD(&spa_namespace_lock) || + spa->spa_export_thread == curthread); return (zfs_refcount_count(&spa->spa_refcount) == spa->spa_minref); } @@ -1231,6 +1240,8 @@ spa_vdev_enter(spa_t *spa) mutex_enter(&spa->spa_vdev_top_lock); mutex_enter(&spa_namespace_lock); + ASSERT0(spa->spa_export_thread); + vdev_autotrim_stop_all(spa); return (spa_vdev_config_enter(spa)); @@ -1248,6 +1259,8 @@ spa_vdev_detach_enter(spa_t *spa, uint64_t guid) mutex_enter(&spa->spa_vdev_top_lock); mutex_enter(&spa_namespace_lock); + ASSERT0(spa->spa_export_thread); + vdev_autotrim_stop_all(spa); if (guid != 0) { diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index c5551eb6cf6e..c74f72159dc9 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -112,6 +112,11 @@ int zfs_vdev_dtl_sm_blksz = (1 << 12); */ static unsigned int zfs_slow_io_events_per_second = 20; +/* + * Rate limit deadman "hung IO" events to this many per second. + */ +static unsigned int zfs_deadman_events_per_second = 1; + /* * Rate limit checksum events after this many checksum errors per second. */ @@ -348,7 +353,8 @@ vdev_get_min_asize(vdev_t *vd) * to the nearest metaslab. */ if (vd == vd->vdev_top) - return (P2ALIGN(vd->vdev_asize, 1ULL << vd->vdev_ms_shift)); + return (P2ALIGN_TYPED(vd->vdev_asize, 1ULL << vd->vdev_ms_shift, + uint64_t)); return (pvd->vdev_ops->vdev_op_min_asize(pvd)); } @@ -665,7 +671,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) */ zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_slow_io_events_per_second, 1); - zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_slow_io_events_per_second, + zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_deadman_events_per_second, 1); zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksum_events_per_second, 1); @@ -2115,8 +2121,8 @@ vdev_open(vdev_t *vd) } } - osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t)); - max_osize = P2ALIGN(max_osize, (uint64_t)sizeof (vdev_label_t)); + osize = P2ALIGN_TYPED(osize, sizeof (vdev_label_t), uint64_t); + max_osize = P2ALIGN_TYPED(max_osize, sizeof (vdev_label_t), uint64_t); if (vd->vdev_children == 0) { if (osize < SPA_MINDEVSIZE) { @@ -4764,9 +4770,9 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx) * can expand. */ if (vd->vdev_aux == NULL && tvd != NULL) { - vs->vs_esize = P2ALIGN( + vs->vs_esize = P2ALIGN_TYPED( vd->vdev_max_asize - vd->vdev_asize, - 1ULL << tvd->vdev_ms_shift); + 1ULL << tvd->vdev_ms_shift, uint64_t); } vs->vs_configured_ashift = vd->vdev_top != NULL @@ -6475,6 +6481,9 @@ ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, ms_count_limit, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, slow_io_events_per_second, UINT, ZMOD_RW, "Rate limit slow IO (delay) events to this many per second"); +ZFS_MODULE_PARAM(zfs, zfs_, deadman_events_per_second, UINT, ZMOD_RW, + "Rate limit hung IO (deadman) events to this many per second"); + /* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs, zfs_, checksum_events_per_second, UINT, ZMOD_RW, "Rate limit checksum events to this many checksum errors per second " diff --git a/sys/contrib/openzfs/module/zfs/vdev_initialize.c b/sys/contrib/openzfs/module/zfs/vdev_initialize.c index c5e16af16692..0a7323f58df2 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_initialize.c +++ b/sys/contrib/openzfs/module/zfs/vdev_initialize.c @@ -682,7 +682,8 @@ vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list) (void) spa; vdev_t *vd; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(MUTEX_HELD(&spa_namespace_lock) || + spa->spa_export_thread == curthread); while ((vd = list_remove_head(vd_list)) != NULL) { mutex_enter(&vd->vdev_initialize_lock); @@ -724,7 +725,8 @@ vdev_initialize_stop(vdev_t *vd, vdev_initializing_state_t tgt_state, if (vd_list == NULL) { vdev_initialize_stop_wait_impl(vd); } else { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(MUTEX_HELD(&spa_namespace_lock) || + vd->vdev_spa->spa_export_thread == curthread); list_insert_tail(vd_list, vd); } } @@ -756,7 +758,8 @@ vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state) spa_t *spa = vd->vdev_spa; list_t vd_list; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(MUTEX_HELD(&spa_namespace_lock) || + spa->spa_export_thread == curthread); list_create(&vd_list, sizeof (vdev_t), offsetof(vdev_t, vdev_initialize_node)); diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c index de7d0fa79478..15c8b8ca6016 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c +++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c @@ -4039,7 +4039,8 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); vdev_t *raidvd = vdev_lookup_top(spa, vre->vre_vdev_id); int ashift = raidvd->vdev_ashift; - uint64_t write_size = P2ALIGN(VDEV_BOOT_SIZE, 1 << ashift); + uint64_t write_size = P2ALIGN_TYPED(VDEV_BOOT_SIZE, 1 << ashift, + uint64_t); uint64_t logical_size = write_size * raidvd->vdev_children; uint64_t read_size = P2ROUNDUP(DIV_ROUND_UP(logical_size, (raidvd->vdev_children - 1)), diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c index 00ebd4c9fca4..8a8b02cab5c6 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c +++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c @@ -1087,7 +1087,8 @@ vdev_rebuild_stop_wait(vdev_t *vd) { spa_t *spa = vd->vdev_spa; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(MUTEX_HELD(&spa_namespace_lock) || + spa->spa_export_thread == curthread); if (vd == spa->spa_root_vdev) { for (uint64_t i = 0; i < vd->vdev_children; i++) diff --git a/sys/contrib/openzfs/module/zfs/vdev_trim.c b/sys/contrib/openzfs/module/zfs/vdev_trim.c index 9753d5a1ea04..9cf10332e8bf 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_trim.c +++ b/sys/contrib/openzfs/module/zfs/vdev_trim.c @@ -1040,7 +1040,8 @@ vdev_trim_stop_wait(spa_t *spa, list_t *vd_list) (void) spa; vdev_t *vd; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(MUTEX_HELD(&spa_namespace_lock) || + spa->spa_export_thread == curthread); while ((vd = list_remove_head(vd_list)) != NULL) { mutex_enter(&vd->vdev_trim_lock); @@ -1079,7 +1080,8 @@ vdev_trim_stop(vdev_t *vd, vdev_trim_state_t tgt_state, list_t *vd_list) if (vd_list == NULL) { vdev_trim_stop_wait_impl(vd); } else { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(MUTEX_HELD(&spa_namespace_lock) || + vd->vdev_spa->spa_export_thread == curthread); list_insert_tail(vd_list, vd); } } @@ -1115,7 +1117,8 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state) list_t vd_list; vdev_t *vd_l2cache; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(MUTEX_HELD(&spa_namespace_lock) || + spa->spa_export_thread == curthread); list_create(&vd_list, sizeof (vdev_t), offsetof(vdev_t, vdev_trim_node)); diff --git a/sys/contrib/openzfs/module/zfs/zap.c b/sys/contrib/openzfs/module/zfs/zap.c index 1b6b16fc6662..03b76ea1b7bf 100644 --- a/sys/contrib/openzfs/module/zfs/zap.c +++ b/sys/contrib/openzfs/module/zfs/zap.c @@ -425,20 +425,36 @@ zap_leaf_evict_sync(void *dbu) static zap_leaf_t * zap_create_leaf(zap_t *zap, dmu_tx_t *tx) { - zap_leaf_t *l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP); - ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); - rw_init(&l->l_rwlock, NULL, RW_NOLOCKDEP, NULL); - rw_enter(&l->l_rwlock, RW_WRITER); - l->l_blkid = zap_allocate_blocks(zap, 1); - l->l_dbuf = NULL; + uint64_t blkid = zap_allocate_blocks(zap, 1); + dmu_buf_t *db = NULL; VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode, - l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf, + blkid << FZAP_BLOCK_SHIFT(zap), NULL, &db, DMU_READ_NO_PREFETCH)); - dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf); - VERIFY3P(NULL, ==, dmu_buf_set_user(l->l_dbuf, &l->l_dbu)); + + /* + * Create the leaf structure and stash it on the dbuf. If zap was + * recent shrunk or truncated, the dbuf might have been sitting in the + * cache waiting to be evicted, and so still have the old leaf attached + * to it. If so, just reuse it. + */ + zap_leaf_t *l = dmu_buf_get_user(db); + if (l == NULL) { + l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP); + l->l_blkid = blkid; + l->l_dbuf = db; + rw_init(&l->l_rwlock, NULL, RW_NOLOCKDEP, NULL); + dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, + &l->l_dbuf); + dmu_buf_set_user(l->l_dbuf, &l->l_dbu); + } else { + ASSERT3U(l->l_blkid, ==, blkid); + ASSERT3P(l->l_dbuf, ==, db); + } + + rw_enter(&l->l_rwlock, RW_WRITER); dmu_buf_will_dirty(l->l_dbuf, tx); zap_leaf_init(l, zap->zap_normflags != 0); @@ -711,6 +727,7 @@ zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, uint64_t object = zap->zap_object; zap_put_leaf(l); + *lp = l = NULL; zap_unlockdir(zap, tag); err = zap_lockdir(os, object, tx, RW_WRITER, FALSE, FALSE, tag, &zn->zn_zap); @@ -920,21 +937,17 @@ fzap_add_cd(zap_name_t *zn, } else if (err == EAGAIN) { err = zap_expand_leaf(zn, l, tag, tx, &l); zap = zn->zn_zap; /* zap_expand_leaf() may change zap */ - if (err == 0) { + if (err == 0) goto retry; - } else if (err == ENOSPC) { - /* - * If we failed to expand the leaf, then bailout - * as there is no point trying - * zap_put_leaf_maybe_grow_ptrtbl(). - */ - return (err); - } } out: - if (zap != NULL) - zap_put_leaf_maybe_grow_ptrtbl(zn, l, tag, tx); + if (l != NULL) { + if (err == ENOSPC) + zap_put_leaf(l); + else + zap_put_leaf_maybe_grow_ptrtbl(zn, l, tag, tx); + } return (err); } @@ -991,8 +1004,12 @@ fzap_update(zap_name_t *zn, goto retry; } - if (zap != NULL) - zap_put_leaf_maybe_grow_ptrtbl(zn, l, tag, tx); + if (l != NULL) { + if (err == ENOSPC) + zap_put_leaf(l); + else + zap_put_leaf_maybe_grow_ptrtbl(zn, l, tag, tx); + } return (err); } diff --git a/sys/contrib/openzfs/module/zfs/zcp.c b/sys/contrib/openzfs/module/zfs/zcp.c index 959404f665ab..7c279162a9d1 100644 --- a/sys/contrib/openzfs/module/zfs/zcp.c +++ b/sys/contrib/openzfs/module/zfs/zcp.c @@ -780,8 +780,7 @@ zcp_lua_counthook(lua_State *state, lua_Debug *ar) * Check if we were canceled while waiting for the * txg to sync or from our open context thread */ - if (ri->zri_canceled || - (!ri->zri_sync && issig(JUSTLOOKING) && issig(FORREAL))) { + if (ri->zri_canceled || (!ri->zri_sync && issig())) { ri->zri_canceled = B_TRUE; (void) lua_pushstring(state, "Channel program was canceled."); (void) lua_error(state); diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c index 908b9efc1813..7b527eb75e83 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c +++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c @@ -38,7 +38,7 @@ * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved. - * Copyright (c) 2019, 2021, Klara Inc. + * Copyright (c) 2019, 2021, 2024, Klara Inc. * Copyright (c) 2019, Allan Jude * Copyright 2024 Oxide Computer Company */ @@ -2336,7 +2336,7 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) } while (error == 0) { - if (issig(JUSTLOOKING) && issig(FORREAL)) { + if (issig()) { error = SET_ERROR(EINTR); break; } @@ -5514,6 +5514,14 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) return (error); } +/* + * When stack space is limited, we write replication stream data to the target + * on a separate taskq thread, to make sure there's enough stack space. + */ +#ifndef HAVE_LARGE_STACKS +#define USE_SEND_TASKQ 1 +#endif + typedef struct dump_bytes_io { zfs_file_t *dbi_fp; caddr_t dbi_buf; @@ -5534,31 +5542,65 @@ dump_bytes_cb(void *arg) dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL); } +typedef struct dump_bytes_arg { + zfs_file_t *dba_fp; +#ifdef USE_SEND_TASKQ + taskq_t *dba_tq; + taskq_ent_t dba_tqent; +#endif +} dump_bytes_arg_t; + static int dump_bytes(objset_t *os, void *buf, int len, void *arg) { + dump_bytes_arg_t *dba = (dump_bytes_arg_t *)arg; dump_bytes_io_t dbi; - dbi.dbi_fp = arg; + dbi.dbi_fp = dba->dba_fp; dbi.dbi_buf = buf; dbi.dbi_len = len; -#if defined(HAVE_LARGE_STACKS) - dump_bytes_cb(&dbi); +#ifdef USE_SEND_TASKQ + taskq_dispatch_ent(dba->dba_tq, dump_bytes_cb, &dbi, TQ_SLEEP, + &dba->dba_tqent); + taskq_wait(dba->dba_tq); #else - /* - * The vn_rdwr() call is performed in a taskq to ensure that there is - * always enough stack space to write safely to the target filesystem. - * The ZIO_TYPE_FREE threads are used because there can be a lot of - * them and they are used in vdev_file.c for a similar purpose. - */ - spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE, - ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP); -#endif /* HAVE_LARGE_STACKS */ + dump_bytes_cb(&dbi); +#endif return (dbi.dbi_err); } +static int +dump_bytes_init(dump_bytes_arg_t *dba, int fd, dmu_send_outparams_t *out) +{ + zfs_file_t *fp = zfs_file_get(fd); + if (fp == NULL) + return (SET_ERROR(EBADF)); + + dba->dba_fp = fp; +#ifdef USE_SEND_TASKQ + dba->dba_tq = taskq_create("z_send", 1, defclsyspri, 0, 0, 0); + taskq_init_ent(&dba->dba_tqent); +#endif + + memset(out, 0, sizeof (dmu_send_outparams_t)); + out->dso_outfunc = dump_bytes; + out->dso_arg = dba; + out->dso_dryrun = B_FALSE; + + return (0); +} + +static void +dump_bytes_fini(dump_bytes_arg_t *dba) +{ + zfs_file_put(dba->dba_fp); +#ifdef USE_SEND_TASKQ + taskq_destroy(dba->dba_tq); +#endif +} + /* * inputs: * zc_name name of snapshot to send @@ -5643,21 +5685,18 @@ zfs_ioc_send(zfs_cmd_t *zc) dsl_dataset_rele(tosnap, FTAG); dsl_pool_rele(dp, FTAG); } else { - zfs_file_t *fp; - dmu_send_outparams_t out = {0}; + dump_bytes_arg_t dba; + dmu_send_outparams_t out; + error = dump_bytes_init(&dba, zc->zc_cookie, &out); + if (error) + return (error); - if ((fp = zfs_file_get(zc->zc_cookie)) == NULL) - return (SET_ERROR(EBADF)); - - off = zfs_file_off(fp); - out.dso_outfunc = dump_bytes; - out.dso_arg = fp; - out.dso_dryrun = B_FALSE; + off = zfs_file_off(dba.dba_fp); error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, zc->zc_fromobj, embedok, large_block_ok, compressok, rawok, savedok, zc->zc_cookie, &off, &out); - zfs_file_put(fp); + dump_bytes_fini(&dba); } return (error); } @@ -6604,7 +6643,6 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) offset_t off; const char *fromname = NULL; int fd; - zfs_file_t *fp; boolean_t largeblockok; boolean_t embedok; boolean_t compressok; @@ -6629,20 +6667,19 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) (void) nvlist_lookup_string(innvl, "redactbook", &redactbook); - if ((fp = zfs_file_get(fd)) == NULL) - return (SET_ERROR(EBADF)); + dump_bytes_arg_t dba; + dmu_send_outparams_t out; + error = dump_bytes_init(&dba, fd, &out); + if (error) + return (error); - off = zfs_file_off(fp); - - dmu_send_outparams_t out = {0}; - out.dso_outfunc = dump_bytes; - out.dso_arg = fp; - out.dso_dryrun = B_FALSE; + off = zfs_file_off(dba.dba_fp); error = dmu_send(snapname, fromname, embedok, largeblockok, compressok, rawok, savedok, resumeobj, resumeoff, redactbook, fd, &off, &out); - zfs_file_put(fp); + dump_bytes_fini(&dba); + return (error); } diff --git a/sys/contrib/openzfs/module/zfs/zfs_log.c b/sys/contrib/openzfs/module/zfs/zfs_log.c index 433a653e5500..fa4e7093ca46 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_log.c +++ b/sys/contrib/openzfs/module/zfs/zfs_log.c @@ -895,7 +895,7 @@ zfs_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, itx_t *itx; lr_clone_range_t *lr; uint64_t partlen, max_log_data; - size_t i, partnbps; + size_t partnbps; if (zil_replaying(zilog, tx) || zp->z_unlinked) return; @@ -904,10 +904,8 @@ zfs_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, while (nbps > 0) { partnbps = MIN(nbps, max_log_data / sizeof (bps[0])); - partlen = 0; - for (i = 0; i < partnbps; i++) { - partlen += BP_GET_LSIZE(&bps[i]); - } + partlen = partnbps * blksz; + ASSERT3U(partlen, <, len + blksz); partlen = MIN(partlen, len); itx = zil_itx_create(txtype, diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c index babb07ca25a9..f3db953eab46 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c +++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c @@ -1412,6 +1412,11 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, outoff += size; len -= size; done += size; + + if (issig()) { + error = SET_ERROR(EINTR); + break; + } } vmem_free(bps, sizeof (bps[0]) * maxblocks); diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index 870343bf4fa3..d68d5ababe79 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -2023,7 +2023,6 @@ zio_taskq_dispatch(zio_t *zio, zio_taskq_type_t q, boolean_t cutinline) { spa_t *spa = zio->io_spa; zio_type_t t = zio->io_type; - int flags = (cutinline ? TQ_FRONT : 0); /* * If we're a config writer or a probe, the normal issue and @@ -2041,23 +2040,18 @@ zio_taskq_dispatch(zio_t *zio, zio_taskq_type_t q, boolean_t cutinline) /* * If this is a high priority I/O, then use the high priority taskq if - * available. + * available or cut the line otherwise. */ - if ((zio->io_priority == ZIO_PRIORITY_NOW || - zio->io_priority == ZIO_PRIORITY_SYNC_WRITE) && - spa->spa_zio_taskq[t][q + 1].stqs_count != 0) - q++; + if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE) { + if (spa->spa_zio_taskq[t][q + 1].stqs_count != 0) + q++; + else + cutinline = B_TRUE; + } ASSERT3U(q, <, ZIO_TASKQ_TYPES); - /* - * NB: We are assuming that the zio can only be dispatched - * to a single taskq at a time. It would be a grievous error - * to dispatch the zio to another taskq at the same time. - */ - ASSERT(taskq_empty_ent(&zio->io_tqent)); - spa_taskq_dispatch_ent(spa, t, q, zio_execute, zio, flags, - &zio->io_tqent, zio); + spa_taskq_dispatch(spa, t, q, zio_execute, zio, cutinline); } static boolean_t @@ -5005,10 +4999,9 @@ zio_done(zio_t *zio) * Reexecution is potentially a huge amount of work. * Hand it off to the otherwise-unused claim taskq. */ - ASSERT(taskq_empty_ent(&zio->io_tqent)); - spa_taskq_dispatch_ent(zio->io_spa, + spa_taskq_dispatch(zio->io_spa, ZIO_TYPE_CLAIM, ZIO_TASKQ_ISSUE, - zio_reexecute, zio, 0, &zio->io_tqent, NULL); + zio_reexecute, zio, B_FALSE); } return (NULL); } diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run index 5e7fdf359a75..ac2c541a9188 100644 --- a/sys/contrib/openzfs/tests/runfiles/common.run +++ b/sys/contrib/openzfs/tests/runfiles/common.run @@ -430,7 +430,8 @@ tags = ['functional', 'cli_root', 'zpool_events'] [tests/functional/cli_root/zpool_export] tests = ['zpool_export_001_pos', 'zpool_export_002_pos', - 'zpool_export_003_neg', 'zpool_export_004_pos'] + 'zpool_export_003_neg', 'zpool_export_004_pos', + 'zpool_export_parallel_pos', 'zpool_export_parallel_admin'] tags = ['functional', 'cli_root', 'zpool_export'] [tests/functional/cli_root/zpool_get] diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in index ecc50f487152..5ca130931364 100755 --- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in @@ -182,7 +182,6 @@ if sys.platform.startswith('freebsd'): 'cli_root/zfs_unshare/zfs_unshare_008_pos': ['SKIP', na_reason], 'cp_files/cp_files_002_pos': ['SKIP', na_reason], 'link_count/link_count_001': ['SKIP', na_reason], - 'casenorm/mixed_create_failure': ['FAIL', 13215], 'mmap/mmap_sync_001_pos': ['SKIP', na_reason], 'rsend/send_raw_ashift': ['SKIP', 14961], }) diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/libzfs_input_check.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/libzfs_input_check.c index c661718a296c..7d9ce4fada1b 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/libzfs_input_check.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/libzfs_input_check.c @@ -521,13 +521,15 @@ test_send_new(const char *snapshot, int fd) static void test_recv_new(const char *dataset, int fd) { - dmu_replay_record_t drr = { 0 }; + dmu_replay_record_t drr; nvlist_t *required = fnvlist_alloc(); nvlist_t *optional = fnvlist_alloc(); nvlist_t *props = fnvlist_alloc(); char snapshot[MAXNAMELEN + 32]; ssize_t count; + memset(&drr, 0, sizeof (dmu_replay_record_t)); + int cleanup_fd = open(ZFS_DEV, O_RDWR); if (cleanup_fd == -1) { (void) fprintf(stderr, "open(%s) failed: %s\n", ZFS_DEV, diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg index a619b846dd11..721cf27f48ca 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg @@ -29,6 +29,7 @@ CONDENSE_INDIRECT_OBSOLETE_PCT condense.indirect_obsolete_pct zfs_condense_indir CONDENSE_MIN_MAPPING_BYTES condense.min_mapping_bytes zfs_condense_min_mapping_bytes DBUF_CACHE_SHIFT dbuf.cache_shift dbuf_cache_shift DEADMAN_CHECKTIME_MS deadman.checktime_ms zfs_deadman_checktime_ms +DEADMAN_EVENTS_PER_SECOND deadman_events_per_second zfs_deadman_events_per_second DEADMAN_FAILMODE deadman.failmode zfs_deadman_failmode DEADMAN_SYNCTIME_MS deadman.synctime_ms zfs_deadman_synctime_ms DEADMAN_ZIOTIME_MS deadman.ziotime_ms zfs_deadman_ziotime_ms diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index d625c040b819..44eedcf6fae5 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -1084,6 +1084,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_export/zpool_export_002_pos.ksh \ functional/cli_root/zpool_export/zpool_export_003_neg.ksh \ functional/cli_root/zpool_export/zpool_export_004_pos.ksh \ + functional/cli_root/zpool_export/zpool_export_parallel_admin.ksh \ + functional/cli_root/zpool_export/zpool_export_parallel_pos.ksh \ functional/cli_root/zpool_get/cleanup.ksh \ functional/cli_root/zpool_get/setup.ksh \ functional/cli_root/zpool_get/vdev_get_001_pos.ksh \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_parallel_admin.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_parallel_admin.ksh new file mode 100755 index 000000000000..cab8fc2b4239 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_parallel_admin.ksh @@ -0,0 +1,72 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2024 Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify that admin commands cannot race a pool export +# +# STRATEGY: +# 1. Create a pool +# 2. Import the pool with an injected delay in the background +# 3. Execute some admin commands against the pool +# + +verify_runnable "global" + +DEVICE_DIR=$TEST_BASE_DIR/dev_export-test + +function cleanup +{ + zinject -c all + poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1 + [[ -d $DEVICE_DIR ]] && log_must rm -rf $DEVICE_DIR +} + +log_assert "admin commands cannot race a pool export" + +log_onexit cleanup + +[[ ! -d $DEVICE_DIR ]] && log_must mkdir -p $DEVICE_DIR +log_must truncate -s $MINVDEVSIZE ${DEVICE_DIR}/disk0 ${DEVICE_DIR}/disk1 + +log_must zpool create -f $TESTPOOL1 mirror ${DEVICE_DIR}/disk0 ${DEVICE_DIR}/disk1 + +log_must zinject -P export -s 10 $TESTPOOL1 + +log_must zpool export $TESTPOOL1 & + +zpool set comment=hello $TESTPOOL1 +zpool reguid $TESTPOOL1 & +zpool split $TESTPOOL1 & + +log_pass "admin commands cannot race a pool export" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_parallel_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_parallel_pos.ksh new file mode 100755 index 000000000000..037d17d082bd --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_parallel_pos.ksh @@ -0,0 +1,129 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2024 Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib + +# test uses 8 vdevs +MAX_NUM=8 +DEVICE_DIR=$TEST_BASE_DIR/dev_import-test + + +# +# DESCRIPTION: +# Verify that pool exports can occur in parallel +# +# STRATEGY: +# 1. Create 8 pools +# 2. Inject an export delay using zinject +# 3. Export half of the pools synchronously to baseline sequential cost +# 4. Export the other half asynchronously to demonstrate parallel savings +# 6. Import 4 pools +# 7. Test zpool export -a +# + +verify_runnable "global" + +# +# override the minimum sized vdevs +# + +POOLNAME="test_pool" + +function cleanup +{ + zinject -c all + + for i in {0..$(($MAX_NUM - 1))}; do + poolexists $POOLNAME-$i && destroy_pool $POOLNAME-$i + done + + [[ -d $DEVICE_DIR ]] && log_must rm -rf $DEVICE_DIR +} + +log_assert "Pool exports can occur in parallel" + +log_onexit cleanup + +[[ ! -d $DEVICE_DIR ]] && log_must mkdir -p $DEVICE_DIR + +# +# Create some pools with export delay injectors +# +for i in {0..$(($MAX_NUM - 1))}; do + log_must truncate -s $MINVDEVSIZE ${DEVICE_DIR}/disk$i + log_must zpool create $POOLNAME-$i $DEVICE_DIR/disk$i + log_must zinject -P export -s 8 $POOLNAME-$i +done + +# +# Export half of the pools synchronously +# +SECONDS=0 +for i in {0..3}; do + log_must zpool export $POOLNAME-$i +done +sequential_time=$SECONDS +log_note "sequentially exported 4 pools in $sequential_time seconds" + +# +# Export half of the pools in parallel +# +SECONDS=0 +for i in {4..7}; do + log_must zpool export $POOLNAME-$i & +done +wait +parallel_time=$SECONDS +log_note "asyncronously exported 4 pools in $parallel_time seconds" + +log_must test $parallel_time -lt $(($sequential_time / 3)) + +# +# import 4 pools with export delay injectors +# +for i in {4..7}; do + log_must zpool import -d $DEVICE_DIR/disk$i $POOLNAME-$i + log_must zinject -P export -s 8 $POOLNAME-$i +done + +# +# now test zpool export -a +# +SECONDS=0 +log_must zpool export -a +parallel_time=$SECONDS +log_note "asyncronously exported 4 pools, using '-a', in $parallel_time seconds" + +log_must test $parallel_time -lt $(($sequential_time / 3)) + +log_pass "Pool exports occur in parallel" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh index 4dd4c5b9a76c..d851d03e1a87 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh @@ -28,7 +28,7 @@ # Verify spa deadman events are rate limited # # STRATEGY: -# 1. Reduce the zfs_slow_io_events_per_second to 1. +# 1. Reduce the zfs_deadman_events_per_second to 1. # 2. Reduce the zfs_deadman_ziotime_ms to 1ms. # 3. Write data to a pool and read it back. # 4. Verify deadman events have been produced at a reasonable rate. @@ -44,15 +44,15 @@ function cleanup zinject -c all default_cleanup_noexit - set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS + set_tunable64 DEADMAN_EVENTS_PER_SECOND $OLD_DEADMAN_EVENTS set_tunable64 DEADMAN_ZIOTIME_MS $ZIOTIME_DEFAULT } log_assert "Verify spa deadman events are rate limited" log_onexit cleanup -OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND) -log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1 +OLD_DEADMAN_EVENTS=$(get_tunable DEADMAN_EVENTS_PER_SECOND) +log_must set_tunable64 DEADMAN_EVENTS_PER_SECOND 1 log_must set_tunable64 DEADMAN_ZIOTIME_MS 1 # Create a new pool in order to use the updated deadman settings. diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 4d6786e92d22..d5e9cebfffc7 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -87,6 +87,9 @@ #define HAVE_AVX512VL 1 #endif +/* backtrace() is available */ +/* #undef HAVE_BACKTRACE */ + /* bdevname() is available */ /* #undef HAVE_BDEVNAME */ @@ -598,6 +601,9 @@ /* Define if compiler supports -Winfinite-recursion */ /* #undef HAVE_KERNEL_INFINITE_RECURSION */ +/* kernel defines intptr_t */ +/* #undef HAVE_KERNEL_INTPTR_T */ + /* kernel has kernel_neon_* functions */ /* #undef HAVE_KERNEL_NEON */ @@ -1188,7 +1194,7 @@ /* #undef ZFS_IS_GPL_COMPATIBLE */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.2.99-474-FreeBSD_g8f1b7a6fa" +#define ZFS_META_ALIAS "zfs-2.2.99-517-FreeBSD_ge2357561b" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -1218,7 +1224,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "474-FreeBSD_g8f1b7a6fa" +#define ZFS_META_RELEASE "517-FreeBSD_ge2357561b" /* Define the project version. */ #define ZFS_META_VERSION "2.2.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 7bdd4299c8fa..bb0820a7f0b8 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.2.99-474-g8f1b7a6fa" +#define ZFS_META_GITREV "zfs-2.2.99-517-ge2357561b"