From 735ba3a7b7b5b874ab746ea22fda3b0525878899 Mon Sep 17 00:00:00 2001 From: Yuri Pankov <113725409+yuripv@users.noreply.github.com> Date: Fri, 1 Dec 2023 01:36:33 +0700 Subject: [PATCH 01/18] Use uint64_t instead of u_int64_t Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Yuri Pankov Closes #15610 --- include/os/freebsd/spl/sys/time.h | 4 +--- lib/libspl/include/sys/time.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/os/freebsd/spl/sys/time.h b/include/os/freebsd/spl/sys/time.h index fbc679aacf93..47d64c34756a 100644 --- a/include/os/freebsd/spl/sys/time.h +++ b/include/os/freebsd/spl/sys/time.h @@ -22,8 +22,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _OPENSOLARIS_SYS_TIME_H_ @@ -91,6 +89,6 @@ gethrtime(void) { struct timespec ts; clock_gettime(CLOCK_UPTIME, &ts); - return (((u_int64_t)ts.tv_sec) * NANOSEC + ts.tv_nsec); + return (((uint64_t)ts.tv_sec) * NANOSEC + ts.tv_nsec); } #endif /* !_OPENSOLARIS_SYS_TIME_H_ */ diff --git a/lib/libspl/include/sys/time.h b/lib/libspl/include/sys/time.h index e692415a041c..0f7d5196b31c 100644 --- a/lib/libspl/include/sys/time.h +++ b/lib/libspl/include/sys/time.h @@ -101,7 +101,7 @@ gethrtime(void) { struct timespec ts; (void) clock_gettime(CLOCK_MONOTONIC, &ts); - return ((((u_int64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec); + return ((((uint64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec); } #endif /* _LIBSPL_SYS_TIME_H */ From 3e4bef52b084dacc0f1abdaec428c0bdfb73f96d Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Thu, 30 Nov 2023 16:04:18 -0800 Subject: [PATCH 02/18] Only provide execvpe(3) when needed Check for the existence of execvpe(3) and only provide the FreeBSD compat version if required. Reviewed-by: Brian Behlendorf Signed-off-by: Brooks Davis Closes #15609 --- config/user.m4 | 2 +- lib/libspl/include/os/freebsd/sys/param.h | 2 ++ lib/libzfs/os/freebsd/libzfs_compat.c | 4 +++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/config/user.m4 b/config/user.m4 index 6ec27a5b2cf5..87df8c7ccabd 100644 --- a/config/user.m4 +++ b/config/user.m4 @@ -31,7 +31,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER], [ ZFS_AC_CONFIG_USER_MAKEDEV_IN_MKDEV ZFS_AC_CONFIG_USER_ZFSEXEC - AC_CHECK_FUNCS([issetugid mlockall strlcat strlcpy]) + AC_CHECK_FUNCS([execvpe issetugid mlockall strlcat strlcpy]) AC_SUBST(RM) ]) diff --git a/lib/libspl/include/os/freebsd/sys/param.h b/lib/libspl/include/os/freebsd/sys/param.h index 15d3ff0dcb56..1ff3ca8025fc 100644 --- a/lib/libspl/include/os/freebsd/sys/param.h +++ b/lib/libspl/include/os/freebsd/sys/param.h @@ -57,6 +57,8 @@ extern size_t spl_pagesize(void); #define PAGESIZE (spl_pagesize()) +#ifndef HAVE_EXECVPE extern int execvpe(const char *name, char * const argv[], char * const envp[]); +#endif #endif diff --git a/lib/libzfs/os/freebsd/libzfs_compat.c b/lib/libzfs/os/freebsd/libzfs_compat.c index d1c1fea7fb68..aef2abf62568 100644 --- a/lib/libzfs/os/freebsd/libzfs_compat.c +++ b/lib/libzfs/os/freebsd/libzfs_compat.c @@ -38,7 +38,8 @@ #define ZFS_KMOD "openzfs" #endif - +#ifndef HAVE_EXECVPE +/* FreeBSD prior to 15 lacks execvpe */ static int execvPe(const char *name, const char *path, char * const *argv, char * const *envp) @@ -192,6 +193,7 @@ execvpe(const char *name, char * const argv[], char * const envp[]) return (execvPe(name, path, argv, envp)); } +#endif /* !HAVE_EXECVPE */ static __thread char errbuf[ERRBUFLEN]; From adcea23cb0d18a94e20249a40945ca107c4dc85d Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Fri, 1 Dec 2023 14:50:10 -0500 Subject: [PATCH 03/18] ZIO: Add overflow checks for linear buffers Since we use a limited set of kmem caches, quite often we have unused memory after the end of the buffer. Put there up to a 512-byte canary when built with debug to detect buffer overflows at the free time. Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15553 --- lib/libspl/include/assert.h | 3 ++ module/zfs/zio.c | 57 +++++++++++++++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/lib/libspl/include/assert.h b/lib/libspl/include/assert.h index d8c5e203f42f..57f5719c1ac1 100644 --- a/lib/libspl/include/assert.h +++ b/lib/libspl/include/assert.h @@ -64,6 +64,9 @@ libspl_assert(const char *buf, const char *file, const char *func, int line) #undef verify #endif +#define PANIC(fmt, a...) \ + libspl_assertf(__FILE__, __FUNCTION__, __LINE__, fmt, ## a) + #define VERIFY(cond) \ (void) ((!(cond)) && \ libspl_assert(#cond, __FILE__, __FUNCTION__, __LINE__)) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 191166b855f1..213fe5c483f2 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -295,6 +295,53 @@ zio_fini(void) * ========================================================================== */ +#ifdef ZFS_DEBUG +static const ulong_t zio_buf_canary = (ulong_t)0xdeadc0dedead210b; +#endif + +/* + * Use empty space after the buffer to detect overflows. + * + * Since zio_init() creates kmem caches only for certain set of buffer sizes, + * allocations of different sizes may have some unused space after the data. + * Filling part of that space with a known pattern on allocation and checking + * it on free should allow us to detect some buffer overflows. + */ +static void +zio_buf_put_canary(ulong_t *p, size_t size, kmem_cache_t **cache, size_t c) +{ +#ifdef ZFS_DEBUG + size_t off = P2ROUNDUP(size, sizeof (ulong_t)); + ulong_t *canary = p + off / sizeof (ulong_t); + size_t asize = (c + 1) << SPA_MINBLOCKSHIFT; + if (c + 1 < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT && + cache[c] == cache[c + 1]) + asize = (c + 2) << SPA_MINBLOCKSHIFT; + for (; off < asize; canary++, off += sizeof (ulong_t)) + *canary = zio_buf_canary; +#endif +} + +static void +zio_buf_check_canary(ulong_t *p, size_t size, kmem_cache_t **cache, size_t c) +{ +#ifdef ZFS_DEBUG + size_t off = P2ROUNDUP(size, sizeof (ulong_t)); + ulong_t *canary = p + off / sizeof (ulong_t); + size_t asize = (c + 1) << SPA_MINBLOCKSHIFT; + if (c + 1 < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT && + cache[c] == cache[c + 1]) + asize = (c + 2) << SPA_MINBLOCKSHIFT; + for (; off < asize; canary++, off += sizeof (ulong_t)) { + if (unlikely(*canary != zio_buf_canary)) { + PANIC("ZIO buffer overflow %p (%zu) + %zu %#lx != %#lx", + p, size, (canary - p) * sizeof (ulong_t), + *canary, zio_buf_canary); + } + } +#endif +} + /* * Use zio_buf_alloc to allocate ZFS metadata. This data will appear in a * crashdump if the kernel panics, so use it judiciously. Obviously, it's @@ -311,7 +358,9 @@ zio_buf_alloc(size_t size) atomic_add_64(&zio_buf_cache_allocs[c], 1); #endif - return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE)); + void *p = kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE); + zio_buf_put_canary(p, size, zio_buf_cache, c); + return (p); } /* @@ -327,7 +376,9 @@ zio_data_buf_alloc(size_t size) VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); - return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE)); + void *p = kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE); + zio_buf_put_canary(p, size, zio_data_buf_cache, c); + return (p); } void @@ -340,6 +391,7 @@ zio_buf_free(void *buf, size_t size) atomic_add_64(&zio_buf_cache_frees[c], 1); #endif + zio_buf_check_canary(buf, size, zio_buf_cache, c); kmem_cache_free(zio_buf_cache[c], buf); } @@ -350,6 +402,7 @@ zio_data_buf_free(void *buf, size_t size) VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); + zio_buf_check_canary(buf, size, zio_data_buf_cache, c); kmem_cache_free(zio_data_buf_cache[c], buf); } From bcd83ccd2516bc649b874a4a3cc40435996ff77b Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Fri, 1 Dec 2023 18:23:20 -0500 Subject: [PATCH 04/18] ZIL: Remove TX_CLONE_RANGE replay for ZVOLs. zil_claim_clone_range() takes references on cloned blocks before ZIL replay. Later zil_free_clone_range() drops them after replay or on dataset destroy. The total balance is neutral. It means we do not need to do anything (drop the references) for not implemented yet TX_CLONE_RANGE replay for ZVOLs. This is a logical follow up to #15603. Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15612 --- module/zfs/zvol.c | 60 +---------------------------------------------- 1 file changed, 1 insertion(+), 59 deletions(-) diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index ce5b75462349..5b6a3f5cb410 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -515,64 +515,6 @@ zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap) return (error); } -/* - * Replay a TX_CLONE_RANGE ZIL transaction that didn't get committed - * after a system failure. - * - * TODO: For now we drop block cloning transations for ZVOLs as they are - * unsupported, but we still need to inform BRT about that as we - * claimed them during pool import. - * This situation can occur when we try to import a pool from a ZFS - * version supporting block cloning for ZVOLs into a system that - * has this ZFS version, that doesn't support block cloning for ZVOLs. - */ -static int -zvol_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap) -{ - char name[ZFS_MAX_DATASET_NAME_LEN]; - zvol_state_t *zv = arg1; - objset_t *os = zv->zv_objset; - lr_clone_range_t *lr = arg2; - blkptr_t *bp; - dmu_tx_t *tx; - spa_t *spa; - uint_t ii; - int error; - - ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); - ASSERT3U(lr->lr_common.lrc_reclen, >=, offsetof(lr_clone_range_t, - lr_bps[lr->lr_nbps])); - - dmu_objset_name(os, name); - cmn_err(CE_WARN, "ZFS dropping block cloning transaction for %s.", - name); - - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); - - tx = dmu_tx_create(os); - error = dmu_tx_assign(tx, TXG_WAIT); - if (error) { - dmu_tx_abort(tx); - return (error); - } - - spa = os->os_spa; - - for (ii = 0; ii < lr->lr_nbps; ii++) { - bp = &lr->lr_bps[ii]; - - if (!BP_IS_HOLE(bp)) { - zio_free(spa, dmu_tx_get_txg(tx), bp); - } - } - - (void) zil_replaying(zv->zv_zilog, tx); - dmu_tx_commit(tx); - - return (0); -} - static int zvol_replay_err(void *arg1, void *arg2, boolean_t byteswap) { @@ -607,7 +549,7 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = { zvol_replay_err, /* TX_SETSAXATTR */ zvol_replay_err, /* TX_RENAME_EXCHANGE */ zvol_replay_err, /* TX_RENAME_WHITEOUT */ - zvol_replay_clone_range /* TX_CLONE_RANGE */ + zvol_replay_err, /* TX_CLONE_RANGE */ }; /* From 014265f4e6fa32b0a990525a0967b52164a94752 Mon Sep 17 00:00:00 2001 From: Dex Wood Date: Fri, 1 Dec 2023 17:25:17 -0600 Subject: [PATCH 05/18] Add Ntfy notification support to ZED This commit adds the zed_notify_ntfy() function and hooks it into zed_notify(). This will allow ZED to send notifications to ntfy.sh or a self-hosted Ntfy service, which can be received on a desktop or mobile device. It is configured with ZED_NTFY_TOPIC, ZED_NTFY_URL, and ZED_NTFY_ACCESS_TOKEN variables in zed.rc. Reviewed-by: @classabbyamp Reviewed-by: Brian Behlendorf Signed-off-by: Dex Wood Closes #15584 --- cmd/zed/zed.d/zed-functions.sh | 98 ++++++++++++++++++++++++++++++++++ cmd/zed/zed.d/zed.rc | 22 ++++++++ 2 files changed, 120 insertions(+) diff --git a/cmd/zed/zed.d/zed-functions.sh b/cmd/zed/zed.d/zed-functions.sh index 49b6b54029aa..3a2519633d01 100644 --- a/cmd/zed/zed.d/zed-functions.sh +++ b/cmd/zed/zed.d/zed-functions.sh @@ -205,6 +205,10 @@ zed_notify() [ "${rv}" -eq 0 ] && num_success=$((num_success + 1)) [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1)) + zed_notify_ntfy "${subject}" "${pathname}"; rv=$? + [ "${rv}" -eq 0 ] && num_success=$((num_success + 1)) + [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1)) + [ "${num_success}" -gt 0 ] && return 0 [ "${num_failure}" -gt 0 ] && return 1 return 2 @@ -527,6 +531,100 @@ zed_notify_pushover() } +# zed_notify_ntfy (subject, pathname) +# +# Send a notification via Ntfy.sh . +# The ntfy topic (ZED_NTFY_TOPIC) identifies the topic that the notification +# will be sent to Ntfy.sh server. The ntfy url (ZED_NTFY_URL) defines the +# self-hosted or provided hosted ntfy service location. The ntfy access token +# (ZED_NTFY_ACCESS_TOKEN) reprsents an +# access token that could be used if a topic is read/write protected. If a +# topic can be written to publicaly, a ZED_NTFY_ACCESS_TOKEN is not required. +# +# Requires curl and sed executables to be installed in the standard PATH. +# +# References +# https://docs.ntfy.sh +# +# Arguments +# subject: notification subject +# pathname: pathname containing the notification message (OPTIONAL) +# +# Globals +# ZED_NTFY_TOPIC +# ZED_NTFY_ACCESS_TOKEN (OPTIONAL) +# ZED_NTFY_URL +# +# Return +# 0: notification sent +# 1: notification failed +# 2: not configured +# +zed_notify_ntfy() +{ + local subject="$1" + local pathname="${2:-"/dev/null"}" + local msg_body + local msg_out + local msg_err + + [ -n "${ZED_NTFY_TOPIC}" ] || return 2 + local url="${ZED_NTFY_URL:-"https://ntfy.sh"}/${ZED_NTFY_TOPIC}" + + if [ ! -r "${pathname}" ]; then + zed_log_err "ntfy cannot read \"${pathname}\"" + return 1 + fi + + zed_check_cmd "curl" "sed" || return 1 + + # Read the message body in. + # + msg_body="$(cat "${pathname}")" + + if [ -z "${msg_body}" ] + then + msg_body=$subject + subject="" + fi + + # Send the POST request and check for errors. + # + if [ -n "${ZED_NTFY_ACCESS_TOKEN}" ]; then + msg_out="$( \ + curl \ + -u ":${ZED_NTFY_ACCESS_TOKEN}" \ + -H "Title: ${subject}" \ + -d "${msg_body}" \ + -H "Priority: high" \ + "${url}" \ + 2>/dev/null \ + )"; rv=$? + else + msg_out="$( \ + curl \ + -H "Title: ${subject}" \ + -d "${msg_body}" \ + -H "Priority: high" \ + "${url}" \ + 2>/dev/null \ + )"; rv=$? + fi + if [ "${rv}" -ne 0 ]; then + zed_log_err "curl exit=${rv}" + return 1 + fi + msg_err="$(echo "${msg_out}" \ + | sed -n -e 's/.*"errors" *:.*\[\(.*\)\].*/\1/p')" + if [ -n "${msg_err}" ]; then + zed_log_err "ntfy \"${msg_err}"\" + return 1 + fi + return 0 +} + + + # zed_rate_limit (tag, [interval]) # # Check whether an event of a given type [tag] has already occurred within the diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index 78dc1afc7b15..acdfabc8ded2 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -147,3 +147,25 @@ ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event" # help silence misbehaving drives. This assumes your drive enclosure fully # supports slot power control via sysfs. #ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT=1 + +## +# Ntfy topic +# This defines which topic will receive the ntfy notification. +# +# Disabled by default; uncomment to enable. +#ZED_NTFY_TOPIC="" + +## +# Ntfy access token (optional for public topics) +# This defines an access token which can be used +# to allow you to authenticate when sending to topics +# +# Disabled by default; uncomment to enable. +#ZED_NTFY_ACCESS_TOKEN="" + +## +# Ntfy Service URL +# This defines which service the ntfy call will be directed toward +# +# https://ntfy.sh by default; uncomment to enable an alternative service url. +#ZED_NTFY_URL="https://ntfy.sh" From 55b764e062b171874655c7ed51d6a45c2c243c0a Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 5 Dec 2023 13:58:11 -0500 Subject: [PATCH 06/18] ZIL: Do not clone blocks from the future ZIL claim can not handle block pointers cloned from the future, since they are not yet allocated at that point. It may happen either if the block was just written when it was cloned, or if the pool was frozen or somehow else rewound on import. Handle it from two sides: prevent cloning of blocks with physical birth time from not yet synced or frozen TXG, and abort ZIL claim if we still detect such blocks due to rewind or something else. While there, assert that any cloned blocks we claim are really allocated by calling metaslab_check_free(). Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15617 --- module/zfs/dmu.c | 15 +++++++++++++++ module/zfs/zil.c | 38 ++++++++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index e0cdd9e3f33e..6cf7f3c3b12e 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2274,6 +2274,21 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, goto out; } + /* + * If the block was allocated in transaction group that is not + * yet synced, we could clone it, but we couldn't write this + * operation into ZIL, or it may be impossible to replay, since + * the block may appear not yet allocated at that point. + */ + if (BP_PHYSICAL_BIRTH(bp) > spa_freeze_txg(os->os_spa)) { + error = SET_ERROR(EINVAL); + goto out; + } + if (BP_PHYSICAL_BIRTH(bp) > spa_last_synced_txg(os->os_spa)) { + error = SET_ERROR(EAGAIN); + goto out; + } + bps[i] = *bp; } diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 7dfdaa081e8a..252d9a0493de 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -617,11 +617,12 @@ zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg) } static int -zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx) +zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx, + uint64_t first_txg) { const lr_clone_range_t *lr = (const lr_clone_range_t *)lrc; const blkptr_t *bp; - spa_t *spa; + spa_t *spa = zilog->zl_spa; uint_t ii; ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr)); @@ -636,19 +637,36 @@ zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx) * XXX: Do we need to byteswap lr? */ - spa = zilog->zl_spa; - for (ii = 0; ii < lr->lr_nbps; ii++) { bp = &lr->lr_bps[ii]; /* - * When data in embedded into BP there is no need to create - * BRT entry as there is no data block. Just copy the BP as - * it contains the data. + * When data is embedded into the BP there is no need to create + * BRT entry as there is no data block. Just copy the BP as it + * contains the data. */ - if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) { + if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) + continue; + + /* + * We can not handle block pointers from the future, since they + * are not yet allocated. It should not normally happen, but + * just in case lets be safe and just stop here now instead of + * corrupting the pool. + */ + if (BP_PHYSICAL_BIRTH(bp) >= first_txg) + return (SET_ERROR(ENOENT)); + + /* + * Assert the block is really allocated before we reference it. + */ + metaslab_check_free(spa, bp); + } + + for (ii = 0; ii < lr->lr_nbps; ii++) { + bp = &lr->lr_bps[ii]; + if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) brt_pending_add(spa, bp, tx); - } } return (0); @@ -663,7 +681,7 @@ zil_claim_log_record(zilog_t *zilog, const lr_t *lrc, void *tx, case TX_WRITE: return (zil_claim_write(zilog, lrc, tx, first_txg)); case TX_CLONE_RANGE: - return (zil_claim_clone_range(zilog, lrc, tx)); + return (zil_claim_clone_range(zilog, lrc, tx, first_txg)); default: return (0); } From c7b6119268b87ba16249550a64d28594185595fb Mon Sep 17 00:00:00 2001 From: oromenahar Date: Tue, 5 Dec 2023 20:03:48 +0100 Subject: [PATCH 07/18] Allow block cloning across encrypted datasets When two datasets share the same master encryption key, it is safe to clone encrypted blocks. Currently only snapshots and clones of a dataset share with it the same encryption key. Added a test for: - Clone from encrypted sibling to encrypted sibling with non encrypted parent - Clone from encrypted parent to inherited encrypted child - Clone from child to sibling with encrypted parent - Clone from snapshot to the original datasets - Clone from foreign snapshot to a foreign dataset - Cloning from non-encrypted to encrypted datasets - Cloning from encrypted to non-encrypted datasets Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Original-patch-by: Pawel Jakub Dawidek Signed-off-by: Kay Pedersen Closes #15544 --- include/sys/dsl_crypt.h | 1 + man/man7/zpool-features.7 | 9 +- module/zfs/brt.c | 6 +- module/zfs/dsl_crypt.c | 34 ++++ module/zfs/zfs_vnops.c | 25 ++- tests/runfiles/linux.run | 1 + tests/test-runner/bin/zts-report.py.in | 2 + tests/zfs-tests/tests/Makefile.am | 1 + .../block_cloning/block_cloning.kshlib | 12 +- .../block_cloning_cross_enc_dataset.ksh | 170 ++++++++++++++++++ 10 files changed, 236 insertions(+), 25 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h index 72716e296c9e..fbcae3715355 100644 --- a/include/sys/dsl_crypt.h +++ b/include/sys/dsl_crypt.h @@ -206,6 +206,7 @@ void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, dmu_tx_t *tx); int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, boolean_t *will_encrypt); +boolean_t dmu_objset_crypto_key_equal(objset_t *osa, objset_t *osb); void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx); uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7 index 7097d3a5b5d1..ea3c68dc6083 100644 --- a/man/man7/zpool-features.7 +++ b/man/man7/zpool-features.7 @@ -364,9 +364,12 @@ When this feature is enabled ZFS will use block cloning for operations like Block cloning allows to create multiple references to a single block. It is much faster than copying the data (as the actual data is neither read nor written) and takes no additional space. -Blocks can be cloned across datasets under some conditions (like disabled -encryption and equal -.Nm recordsize ) . +Blocks can be cloned across datasets under some conditions (like equal +.Nm recordsize , +the same master encryption key, etc.). +ZFS tries its best to clone across datasets including encrypted ones. +This is limited for various (nontrivial) reasons depending on the OS +and/or ZFS internals. .Pp This feature becomes .Sy active diff --git a/module/zfs/brt.c b/module/zfs/brt.c index 759bc8d2e2b8..a701c70fcfb5 100644 --- a/module/zfs/brt.c +++ b/module/zfs/brt.c @@ -157,10 +157,8 @@ * (copying the file content to the new dataset and removing the source file). * In that case Block Cloning will only be used briefly, because the BRT entries * will be removed when the source is removed. - * Note: currently it is not possible to clone blocks between encrypted - * datasets, even if those datasets use the same encryption key (this includes - * snapshots of encrypted datasets). Cloning blocks between datasets that use - * the same keys should be possible and should be implemented in the future. + * Block Cloning across encrypted datasets is supported as long as both + * datasets share the same master key (e.g. snapshots and clones) * * Block Cloning flow through ZFS layers. * diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 5e6e4e3d6c39..8e1055d9bcb1 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -266,6 +266,40 @@ spa_crypto_key_compare(const void *a, const void *b) return (0); } +/* + * this compares a crypto key based on zk_guid. See comment on + * spa_crypto_key_compare for more information. + */ +boolean_t +dmu_objset_crypto_key_equal(objset_t *osa, objset_t *osb) +{ + dsl_crypto_key_t *dcka = NULL; + dsl_crypto_key_t *dckb = NULL; + uint64_t obja, objb; + boolean_t equal; + spa_t *spa; + + spa = dmu_objset_spa(osa); + if (spa != dmu_objset_spa(osb)) + return (B_FALSE); + obja = dmu_objset_ds(osa)->ds_object; + objb = dmu_objset_ds(osb)->ds_object; + + if (spa_keystore_lookup_key(spa, obja, FTAG, &dcka) != 0) + return (B_FALSE); + if (spa_keystore_lookup_key(spa, objb, FTAG, &dckb) != 0) { + spa_keystore_dsl_key_rele(spa, dcka, FTAG); + return (B_FALSE); + } + + equal = (dcka->dck_key.zk_guid == dckb->dck_key.zk_guid); + + spa_keystore_dsl_key_rele(spa, dcka, FTAG); + spa_keystore_dsl_key_rele(spa, dckb, FTAG); + + return (equal); +} + static int spa_key_mapping_compare(const void *a, const void *b) { diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index eb012fe549dc..b2b7e36645b5 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1097,6 +1098,16 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, return (SET_ERROR(EXDEV)); } + /* + * Cloning across encrypted datasets is possible only if they + * share the same master key. + */ + if (inos != outos && inos->os_encrypted && + !dmu_objset_crypto_key_equal(inos, outos)) { + zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); + return (SET_ERROR(EXDEV)); + } + error = zfs_verify_zp(inzp); if (error == 0) error = zfs_verify_zp(outzp); @@ -1280,20 +1291,6 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, */ break; } - /* - * Encrypted data is fine as long as it comes from the same - * dataset. - * TODO: We want to extend it in the future to allow cloning to - * datasets with the same keys, like clones or to be able to - * clone a file from a snapshot of an encrypted dataset into the - * dataset itself. - */ - if (BP_IS_PROTECTED(&bps[0])) { - if (inzfsvfs != outzfsvfs) { - error = SET_ERROR(EXDEV); - break; - } - } /* * Start a transaction. diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 8bc55a1b4b47..fb78d96fb52d 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -42,6 +42,7 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial', 'block_cloning_disabled_copyfilerange', 'block_cloning_disabled_ficlone', 'block_cloning_disabled_ficlonerange', 'block_cloning_copyfilerange_cross_dataset', + 'block_cloning_cross_enc_dataset', 'block_cloning_copyfilerange_fallback_same_txg'] tags = ['functional', 'block_cloning'] diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index 4608e87522a3..b188a101c257 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -305,6 +305,8 @@ elif sys.platform.startswith('linux'): ['SKIP', cfr_cross_reason], 'block_cloning/block_cloning_copyfilerange_fallback_same_txg': ['SKIP', cfr_cross_reason], + 'block_cloning/block_cloning_cross_enc_dataset': + ['SKIP', cfr_cross_reason], }) diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 79aef1900b59..2d899e58bdbb 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -451,6 +451,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/block_cloning/block_cloning_ficlone.ksh \ functional/block_cloning/block_cloning_ficlonerange.ksh \ functional/block_cloning/block_cloning_ficlonerange_partial.ksh \ + functional/block_cloning/block_cloning_cross_enc_dataset.ksh \ functional/bootfs/bootfs_001_pos.ksh \ functional/bootfs/bootfs_002_neg.ksh \ functional/bootfs/bootfs_003_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib b/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib index 8e16366b4cd6..526bd54a2bf3 100644 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib @@ -28,8 +28,8 @@ function have_same_content { - typeset hash1=$(cat $1 | md5sum) - typeset hash2=$(cat $2 | md5sum) + typeset hash1=$(md5digest $1) + typeset hash2=$(md5digest $2) log_must [ "$hash1" = "$hash2" ] } @@ -44,10 +44,14 @@ function have_same_content # function get_same_blocks { + KEY=$5 + if [ ${#KEY} -gt 0 ]; then + KEY="--key=$KEY" + fi typeset zdbout=${TMPDIR:-$TEST_BASE_DIR}/zdbout.$$ - zdb -vvvvv $1 -O $2 | \ + zdb $KEY -vvvvv $1 -O $2 | \ awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.a - zdb -vvvvv $3 -O $4 | \ + zdb $KEY -vvvvv $3 -O $4 | \ awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.b echo $(sort $zdbout.a $zdbout.b | uniq -d | cut -f1 -d' ') } diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh new file mode 100755 index 000000000000..fe8f0867b909 --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh @@ -0,0 +1,170 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023, Kay Pedersen +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +verify_runnable "global" + +if [[ $(linux_version) -lt $(linux_version "5.3") ]]; then + log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3" +fi + +claim="Block cloning across encrypted datasets." + +log_assert $claim + +DS1="$TESTPOOL/encrypted1" +DS2="$TESTPOOL/encrypted2" +DS1_NC="$TESTPOOL/notcrypted1" +PASSPHRASE="top_secret" + +function prepare_enc +{ + log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS + log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $DS1" + log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $DS2" + log_must zfs create $DS1/child1 + log_must zfs create $DS1/child2 + log_must zfs create $DS1_NC + + log_note "Create test file" + # we must wait until the src file txg is written to the disk otherwise we + # will fallback to normal copy. See "dmu_read_l0_bps" in + # "zfs/module/zfs/dmu.c" and "zfs_clone_range" in + # "zfs/module/zfs/zfs_vnops.c" + log_must dd if=/dev/urandom of=/$DS1/file bs=128K count=4 + log_must dd if=/dev/urandom of=/$DS1/child1/file bs=128K count=4 + log_must dd if=/dev/urandom of=/$DS1_NC/file bs=128K count=4 + log_must sync_pool $TESTPOOL +} + +function cleanup_enc +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL +} + +function clone_and_check +{ + I_FILE="$1" + O_FILE=$2 + I_DS=$3 + O_DS=$4 + SAME_BLOCKS=$5 + # the CLONE option provides a choice between copy_file_range + # which should clone and a dd which is a copy no matter what + CLONE=$6 + SNAPSHOT=$7 + if [ ${#SNAPSHOT} -gt 0 ]; then + I_FILE=".zfs/snapshot/$SNAPSHOT/$1" + fi + if [ $CLONE ]; then + log_must clonefile -f "/$I_DS/$I_FILE" "/$O_DS/$O_FILE" 0 0 524288 + else + log_must dd if="/$I_DS/$I_FILE" of="/$O_DS/$O_FILE" bs=128K + fi + log_must sync_pool $TESTPOOL + + log_must have_same_content "/$I_DS/$I_FILE" "/$O_DS/$O_FILE" + + if [ ${#SNAPSHOT} -gt 0 ]; then + I_DS="$I_DS@$SNAPSHOT" + I_FILE="$1" + fi + typeset blocks=$(get_same_blocks \ + $I_DS $I_FILE $O_DS $O_FILE $PASSPHRASE) + log_must [ "$blocks" = "$SAME_BLOCKS" ] +} + +log_onexit cleanup_enc + +prepare_enc + +log_note "Cloning entire file with copy_file_range across different enc" \ + "roots, should fallback" +# we are expecting no same block map. +clone_and_check "file" "clone" $DS1 $DS2 "" true +log_note "check if the file is still readable and the same after" \ + "unmount and key unload, shouldn't fail" +typeset hash1=$(md5digest "/$DS1/file") +log_must zfs umount $DS1 && zfs unload-key $DS1 +typeset hash2=$(md5digest "/$DS2/clone") +log_must [ "$hash1" = "$hash2" ] + +cleanup_enc +prepare_enc + +log_note "Cloning entire file with copy_file_range across different child datasets" +# clone shouldn't work because of deriving a new master key for the child +# we are expecting no same block map. +clone_and_check "file" "clone" $DS1 "$DS1/child1" "" true +clone_and_check "file" "clone" "$DS1/child1" "$DS1/child2" "" true + +cleanup_enc +prepare_enc + +log_note "Copying entire file with copy_file_range across same snapshot" +log_must zfs snapshot -r $DS1@s1 +log_must sync_pool $TESTPOOL +log_must rm -f "/$DS1/file" +log_must sync_pool $TESTPOOL +clone_and_check "file" "clone" "$DS1" "$DS1" "0 1 2 3" true "s1" + +cleanup_enc +prepare_enc + +log_note "Copying entire file with copy_file_range across different snapshot" +clone_and_check "file" "file" $DS1 $DS2 "" true +log_must zfs snapshot -r $DS2@s1 +log_must sync_pool $TESTPOOL +log_must rm -f "/$DS1/file" "/$DS2/file" +log_must sync_pool $TESTPOOL +clone_and_check "file" "clone" "$DS2" "$DS1" "" true "s1" +typeset hash1=$(md5digest "/$DS1/.zfs/snapshot/s1/file") +log_note "destroy the snapshot and check if the file is still readable and" \ + "has the same content" +log_must zfs destroy -r $DS2@s1 +log_must sync_pool $TESTPOOL +typeset hash2=$(md5digest "/$DS1/file") +log_must [ "$hash1" = "$hash2" ] + +cleanup_enc +prepare_enc + +log_note "Copying with copy_file_range from non encrypted to encrypted" +clone_and_check "file" "copy" $DS1_NC $DS1 "" true + +cleanup_enc +prepare_enc + +log_note "Copying with copy_file_range from encrypted to non encrypted" +clone_and_check "file" "copy" $DS1 $DS1_NC "" true + +log_must sync_pool $TESTPOOL + +log_pass $claim From 5f2700eee5428b9ca4d4689c6985e751b733cbc6 Mon Sep 17 00:00:00 2001 From: Rob N Date: Wed, 6 Dec 2023 06:53:14 +1100 Subject: [PATCH 08/18] zpool: flush output before sleeping Several zpool commands (status, list, iostat) have modes that present some information, sleep a while, present the current state, sleep, etc. Some of those had ways to invoke them that when piped would appear to do nothing for a while, because non-terminals are block-buffered, not line-buffered, by default. Fix this by forcing a flush before sleeping. In particular, all of these buffered: - zpool status - zpool iostat -y - zpool list Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Closes #15593 --- cmd/zpool/zpool_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 9dd1d2109004..c143d637059d 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -5950,6 +5950,7 @@ zpool_do_iostat(int argc, char **argv) print_iostat_header(&cb); if (skip) { + (void) fflush(stdout); (void) fsleep(interval); continue; } @@ -5980,18 +5981,13 @@ zpool_do_iostat(int argc, char **argv) } - /* - * Flush the output so that redirection to a file isn't buffered - * indefinitely. - */ - (void) fflush(stdout); - if (interval == 0) break; if (count != 0 && --count == 0) break; + (void) fflush(stdout); (void) fsleep(interval); } @@ -6514,6 +6510,8 @@ zpool_do_list(int argc, char **argv) break; pool_list_free(list); + + (void) fflush(stdout); (void) fsleep(interval); } @@ -9094,6 +9092,7 @@ zpool_do_status(int argc, char **argv) if (count != 0 && --count == 0) break; + (void) fflush(stdout); (void) fsleep(interval); } From 727497ccdfcccc26517a596ec75e8db400b62932 Mon Sep 17 00:00:00 2001 From: Shengqi Chen Date: Wed, 6 Dec 2023 04:01:09 +0800 Subject: [PATCH 09/18] module/icp/asm-arm/sha2: enable non-SIMD asm kernels on armv5/6 My merged pull request #15557 fixes compilation of sha2 kernels on arm v5/6. However, the compiler guards only allows sha256/512_armv7_impl to be used when __ARM_ARCH > 6. This patch enables these ASM kernels on all arm architectures. Some compiler guards are adjusted accordingly to avoid the unnecessary compilation of SIMD (e.g., neon, armv8ce) kernels on old architectures. Reviewed-by: Brian Behlendorf Signed-off-by: Shengqi Chen Closes #15623 --- module/icp/algs/sha2/sha256_impl.c | 22 +++++++++++++--------- module/icp/algs/sha2/sha512_impl.c | 19 ++++++++----------- module/icp/asm-arm/sha2/sha256-armv7.S | 8 +++----- module/icp/asm-arm/sha2/sha512-armv7.S | 4 +++- 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/module/icp/algs/sha2/sha256_impl.c b/module/icp/algs/sha2/sha256_impl.c index 01ce5cbd814c..0f24319511d7 100644 --- a/module/icp/algs/sha2/sha256_impl.c +++ b/module/icp/algs/sha2/sha256_impl.c @@ -118,7 +118,15 @@ const sha256_ops_t sha256_shani_impl = { }; #endif -#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH > 6) +#elif defined(__aarch64__) || defined(__arm__) +extern void zfs_sha256_block_armv7(uint32_t s[8], const void *, size_t); +const sha256_ops_t sha256_armv7_impl = { + .is_supported = sha2_is_supported, + .transform = zfs_sha256_block_armv7, + .name = "armv7" +}; + +#if __ARM_ARCH > 6 static boolean_t sha256_have_neon(void) { return (kfpu_allowed() && zfs_neon_available()); @@ -129,13 +137,6 @@ static boolean_t sha256_have_armv8ce(void) return (kfpu_allowed() && zfs_sha256_available()); } -extern void zfs_sha256_block_armv7(uint32_t s[8], const void *, size_t); -const sha256_ops_t sha256_armv7_impl = { - .is_supported = sha2_is_supported, - .transform = zfs_sha256_block_armv7, - .name = "armv7" -}; - TF(zfs_sha256_block_neon, tf_sha256_neon); const sha256_ops_t sha256_neon_impl = { .is_supported = sha256_have_neon, @@ -149,6 +150,7 @@ const sha256_ops_t sha256_armv8_impl = { .transform = tf_sha256_armv8ce, .name = "armv8-ce" }; +#endif #elif defined(__PPC64__) static boolean_t sha256_have_isa207(void) @@ -192,11 +194,13 @@ static const sha256_ops_t *const sha256_impls[] = { #if defined(__x86_64) && defined(HAVE_SSE4_1) &sha256_shani_impl, #endif -#if defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH > 6) +#if defined(__aarch64__) || defined(__arm__) &sha256_armv7_impl, +#if __ARM_ARCH > 6 &sha256_neon_impl, &sha256_armv8_impl, #endif +#endif #if defined(__PPC64__) &sha256_ppc_impl, &sha256_power8_impl, diff --git a/module/icp/algs/sha2/sha512_impl.c b/module/icp/algs/sha2/sha512_impl.c index 27b35a639a54..6291fbd77e36 100644 --- a/module/icp/algs/sha2/sha512_impl.c +++ b/module/icp/algs/sha2/sha512_impl.c @@ -88,7 +88,7 @@ const sha512_ops_t sha512_avx2_impl = { }; #endif -#elif defined(__aarch64__) +#elif defined(__aarch64__) || defined(__arm__) extern void zfs_sha512_block_armv7(uint64_t s[8], const void *, size_t); const sha512_ops_t sha512_armv7_impl = { .is_supported = sha2_is_supported, @@ -96,6 +96,7 @@ const sha512_ops_t sha512_armv7_impl = { .name = "armv7" }; +#if defined(__aarch64__) static boolean_t sha512_have_armv8ce(void) { return (kfpu_allowed() && zfs_sha512_available()); @@ -107,15 +108,9 @@ const sha512_ops_t sha512_armv8_impl = { .transform = tf_sha512_armv8ce, .name = "armv8-ce" }; +#endif -#elif defined(__arm__) && __ARM_ARCH > 6 -extern void zfs_sha512_block_armv7(uint64_t s[8], const void *, size_t); -const sha512_ops_t sha512_armv7_impl = { - .is_supported = sha2_is_supported, - .transform = zfs_sha512_block_armv7, - .name = "armv7" -}; - +#if defined(__arm__) && __ARM_ARCH > 6 static boolean_t sha512_have_neon(void) { return (kfpu_allowed() && zfs_neon_available()); @@ -127,6 +122,7 @@ const sha512_ops_t sha512_neon_impl = { .transform = tf_sha512_neon, .name = "neon" }; +#endif #elif defined(__PPC64__) TF(zfs_sha512_ppc, tf_sha512_ppc); @@ -164,14 +160,15 @@ static const sha512_ops_t *const sha512_impls[] = { #if defined(__x86_64) && defined(HAVE_AVX2) &sha512_avx2_impl, #endif -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(__arm__) &sha512_armv7_impl, +#if defined(__aarch64__) &sha512_armv8_impl, #endif #if defined(__arm__) && __ARM_ARCH > 6 - &sha512_armv7_impl, &sha512_neon_impl, #endif +#endif #if defined(__PPC64__) &sha512_ppc_impl, &sha512_power8_impl, diff --git a/module/icp/asm-arm/sha2/sha256-armv7.S b/module/icp/asm-arm/sha2/sha256-armv7.S index 190dbabc5ecb..3ae66626df31 100644 --- a/module/icp/asm-arm/sha2/sha256-armv7.S +++ b/module/icp/asm-arm/sha2/sha256-armv7.S @@ -1837,6 +1837,7 @@ zfs_sha256_block_armv7: #endif .size zfs_sha256_block_armv7,.-zfs_sha256_block_armv7 +#if __ARM_ARCH__ >= 7 .arch armv7-a .fpu neon @@ -1849,11 +1850,7 @@ zfs_sha256_block_neon: stmdb sp!,{r4-r12,lr} sub r11,sp,#16*4+16 -#if __ARM_ARCH__ >=7 adr r14,K256 -#else - ldr r14,=K256 -#endif bic r11,r11,#15 @ align for 128-bit stores mov r12,sp mov sp,r11 @ alloca @@ -2773,4 +2770,5 @@ zfs_sha256_block_armv8: bx lr @ bx lr .size zfs_sha256_block_armv8,.-zfs_sha256_block_armv8 -#endif +#endif // #if __ARM_ARCH__ >= 7 +#endif // #if defined(__arm__) diff --git a/module/icp/asm-arm/sha2/sha512-armv7.S b/module/icp/asm-arm/sha2/sha512-armv7.S index 499cb6df9567..66d7dd3cf0f7 100644 --- a/module/icp/asm-arm/sha2/sha512-armv7.S +++ b/module/icp/asm-arm/sha2/sha512-armv7.S @@ -493,6 +493,7 @@ zfs_sha512_block_armv7: #endif .size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7 +#if __ARM_ARCH__ >= 7 .arch armv7-a .fpu neon @@ -1822,4 +1823,5 @@ zfs_sha512_block_neon: VFP_ABI_POP bx lr @ .word 0xe12fff1e .size zfs_sha512_block_neon,.-zfs_sha512_block_neon -#endif +#endif // #if __ARM_ARCH__ >= 7 +#endif // #if defined(__arm__) From 687e4d7f9cadc17a4b8ebb67bc156b7b0a699192 Mon Sep 17 00:00:00 2001 From: Don Brady Date: Tue, 5 Dec 2023 15:27:56 -0700 Subject: [PATCH 10/18] Extend import_progress kstat with a notes field Detail the import progress of log spacemaps as they can take a very long time. Also grab the spa_note() messages to, as they provide insight into what is happening Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Don Brady Co-authored-by: Allan Jude Closes #15539 --- include/sys/spa.h | 4 + module/zfs/spa.c | 41 +++++- module/zfs/spa_log_spacemap.c | 12 +- module/zfs/spa_misc.c | 74 +++++++++- tests/runfiles/common.run | 3 +- .../zpool_import/zpool_import_status.ksh | 132 ++++++++++++++++++ 6 files changed, 256 insertions(+), 10 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh diff --git a/include/sys/spa.h b/include/sys/spa.h index cef7933df441..08099cd4fa29 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -971,6 +971,10 @@ extern int spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t max_txg); extern int spa_import_progress_set_state(uint64_t pool_guid, spa_load_state_t spa_load_state); +extern void spa_import_progress_set_notes(spa_t *spa, + const char *fmt, ...) __printflike(2, 3); +extern void spa_import_progress_set_notes_nolog(spa_t *spa, + const char *fmt, ...) __printflike(2, 3); /* Pool configuration locks */ extern int spa_config_tryenter(spa_t *spa, int locks, const void *tag, diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 5161326c0397..2ca5e7bac1a4 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -3109,6 +3109,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) spa->spa_load_state = state; (void) spa_import_progress_set_state(spa_guid(spa), spa_load_state(spa)); + spa_import_progress_set_notes(spa, "spa_load()"); gethrestime(&spa->spa_loaded_ts); error = spa_load_impl(spa, type, &ereport); @@ -3337,7 +3338,7 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) uint64_t mmp_config = ub->ub_mmp_config; uint16_t mmp_seq = MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0; uint64_t import_delay; - hrtime_t import_expire; + hrtime_t import_expire, now; nvlist_t *mmp_label = NULL; vdev_t *rvd = spa->spa_root_vdev; kcondvar_t cv; @@ -3375,7 +3376,17 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) import_expire = gethrtime() + import_delay; - while (gethrtime() < import_expire) { + spa_import_progress_set_notes(spa, "Checking MMP activity, waiting " + "%llu ms", (u_longlong_t)NSEC2MSEC(import_delay)); + + int interations = 0; + while ((now = gethrtime()) < import_expire) { + if (interations++ % 30 == 0) { + spa_import_progress_set_notes(spa, "Checking MMP " + "activity, %llu ms remaining", + (u_longlong_t)NSEC2MSEC(import_expire - now)); + } + (void) spa_import_progress_set_mmp_check(spa_guid(spa), NSEC2SEC(import_expire - gethrtime())); @@ -4995,6 +5006,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) /* * Retrieve the checkpoint txg if the pool has a checkpoint. */ + spa_import_progress_set_notes(spa, "Loading checkpoint txg"); error = spa_ld_read_checkpoint_txg(spa); if (error != 0) return (error); @@ -5007,6 +5019,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * initiated. Otherwise we could be reading from indirect vdevs before * we have loaded their mappings. */ + spa_import_progress_set_notes(spa, "Loading indirect vdev metadata"); error = spa_ld_open_indirect_vdev_metadata(spa); if (error != 0) return (error); @@ -5015,6 +5028,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Retrieve the full list of active features from the MOS and check if * they are all supported. */ + spa_import_progress_set_notes(spa, "Checking feature flags"); error = spa_ld_check_features(spa, &missing_feat_write); if (error != 0) return (error); @@ -5023,6 +5037,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Load several special directories from the MOS needed by the dsl_pool * layer. */ + spa_import_progress_set_notes(spa, "Loading special MOS directories"); error = spa_ld_load_special_directories(spa); if (error != 0) return (error); @@ -5030,6 +5045,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) /* * Retrieve pool properties from the MOS. */ + spa_import_progress_set_notes(spa, "Loading properties"); error = spa_ld_get_props(spa); if (error != 0) return (error); @@ -5038,6 +5054,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Retrieve the list of auxiliary devices - cache devices and spares - * and open them. */ + spa_import_progress_set_notes(spa, "Loading AUX vdevs"); error = spa_ld_open_aux_vdevs(spa, type); if (error != 0) return (error); @@ -5046,14 +5063,17 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Load the metadata for all vdevs. Also check if unopenable devices * should be autoreplaced. */ + spa_import_progress_set_notes(spa, "Loading vdev metadata"); error = spa_ld_load_vdev_metadata(spa); if (error != 0) return (error); + spa_import_progress_set_notes(spa, "Loading dedup tables"); error = spa_ld_load_dedup_tables(spa); if (error != 0) return (error); + spa_import_progress_set_notes(spa, "Loading BRT"); error = spa_ld_load_brt(spa); if (error != 0) return (error); @@ -5062,6 +5082,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Verify the logs now to make sure we don't have any unexpected errors * when we claim log blocks later. */ + spa_import_progress_set_notes(spa, "Verifying Log Devices"); error = spa_ld_verify_logs(spa, type, ereport); if (error != 0) return (error); @@ -5083,6 +5104,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * state. When performing an extreme rewind, we verify the whole pool, * which can take a very long time. */ + spa_import_progress_set_notes(spa, "Verifying pool data"); error = spa_ld_verify_pool_data(spa); if (error != 0) return (error); @@ -5092,6 +5114,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * we write anything to the pool because we'd need to update the space * accounting using the deflated sizes. */ + spa_import_progress_set_notes(spa, "Calculating deflated space"); spa_update_dspace(spa); /* @@ -5099,6 +5122,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * pool. If we are importing the pool in read-write mode, a few * additional steps must be performed to finish the import. */ + spa_import_progress_set_notes(spa, "Starting import"); if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER || spa->spa_load_max_txg == UINT64_MAX)) { uint64_t config_cache_txg = spa->spa_config_txg; @@ -5122,6 +5146,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg); } + spa_import_progress_set_notes(spa, "Claiming ZIL blocks"); /* * Traverse the ZIL and claim all blocks. */ @@ -5141,6 +5166,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * will have been set for us by ZIL traversal operations * performed above. */ + spa_import_progress_set_notes(spa, "Syncing ZIL claims"); txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); /* @@ -5148,6 +5174,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * next sync, we would update the config stored in vdev labels * and the cachefile (by default /etc/zfs/zpool.cache). */ + spa_import_progress_set_notes(spa, "Updating configs"); spa_ld_check_for_config_update(spa, config_cache_txg, update_config_cache); @@ -5156,6 +5183,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Then check all DTLs to see if anything needs resilvering. * The resilver will be deferred if a rebuild was started. */ + spa_import_progress_set_notes(spa, "Starting resilvers"); if (vdev_rebuild_active(spa->spa_root_vdev)) { vdev_rebuild_restart(spa); } else if (!dsl_scan_resilvering(spa->spa_dsl_pool) && @@ -5169,6 +5197,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) */ spa_history_log_version(spa, "open", NULL); + spa_import_progress_set_notes(spa, + "Restarting device removals"); spa_restart_removal(spa); spa_spawn_aux_threads(spa); @@ -5181,19 +5211,26 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * auxiliary threads above (from which the livelist * deletion zthr is part of). */ + spa_import_progress_set_notes(spa, + "Cleaning up inconsistent objsets"); (void) dmu_objset_find(spa_name(spa), dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); /* * Clean up any stale temporary dataset userrefs. */ + spa_import_progress_set_notes(spa, + "Cleaning up temporary userrefs"); dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + spa_import_progress_set_notes(spa, "Restarting initialize"); vdev_initialize_restart(spa->spa_root_vdev); + spa_import_progress_set_notes(spa, "Restarting TRIM"); vdev_trim_restart(spa->spa_root_vdev); vdev_autotrim_restart(spa); spa_config_exit(spa, SCL_CONFIG, FTAG); + spa_import_progress_set_notes(spa, "Finished importing"); } spa_import_progress_remove(spa_guid(spa)); diff --git a/module/zfs/spa_log_spacemap.c b/module/zfs/spa_log_spacemap.c index cf05158b63f8..873089a53e34 100644 --- a/module/zfs/spa_log_spacemap.c +++ b/module/zfs/spa_log_spacemap.c @@ -1153,6 +1153,7 @@ spa_ld_log_sm_data(spa_t *spa) uint_t pn = 0; uint64_t ps = 0; + uint64_t nsm = 0; psls = sls = avl_first(&spa->spa_sm_logs_by_txg); while (sls != NULL) { /* Prefetch log spacemaps up to 16 TXGs or MBs ahead. */ @@ -1185,6 +1186,10 @@ spa_ld_log_sm_data(spa_t *spa) summary_add_data(spa, sls->sls_txg, sls->sls_mscount, 0, sls->sls_nblocks); + spa_import_progress_set_notes_nolog(spa, + "Read %llu of %lu log space maps", (u_longlong_t)nsm, + avl_numnodes(&spa->spa_sm_logs_by_txg)); + struct spa_ld_log_sm_arg vla = { .slls_spa = spa, .slls_txg = sls->sls_txg @@ -1200,6 +1205,7 @@ spa_ld_log_sm_data(spa_t *spa) pn--; ps -= space_map_length(sls->sls_sm); + nsm++; space_map_close(sls->sls_sm); sls->sls_sm = NULL; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls); @@ -1210,11 +1216,11 @@ spa_ld_log_sm_data(spa_t *spa) hrtime_t read_logs_endtime = gethrtime(); spa_load_note(spa, - "read %llu log space maps (%llu total blocks - blksz = %llu bytes) " - "in %lld ms", (u_longlong_t)avl_numnodes(&spa->spa_sm_logs_by_txg), + "Read %lu log space maps (%llu total blocks - blksz = %llu bytes) " + "in %lld ms", avl_numnodes(&spa->spa_sm_logs_by_txg), (u_longlong_t)spa_log_sm_nblocks(spa), (u_longlong_t)zfs_log_sm_blksz, - (longlong_t)((read_logs_endtime - read_logs_starttime) / 1000000)); + (longlong_t)NSEC2MSEC(read_logs_endtime - read_logs_starttime)); out: if (error != 0) { diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 3990af98c732..1e5ab59eb4d0 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -426,6 +426,8 @@ spa_load_note(spa_t *spa, const char *fmt, ...) zfs_dbgmsg("spa_load(%s, config %s): %s", spa->spa_name, spa->spa_trust_config ? "trusted" : "untrusted", buf); + + spa_import_progress_set_notes_nolog(spa, "%s", buf); } /* @@ -2184,6 +2186,7 @@ typedef struct spa_import_progress { uint64_t pool_guid; /* unique id for updates */ char *pool_name; spa_load_state_t spa_load_state; + char *spa_load_notes; uint64_t mmp_sec_remaining; /* MMP activity check */ uint64_t spa_load_max_txg; /* rewind txg */ procfs_list_node_t smh_node; @@ -2194,9 +2197,9 @@ spa_history_list_t *spa_import_progress_list = NULL; static int spa_import_progress_show_header(struct seq_file *f) { - seq_printf(f, "%-20s %-14s %-14s %-12s %s\n", "pool_guid", + seq_printf(f, "%-20s %-14s %-14s %-12s %-16s %s\n", "pool_guid", "load_state", "multihost_secs", "max_txg", - "pool_name"); + "pool_name", "notes"); return (0); } @@ -2205,11 +2208,12 @@ spa_import_progress_show(struct seq_file *f, void *data) { spa_import_progress_t *sip = (spa_import_progress_t *)data; - seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %s\n", + seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %-16s %s\n", (u_longlong_t)sip->pool_guid, (u_longlong_t)sip->spa_load_state, (u_longlong_t)sip->mmp_sec_remaining, (u_longlong_t)sip->spa_load_max_txg, - (sip->pool_name ? sip->pool_name : "-")); + (sip->pool_name ? sip->pool_name : "-"), + (sip->spa_load_notes ? sip->spa_load_notes : "-")); return (0); } @@ -2223,6 +2227,8 @@ spa_import_progress_truncate(spa_history_list_t *shl, unsigned int size) sip = list_remove_head(&shl->procfs_list.pl_list); if (sip->pool_name) spa_strfree(sip->pool_name); + if (sip->spa_load_notes) + kmem_strfree(sip->spa_load_notes); kmem_free(sip, sizeof (spa_import_progress_t)); shl->size--; } @@ -2278,6 +2284,10 @@ spa_import_progress_set_state(uint64_t pool_guid, sip = list_prev(&shl->procfs_list.pl_list, sip)) { if (sip->pool_guid == pool_guid) { sip->spa_load_state = load_state; + if (sip->spa_load_notes != NULL) { + kmem_strfree(sip->spa_load_notes); + sip->spa_load_notes = NULL; + } error = 0; break; } @@ -2287,6 +2297,59 @@ spa_import_progress_set_state(uint64_t pool_guid, return (error); } +static void +spa_import_progress_set_notes_impl(spa_t *spa, boolean_t log_dbgmsg, + const char *fmt, va_list adx) +{ + spa_history_list_t *shl = spa_import_progress_list; + spa_import_progress_t *sip; + uint64_t pool_guid = spa_guid(spa); + + if (shl->size == 0) + return; + + char *notes = kmem_vasprintf(fmt, adx); + + mutex_enter(&shl->procfs_list.pl_lock); + for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL; + sip = list_prev(&shl->procfs_list.pl_list, sip)) { + if (sip->pool_guid == pool_guid) { + if (sip->spa_load_notes != NULL) { + kmem_strfree(sip->spa_load_notes); + sip->spa_load_notes = NULL; + } + sip->spa_load_notes = notes; + if (log_dbgmsg) + zfs_dbgmsg("'%s' %s", sip->pool_name, notes); + notes = NULL; + break; + } + } + mutex_exit(&shl->procfs_list.pl_lock); + if (notes != NULL) + kmem_strfree(notes); +} + +void +spa_import_progress_set_notes(spa_t *spa, const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + spa_import_progress_set_notes_impl(spa, B_TRUE, fmt, adx); + va_end(adx); +} + +void +spa_import_progress_set_notes_nolog(spa_t *spa, const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + spa_import_progress_set_notes_impl(spa, B_FALSE, fmt, adx); + va_end(adx); +} + int spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t load_max_txg) { @@ -2355,6 +2418,7 @@ spa_import_progress_add(spa_t *spa) poolname = spa_name(spa); sip->pool_name = spa_strdup(poolname); sip->spa_load_state = spa_load_state(spa); + sip->spa_load_notes = NULL; mutex_enter(&shl->procfs_list.pl_lock); procfs_list_add(&shl->procfs_list, sip); @@ -2374,6 +2438,8 @@ spa_import_progress_remove(uint64_t pool_guid) if (sip->pool_guid == pool_guid) { if (sip->pool_name) spa_strfree(sip->pool_name); + if (sip->spa_load_notes) + spa_strfree(sip->spa_load_notes); list_remove(&shl->procfs_list.pl_list, sip); shl->size--; kmem_free(sip, sizeof (spa_import_progress_t)); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 20c9b823c648..dd44b63aaf3d 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -425,7 +425,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos', 'import_devices_missing', 'import_log_missing', 'import_paths_changed', 'import_rewind_config_changed', - 'import_rewind_device_replaced'] + 'import_rewind_device_replaced', + 'zpool_import_status'] tags = ['functional', 'cli_root', 'zpool_import'] timeout = 1200 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh new file mode 100755 index 000000000000..c96961bf6419 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh @@ -0,0 +1,132 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2023 Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg + +# +# DESCRIPTION: +# During a pool import, the 'import_progress' kstat contains details +# on the import progress. +# +# STRATEGY: +# 1. Create test pool with several devices +# 2. Generate some ZIL records and spacemap logs +# 3. Export the pool +# 4. Import the pool in the background and monitor the kstat content +# 5. Check the zfs debug messages for import progress +# + +verify_runnable "global" + +function cleanup +{ + log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0 + log_must set_tunable64 METASLAB_DEBUG_LOAD 0 + + destroy_pool $TESTPOOL1 +} + +log_assert "During a pool import, the 'import_progress' kstat contains " \ + "notes on the progress" + +log_onexit cleanup + +log_must zpool create $TESTPOOL1 $VDEV0 $VDEV1 $VDEV2 +typeset guid=$(zpool get -H -o value guid $TESTPOOL1) + +log_must zfs create -o recordsize=8k $TESTPOOL1/fs +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=/$TESTPOOL1/fs/sync conv=fsync bs=1 count=1 + +# +# Overwrite some blocks to populate spacemap logs +# +log_must dd if=/dev/urandom of=/$TESTPOOL1/fs/00 bs=1M count=200 +sync_all_pools +log_must dd if=/dev/urandom of=/$TESTPOOL1/fs/00 bs=1M count=200 +sync_all_pools + +# +# Freeze the pool to retain intent log records +# +log_must zpool freeze $TESTPOOL1 + +# fill_fs [destdir] [dirnum] [filenum] [bytes] [num_writes] [data] +log_must fill_fs /$TESTPOOL1/fs 1 2000 100 1024 R + +log_must zpool list -v $TESTPOOL1 + +# +# Unmount filesystem and export the pool +# +# At this stage the zfs intent log contains +# a set of records to replay. +# +log_must zfs unmount /$TESTPOOL1/fs + +log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1 +log_must zpool export $TESTPOOL1 + +log_must set_tunable64 METASLAB_DEBUG_LOAD 1 +log_note "Starting zpool import in background at" $(date +'%H:%M:%S') +zpool import -d $DEVICE_DIR -f $guid & +pid=$! + +# +# capture progress until import is finished +# +log_note waiting for pid $pid to exit +kstat import_progress +while [[ -d /proc/"$pid" ]]; do + line=$(kstat import_progress | grep -v pool_guid) + if [[ -n $line ]]; then + echo $line + fi + if [[ -f /$TESTPOOL1/fs/00 ]]; then + break; + fi + sleep 0.0001 +done +log_note "zpool import completed at" $(date +'%H:%M:%S') + +entries=$(kstat dbgmsg | grep "spa_import_progress_set_notes_impl(): 'testpool1'" | wc -l) +log_note "found $entries progress notes in dbgmsg" +log_must test $entries -gt 20 + +log_must zpool status $TESTPOOL1 + +log_pass "During a pool import, the 'import_progress' kstat contains " \ + "notes on the progress" From 86239a5b9c54e5d34c75446e23c4c88f8defc59f Mon Sep 17 00:00:00 2001 From: Shengqi Chen Date: Thu, 7 Dec 2023 04:37:50 +0800 Subject: [PATCH 11/18] compact: workaround for GPL-only symbols on riscv from Linux 6.2 Since Linux 6.2, the implementation of flush_dcache_page on riscv references GPL-only symbol `PageHuge`, breaking the build of zfs. This patch uses existing mechanism to override flush_dcache_page, removing the call to `PageHuge`. According to comments in kernel, it is only used to do some check against HugeTLB pages, which only exist in userspace. ZFS uses flush_dcache_page only on kernel pages, thus this patch will not introduce any behaviour change. See also: torvalds/linux@d33deda, openzfs/zfs@589f59b Reviewed-by: Brian Behlendorf Signed-off-by: Shengqi Chen Closes #14974 Closes #15627 --- config/kernel-flush_dcache_page.m4 | 5 +++-- config/kernel.m4 | 6 ++++++ include/os/linux/kernel/linux/dcache_compat.h | 15 +++++++++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/config/kernel-flush_dcache_page.m4 b/config/kernel-flush_dcache_page.m4 index 2340c386ef57..aa916c87d531 100644 --- a/config/kernel-flush_dcache_page.m4 +++ b/config/kernel-flush_dcache_page.m4 @@ -1,7 +1,8 @@ dnl # dnl # Starting from Linux 5.13, flush_dcache_page() becomes an inline -dnl # function and may indirectly referencing GPL-only cpu_feature_keys on -dnl # powerpc +dnl # function and may indirectly referencing GPL-only symbols: +dnl # on powerpc: cpu_feature_keys +dnl # on riscv: PageHuge (added from 6.2) dnl # dnl # diff --git a/config/kernel.m4 b/config/kernel.m4 index 056517a841f2..d25b65994f6f 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -168,6 +168,9 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE ;; + riscv*) + ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE + ;; esac AC_MSG_CHECKING([for available kernel interfaces]) @@ -310,6 +313,9 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_CPU_HAS_FEATURE ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE ;; + riscv*) + ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE + ;; esac ]) diff --git a/include/os/linux/kernel/linux/dcache_compat.h b/include/os/linux/kernel/linux/dcache_compat.h index 1e35204932d1..ab1711b99f3f 100644 --- a/include/os/linux/kernel/linux/dcache_compat.h +++ b/include/os/linux/kernel/linux/dcache_compat.h @@ -42,8 +42,8 @@ /* * Starting from Linux 5.13, flush_dcache_page() becomes an inline function * and under some configurations, may indirectly referencing GPL-only - * cpu_feature_keys on powerpc. Override this function when it is detected - * being GPL-only. + * symbols, e.g., cpu_feature_keys on powerpc and PageHuge on riscv. + * Override this function when it is detected being GPL-only. */ #if defined __powerpc__ && defined HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY #include @@ -53,6 +53,17 @@ clear_bit(PG_dcache_clean, &(page)->flags); \ } while (0) #endif +/* + * For riscv implementation, the use of PageHuge can be safely removed. + * Because it handles pages allocated by HugeTLB, while flush_dcache_page + * in zfs module is only called on kernel pages. + */ +#if defined __riscv && defined HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY +#define flush_dcache_page(page) do { \ + if (test_bit(PG_dcache_clean, &(page)->flags)) \ + clear_bit(PG_dcache_clean, &(page)->flags); \ + } while (0) +#endif /* * 2.6.30 API change, From f9765b182eae1d031aa6559b22688329fff90fba Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 6 Dec 2023 15:39:12 -0500 Subject: [PATCH 12/18] zdb: Dump encrypted write and clone ZIL records Block pointers are not encrypted in TX_WRITE and TX_CLONE_RANGE records, so we can dump them, that may be useful for debugging. Related to #15543. Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15629 --- cmd/zdb/zdb_il.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c index 970c45c9b3bb..63d95ddedc3b 100644 --- a/cmd/zdb/zdb_il.c +++ b/cmd/zdb/zdb_il.c @@ -168,7 +168,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset, (u_longlong_t)lr->lr_length); - if (txtype == TX_WRITE2 || verbose < 5) + if (txtype == TX_WRITE2 || verbose < 4) return; if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { @@ -178,6 +178,8 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) "will claim" : "won't claim"); print_log_bp(bp, tab_prefix); + if (verbose < 5) + return; if (BP_IS_HOLE(bp)) { (void) printf("\t\t\tLSIZE 0x%llx\n", (u_longlong_t)BP_GET_LSIZE(bp)); @@ -202,6 +204,9 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) if (error) goto out; } else { + if (verbose < 5) + return; + /* data is stored after the end of the lr_write record */ data = abd_alloc(lr->lr_length, B_FALSE); abd_copy_from_buf(data, lr + 1, lr->lr_length); @@ -217,6 +222,28 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) abd_free(data); } +static void +zil_prt_rec_write_enc(zilog_t *zilog, int txtype, const void *arg) +{ + (void) txtype; + const lr_write_t *lr = arg; + const blkptr_t *bp = &lr->lr_blkptr; + int verbose = MAX(dump_opt['d'], dump_opt['i']); + + (void) printf("%s(encrypted)\n", tab_prefix); + + if (verbose < 4) + return; + + if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { + (void) printf("%shas blkptr, %s\n", tab_prefix, + !BP_IS_HOLE(bp) && + bp->blk_birth >= spa_min_claim_txg(zilog->zl_spa) ? + "will claim" : "won't claim"); + print_log_bp(bp, tab_prefix); + } +} + static void zil_prt_rec_truncate(zilog_t *zilog, int txtype, const void *arg) { @@ -312,11 +339,34 @@ zil_prt_rec_clone_range(zilog_t *zilog, int txtype, const void *arg) { (void) zilog, (void) txtype; const lr_clone_range_t *lr = arg; + int verbose = MAX(dump_opt['d'], dump_opt['i']); (void) printf("%sfoid %llu, offset %llx, length %llx, blksize %llx\n", tab_prefix, (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset, (u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blksz); + if (verbose < 4) + return; + + for (unsigned int i = 0; i < lr->lr_nbps; i++) { + (void) printf("%s[%u/%llu] ", tab_prefix, i + 1, + (u_longlong_t)lr->lr_nbps); + print_log_bp(&lr->lr_bps[i], ""); + } +} + +static void +zil_prt_rec_clone_range_enc(zilog_t *zilog, int txtype, const void *arg) +{ + (void) zilog, (void) txtype; + const lr_clone_range_t *lr = arg; + int verbose = MAX(dump_opt['d'], dump_opt['i']); + + (void) printf("%s(encrypted)\n", tab_prefix); + + if (verbose < 4) + return; + for (unsigned int i = 0; i < lr->lr_nbps; i++) { (void) printf("%s[%u/%llu] ", tab_prefix, i + 1, (u_longlong_t)lr->lr_nbps); @@ -327,6 +377,7 @@ zil_prt_rec_clone_range(zilog_t *zilog, int txtype, const void *arg) typedef void (*zil_prt_rec_func_t)(zilog_t *, int, const void *); typedef struct zil_rec_info { zil_prt_rec_func_t zri_print; + zil_prt_rec_func_t zri_print_enc; const char *zri_name; uint64_t zri_count; } zil_rec_info_t; @@ -341,7 +392,9 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = { {.zri_print = zil_prt_rec_remove, .zri_name = "TX_RMDIR "}, {.zri_print = zil_prt_rec_link, .zri_name = "TX_LINK "}, {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME "}, - {.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE "}, + {.zri_print = zil_prt_rec_write, + .zri_print_enc = zil_prt_rec_write_enc, + .zri_name = "TX_WRITE "}, {.zri_print = zil_prt_rec_truncate, .zri_name = "TX_TRUNCATE "}, {.zri_print = zil_prt_rec_setattr, .zri_name = "TX_SETATTR "}, {.zri_print = zil_prt_rec_acl, .zri_name = "TX_ACL_V0 "}, @@ -358,6 +411,7 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = { {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_EXCHANGE "}, {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_WHITEOUT "}, {.zri_print = zil_prt_rec_clone_range, + .zri_print_enc = zil_prt_rec_clone_range_enc, .zri_name = "TX_CLONE_RANGE "}, }; @@ -384,6 +438,8 @@ print_log_record(zilog_t *zilog, const lr_t *lr, void *arg, uint64_t claim_txg) if (txtype && verbose >= 3) { if (!zilog->zl_os->os_encrypted) { zil_rec_info[txtype].zri_print(zilog, txtype, lr); + } else if (zil_rec_info[txtype].zri_print_enc) { + zil_rec_info[txtype].zri_print_enc(zilog, txtype, lr); } else { (void) printf("%s(encrypted)\n", tab_prefix); } From 2aa3a482abe31bf41d97bd028507b69029515698 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 6 Dec 2023 18:02:05 -0500 Subject: [PATCH 13/18] ZIL: Remove 128K into 2x68K LWB split optimization To improve 128KB block write performance in case of multiple VDEVs ZIL used to spit those writes into two 64KB ones. Unfortunately it was found to cause LWB buffer overflow, trying to write maximum- sizes 128KB TX_CLONE_RANGE record with 1022 block pointers into 68KB buffer, since unlike TX_WRITE ZIL code can't split it. This is a minimally-invasive temporary block cloning fix until the following more invasive prediction code refactoring. Reviewed-by: Brian Behlendorf Reviewed-by: Ameer Hamza Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15634 --- module/zfs/zil.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 252d9a0493de..e1001d1be453 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1725,8 +1725,6 @@ static const struct { { 8192 + 4096, 8192 + 4096 }, /* database */ { 32768 + 4096, 32768 + 4096 }, /* NFS writes */ { 65536 + 4096, 65536 + 4096 }, /* 64KB writes */ - { 131072, 131072 }, /* < 128KB writes */ - { 131072 +4096, 65536 + 4096 }, /* 128KB writes */ { UINT64_MAX, SPA_OLD_MAXBLOCKSIZE}, /* > 128KB writes */ }; From 9743d09635c7ab7957e00161cc8d4e8697ffa191 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 6 Dec 2023 18:37:27 -0500 Subject: [PATCH 14/18] BRT: Limit brt_vdev_dump() to only one vdev Without this patch on pool of 60 vdevs with ZFS_DEBUG enabled clone takes much more time than copy, while heavily trashing dbgmsg for no good reason, repeatedly dumping all vdevs BRTs again and again, even unmodified ones. I am generally not sure this dumping is not excessive, but decided to keep it for now, just restricting its scope to more reasonable. Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15625 --- module/zfs/brt.c | 80 ++++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 47 deletions(-) diff --git a/module/zfs/brt.c b/module/zfs/brt.c index a701c70fcfb5..225ddaca1e54 100644 --- a/module/zfs/brt.c +++ b/module/zfs/brt.c @@ -342,7 +342,7 @@ brt_vdev_entcount_get(const brt_vdev_t *brtvd, uint64_t idx) ASSERT3U(idx, <, brtvd->bv_size); - if (brtvd->bv_need_byteswap) { + if (unlikely(brtvd->bv_need_byteswap)) { return (BSWAP_16(brtvd->bv_entcount[idx])); } else { return (brtvd->bv_entcount[idx]); @@ -355,7 +355,7 @@ brt_vdev_entcount_set(brt_vdev_t *brtvd, uint64_t idx, uint16_t entcnt) ASSERT3U(idx, <, brtvd->bv_size); - if (brtvd->bv_need_byteswap) { + if (unlikely(brtvd->bv_need_byteswap)) { brtvd->bv_entcount[idx] = BSWAP_16(entcnt); } else { brtvd->bv_entcount[idx] = entcnt; @@ -390,55 +390,39 @@ brt_vdev_entcount_dec(brt_vdev_t *brtvd, uint64_t idx) #ifdef ZFS_DEBUG static void -brt_vdev_dump(brt_t *brt) +brt_vdev_dump(brt_vdev_t *brtvd) { - brt_vdev_t *brtvd; - uint64_t vdevid; + uint64_t idx; - if ((zfs_flags & ZFS_DEBUG_BRT) == 0) { - return; - } - - if (brt->brt_nvdevs == 0) { - zfs_dbgmsg("BRT empty"); - return; - } - - zfs_dbgmsg("BRT vdev dump:"); - for (vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) { - uint64_t idx; - - brtvd = &brt->brt_vdevs[vdevid]; - zfs_dbgmsg(" vdevid=%llu/%llu meta_dirty=%d entcount_dirty=%d " - "size=%llu totalcount=%llu nblocks=%llu bitmapsize=%zu\n", - (u_longlong_t)vdevid, (u_longlong_t)brtvd->bv_vdevid, - brtvd->bv_meta_dirty, brtvd->bv_entcount_dirty, - (u_longlong_t)brtvd->bv_size, - (u_longlong_t)brtvd->bv_totalcount, - (u_longlong_t)brtvd->bv_nblocks, - (size_t)BT_SIZEOFMAP(brtvd->bv_nblocks)); - if (brtvd->bv_totalcount > 0) { - zfs_dbgmsg(" entcounts:"); - for (idx = 0; idx < brtvd->bv_size; idx++) { - if (brt_vdev_entcount_get(brtvd, idx) > 0) { - zfs_dbgmsg(" [%04llu] %hu", - (u_longlong_t)idx, - brt_vdev_entcount_get(brtvd, idx)); - } + zfs_dbgmsg(" BRT vdevid=%llu meta_dirty=%d entcount_dirty=%d " + "size=%llu totalcount=%llu nblocks=%llu bitmapsize=%zu\n", + (u_longlong_t)brtvd->bv_vdevid, + brtvd->bv_meta_dirty, brtvd->bv_entcount_dirty, + (u_longlong_t)brtvd->bv_size, + (u_longlong_t)brtvd->bv_totalcount, + (u_longlong_t)brtvd->bv_nblocks, + (size_t)BT_SIZEOFMAP(brtvd->bv_nblocks)); + if (brtvd->bv_totalcount > 0) { + zfs_dbgmsg(" entcounts:"); + for (idx = 0; idx < brtvd->bv_size; idx++) { + uint16_t entcnt = brt_vdev_entcount_get(brtvd, idx); + if (entcnt > 0) { + zfs_dbgmsg(" [%04llu] %hu", + (u_longlong_t)idx, entcnt); } } - if (brtvd->bv_entcount_dirty) { - char *bitmap; + } + if (brtvd->bv_entcount_dirty) { + char *bitmap; - bitmap = kmem_alloc(brtvd->bv_nblocks + 1, KM_SLEEP); - for (idx = 0; idx < brtvd->bv_nblocks; idx++) { - bitmap[idx] = - BT_TEST(brtvd->bv_bitmap, idx) ? 'x' : '.'; - } - bitmap[idx] = '\0'; - zfs_dbgmsg(" bitmap: %s", bitmap); - kmem_free(bitmap, brtvd->bv_nblocks + 1); + bitmap = kmem_alloc(brtvd->bv_nblocks + 1, KM_SLEEP); + for (idx = 0; idx < brtvd->bv_nblocks; idx++) { + bitmap[idx] = + BT_TEST(brtvd->bv_bitmap, idx) ? 'x' : '.'; } + bitmap[idx] = '\0'; + zfs_dbgmsg(" dirty: %s", bitmap); + kmem_free(bitmap, brtvd->bv_nblocks + 1); } } #endif @@ -767,7 +751,8 @@ brt_vdev_addref(brt_t *brt, brt_vdev_t *brtvd, const brt_entry_t *bre, BT_SET(brtvd->bv_bitmap, idx); #ifdef ZFS_DEBUG - brt_vdev_dump(brt); + if (zfs_flags & ZFS_DEBUG_BRT) + brt_vdev_dump(brtvd); #endif } @@ -803,7 +788,8 @@ brt_vdev_decref(brt_t *brt, brt_vdev_t *brtvd, const brt_entry_t *bre, BT_SET(brtvd->bv_bitmap, idx); #ifdef ZFS_DEBUG - brt_vdev_dump(brt); + if (zfs_flags & ZFS_DEBUG_BRT) + brt_vdev_dump(brtvd); #endif } From 11656234b560c401dec6a16e1bf048b20fd31aac Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Thu, 7 Dec 2023 11:20:11 -0500 Subject: [PATCH 15/18] FreeBSD: Ensure that zfs_getattr() initializes the va_rdev field Otherwise the field is left uninitialized, leading to a possible kernel memory disclosure to userspace or to the network. Use the same initialization value we use in zfsctl_common_getattr(). Reported-by: KMSAN Sponsored-by: The FreeBSD Foundation Reviewed-by: Brian Behlendorf Reviewed-by: Ed Maste Signed-off-by: Mark Johnston Closes #15639 --- module/os/freebsd/zfs/zfs_vnops_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index aa1d4855e663..d9a8c8a0d769 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -2011,6 +2011,8 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) vap->va_size = zp->z_size; if (vp->v_type == VBLK || vp->v_type == VCHR) vap->va_rdev = zfs_cmpldev(rdev); + else + vap->va_rdev = 0; vap->va_gen = zp->z_gen; vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ vap->va_filerev = zp->z_seq; From 4836d293c0508b58d5235cc35bf7211fc1082952 Mon Sep 17 00:00:00 2001 From: Rob N Date: Fri, 8 Dec 2023 03:21:38 +1100 Subject: [PATCH 16/18] zfs_refcount_remove: explictly ignore returns Coverity noticed that sometimes we ignore the return, and sometimes we don't. Its not wrong, and I like consistent style, so here we are. Reported-by: Coverity (CID-1564584) Reported-by: Coverity (CID-1564585) Reported-by: Coverity (CID-1564586) Reported-by: Coverity (CID-1564587) Reported-by: Coverity (CID-1564588) Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Closes #15647 --- module/zfs/arc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index f81beab222ff..3bcffb3c7ede 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -6069,7 +6069,7 @@ arc_prune_task(void *ptr) if (func != NULL) func(ap->p_adjust, ap->p_private); - zfs_refcount_remove(&ap->p_refcnt, func); + (void) zfs_refcount_remove(&ap->p_refcnt, func); } /* @@ -6098,7 +6098,7 @@ arc_prune_async(uint64_t adjust) ap->p_adjust = adjust; if (taskq_dispatch(arc_prune_taskq, arc_prune_task, ap, TQ_SLEEP) == TASKQID_INVALID) { - zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc); + (void) zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc); continue; } ARCSTAT_BUMP(arcstat_prune); @@ -7720,7 +7720,7 @@ arc_fini(void) mutex_enter(&arc_prune_mtx); while ((p = list_remove_head(&arc_prune_list)) != NULL) { - zfs_refcount_remove(&p->p_refcnt, &arc_prune_list); + (void) zfs_refcount_remove(&p->p_refcnt, &arc_prune_list); zfs_refcount_destroy(&p->p_refcnt); kmem_free(p, sizeof (*p)); } @@ -8301,7 +8301,8 @@ l2arc_write_done(zio_t *zio) ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count); zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize, lb_ptr_buf); - zfs_refcount_remove(&dev->l2ad_lb_count, lb_ptr_buf); + (void) zfs_refcount_remove(&dev->l2ad_lb_count, + lb_ptr_buf); kmem_free(lb_ptr_buf->lb_ptr, sizeof (l2arc_log_blkptr_t)); kmem_free(lb_ptr_buf, sizeof (l2arc_lb_ptr_buf_t)); @@ -8772,7 +8773,8 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all) ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count); zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize, lb_ptr_buf); - zfs_refcount_remove(&dev->l2ad_lb_count, lb_ptr_buf); + (void) zfs_refcount_remove(&dev->l2ad_lb_count, + lb_ptr_buf); list_remove(&dev->l2ad_lbptr_list, lb_ptr_buf); kmem_free(lb_ptr_buf->lb_ptr, sizeof (l2arc_log_blkptr_t)); From f0cb6482e1fe239c17f64cdb37f205e0e66ac4d4 Mon Sep 17 00:00:00 2001 From: Rob N Date: Fri, 8 Dec 2023 03:23:16 +1100 Subject: [PATCH 17/18] setproctitle: fix ununitialised variable Reported-by: Coverity (CID-1573333) Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Reviewed-by: Ameer Hamza Signed-off-by: Rob Norris Closes #15649 --- lib/libzutil/os/linux/zutil_setproctitle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libzutil/os/linux/zutil_setproctitle.c b/lib/libzutil/os/linux/zutil_setproctitle.c index 5961527ebc2c..e63acceb4f8e 100644 --- a/lib/libzutil/os/linux/zutil_setproctitle.c +++ b/lib/libzutil/os/linux/zutil_setproctitle.c @@ -88,7 +88,7 @@ spt_copyenv(int envc, char *envp[]) char **envcopy; char *eq; int envsize; - int i, error; + int i, error = 0; if (environ != envp) return (0); From 450f2d0b08e73cfb057d0e301a813418b70d64b9 Mon Sep 17 00:00:00 2001 From: Rob N Date: Fri, 8 Dec 2023 03:41:54 +1100 Subject: [PATCH 18/18] import: ignore return on hostid lookups Just silencing a warning. Its totally fine for a hostid to not be there. Reported-by: Coverity (CID-1573336) Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Reviewed-by: Ameer Hamza Signed-off-by: Rob Norris Closes #15650 --- cmd/zpool/zpool_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index c143d637059d..cf66953a8caa 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -3132,7 +3132,8 @@ zfs_force_import_required(nvlist_t *config) * local hostid. */ if (nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_HOSTID, &hostid) != 0) - nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, + &hostid); if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid()) return (B_TRUE);