From 2e2a46e0a597b3ee606ea7dc5bc527459077322f Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Fri, 22 Sep 2023 16:08:51 -0700 Subject: [PATCH] Invoke zdb by guid to avoid import errors The problem that was occurring is basically that a device was removed by ztest and replaced with another device. It was then reguided. The import then failed because there were two possible imports with the same name; one with the new guid, and one with the old. This can happen because the label writes from the device removal/replacement can be subject to ztest's error injection. The other ways to fix this would be to change the error injection to not trigger on removals (which may not be technically feasible), or to change the import code to not report configurations that are so short on devices (which would potentially have unpleasant end-user effects when trying to recover from data losses/device configuration issues). Reviewed-by: Brian Behlendorf Reviewed-by: Matthew Ahrens Reviewed-by: George Melikov Signed-off-by: Paul Dagnelie Closes #15298 --- cmd/ztest.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/cmd/ztest.c b/cmd/ztest.c index 9c5a6944035c..8cfbdfe1c2e2 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -6378,6 +6378,7 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id) spa_t *spa = ztest_spa; uint64_t orig, load; int error; + ztest_shared_t *zs = ztest_shared; if (ztest_opts.zo_mmp_test) return; @@ -6387,6 +6388,7 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id) (void) pthread_rwlock_wrlock(&ztest_name_lock); error = spa_change_guid(spa); + zs->zs_guid = spa_guid(spa); (void) pthread_rwlock_unlock(&ztest_name_lock); if (error != 0) @@ -6916,7 +6918,7 @@ ztest_trim(ztest_ds_t *zd, uint64_t id) * Verify pool integrity by running zdb. */ static void -ztest_run_zdb(const char *pool) +ztest_run_zdb(uint64_t guid) { int status; char *bin; @@ -6940,13 +6942,13 @@ ztest_run_zdb(const char *pool) free(set_gvars_args); size_t would = snprintf(zdb, len, - "%s -bcc%s%s -G -d -Y -e -y %s -p %s %s", + "%s -bcc%s%s -G -d -Y -e -y %s -p %s %"PRIu64, bin, ztest_opts.zo_verbose >= 3 ? "s" : "", ztest_opts.zo_verbose >= 4 ? "v" : "", set_gvars_args_joined, ztest_opts.zo_dir, - pool); + guid); ASSERT3U(would, <, len); umem_free(set_gvars_args_joined, strlen(set_gvars_args_joined) + 1); @@ -7524,14 +7526,15 @@ ztest_import(ztest_shared_t *zs) VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); zs->zs_metaslab_sz = 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; + zs->zs_guid = spa_guid(spa); spa_close(spa, FTAG); kernel_fini(); if (!ztest_opts.zo_mmp_test) { - ztest_run_zdb(ztest_opts.zo_pool); + ztest_run_zdb(zs->zs_guid); ztest_freeze(); - ztest_run_zdb(ztest_opts.zo_pool); + ztest_run_zdb(zs->zs_guid); } (void) pthread_rwlock_destroy(&ztest_name_lock); @@ -7602,7 +7605,6 @@ ztest_run(ztest_shared_t *zs) dsl_pool_config_enter(dmu_objset_pool(os), FTAG); dmu_objset_fast_stat(os, &dds); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); - zs->zs_guid = dds.dds_guid; dmu_objset_disown(os, B_TRUE, FTAG); /* @@ -7873,14 +7875,15 @@ ztest_init(ztest_shared_t *zs) VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); zs->zs_metaslab_sz = 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; + zs->zs_guid = spa_guid(spa); spa_close(spa, FTAG); kernel_fini(); if (!ztest_opts.zo_mmp_test) { - ztest_run_zdb(ztest_opts.zo_pool); + ztest_run_zdb(zs->zs_guid); ztest_freeze(); - ztest_run_zdb(ztest_opts.zo_pool); + ztest_run_zdb(zs->zs_guid); } (void) pthread_rwlock_destroy(&ztest_name_lock); @@ -8303,7 +8306,7 @@ main(int argc, char **argv) } if (!ztest_opts.zo_mmp_test) - ztest_run_zdb(ztest_opts.zo_pool); + ztest_run_zdb(zs->zs_guid); } if (ztest_opts.zo_verbose >= 1) {