zfs: merge openzfs/zfs@feff9dfed

Notable upstream pull request merges: #14833 Update compatibility.d files #14841 ZIL: Reduce scope of per-dataset zl_issuer_lock #14863 zil: Add some more statistics #14866 btree: Implement faster binary search algorithm #14894 Fix inconsistent definition of zfs_scrub_error_blocks_per_txg #14892 Fix concurrent resilvers initiated at same time #14903 Fix NULL pointer dereference when doing concurrent 'send' operations #14910 ZIL: Allow to replay blocks of any size #14939 Fix the L2ARC write size calculating logic #14934 Introduce zfs_refcount_(add|remove)_few() #14946 Improve l2arc reporting in arc_summary #14953 Finally drop long disabled vdev cache #14954 Fix the L2ARC write size calculating logic (2) #14955 Use list_remove_head() where possible #14959 ZIL: Fix race introduced by f63811f072 Obtained from: OpenZFS OpenZFS commit: feff9dfed3
2024-07-09 04:36:31 +00:00 · 2023-06-10 19:31:17 +02:00 · 2023-06-10 19:31:17 +02:00 · 4e8d558c9d
commit 4e8d558c9d
parent 5ca7f02946 feff9dfed3
97 changed files with 1693 additions and 1220 deletions
--- a/cddl/lib/libzpool/Makefile
+++ b/cddl/lib/libzpool/Makefile
@ -135,7 +135,6 @@ KERNEL_C = \
 	uberblock.c \
 	unique.c \
 	vdev.c \
-	vdev_cache.c \
 	vdev_draid.c \
 	vdev_draid_rand.c \
 	vdev_file.c \
--- a/sys/conf/files
+++ b/sys/conf/files
@ -326,7 +326,6 @@ contrib/openzfs/module/zfs/txg.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/uberblock.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/unique.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev.c		optional zfs compile-with "${ZFS_C}"
-contrib/openzfs/module/zfs/vdev_cache.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_draid.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_draid_rand.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_indirect.c		optional zfs compile-with "${ZFS_C}"
--- a/sys/contrib/openzfs/META
+++ b/sys/contrib/openzfs/META
@ -6,5 +6,5 @@ Release:       1
 Release-Tags:  relext
 License:       CDDL
 Author:        OpenZFS
-Linux-Maximum: 6.2
+Linux-Maximum: 6.3
 Linux-Minimum: 3.10
--- a/sys/contrib/openzfs/cmd/arc_summary
+++ b/sys/contrib/openzfs/cmd/arc_summary
@ -64,7 +64,6 @@ SECTION_HELP = 'print info from one section ('+' '.join(SECTIONS)+')'
 SECTION_PATHS = {'arc': 'arcstats',
                 'dmu': 'dmu_tx',
                 'l2arc': 'arcstats',  # L2ARC stuff lives in arcstats
-                 'vdev': 'vdev_cache_stats',
                 'zfetch': 'zfetchstats',
                 'zil': 'zil'}

@ -90,8 +89,6 @@ if sys.platform.startswith('freebsd'):
    # Requires py36-sysctl on FreeBSD
    import sysctl

-    VDEV_CACHE_SIZE = 'vdev.cache_size'
-
    def is_value(ctl):
        return ctl.type != sysctl.CTLTYPE_NODE

@ -135,8 +132,6 @@ elif sys.platform.startswith('linux'):
    SPL_PATH = '/sys/module/spl/parameters'
    TUNABLES_PATH = '/sys/module/zfs/parameters'

-    VDEV_CACHE_SIZE = 'zfs_vdev_cache_size'
-
    def load_kstats(section):
        path = os.path.join(KSTAT_PATH, section)
        with open(path) as f:
@ -842,7 +837,8 @@ def section_l2arc(kstats_dict):
               ('Free on write:', 'l2_free_on_write'),
               ('R/W clashes:', 'l2_rw_clash'),
               ('Bad checksums:', 'l2_cksum_bad'),
-               ('I/O errors:', 'l2_io_error'))
+               ('Read errors:', 'l2_io_error'),
+               ('Write errors:', 'l2_writes_error'))

    for title, value in l2_todo:
        prt_i1(title, f_hits(arc_stats[value]))
@ -878,28 +874,20 @@ def section_l2arc(kstats_dict):
    prt_i2('Miss ratio:',
           f_perc(arc_stats['l2_misses'], l2_access_total),
           f_hits(arc_stats['l2_misses']))
-    prt_i1('Feeds:', f_hits(arc_stats['l2_feeds']))

    print()
-    print('L2ARC writes:')
-
-    if arc_stats['l2_writes_done'] != arc_stats['l2_writes_sent']:
-        prt_i2('Writes sent:', 'FAULTED', f_hits(arc_stats['l2_writes_sent']))
-        prt_i2('Done ratio:',
-               f_perc(arc_stats['l2_writes_done'],
-                      arc_stats['l2_writes_sent']),
-               f_hits(arc_stats['l2_writes_done']))
-        prt_i2('Error ratio:',
-               f_perc(arc_stats['l2_writes_error'],
-                      arc_stats['l2_writes_sent']),
-               f_hits(arc_stats['l2_writes_error']))
-    else:
-        prt_i2('Writes sent:', '100 %', f_hits(arc_stats['l2_writes_sent']))
+    print('L2ARC I/O:')
+    prt_i2('Reads:',
+           f_bytes(arc_stats['l2_read_bytes']),
+           f_hits(arc_stats['l2_hits']))
+    prt_i2('Writes:',
+           f_bytes(arc_stats['l2_write_bytes']),
+           f_hits(arc_stats['l2_writes_sent']))

    print()
    print('L2ARC evicts:')
-    prt_i1('Lock retries:', f_hits(arc_stats['l2_evict_lock_retry']))
-    prt_i1('Upon reading:', f_hits(arc_stats['l2_evict_reading']))
+    prt_i1('L1 cached:', f_hits(arc_stats['l2_evict_l1cached']))
+    prt_i1('While reading:', f_hits(arc_stats['l2_evict_reading']))
    print()


@ -959,35 +947,6 @@ def section_tunables(*_):
    print()


-def section_vdev(kstats_dict):
-    """Collect information on VDEV caches"""
-
-    # Currently [Nov 2017] the VDEV cache is disabled, because it is actually
-    # harmful. When this is the case, we just skip the whole entry. See
-    # https://github.com/openzfs/zfs/blob/master/module/zfs/vdev_cache.c
-    # for details
-    tunables = get_vdev_params()
-
-    if tunables[VDEV_CACHE_SIZE] == '0':
-        print('VDEV cache disabled, skipping section\n')
-        return
-
-    vdev_stats = isolate_section('vdev_cache_stats', kstats_dict)
-
-    vdev_cache_total = int(vdev_stats['hits']) +\
-        int(vdev_stats['misses']) +\
-        int(vdev_stats['delegations'])
-
-    prt_1('VDEV cache summary:', f_hits(vdev_cache_total))
-    prt_i2('Hit ratio:', f_perc(vdev_stats['hits'], vdev_cache_total),
-           f_hits(vdev_stats['hits']))
-    prt_i2('Miss ratio:', f_perc(vdev_stats['misses'], vdev_cache_total),
-           f_hits(vdev_stats['misses']))
-    prt_i2('Delegations:', f_perc(vdev_stats['delegations'], vdev_cache_total),
-           f_hits(vdev_stats['delegations']))
-    print()
-
-
 def section_zil(kstats_dict):
    """Collect information on the ZFS Intent Log. Some of the information
    taken from https://github.com/openzfs/zfs/blob/master/include/sys/zil.h
@ -1015,7 +974,6 @@ section_calls = {'arc': section_arc,
                 'l2arc': section_l2arc,
                 'spl': section_spl,
                 'tunables': section_tunables,
-                 'vdev': section_vdev,
                 'zil': section_zil}


--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@ -33,6 +33,7 @@
 *     under sponsorship from the FreeBSD Foundation.
 * Copyright (c) 2021 Allan Jude
 * Copyright (c) 2021 Toomas Soome <tsoome@me.com>
+ * Copyright (c) 2023, Klara Inc.
 */

 #include <stdio.h>
@ -326,7 +327,7 @@ sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
 	int err;
 	struct sublivelist_verify *sv = args;

-	zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare,
+	zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare, NULL,
 	    sizeof (sublivelist_verify_block_refcnt_t));

 	err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
@ -390,7 +391,7 @@ sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle)
 {
 	(void) args;
 	sublivelist_verify_t sv;
-	zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
+	zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
 	    sizeof (sublivelist_verify_block_t));
 	int err = sublivelist_verify_func(&sv, dle);
 	zfs_btree_clear(&sv.sv_leftover);
@ -682,7 +683,7 @@ livelist_metaslab_validate(spa_t *spa)
 	(void) printf("Verifying deleted livelist entries\n");

 	sublivelist_verify_t sv;
-	zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
+	zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
 	    sizeof (sublivelist_verify_block_t));
 	iterate_deleted_livelists(spa, livelist_verify, &sv);

@ -716,7 +717,7 @@ livelist_metaslab_validate(spa_t *spa)
 			mv.mv_start = m->ms_start;
 			mv.mv_end = m->ms_start + m->ms_size;
 			zfs_btree_create(&mv.mv_livelist_allocs,
-			    livelist_block_compare,
+			    livelist_block_compare, NULL,
 			    sizeof (sublivelist_verify_block_t));

 			mv_populate_livelist_allocs(&mv, &sv);
@ -789,6 +790,9 @@ usage(void)
 	    "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]]\n"
 	    "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] [-K <key>]\n"
 	    "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]\n"
+	    "\t%s -B [-e [-V] [-p <path> ...]] [-I <inflight I/Os>]\n"
+	    "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
+	    "\t\t[-K <key>] <poolname>/<objset id> [<backupflags>]\n"
 	    "\t%s [-v] <bookmark>\n"
 	    "\t%s -C [-A] [-U <cache>]\n"
 	    "\t%s -l [-Aqu] <device>\n"
@ -802,7 +806,7 @@ usage(void)
 	    "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
 	    "<poolname>\n\n",
 	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
-	    cmdname, cmdname, cmdname, cmdname);
+	    cmdname, cmdname, cmdname, cmdname, cmdname);

 	(void) fprintf(stderr, "    Dataset name must include at least one "
 	    "separator character '/' or '@'\n");
@ -825,6 +829,8 @@ usage(void)
 	(void) fprintf(stderr, "    Options to control amount of output:\n");
 	(void) fprintf(stderr, "        -b --block-stats             "
 	    "block statistics\n");
+	(void) fprintf(stderr, "        -B --backup                  "
+	    "backup stream\n");
 	(void) fprintf(stderr, "        -c --checksum                "
 	    "checksum all metadata (twice for all data) blocks\n");
 	(void) fprintf(stderr, "        -C --config                  "
@ -4875,6 +4881,81 @@ dump_path(char *ds, char *path, uint64_t *retobj)
 	return (err);
 }

+static int
+dump_backup_bytes(objset_t *os, void *buf, int len, void *arg)
+{
+	const char *p = (const char *)buf;
+	ssize_t nwritten;
+
+	(void) os;
+	(void) arg;
+
+	/* Write the data out, handling short writes and signals. */
+	while ((nwritten = write(STDOUT_FILENO, p, len)) < len) {
+		if (nwritten < 0) {
+			if (errno == EINTR)
+				continue;
+			return (errno);
+		}
+		p += nwritten;
+		len -= nwritten;
+	}
+
+	return (0);
+}
+
+static void
+dump_backup(const char *pool, uint64_t objset_id, const char *flagstr)
+{
+	boolean_t embed = B_FALSE;
+	boolean_t large_block = B_FALSE;
+	boolean_t compress = B_FALSE;
+	boolean_t raw = B_FALSE;
+
+	const char *c;
+	for (c = flagstr; c != NULL && *c != '\0'; c++) {
+		switch (*c) {
+			case 'e':
+				embed = B_TRUE;
+				break;
+			case 'L':
+				large_block = B_TRUE;
+				break;
+			case 'c':
+				compress = B_TRUE;
+				break;
+			case 'w':
+				raw = B_TRUE;
+				break;
+			default:
+				fprintf(stderr, "dump_backup: invalid flag "
+				    "'%c'\n", *c);
+				return;
+		}
+	}
+
+	if (isatty(STDOUT_FILENO)) {
+		fprintf(stderr, "dump_backup: stream cannot be written "
+		    "to a terminal\n");
+		return;
+	}
+
+	offset_t off = 0;
+	dmu_send_outparams_t out = {
+	    .dso_outfunc = dump_backup_bytes,
+	    .dso_dryrun  = B_FALSE,
+	};
+
+	int err = dmu_send_obj(pool, objset_id, /* fromsnap */0, embed,
+	    large_block, compress, raw, /* saved */ B_FALSE, STDOUT_FILENO,
+	    &off, &out);
+	if (err != 0) {
+		fprintf(stderr, "dump_backup: dmu_send_obj: %s\n",
+		    strerror(err));
+		return;
+	}
+}
+
 static int
 zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
 {
@ -8465,9 +8546,9 @@ zdb_read_block(char *thing, spa_t *spa)
 		 */
 		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
 		    psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
-		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE |
-		    ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
-		    ZIO_FLAG_OPTIONAL, NULL, NULL));
+		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
+		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL,
+		    NULL, NULL));
 	}

 	error = zio_wait(zio);
@ -8561,7 +8642,6 @@ zdb_read_block(char *thing, spa_t *spa)
 				zio_nowait(zio_vdev_child_io(czio, bp, vd,
 				    offset, pabd, psize, ZIO_TYPE_READ,
 				    ZIO_PRIORITY_SYNC_READ,
-				    ZIO_FLAG_DONT_CACHE |
 				    ZIO_FLAG_DONT_PROPAGATE |
 				    ZIO_FLAG_DONT_RETRY |
 				    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
@ -8695,6 +8775,7 @@ main(int argc, char **argv)
 	struct option long_options[] = {
 		{"ignore-assertions",	no_argument,		NULL, 'A'},
 		{"block-stats",		no_argument,		NULL, 'b'},
+		{"backup",		no_argument,		NULL, 'B'},
 		{"checksum",		no_argument,		NULL, 'c'},
 		{"config",		no_argument,		NULL, 'C'},
 		{"datasets",		no_argument,		NULL, 'd'},
@ -8736,10 +8817,11 @@ main(int argc, char **argv)
 	};

 	while ((c = getopt_long(argc, argv,
-	    "AbcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ",
+	    "AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ",
 	    long_options, NULL)) != -1) {
 		switch (c) {
 		case 'b':
+		case 'B':
 		case 'c':
 		case 'C':
 		case 'd':
@ -8887,7 +8969,7 @@ main(int argc, char **argv)
 		verbose = MAX(verbose, 1);

 	for (c = 0; c < 256; c++) {
-		if (dump_all && strchr("AeEFkKlLNOPrRSXy", c) == NULL)
+		if (dump_all && strchr("ABeEFkKlLNOPrRSXy", c) == NULL)
 			dump_opt[c] = 1;
 		if (dump_opt[c])
 			dump_opt[c] += verbose;
@ -9073,7 +9155,8 @@ main(int argc, char **argv)
 				    checkpoint_pool, error);
 			}

-		} else if (target_is_spa || dump_opt['R'] || objset_id == 0) {
+		} else if (target_is_spa || dump_opt['R'] || dump_opt['B'] ||
+		    objset_id == 0) {
 			zdb_set_skip_mmp(target);
 			error = spa_open_rewind(target, &spa, FTAG, policy,
 			    NULL);
@ -9209,7 +9292,10 @@ main(int argc, char **argv)
 					    strerror(errno));
 			}
 		}
-		if (os != NULL) {
+		if (dump_opt['B']) {
+			dump_backup(target, objset_id,
+			    argc > 0 ? argv[0] : NULL);
+		} else if (os != NULL) {
 			dump_objset(os);
 		} else if (zopt_object_args > 0 && !dump_opt['m']) {
 			dump_objset(spa->spa_meta_objset);
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
@ -369,9 +369,7 @@ zfs_agent_consumer_thread(void *arg)
 			return (NULL);
 		}

-		if ((event = (list_head(&agent_events))) != NULL) {
-			list_remove(&agent_events, event);
-
+		if ((event = list_remove_head(&agent_events)) != NULL) {
 			(void) pthread_mutex_unlock(&agent_lock);

 			/* dispatch to all event subscribers */
@ -434,8 +432,7 @@ zfs_agent_fini(void)
 	(void) pthread_join(g_agents_tid, NULL);

 	/* drain any pending events */
-	while ((event = (list_head(&agent_events))) != NULL) {
-		list_remove(&agent_events, event);
+	while ((event = list_remove_head(&agent_events)) != NULL) {
 		nvlist_free(event->ae_nvl);
 		free(event);
 	}
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
@ -1288,17 +1288,14 @@ zfs_slm_fini(void)
 		tpool_destroy(g_tpool);
 	}

-	while ((pool = (list_head(&g_pool_list))) != NULL) {
-		list_remove(&g_pool_list, pool);
+	while ((pool = list_remove_head(&g_pool_list)) != NULL) {
 		zpool_close(pool->uap_zhp);
 		free(pool);
 	}
 	list_destroy(&g_pool_list);

-	while ((device = (list_head(&g_device_list))) != NULL) {
-		list_remove(&g_device_list, device);
+	while ((device = list_remove_head(&g_device_list)) != NULL)
 		free(device);
-	}
 	list_destroy(&g_device_list);

 	libzfs_fini(g_zfshdl);
--- a/sys/contrib/openzfs/cmd/zilstat.in
+++ b/sys/contrib/openzfs/cmd/zilstat.in
@ -36,31 +36,49 @@ import argparse
 from argparse import RawTextHelpFormatter

 cols = {
-	# hdr:       [size,      scale, 	 kstat name]
+	# hdr:       [size,      scale,      kstat name]
 	"time":      [8,         -1,         "time"],
 	"pool":      [12,        -1,         "pool"],
 	"ds":        [12,        -1,         "dataset_name"],
 	"obj":       [12,        -1,         "objset"],
-	"zcc":       [10,        1000,       "zil_commit_count"],
-	"zcwc":      [10,        1000,       "zil_commit_writer_count"],
-	"ziic":      [10,        1000,       "zil_itx_indirect_count"],
-	"zic":       [10,        1000,       "zil_itx_count"],
-	"ziib":      [10,        1024,       "zil_itx_indirect_bytes"],
-	"zicc":      [10,        1000,       "zil_itx_copied_count"],
-	"zicb":      [10,        1024,       "zil_itx_copied_bytes"],
-	"zinc":      [10,        1000,       "zil_itx_needcopy_count"],
-	"zinb":      [10,        1024,       "zil_itx_needcopy_bytes"],
-	"zimnc":     [10,        1000,       "zil_itx_metaslab_normal_count"],
-	"zimnb":     [10,        1024,       "zil_itx_metaslab_normal_bytes"],
-	"zimsc":     [10,        1000,       "zil_itx_metaslab_slog_count"],
-	"zimsb":     [10,        1024,       "zil_itx_metaslab_slog_bytes"],
+	"cc":        [5,         1000,       "zil_commit_count"],
+	"cwc":       [5,         1000,       "zil_commit_writer_count"],
+	"ic":        [5,         1000,       "zil_itx_count"],
+	"iic":       [5,         1000,       "zil_itx_indirect_count"],
+	"iib":       [5,         1024,       "zil_itx_indirect_bytes"],
+	"icc":       [5,         1000,       "zil_itx_copied_count"],
+	"icb":       [5,         1024,       "zil_itx_copied_bytes"],
+	"inc":       [5,         1000,       "zil_itx_needcopy_count"],
+	"inb":       [5,         1024,       "zil_itx_needcopy_bytes"],
+	"idc":       [5,         1000,       "icc+inc"],
+	"idb":       [5,         1024,       "icb+inb"],
+	"iwc":       [5,         1000,       "iic+idc"],
+	"iwb":       [5,         1024,       "iib+idb"],
+	"imnc":      [6,         1000,       "zil_itx_metaslab_normal_count"],
+	"imnb":      [6,         1024,       "zil_itx_metaslab_normal_bytes"],
+	"imnw":      [6,         1024,       "zil_itx_metaslab_normal_write"],
+	"imna":      [6,         1024,       "zil_itx_metaslab_normal_alloc"],
+	"imsc":      [6,         1000,       "zil_itx_metaslab_slog_count"],
+	"imsb":      [6,         1024,       "zil_itx_metaslab_slog_bytes"],
+	"imsw":      [6,         1024,       "zil_itx_metaslab_slog_write"],
+	"imsa":      [6,         1024,       "zil_itx_metaslab_slog_alloc"],
+	"imc":       [5,         1000,       "imnc+imsc"],
+	"imb":       [5,         1024,       "imnb+imsb"],
+	"imw":       [5,         1024,       "imnw+imsw"],
+	"ima":       [5,         1024,       "imna+imsa"],
+	"se%":       [3,         100,        "imb/ima"],
+	"sen%":      [4,         100,        "imnb/imna"],
+	"ses%":      [4,         100,        "imsb/imsa"],
+	"te%":       [3,         100,        "imb/imw"],
+	"ten%":      [4,         100,        "imnb/imnw"],
+	"tes%":      [4,         100,        "imsb/imsw"],
 }

-hdr = ["time", "pool", "ds", "obj", "zcc", "zcwc", "ziic", "zic", "ziib", \
-	"zicc", "zicb", "zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]
+hdr = ["time", "ds", "cc", "ic", "idc", "idb", "iic", "iib",
+	"imnc", "imnw", "imsc", "imsw"]

-ghdr = ["time", "zcc", "zcwc", "ziic", "zic", "ziib", "zicc", "zicb",
-	"zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]
+ghdr = ["time", "cc", "ic", "idc", "idb", "iic", "iib",
+	"imnc", "imnw", "imsc", "imsw"]

 cmd = ("Usage: zilstat [-hgdv] [-i interval] [-p pool_name]")

@ -105,7 +123,7 @@ def print_header():
 	global sep
 	for col in hdr:
 		new_col = col
-		if interval > 0 and col not in ['time', 'pool', 'ds', 'obj']:
+		if interval > 0 and cols[col][1] > 100:
 			new_col += "/s"
 		sys.stdout.write("%*s%s" % (cols[col][0], new_col, sep))
 	sys.stdout.write("\n")
@ -115,7 +133,7 @@ def print_values(v):
 	global sep
 	for col in hdr:
 		val = v[cols[col][2]]
-		if col not in ['time', 'pool', 'ds', 'obj'] and interval > 0:
+		if interval > 0 and cols[col][1] > 100:
 			val = v[cols[col][2]] // interval
 		sys.stdout.write("%s%s" % (
 			prettynum(cols[col][0], cols[col][1], val), sep))
@ -237,9 +255,7 @@ def init():

 		invalid = []
 		for ele in hdr:
-			if gFlag and ele not in ghdr:
-				invalid.append(ele)
-			elif ele not in cols:
+			if ele not in cols:
 				invalid.append(ele)

 		if len(invalid) > 0:
@ -403,17 +419,17 @@ def calculate_diff():
 	diff = copy.deepcopy(curr)
 	for pool in curr:
 		for objset in curr[pool]:
-			for col in hdr:
-				if col not in ['time', 'pool', 'ds', 'obj']:
-					key = cols[col][2]
-					# If prev is NULL, this is the
-					# first time we are here
-					if not prev:
-						diff[pool][objset][key] = 0
-					else:
-						diff[pool][objset][key] \
-							= curr[pool][objset][key] \
-							- prev[pool][objset][key]
+			for key in curr[pool][objset]:
+				if not isinstance(diff[pool][objset][key], int):
+					continue
+				# If prev is NULL, this is the
+				# first time we are here
+				if not prev:
+					diff[pool][objset][key] = 0
+				else:
+					diff[pool][objset][key] \
+						= curr[pool][objset][key] \
+						- prev[pool][objset][key]

 def zil_build_dict(pool = "GLOBAL"):
 	global kstat
@ -425,10 +441,77 @@ def zil_build_dict(pool = "GLOBAL"):
 			if objset not in curr[pool]:
 				curr[pool][objset] = dict()
 			curr[pool][objset][key] = val
-		curr[pool][objset]["pool"] = pool
-		curr[pool][objset]["objset"] = objset
-		curr[pool][objset]["time"] = time.strftime("%H:%M:%S", \
-			time.localtime())
+
+def zil_extend_dict():
+	global diff
+	for pool in diff:
+		for objset in diff[pool]:
+			diff[pool][objset]["pool"] = pool
+			diff[pool][objset]["objset"] = objset
+			diff[pool][objset]["time"] = time.strftime("%H:%M:%S", \
+				time.localtime())
+			diff[pool][objset]["icc+inc"] = \
+				diff[pool][objset]["zil_itx_copied_count"] + \
+				diff[pool][objset]["zil_itx_needcopy_count"]
+			diff[pool][objset]["icb+inb"] = \
+				diff[pool][objset]["zil_itx_copied_bytes"] + \
+				diff[pool][objset]["zil_itx_needcopy_bytes"]
+			diff[pool][objset]["iic+idc"] = \
+				diff[pool][objset]["zil_itx_indirect_count"] + \
+				diff[pool][objset]["zil_itx_copied_count"] + \
+				diff[pool][objset]["zil_itx_needcopy_count"]
+			diff[pool][objset]["iib+idb"] = \
+				diff[pool][objset]["zil_itx_indirect_bytes"] + \
+				diff[pool][objset]["zil_itx_copied_bytes"] + \
+				diff[pool][objset]["zil_itx_needcopy_bytes"]
+			diff[pool][objset]["imnc+imsc"] = \
+				diff[pool][objset]["zil_itx_metaslab_normal_count"] + \
+				diff[pool][objset]["zil_itx_metaslab_slog_count"]
+			diff[pool][objset]["imnb+imsb"] = \
+				diff[pool][objset]["zil_itx_metaslab_normal_bytes"] + \
+				diff[pool][objset]["zil_itx_metaslab_slog_bytes"]
+			diff[pool][objset]["imnw+imsw"] = \
+				diff[pool][objset]["zil_itx_metaslab_normal_write"] + \
+				diff[pool][objset]["zil_itx_metaslab_slog_write"]
+			diff[pool][objset]["imna+imsa"] = \
+				diff[pool][objset]["zil_itx_metaslab_normal_alloc"] + \
+				diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
+			if diff[pool][objset]["imna+imsa"] > 0:
+				diff[pool][objset]["imb/ima"] = 100 * \
+					diff[pool][objset]["imnb+imsb"] // \
+					diff[pool][objset]["imna+imsa"]
+			else:
+				diff[pool][objset]["imb/ima"] = 100
+			if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
+				diff[pool][objset]["imnb/imna"] = 100 * \
+					diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
+					diff[pool][objset]["zil_itx_metaslab_normal_alloc"]
+			else:
+				diff[pool][objset]["imnb/imna"] = 100
+			if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
+				diff[pool][objset]["imsb/imsa"] = 100 * \
+					diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
+					diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
+			else:
+				diff[pool][objset]["imsb/imsa"] = 100
+			if diff[pool][objset]["imnw+imsw"] > 0:
+				diff[pool][objset]["imb/imw"] = 100 * \
+					diff[pool][objset]["imnb+imsb"] // \
+					diff[pool][objset]["imnw+imsw"]
+			else:
+				diff[pool][objset]["imb/imw"] = 100
+			if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
+				diff[pool][objset]["imnb/imnw"] = 100 * \
+					diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
+					diff[pool][objset]["zil_itx_metaslab_normal_write"]
+			else:
+				diff[pool][objset]["imnb/imnw"] = 100
+			if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
+				diff[pool][objset]["imsb/imsw"] = 100 * \
+					diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
+					diff[pool][objset]["zil_itx_metaslab_slog_write"]
+			else:
+				diff[pool][objset]["imsb/imsw"] = 100

 def sign_handler_epipe(sig, frame):
 	print("Caught EPIPE signal: " + str(frame))
@ -437,30 +520,31 @@ def sign_handler_epipe(sig, frame):

 def main():
 	global interval
-	global curr
+	global curr, diff
 	hprint = False
 	init()
 	signal.signal(signal.SIGINT, signal.SIG_DFL)
 	signal.signal(signal.SIGPIPE, sign_handler_epipe)

+	zil_process_kstat()
+	if not curr:
+		print ("Error: No stats to show")
+		sys.exit(0)
+	print_header()
 	if interval > 0:
+		time.sleep(interval)
 		while True:
 			calculate_diff()
 			if not diff:
 				print ("Error: No stats to show")
 				sys.exit(0)
-			if hprint == False:
-				print_header()
-				hprint = True
+			zil_extend_dict()
 			print_dict(diff)
 			time.sleep(interval)
 	else:
-		zil_process_kstat()
-		if not curr:
-			print ("Error: No stats to show")
-			sys.exit(0)
-		print_header()
-		print_dict(curr)
+		diff = curr
+		zil_extend_dict()
+		print_dict(diff)

 if __name__ == '__main__':
 	main()
--- a/sys/contrib/openzfs/cmd/zpool/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zpool/Makefile.am
@ -145,6 +145,7 @@ dist_zpoolcompat_DATA = \
 	%D%/compatibility.d/openzfs-2.0-linux \
 	%D%/compatibility.d/openzfs-2.1-freebsd \
 	%D%/compatibility.d/openzfs-2.1-linux \
+	%D%/compatibility.d/openzfs-2.2 \
 	%D%/compatibility.d/openzfsonosx-1.7.0 \
 	%D%/compatibility.d/openzfsonosx-1.8.1 \
 	%D%/compatibility.d/openzfsonosx-1.9.3 \
@ -168,12 +169,20 @@ zpoolcompatlinks = \
 	"freebsd-11.3		freebsd-12.0" \
 	"freebsd-11.3		freebsd-12.1" \
 	"freebsd-11.3		freebsd-12.2" \
+	"freebsd-11.3		freebsd-12.3" \
+	"freebsd-11.3		freebsd-12.4" \
+	"openzfs-2.1-freebsd	freebsd-13.0" \
+	"openzfs-2.1-freebsd	freebsd-13.1" \
+	"openzfs-2.1-freebsd	freebsd-13.2" \
 	"freebsd-11.3		freenas-11.3" \
 	"freenas-11.0		freenas-11.1" \
 	"openzfsonosx-1.9.3	openzfsonosx-1.9.4" \
 	"openzfs-2.0-freebsd	truenas-12.0" \
 	"zol-0.7		ubuntu-18.04" \
-	"zol-0.8		ubuntu-20.04"
+	"zol-0.8		ubuntu-20.04" \
+	"openzfs-2.1-linux	ubuntu-22.04" \
+	"openzfs-2.2		openzfs-2.2-linux" \
+	"openzfs-2.2		openzfs-2.2-freebsd"

 zpoolconfdir = $(sysconfdir)/zfs/zpool.d
 INSTALL_DATA_HOOKS += zpool-install-data-hook
--- a/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
+++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
@ -8,5 +8,7 @@ extensible_dataset
 filesystem_limits
 hole_birth
 large_blocks
+livelist
 lz4_compress
 spacemap_histogram
+zpool_checkpoint
--- a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.2
+++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.2
@ -0,0 +1,40 @@
+# Features supported by OpenZFS 2.2 on Linux and FreeBSD
+allocation_classes
+async_destroy
+blake3
+block_cloning
+bookmark_v2
+bookmark_written
+bookmarks
+device_rebuild
+device_removal
+draid
+edonr
+embedded_data
+empty_bpobj
+enabled_txg
+encryption
+extensible_dataset
+filesystem_limits
+head_errlog
+hole_birth
+large_blocks
+large_dnode
+livelist
+log_spacemap
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+project_quota
+redacted_datasets
+redaction_bookmarks
+resilver_defer
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+userobj_accounting
+vdev_zaps_v2
+zilsaxattr
+zpool_checkpoint
+zstd_compress
--- a/sys/contrib/openzfs/config/kernel-reclaim_state.m4
+++ b/sys/contrib/openzfs/config/kernel-reclaim_state.m4
@ -0,0 +1,26 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_RECLAIMED], [
+	dnl #
+	dnl # 6.4 API change
+	dnl # The reclaimed_slab of struct reclaim_state
+	dnl # is renamed to reclaimed
+	dnl #
+	ZFS_LINUX_TEST_SRC([reclaim_state_reclaimed], [
+		#include <linux/swap.h>
+		static const struct reclaim_state
+		    rs  __attribute__ ((unused)) = {
+		    .reclaimed = 100,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_RECLAIMED], [
+	AC_MSG_CHECKING([whether struct reclaim_state has reclaimed field])
+	ZFS_LINUX_TEST_RESULT([reclaim_state_reclaimed], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_RECLAIM_STATE_RECLAIMED, 1,
+		   [struct reclaim_state has reclaimed])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@ -153,6 +153,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_IATTR_VFSID
 	ZFS_AC_KERNEL_SRC_FILEMAP
 	ZFS_AC_KERNEL_SRC_WRITEPAGE_T
+	ZFS_AC_KERNEL_SRC_RECLAIMED
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@ -285,6 +286,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_IATTR_VFSID
 	ZFS_AC_KERNEL_FILEMAP
 	ZFS_AC_KERNEL_WRITEPAGE_T
+	ZFS_AC_KERNEL_RECLAIMED
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_CPU_HAS_FEATURE
--- a/sys/contrib/openzfs/contrib/debian/rules.in
+++ b/sys/contrib/openzfs/contrib/debian/rules.in
@ -7,8 +7,8 @@ NAME := $(shell awk '$$1 == "Name:" { print $$2; }' META)
 LINUX_MIN  := $(shell awk '/Linux-Minimum:/{print $$2}' META)
 LINUX_NEXT := $(shell awk -F'[ .]' '/Linux-Maximum:/{print $$2 "." $$3+1}' META)

-DKMSFILES := module include config zfs.release.in autogen.sh META AUTHORS \
-		COPYRIGHT LICENSE README.md
+DKMSFILES := module include config zfs.release.in autogen.sh copy-builtin META AUTHORS \
+		COPYRIGHT LICENSE README.md CODE_OF_CONDUCT.md NEWS NOTICE RELEASES.md

 ifndef KVERS
 KVERS=$(shell uname -r)
--- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
+++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
@ -344,7 +344,7 @@ mount_fs()

 	# Need the _original_ datasets mountpoint!
 	mountpoint=$(get_fs_value "$fs" mountpoint)
-	ZFS_CMD="mount.zfs -o zfsutil"
+	ZFS_CMD="mount -o zfsutil -t zfs"
 	if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then
 		# Can't use the mountpoint property. Might be one of our
 		# clones. Check the 'org.zol:mountpoint' property set in
@ -361,7 +361,7 @@ mount_fs()
 			fi
 			# Don't use mount.zfs -o zfsutils for legacy mountpoint
 			if [ "$mountpoint" = "legacy" ]; then
-				ZFS_CMD="mount.zfs"
+				ZFS_CMD="mount -t zfs"
 			fi
 			# Last hail-mary: Hope 'rootmnt' is set!
 			mountpoint=""
@ -944,7 +944,7 @@ mountroot()
 		echo "       not specified on the kernel command line."
 		echo ""
 		echo "Manually mount the root filesystem on $rootmnt and then exit."
-		echo "Hint: Try:  mount.zfs -o zfsutil ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
+		echo "Hint: Try:  mount -o zfsutil -t zfs ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
 		shell
 	fi

--- a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
+++ b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
@ -67,6 +67,7 @@ pam_syslog(pam_handle_t *pamh, int loglevel, const char *fmt, ...)
 #include <sys/mman.h>

 static const char PASSWORD_VAR_NAME[] = "pam_zfs_key_authtok";
+static const char OLD_PASSWORD_VAR_NAME[] = "pam_zfs_key_oldauthtok";

 static libzfs_handle_t *g_zfs;

@ -160,10 +161,10 @@ pw_free(pw_password_t *pw)
 }

 static pw_password_t *
-pw_fetch(pam_handle_t *pamh)
+pw_fetch(pam_handle_t *pamh, int tok)
 {
 	const char *token;
-	if (pam_get_authtok(pamh, PAM_AUTHTOK, &token, NULL) != PAM_SUCCESS) {
+	if (pam_get_authtok(pamh, tok, &token, NULL) != PAM_SUCCESS) {
 		pam_syslog(pamh, LOG_ERR,
 		    "couldn't get password from PAM stack");
 		return (NULL);
@ -177,13 +178,13 @@ pw_fetch(pam_handle_t *pamh)
 }

 static const pw_password_t *
-pw_fetch_lazy(pam_handle_t *pamh)
+pw_fetch_lazy(pam_handle_t *pamh, int tok, const char *var_name)
 {
-	pw_password_t *pw = pw_fetch(pamh);
+	pw_password_t *pw = pw_fetch(pamh, tok);
 	if (pw == NULL) {
 		return (NULL);
 	}
-	int ret = pam_set_data(pamh, PASSWORD_VAR_NAME, pw, destroy_pw);
+	int ret = pam_set_data(pamh, var_name, pw, destroy_pw);
 	if (ret != PAM_SUCCESS) {
 		pw_free(pw);
 		pam_syslog(pamh, LOG_ERR, "pam_set_data failed");
@ -193,23 +194,23 @@ pw_fetch_lazy(pam_handle_t *pamh)
 }

 static const pw_password_t *
-pw_get(pam_handle_t *pamh)
+pw_get(pam_handle_t *pamh, int tok, const char *var_name)
 {
 	const pw_password_t *authtok = NULL;
-	int ret = pam_get_data(pamh, PASSWORD_VAR_NAME,
+	int ret = pam_get_data(pamh, var_name,
 	    (const void**)(&authtok));
 	if (ret == PAM_SUCCESS)
 		return (authtok);
 	if (ret == PAM_NO_MODULE_DATA)
-		return (pw_fetch_lazy(pamh));
+		return (pw_fetch_lazy(pamh, tok, var_name));
 	pam_syslog(pamh, LOG_ERR, "password not available");
 	return (NULL);
 }

 static int
-pw_clear(pam_handle_t *pamh)
+pw_clear(pam_handle_t *pamh, const char *var_name)
 {
-	int ret = pam_set_data(pamh, PASSWORD_VAR_NAME, NULL, NULL);
+	int ret = pam_set_data(pamh, var_name, NULL, NULL);
 	if (ret != PAM_SUCCESS) {
 		pam_syslog(pamh, LOG_ERR, "clearing password failed");
 		return (-1);
@ -386,7 +387,7 @@ decrypt_mount(pam_handle_t *pamh, const char *ds_name,
 	int ret = lzc_load_key(ds_name, noop, (uint8_t *)key->value,
 	    WRAPPING_KEY_LEN);
 	pw_free(key);
-	if (ret) {
+	if (ret && ret != EEXIST) {
 		pam_syslog(pamh, LOG_ERR, "load_key failed: %d", ret);
 		zfs_close(ds);
 		return (-1);
@ -406,14 +407,14 @@ decrypt_mount(pam_handle_t *pamh, const char *ds_name,
 }

 static int
-unmount_unload(pam_handle_t *pamh, const char *ds_name)
+unmount_unload(pam_handle_t *pamh, const char *ds_name, boolean_t force)
 {
 	zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM);
 	if (ds == NULL) {
 		pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name);
 		return (-1);
 	}
-	int ret = zfs_unmount(ds, NULL, 0);
+	int ret = zfs_unmount(ds, NULL, force ? MS_FORCE : 0);
 	if (ret) {
 		pam_syslog(pamh, LOG_ERR, "zfs_unmount failed with: %d", ret);
 		zfs_close(ds);
@ -435,9 +436,13 @@ typedef struct {
 	char *runstatedir;
 	char *homedir;
 	char *dsname;
+	uid_t uid_min;
+	uid_t uid_max;
 	uid_t uid;
 	const char *username;
-	int unmount_and_unload;
+	boolean_t unmount_and_unload;
+	boolean_t force_unmount;
+	boolean_t recursive_homes;
 } zfs_key_config_t;

 static int
@ -469,9 +474,13 @@ zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config,
 		free(config->homes_prefix);
 		return (PAM_USER_UNKNOWN);
 	}
+	config->uid_min = 1000;
+	config->uid_max = MAXUID;
 	config->uid = entry->pw_uid;
 	config->username = name;
-	config->unmount_and_unload = 1;
+	config->unmount_and_unload = B_TRUE;
+	config->force_unmount = B_FALSE;
+	config->recursive_homes = B_FALSE;
 	config->dsname = NULL;
 	config->homedir = NULL;
 	for (int c = 0; c < argc; c++) {
@ -481,8 +490,16 @@ zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config,
 		} else if (strncmp(argv[c], "runstatedir=", 12) == 0) {
 			free(config->runstatedir);
 			config->runstatedir = strdup(argv[c] + 12);
+		} else if (strncmp(argv[c], "uid_min=", 8) == 0) {
+			sscanf(argv[c] + 8, "%u", &config->uid_min);
+		} else if (strncmp(argv[c], "uid_max=", 8) == 0) {
+			sscanf(argv[c] + 8, "%u", &config->uid_max);
 		} else if (strcmp(argv[c], "nounmount") == 0) {
-			config->unmount_and_unload = 0;
+			config->unmount_and_unload = B_FALSE;
+		} else if (strcmp(argv[c], "forceunmount") == 0) {
+			config->force_unmount = B_TRUE;
+		} else if (strcmp(argv[c], "recursive_homes") == 0) {
+			config->recursive_homes = B_TRUE;
 		} else if (strcmp(argv[c], "prop_mountpoint") == 0) {
 			if (config->homedir == NULL)
 				config->homedir = strdup(entry->pw_dir);
@ -517,8 +534,12 @@ find_dsname_by_prop_value(zfs_handle_t *zhp, void *data)
 	(void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE);
 	if (strcmp(target->homedir, mountpoint) != 0) {
+		if (target->recursive_homes) {
+			(void) zfs_iter_filesystems_v2(zhp, 0,
+			    find_dsname_by_prop_value, target);
+		}
 		zfs_close(zhp);
-		return (0);
+		return (target->dsname != NULL);
 	}

 	target->dsname = strdup(zfs_get_name(zhp));
@ -531,17 +552,23 @@ zfs_key_config_get_dataset(zfs_key_config_t *config)
 {
 	if (config->homedir != NULL &&
 	    config->homes_prefix != NULL) {
-		zfs_handle_t *zhp = zfs_open(g_zfs, config->homes_prefix,
-		    ZFS_TYPE_FILESYSTEM);
-		if (zhp == NULL) {
-			pam_syslog(NULL, LOG_ERR, "dataset %s not found",
-			    config->homes_prefix);
-			return (NULL);
-		}
+		if (strcmp(config->homes_prefix, "*") == 0) {
+			(void) zfs_iter_root(g_zfs,
+			    find_dsname_by_prop_value, config);
+		} else {
+			zfs_handle_t *zhp = zfs_open(g_zfs,
+			    config->homes_prefix, ZFS_TYPE_FILESYSTEM);
+			if (zhp == NULL) {
+				pam_syslog(NULL, LOG_ERR,
+				    "dataset %s not found",
+				    config->homes_prefix);
+				return (NULL);
+			}

-		(void) zfs_iter_filesystems_v2(zhp, 0,
-		    find_dsname_by_prop_value, config);
-		zfs_close(zhp);
+			(void) zfs_iter_filesystems_v2(zhp, 0,
+			    find_dsname_by_prop_value, config);
+			zfs_close(zhp);
+		}
 		char *dsname = config->dsname;
 		config->dsname = NULL;
 		return (dsname);
@ -655,8 +682,13 @@ pam_sm_authenticate(pam_handle_t *pamh, int flags,
 	if (config_err != PAM_SUCCESS) {
 		return (config_err);
 	}
+	if (config.uid < config.uid_min || config.uid > config.uid_max) {
+		zfs_key_config_free(&config);
+		return (PAM_SERVICE_ERR);
+	}

-	const pw_password_t *token = pw_fetch_lazy(pamh);
+	const pw_password_t *token = pw_fetch_lazy(pamh,
+	    PAM_AUTHTOK, PASSWORD_VAR_NAME);
 	if (token == NULL) {
 		zfs_key_config_free(&config);
 		return (PAM_AUTH_ERR);
@ -706,10 +738,12 @@ pam_sm_chauthtok(pam_handle_t *pamh, int flags,
 	if (zfs_key_config_load(pamh, &config, argc, argv) != PAM_SUCCESS) {
 		return (PAM_SERVICE_ERR);
 	}
-	if (config.uid < 1000) {
+	if (config.uid < config.uid_min || config.uid > config.uid_max) {
 		zfs_key_config_free(&config);
-		return (PAM_SUCCESS);
+		return (PAM_SERVICE_ERR);
 	}
+	const pw_password_t *old_token = pw_get(pamh,
+	    PAM_OLDAUTHTOK, OLD_PASSWORD_VAR_NAME);
 	{
 		if (pam_zfs_init(pamh) != 0) {
 			zfs_key_config_free(&config);
@ -721,49 +755,62 @@ pam_sm_chauthtok(pam_handle_t *pamh, int flags,
 			zfs_key_config_free(&config);
 			return (PAM_SERVICE_ERR);
 		}
-		int key_loaded = is_key_loaded(pamh, dataset);
-		if (key_loaded == -1) {
+		if (!old_token) {
+			pam_syslog(pamh, LOG_ERR,
+			    "old password from PAM stack is null");
 			free(dataset);
 			pam_zfs_free();
 			zfs_key_config_free(&config);
 			return (PAM_SERVICE_ERR);
 		}
-		free(dataset);
-		pam_zfs_free();
-		if (! key_loaded) {
+		if (decrypt_mount(pamh, dataset,
+		    old_token->value, B_TRUE) == -1) {
 			pam_syslog(pamh, LOG_ERR,
-			    "key not loaded, returning try_again");
+			    "old token mismatch");
+			free(dataset);
+			pam_zfs_free();
 			zfs_key_config_free(&config);
 			return (PAM_PERM_DENIED);
 		}
 	}

 	if ((flags & PAM_UPDATE_AUTHTOK) != 0) {
-		const pw_password_t *token = pw_get(pamh);
+		const pw_password_t *token = pw_get(pamh, PAM_AUTHTOK,
+		    PASSWORD_VAR_NAME);
 		if (token == NULL) {
+			pam_syslog(pamh, LOG_ERR, "new password unavailable");
+			pam_zfs_free();
 			zfs_key_config_free(&config);
-			return (PAM_SERVICE_ERR);
-		}
-		if (pam_zfs_init(pamh) != 0) {
-			zfs_key_config_free(&config);
+			pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
 			return (PAM_SERVICE_ERR);
 		}
 		char *dataset = zfs_key_config_get_dataset(&config);
 		if (!dataset) {
 			pam_zfs_free();
 			zfs_key_config_free(&config);
+			pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
+			pw_clear(pamh, PASSWORD_VAR_NAME);
 			return (PAM_SERVICE_ERR);
 		}
-		if (change_key(pamh, dataset, token->value) == -1) {
+		int was_loaded = is_key_loaded(pamh, dataset);
+		if (!was_loaded && decrypt_mount(pamh, dataset,
+		    old_token->value, B_FALSE) == -1) {
 			free(dataset);
 			pam_zfs_free();
 			zfs_key_config_free(&config);
+			pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
+			pw_clear(pamh, PASSWORD_VAR_NAME);
 			return (PAM_SERVICE_ERR);
 		}
+		int changed = change_key(pamh, dataset, token->value);
+		if (!was_loaded) {
+			unmount_unload(pamh, dataset, config.force_unmount);
+		}
 		free(dataset);
 		pam_zfs_free();
 		zfs_key_config_free(&config);
-		if (pw_clear(pamh) == -1) {
+		if (pw_clear(pamh, OLD_PASSWORD_VAR_NAME) == -1 ||
+		    pw_clear(pamh, PASSWORD_VAR_NAME) == -1 || changed == -1) {
 			return (PAM_SERVICE_ERR);
 		}
 	} else {
@ -788,7 +835,7 @@ pam_sm_open_session(pam_handle_t *pamh, int flags,
 		return (PAM_SESSION_ERR);
 	}

-	if (config.uid < 1000) {
+	if (config.uid < config.uid_min || config.uid > config.uid_max) {
 		zfs_key_config_free(&config);
 		return (PAM_SUCCESS);
 	}
@ -799,7 +846,8 @@ pam_sm_open_session(pam_handle_t *pamh, int flags,
 		return (PAM_SUCCESS);
 	}

-	const pw_password_t *token = pw_get(pamh);
+	const pw_password_t *token = pw_get(pamh,
+	    PAM_AUTHTOK, PASSWORD_VAR_NAME);
 	if (token == NULL) {
 		zfs_key_config_free(&config);
 		return (PAM_SESSION_ERR);
@ -823,7 +871,7 @@ pam_sm_open_session(pam_handle_t *pamh, int flags,
 	free(dataset);
 	pam_zfs_free();
 	zfs_key_config_free(&config);
-	if (pw_clear(pamh) == -1) {
+	if (pw_clear(pamh, PASSWORD_VAR_NAME) == -1) {
 		return (PAM_SERVICE_ERR);
 	}
 	return (PAM_SUCCESS);
@ -846,7 +894,7 @@ pam_sm_close_session(pam_handle_t *pamh, int flags,
 	if (zfs_key_config_load(pamh, &config, argc, argv) != PAM_SUCCESS) {
 		return (PAM_SESSION_ERR);
 	}
-	if (config.uid < 1000) {
+	if (config.uid < config.uid_min || config.uid > config.uid_max) {
 		zfs_key_config_free(&config);
 		return (PAM_SUCCESS);
 	}
@ -868,7 +916,7 @@ pam_sm_close_session(pam_handle_t *pamh, int flags,
 			zfs_key_config_free(&config);
 			return (PAM_SESSION_ERR);
 		}
-		if (unmount_unload(pamh, dataset) == -1) {
+		if (unmount_unload(pamh, dataset, config.force_unmount) == -1) {
 			free(dataset);
 			pam_zfs_free();
 			zfs_key_config_free(&config);
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/kmem.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/kmem.h
@ -75,7 +75,7 @@ typedef struct kmem_cache {
 extern uint64_t spl_kmem_cache_inuse(kmem_cache_t *cache);
 extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache);

-__attribute__((alloc_size(1)))
+__attribute__((malloc, alloc_size(1)))
 void *zfs_kmem_alloc(size_t size, int kmflags);
 void zfs_kmem_free(void *buf, size_t size);
 uint64_t kmem_size(void);
@ -83,6 +83,7 @@ kmem_cache_t *kmem_cache_create(const char *name, size_t bufsize, size_t align,
    int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
    void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
 void kmem_cache_destroy(kmem_cache_t *cache);
+__attribute__((malloc))
 void *kmem_cache_alloc(kmem_cache_t *cache, int flags);
 void kmem_cache_free(kmem_cache_t *cache, void *buf);
 boolean_t kmem_cache_reap_active(void);
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h
@ -68,7 +68,6 @@ enum scope_prefix_types {
 	zfs_trim,
 	zfs_txg,
 	zfs_vdev,
-	zfs_vdev_cache,
 	zfs_vdev_file,
 	zfs_vdev_mirror,
 	zfs_vnops,
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h
@ -31,10 +31,10 @@
 #include <linux/vmalloc.h>

 extern int kmem_debugging(void);
-extern char *kmem_vasprintf(const char *fmt, va_list ap)
-    __attribute__((format(printf, 1, 0)));
-extern char *kmem_asprintf(const char *fmt, ...)
-    __attribute__((format(printf, 1, 2)));
+__attribute__((format(printf, 1, 0)))
+extern char *kmem_vasprintf(const char *fmt, va_list ap);
+__attribute__((format(printf, 1, 2)))
+extern char *kmem_asprintf(const char *fmt, ...);
 extern char *kmem_strdup(const char *str);
 extern void kmem_strfree(char *str);

@ -186,10 +186,10 @@ extern unsigned int spl_kmem_alloc_max;
 #define	kmem_free(ptr, sz)	spl_kmem_free((ptr), (sz))
 #define	kmem_cache_reap_active	spl_kmem_cache_reap_active

-extern void *spl_kmem_alloc(size_t sz, int fl, const char *func, int line)
-    __attribute__((alloc_size(1)));
-extern void *spl_kmem_zalloc(size_t sz, int fl, const char *func, int line)
-    __attribute__((alloc_size(1)));
+__attribute__((malloc, alloc_size(1)))
+extern void *spl_kmem_alloc(size_t sz, int fl, const char *func, int line);
+__attribute__((malloc, alloc_size(1)))
+extern void *spl_kmem_zalloc(size_t sz, int fl, const char *func, int line);
 extern void spl_kmem_free(const void *ptr, size_t sz);

 /*
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/vmem.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/vmem.h
@ -91,8 +91,10 @@ typedef struct vmem { } vmem_t;
 #define	vmem_zalloc(sz, fl)	spl_vmem_zalloc((sz), (fl), __func__, __LINE__)
 #define	vmem_free(ptr, sz)	spl_vmem_free((ptr), (sz))

-extern void *spl_vmem_alloc(size_t sz, int fl, const char *func, int line);
-extern void *spl_vmem_zalloc(size_t sz, int fl, const char *func, int line);
+extern void *spl_vmem_alloc(size_t sz, int fl, const char *func, int line)
+    __attribute__((malloc, alloc_size(1)));
+extern void *spl_vmem_zalloc(size_t sz, int fl, const char *func, int line)
+    __attribute__((malloc, alloc_size(1)));
 extern void spl_vmem_free(const void *ptr, size_t sz);

 int spl_vmem_init(void);
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h
@ -215,6 +215,39 @@ DEFINE_EVENT(zfs_zil_commit_io_error_class, name, \
    TP_ARGS(zilog, zcw))
 DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(zfs_zil__commit__io__error);

+/*
+ * Generic support for three argument tracepoints of the form:
+ *
+ * DTRACE_PROBE3(...,
+ *     zilog_t *, ...,
+ *     uint64_t, ...,
+ *     uint64_t, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_zil_block_size_class,
+	TP_PROTO(zilog_t *zilog, uint64_t res, uint64_t s1),
+	TP_ARGS(zilog, res, s1),
+	TP_STRUCT__entry(
+	    ZILOG_TP_STRUCT_ENTRY
+	    __field(uint64_t, res)
+	    __field(uint64_t, s1)
+	),
+	TP_fast_assign(
+	    ZILOG_TP_FAST_ASSIGN
+	    __entry->res = res;
+	    __entry->s1 = s1;
+	),
+	TP_printk(
+	    ZILOG_TP_PRINTK_FMT " res %llu s1 %llu",
+	    ZILOG_TP_PRINTK_ARGS, __entry->res, __entry->s1)
+);
+
+#define	DEFINE_ZIL_BLOCK_SIZE_EVENT(name) \
+DEFINE_EVENT(zfs_zil_block_size_class, name, \
+    TP_PROTO(zilog_t *zilog, uint64_t res, uint64_t s1), \
+    TP_ARGS(zilog, res, s1))
+DEFINE_ZIL_BLOCK_SIZE_EVENT(zfs_zil__block__size);
+
 #endif /* _TRACE_ZIL_H */

 #undef TRACE_INCLUDE_PATH
@ -228,6 +261,7 @@ DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(zfs_zil__commit__io__error);
 DEFINE_DTRACE_PROBE2(zil__process__commit__itx);
 DEFINE_DTRACE_PROBE2(zil__process__normal__itx);
 DEFINE_DTRACE_PROBE2(zil__commit__io__error);
+DEFINE_DTRACE_PROBE3(zil__block__size);

 #endif /* HAVE_DECLARE_EVENT_CLASS */
 #endif /* _KERNEL */
--- a/sys/contrib/openzfs/include/sys/abd.h
+++ b/sys/contrib/openzfs/include/sys/abd.h
@ -86,10 +86,15 @@ extern int zfs_abd_scatter_enabled;
 * Allocations and deallocations
 */

+__attribute__((malloc))
 abd_t *abd_alloc(size_t, boolean_t);
+__attribute__((malloc))
 abd_t *abd_alloc_linear(size_t, boolean_t);
+__attribute__((malloc))
 abd_t *abd_alloc_gang(void);
+__attribute__((malloc))
 abd_t *abd_alloc_for_io(size_t, boolean_t);
+__attribute__((malloc))
 abd_t *abd_alloc_sametype(abd_t *, size_t);
 boolean_t abd_size_alloc_linear(size_t);
 void abd_gang_add(abd_t *, abd_t *, boolean_t);
--- a/sys/contrib/openzfs/include/sys/btree.h
+++ b/sys/contrib/openzfs/include/sys/btree.h
@ -105,8 +105,13 @@ typedef struct zfs_btree_index {
 	boolean_t	bti_before;
 } zfs_btree_index_t;

-typedef struct btree {
+typedef struct btree zfs_btree_t;
+typedef void * (*bt_find_in_buf_f) (zfs_btree_t *, uint8_t *, uint32_t,
+    const void *, zfs_btree_index_t *);
+
+struct btree {
 	int (*bt_compar) (const void *, const void *);
+	bt_find_in_buf_f	bt_find_in_buf;
 	size_t			bt_elem_size;
 	size_t			bt_leaf_size;
 	uint32_t		bt_leaf_cap;
@ -115,7 +120,54 @@ typedef struct btree {
 	uint64_t		bt_num_nodes;
 	zfs_btree_hdr_t		*bt_root;
 	zfs_btree_leaf_t	*bt_bulk; // non-null if bulk loading
-} zfs_btree_t;
+};
+
+/*
+ * Implementation of Shar's algorithm designed to accelerate binary search by
+ * eliminating impossible to predict branches.
+ *
+ * For optimality, this should be used to generate the search function in the
+ * same file as the comparator  and the comparator should be marked
+ * `__attribute__((always_inline) inline` so that the compiler will inline it.
+ *
+ * Arguments are:
+ *
+ * NAME   - The function name for this instance of the search function. Use it
+ *          in a subsequent call to zfs_btree_create().
+ * T      - The element type stored inside the B-Tree.
+ * COMP   - A comparator to compare two nodes, it must return exactly: -1, 0,
+ *          or +1 -1 for <, 0 for ==, and +1 for >. For trivial comparisons,
+ *          TREE_CMP() from avl.h can be used in a boilerplate function.
+ */
+/* BEGIN CSTYLED */
+#define	ZFS_BTREE_FIND_IN_BUF_FUNC(NAME, T, COMP)			\
+_Pragma("GCC diagnostic push")						\
+_Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"")			\
+static void *								\
+NAME(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems,			\
+    const void *value, zfs_btree_index_t *where)			\
+{									\
+	T *i = (T *)buf;						\
+	(void) tree;							\
+	_Pragma("GCC unroll 9")						\
+	while (nelems > 1) {						\
+		uint32_t half = nelems / 2;				\
+		nelems -= half;						\
+		i += (COMP(&i[half - 1], value) < 0) * half;		\
+	}								\
+									\
+	int comp = COMP(i, value);					\
+	where->bti_offset = (i - (T *)buf) + (comp < 0);		\
+	where->bti_before = (comp != 0);				\
+									\
+	if (comp == 0) {						\
+		return (i);						\
+	}								\
+									\
+	return (NULL);							\
+}									\
+_Pragma("GCC diagnostic pop")
+/* END CSTYLED */

 /*
 * Allocate and deallocate caches for btree nodes.
@ -129,13 +181,19 @@ void zfs_btree_fini(void);
 * tree   - the tree to be initialized
 * compar - function to compare two nodes, it must return exactly: -1, 0, or +1
 *          -1 for <, 0 for ==, and +1 for >
+ * find   - optional function to accelerate searches inside B-Tree nodes
+ *          through Shar's algorithm and comparator inlining. Setting this to
+ *          NULL will use a generic function. The function should be created
+ *          using ZFS_BTREE_FIND_IN_BUF_FUNC() in the same file as compar.
+ *          compar should be marked `__attribute__((always_inline)) inline` or
+ *          performance is unlikely to improve very much.
 * size   - the value of sizeof(struct my_type)
 * lsize  - custom leaf size
 */
 void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *),
-    size_t);
+    bt_find_in_buf_f, size_t);
 void zfs_btree_create_custom(zfs_btree_t *, int (*)(const void *, const void *),
-    size_t, size_t);
+    bt_find_in_buf_f, size_t, size_t);

 /*
 * Find a node with a matching value in the tree. Returns the matching node
--- a/sys/contrib/openzfs/include/sys/spa.h
+++ b/sys/contrib/openzfs/include/sys/spa.h
@ -1174,10 +1174,6 @@ extern void zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep,
    zbookmark_phys_t *zb);
 extern void name_to_errphys(char *buf, zbookmark_err_phys_t *zep);

-/* vdev cache */
-extern void vdev_cache_stat_init(void);
-extern void vdev_cache_stat_fini(void);
-
 /* vdev mirror */
 extern void vdev_mirror_stat_init(void);
 extern void vdev_mirror_stat_fini(void);
--- a/sys/contrib/openzfs/include/sys/vdev.h
+++ b/sys/contrib/openzfs/include/sys/vdev.h
@ -158,12 +158,6 @@ extern boolean_t vdev_allocatable(vdev_t *vd);
 extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
 extern boolean_t vdev_is_spacemap_addressable(vdev_t *vd);

-extern void vdev_cache_init(vdev_t *vd);
-extern void vdev_cache_fini(vdev_t *vd);
-extern boolean_t vdev_cache_read(zio_t *zio);
-extern void vdev_cache_write(zio_t *zio);
-extern void vdev_cache_purge(vdev_t *vd);
-
 extern void vdev_queue_init(vdev_t *vd);
 extern void vdev_queue_fini(vdev_t *vd);
 extern zio_t *vdev_queue_io(zio_t *zio);
--- a/sys/contrib/openzfs/include/sys/vdev_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_impl.h
@ -57,8 +57,6 @@ extern "C" {
 * Forward declarations that lots of things need.
 */
 typedef struct vdev_queue vdev_queue_t;
-typedef struct vdev_cache vdev_cache_t;
-typedef struct vdev_cache_entry vdev_cache_entry_t;
 struct abd;

 extern uint_t zfs_vdev_queue_depth_pct;
@ -132,23 +130,6 @@ typedef const struct vdev_ops {
 /*
 * Virtual device properties
 */
-struct vdev_cache_entry {
-	struct abd	*ve_abd;
-	uint64_t	ve_offset;
-	clock_t		ve_lastused;
-	avl_node_t	ve_offset_node;
-	avl_node_t	ve_lastused_node;
-	uint32_t	ve_hits;
-	uint16_t	ve_missed_update;
-	zio_t		*ve_fill_io;
-};
-
-struct vdev_cache {
-	avl_tree_t	vc_offset_tree;
-	avl_tree_t	vc_lastused_tree;
-	kmutex_t	vc_lock;
-};
-
 typedef struct vdev_queue_class {
 	uint32_t	vqc_active;

@ -443,7 +424,6 @@ struct vdev {
 	boolean_t	vdev_resilver_deferred;  /* resilver deferred */
 	boolean_t	vdev_kobj_flag; /* kobj event record */
 	vdev_queue_t	vdev_queue;	/* I/O deadline schedule queue	*/
-	vdev_cache_t	vdev_cache;	/* physical block cache		*/
 	spa_aux_vdev_t	*vdev_aux;	/* for l2cache and spares vdevs	*/
 	zio_t		*vdev_probe_zio; /* root of current probe	*/
 	vdev_aux_t	vdev_label_aux;	/* on-disk aux state		*/
--- a/sys/contrib/openzfs/include/sys/zfs_refcount.h
+++ b/sys/contrib/openzfs/include/sys/zfs_refcount.h
@ -73,13 +73,15 @@ int64_t zfs_refcount_count(zfs_refcount_t *);
 int64_t zfs_refcount_add(zfs_refcount_t *, const void *);
 int64_t zfs_refcount_remove(zfs_refcount_t *, const void *);
 /*
- * Note that (add|remove)_many add/remove one reference with "number" N,
- * _not_ make N references with "number" 1, which is what vanilla
- * zfs_refcount_(add|remove) would do if called N times.
+ * Note that (add|remove)_many adds/removes one reference with "number" N,
+ * _not_ N references with "number" 1, which is what (add|remove)_few does,
+ * or what vanilla zfs_refcount_(add|remove) called N times would do.
 *
 * Attempting to remove a reference with number N when none exists is a
 * panic on debug kernels with reference_tracking enabled.
 */
+void zfs_refcount_add_few(zfs_refcount_t *, uint64_t, const void *);
+void zfs_refcount_remove_few(zfs_refcount_t *, uint64_t, const void *);
 int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, const void *);
 int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, const void *);
 void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *);
@ -108,6 +110,10 @@ typedef struct refcount {
 #define	zfs_refcount_count(rc) atomic_load_64(&(rc)->rc_count)
 #define	zfs_refcount_add(rc, holder) atomic_inc_64_nv(&(rc)->rc_count)
 #define	zfs_refcount_remove(rc, holder) atomic_dec_64_nv(&(rc)->rc_count)
+#define	zfs_refcount_add_few(rc, number, holder) \
+	atomic_add_64(&(rc)->rc_count, number)
+#define	zfs_refcount_remove_few(rc, number, holder) \
+	atomic_add_64(&(rc)->rc_count, -number)
 #define	zfs_refcount_add_many(rc, number, holder) \
 	atomic_add_64_nv(&(rc)->rc_count, number)
 #define	zfs_refcount_remove_many(rc, number, holder) \
--- a/sys/contrib/openzfs/include/sys/zfs_znode.h
+++ b/sys/contrib/openzfs/include/sys/zfs_znode.h
@ -158,6 +158,7 @@ extern "C" {
 #define	ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)

 extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
+extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);

 #ifdef _KERNEL
 #include <sys/zfs_znode_impl.h>
@ -280,7 +281,6 @@ extern void	zfs_znode_delete(znode_t *, dmu_tx_t *);
 extern void	zfs_remove_op_tables(void);
 extern int	zfs_create_op_tables(void);
 extern dev_t	zfs_cmpldev(uint64_t);
-extern int	zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
 extern int	zfs_get_stats(objset_t *os, nvlist_t *nv);
 extern boolean_t zfs_get_vfs_flag_unmounted(objset_t *os);
 extern void	zfs_znode_dmu_fini(znode_t *);
--- a/sys/contrib/openzfs/include/sys/zil.h
+++ b/sys/contrib/openzfs/include/sys/zil.h
@ -489,18 +489,22 @@ typedef struct zil_stats {
 	 * Transactions which have been allocated to the "normal"
 	 * (i.e. not slog) storage pool. Note that "bytes" accumulate
 	 * the actual log record sizes - which do not include the actual
-	 * data in case of indirect writes.
+	 * data in case of indirect writes.  bytes <= write <= alloc.
 	 */
 	kstat_named_t zil_itx_metaslab_normal_count;
 	kstat_named_t zil_itx_metaslab_normal_bytes;
+	kstat_named_t zil_itx_metaslab_normal_write;
+	kstat_named_t zil_itx_metaslab_normal_alloc;

 	/*
 	 * Transactions which have been allocated to the "slog" storage pool.
 	 * If there are no separate log devices, this is the same as the
-	 * "normal" pool.
+	 * "normal" pool.  bytes <= write <= alloc.
 	 */
 	kstat_named_t zil_itx_metaslab_slog_count;
 	kstat_named_t zil_itx_metaslab_slog_bytes;
+	kstat_named_t zil_itx_metaslab_slog_write;
+	kstat_named_t zil_itx_metaslab_slog_alloc;
 } zil_kstat_values_t;

 typedef struct zil_sums {
@ -515,8 +519,12 @@ typedef struct zil_sums {
 	wmsum_t zil_itx_needcopy_bytes;
 	wmsum_t zil_itx_metaslab_normal_count;
 	wmsum_t zil_itx_metaslab_normal_bytes;
+	wmsum_t zil_itx_metaslab_normal_write;
+	wmsum_t zil_itx_metaslab_normal_alloc;
 	wmsum_t zil_itx_metaslab_slog_count;
 	wmsum_t zil_itx_metaslab_slog_bytes;
+	wmsum_t zil_itx_metaslab_slog_write;
+	wmsum_t zil_itx_metaslab_slog_alloc;
 } zil_sums_t;

 #define	ZIL_STAT_INCR(zil, stat, val) \
--- a/sys/contrib/openzfs/include/sys/zil_impl.h
+++ b/sys/contrib/openzfs/include/sys/zil_impl.h
@ -44,7 +44,7 @@ extern "C" {
 * must be held.
 *
 * After the lwb is "opened", it can transition into the "issued" state
- * via zil_lwb_write_issue(). Again, the zilog's "zl_issuer_lock" must
+ * via zil_lwb_write_close(). Again, the zilog's "zl_issuer_lock" must
 * be held when making this transition.
 *
 * After the lwb's write zio completes, it transitions into the "write
@ -93,20 +93,23 @@ typedef struct lwb {
 	blkptr_t	lwb_blk;	/* on disk address of this log blk */
 	boolean_t	lwb_fastwrite;	/* is blk marked for fastwrite? */
 	boolean_t	lwb_slog;	/* lwb_blk is on SLOG device */
+	boolean_t	lwb_indirect;	/* do not postpone zil_lwb_commit() */
 	int		lwb_nused;	/* # used bytes in buffer */
+	int		lwb_nfilled;	/* # filled bytes in buffer */
 	int		lwb_sz;		/* size of block and buffer */
 	lwb_state_t	lwb_state;	/* the state of this lwb */
 	char		*lwb_buf;	/* log write buffer */
 	zio_t		*lwb_write_zio;	/* zio for the lwb buffer */
 	zio_t		*lwb_root_zio;	/* root zio for lwb write and flushes */
+	hrtime_t	lwb_issued_timestamp; /* when was the lwb issued? */
 	uint64_t	lwb_issued_txg;	/* the txg when the write is issued */
 	uint64_t	lwb_max_txg;	/* highest txg in this lwb */
 	list_node_t	lwb_node;	/* zilog->zl_lwb_list linkage */
+	list_node_t	lwb_issue_node;	/* linkage of lwbs ready for issue */
 	list_t		lwb_itxs;	/* list of itx's */
 	list_t		lwb_waiters;	/* list of zil_commit_waiter's */
 	avl_tree_t	lwb_vdev_tree;	/* vdevs to flush after lwb write */
 	kmutex_t	lwb_vdev_lock;	/* protects lwb_vdev_tree */
-	hrtime_t	lwb_issued_timestamp; /* when was the lwb issued? */
 } lwb_t;

 /*
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@ -190,7 +190,6 @@ typedef uint64_t zio_flag_t;
 #define	ZIO_FLAG_SPECULATIVE	(1ULL << 8)
 #define	ZIO_FLAG_CONFIG_WRITER	(1ULL << 9)
 #define	ZIO_FLAG_DONT_RETRY	(1ULL << 10)
-#define	ZIO_FLAG_DONT_CACHE	(1ULL << 11)
 #define	ZIO_FLAG_NODATA		(1ULL << 12)
 #define	ZIO_FLAG_INDUCE_DAMAGE	(1ULL << 13)
 #define	ZIO_FLAG_IO_ALLOCATING	(1ULL << 14)
--- a/sys/contrib/openzfs/lib/libspl/include/umem.h
+++ b/sys/contrib/openzfs/lib/libspl/include/umem.h
@ -83,7 +83,7 @@ const char *_umem_debug_init(void);
 const char *_umem_options_init(void);
 const char *_umem_logging_init(void);

-__attribute__((alloc_size(1)))
+__attribute__((malloc, alloc_size(1)))
 static inline void *
 umem_alloc(size_t size, int flags)
 {
@ -96,7 +96,7 @@ umem_alloc(size_t size, int flags)
 	return (ptr);
 }

-__attribute__((alloc_size(1)))
+__attribute__((malloc, alloc_size(1)))
 static inline void *
 umem_alloc_aligned(size_t size, size_t align, int flags)
 {
@ -118,7 +118,7 @@ umem_alloc_aligned(size_t size, size_t align, int flags)
 	return (ptr);
 }

-__attribute__((alloc_size(1)))
+__attribute__((malloc, alloc_size(1)))
 static inline void *
 umem_zalloc(size_t size, int flags)
 {
@ -188,6 +188,7 @@ umem_cache_destroy(umem_cache_t *cp)
 	umem_free(cp, sizeof (umem_cache_t));
 }

+__attribute__((malloc))
 static inline void *
 umem_cache_alloc(umem_cache_t *cp, int flags)
 {
--- a/sys/contrib/openzfs/lib/libzpool/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzpool/Makefile.am
@ -135,7 +135,6 @@ nodist_libzpool_la_SOURCES = \
 	module/zfs/uberblock.c \
 	module/zfs/unique.c \
 	module/zfs/vdev.c \
-	module/zfs/vdev_cache.c \
 	module/zfs/vdev_draid.c \
 	module/zfs/vdev_draid_rand.c \
 	module/zfs/vdev_indirect.c \
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@ -2028,21 +2028,6 @@ Max vdev I/O aggregation size.
 .It Sy zfs_vdev_aggregation_limit_non_rotating Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq uint
 Max vdev I/O aggregation size for non-rotating media.
 .
-.It Sy zfs_vdev_cache_bshift Ns = Ns Sy 16 Po 64 KiB Pc Pq uint
-Shift size to inflate reads to.
-.
-.It Sy zfs_vdev_cache_max Ns = Ns Sy 16384 Ns B Po 16 KiB Pc Pq uint
-Inflate reads smaller than this value to meet the
-.Sy zfs_vdev_cache_bshift
-size
-.Pq default Sy 64 KiB .
-.
-.It Sy zfs_vdev_cache_size Ns = Ns Sy 0 Pq uint
-Total size of the per-disk cache in bytes.
-.Pp
-Currently this feature is disabled, as it has been found to not be helpful
-for performance and in some cases harmful.
-.
 .It Sy zfs_vdev_mirror_rotating_inc Ns = Ns Sy 0 Pq int
 A number by which the balancing algorithm increments the load calculation for
 the purpose of selecting the least busy mirror member when an I/O operation
--- a/sys/contrib/openzfs/man/man7/zpool-features.7
+++ b/sys/contrib/openzfs/man/man7/zpool-features.7
@ -228,8 +228,10 @@ extensible_dataset
 filesystem_limits
 hole_birth
 large_blocks
+livelist
 lz4_compress
 spacemap_histogram
+zpool_checkpoint

 .No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev
 .Ed
--- a/sys/contrib/openzfs/man/man8/zdb.8
+++ b/sys/contrib/openzfs/man/man8/zdb.8
@ -14,7 +14,7 @@
 .\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
 .\" Copyright (c) 2017 Intel Corporation.
 .\"
-.Dd October 7, 2020
+.Dd June 4, 2023
 .Dt ZDB 8
 .Os
 .
@ -41,6 +41,13 @@
 .Ar poolname Ns Op Ar / Ns Ar dataset Ns | Ns Ar objset-ID
 .Op Ar object Ns | Ns Ar range Ns …
 .Nm
+.Fl B
+.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
+.Op Fl U Ar cache
+.Op Fl K Ar key
+.Ar poolname Ns Ar / Ns Ar objset-ID
+.Op Ar backup-flags
+.Nm
 .Fl C
 .Op Fl A
 .Op Fl U Ar cache
@ -123,6 +130,22 @@ Display options:
 Display statistics regarding the number, size
 .Pq logical, physical and allocated
 and deduplication of blocks.
+.It Fl B , -backup
+Generate a backup stream, similar to
+.Nm zfs Cm send ,
+but for the numeric objset ID, and without opening the dataset.
+This can be useful in recovery scenarios if dataset metadata has become
+corrupted but the dataset itself is readable.
+The optional
+.Ar flags
+argument is a string of one or more of the letters
+.Sy e ,
+.Sy L ,
+.Sy c ,
+and
+.Sy w ,
+which correspond to the same flags in
+.Xr zfs-send 8 .
 .It Fl c , -checksum
 Verify the checksum of all metadata blocks while printing block statistics
 .Po see
--- a/sys/contrib/openzfs/man/man8/zfs-create.8
+++ b/sys/contrib/openzfs/man/man8/zfs-create.8
@ -234,14 +234,11 @@ if the volume is not sparse.
 Print verbose information about the created dataset.
 .El
 .El
-.Ss ZFS Volumes as Swap
-ZFS volumes may be used as swap devices.
-After creating the volume with the
-.Nm zfs Cm create Fl V
-enable the swap area using the
-.Xr swapon 8
-command.
-Swapping to files on ZFS filesystems is not supported.
+.Ss ZFS for Swap
+Swapping to a ZFS volume is prone to deadlock and not recommended.
+See OpenZFS FAQ.
+.Pp
+Swapping to a file on a ZFS filesystem is not supported.
 .
 .Sh EXAMPLES
 .\" These are, respectively, examples 1, 10 from zfs.8
--- a/sys/contrib/openzfs/man/man8/zpool-events.8
+++ b/sys/contrib/openzfs/man/man8/zpool-events.8
@ -456,7 +456,6 @@ ZIO_FLAG_CANFAIL:0x00000080
 ZIO_FLAG_SPECULATIVE:0x00000100
 ZIO_FLAG_CONFIG_WRITER:0x00000200
 ZIO_FLAG_DONT_RETRY:0x00000400
-ZIO_FLAG_DONT_CACHE:0x00000800
 ZIO_FLAG_NODATA:0x00001000
 ZIO_FLAG_INDUCE_DAMAGE:0x00002000

--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@ -34,6 +34,20 @@ ifeq ($(CONFIG_KASAN),y)
 ZFS_MODULE_CFLAGS += -Wno-error=frame-larger-than=
 endif

+# Generated binary search code is particularly bad with this optimization.
+# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
+# is not affected when unrolling is done.
+# Disable it until the following upstream issue is resolved:
+# https://github.com/llvm/llvm-project/issues/62790
+ifeq ($(CONFIG_X86),y)
+ifeq ($(CONFIG_CC_IS_CLANG),y)
+CFLAGS_zfs/dsl_scan.o += -mllvm -x86-cmov-converter=false
+CFLAGS_zfs/metaslab.o += -mllvm -x86-cmov-converter=false
+CFLAGS_zfs/range_tree.o += -mllvm -x86-cmov-converter=false
+CFLAGS_zfs/zap_micro.o += -mllvm -x86-cmov-converter=false
+endif
+endif
+
 ifneq ($(KBUILD_EXTMOD),)
@CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include
@CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
@ -368,7 +382,6 @@ ZFS_OBJS := \
 	uberblock.o \
 	unique.o \
 	vdev.o \
-	vdev_cache.o \
 	vdev_draid.o \
 	vdev_draid_rand.o \
 	vdev_indirect.o \
--- a/sys/contrib/openzfs/module/Makefile.bsd
+++ b/sys/contrib/openzfs/module/Makefile.bsd
@ -308,7 +308,6 @@ SRCS+=	abd.c \
 	uberblock.c \
 	unique.c \
 	vdev.c \
-	vdev_cache.c \
 	vdev_draid.c \
 	vdev_draid_rand.c \
 	vdev_indirect.c \
@ -400,6 +399,20 @@ beforeinstall:

 .include <bsd.kmod.mk>

+# Generated binary search code is particularly bad with this optimization.
+# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
+# is not affected when unrolling is done.
+# Disable it until the following upstream issue is resolved:
+# https://github.com/llvm/llvm-project/issues/62790
+.if ${CC} == "clang"
+.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "amd64"
+CFLAGS.dsl_scan.c= -mllvm -x86-cmov-converter=false
+CFLAGS.metaslab.c= -mllvm -x86-cmov-converter=false
+CFLAGS.range_tree.c= -mllvm -x86-cmov-converter=false
+CFLAGS.zap_micro.c= -mllvm -x86-cmov-converter=false
+.endif
+.endif
+
 CFLAGS.sysctl_os.c= -include ../zfs_config.h
 CFLAGS.xxhash.c+= -include ${SYSDIR}/sys/_null.h

--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@ -872,8 +872,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip,
 	"Enable to bypass vdev_validate().");
 /* END CSTYLED */

-/* vdev_cache.c */
-
 /* vdev_mirror.c */

 /* vdev_queue.c */
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
@ -495,10 +495,8 @@ zfs_acl_release_nodes(zfs_acl_t *aclp)
 {
 	zfs_acl_node_t *aclnode;

-	while ((aclnode = list_head(&aclp->z_acl))) {
-		list_remove(&aclp->z_acl, aclnode);
+	while ((aclnode = list_remove_head(&aclp->z_acl)))
 		zfs_acl_node_free(aclnode);
-	}
 	aclp->z_acl_count = 0;
 	aclp->z_acl_bytes = 0;
 }
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
@ -2220,92 +2220,6 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
 	return (0);
 }

-/*
- * Read a property stored within the master node.
- */
-int
-zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
-{
-	uint64_t *cached_copy = NULL;
-
-	/*
-	 * Figure out where in the objset_t the cached copy would live, if it
-	 * is available for the requested property.
-	 */
-	if (os != NULL) {
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			cached_copy = &os->os_version;
-			break;
-		case ZFS_PROP_NORMALIZE:
-			cached_copy = &os->os_normalization;
-			break;
-		case ZFS_PROP_UTF8ONLY:
-			cached_copy = &os->os_utf8only;
-			break;
-		case ZFS_PROP_CASE:
-			cached_copy = &os->os_casesensitivity;
-			break;
-		default:
-			break;
-		}
-	}
-	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
-		*value = *cached_copy;
-		return (0);
-	}
-
-	/*
-	 * If the property wasn't cached, look up the file system's value for
-	 * the property. For the version property, we look up a slightly
-	 * different string.
-	 */
-	const char *pname;
-	int error = ENOENT;
-	if (prop == ZFS_PROP_VERSION) {
-		pname = ZPL_VERSION_STR;
-	} else {
-		pname = zfs_prop_to_name(prop);
-	}
-
-	if (os != NULL) {
-		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
-		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
-	}
-
-	if (error == ENOENT) {
-		/* No value set, use the default value */
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			*value = ZPL_VERSION;
-			break;
-		case ZFS_PROP_NORMALIZE:
-		case ZFS_PROP_UTF8ONLY:
-			*value = 0;
-			break;
-		case ZFS_PROP_CASE:
-			*value = ZFS_CASE_SENSITIVE;
-			break;
-		case ZFS_PROP_ACLTYPE:
-			*value = ZFS_ACLTYPE_NFSV4;
-			break;
-		default:
-			return (error);
-		}
-		error = 0;
-	}
-
-	/*
-	 * If one of the methods for getting the property value above worked,
-	 * copy it into the objset_t's cache.
-	 */
-	if (error == 0 && cached_copy != NULL) {
-		*cached_copy = *value;
-	}
-
-	return (error);
-}
-
 /*
 * Return true if the corresponding vfs's unmounted flag is set.
 * Otherwise return false.
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
@ -2069,6 +2069,93 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
 	return (error);
 }

+/*
+ * Read a property stored within the master node.
+ */
+int
+zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
+{
+	uint64_t *cached_copy = NULL;
+
+	/*
+	 * Figure out where in the objset_t the cached copy would live, if it
+	 * is available for the requested property.
+	 */
+	if (os != NULL) {
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			cached_copy = &os->os_version;
+			break;
+		case ZFS_PROP_NORMALIZE:
+			cached_copy = &os->os_normalization;
+			break;
+		case ZFS_PROP_UTF8ONLY:
+			cached_copy = &os->os_utf8only;
+			break;
+		case ZFS_PROP_CASE:
+			cached_copy = &os->os_casesensitivity;
+			break;
+		default:
+			break;
+		}
+	}
+	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
+		*value = *cached_copy;
+		return (0);
+	}
+
+	/*
+	 * If the property wasn't cached, look up the file system's value for
+	 * the property. For the version property, we look up a slightly
+	 * different string.
+	 */
+	const char *pname;
+	int error = ENOENT;
+	if (prop == ZFS_PROP_VERSION) {
+		pname = ZPL_VERSION_STR;
+	} else {
+		pname = zfs_prop_to_name(prop);
+	}
+
+	if (os != NULL) {
+		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
+		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
+	}
+
+	if (error == ENOENT) {
+		/* No value set, use the default value */
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			*value = ZPL_VERSION;
+			break;
+		case ZFS_PROP_NORMALIZE:
+		case ZFS_PROP_UTF8ONLY:
+			*value = 0;
+			break;
+		case ZFS_PROP_CASE:
+			*value = ZFS_CASE_SENSITIVE;
+			break;
+		case ZFS_PROP_ACLTYPE:
+			*value = ZFS_ACLTYPE_NFSV4;
+			break;
+		default:
+			return (error);
+		}
+		error = 0;
+	}
+
+	/*
+	 * If one of the methods for getting the property value above worked,
+	 * copy it into the objset_t's cache.
+	 */
+	if (error == 0 && cached_copy != NULL) {
+		*cached_copy = *value;
+	}
+
+	return (error);
+}
+
+

 void
 zfs_znode_update_vfs(znode_t *zp)
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
@ -182,8 +182,11 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
 	 * of that infrastructure we are responsible for incrementing it.
 	 */
 	if (current->reclaim_state)
+#ifdef	HAVE_RECLAIM_STATE_RECLAIMED
+		current->reclaim_state->reclaimed += size >> PAGE_SHIFT;
+#else
 		current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
-
+#endif
 	vfree(ptr);
 }

@ -1012,9 +1015,19 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
 	ASSERT0(flags & ~KM_PUBLIC_MASK);
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
-	might_sleep();
+
 	*obj = NULL;

+	/*
+	 * Since we can't sleep attempt an emergency allocation to satisfy
+	 * the request.  The only alterative is to fail the allocation but
+	 * it's preferable try.  The use of KM_NOSLEEP is expected to be rare.
+	 */
+	if (flags & KM_NOSLEEP)
+		return (spl_emergency_alloc(skc, flags, obj));
+
+	might_sleep();
+
 	/*
 	 * Before allocating a new slab wait for any reaping to complete and
 	 * then return so the local magazine can be rechecked for new objects.
--- a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
@ -219,7 +219,11 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
 	arc_reduce_target_size(ptob(sc->nr_to_scan));
 	arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
 	if (current->reclaim_state != NULL)
+#ifdef	HAVE_RECLAIM_STATE_RECLAIMED
+		current->reclaim_state->reclaimed += sc->nr_to_scan;
+#else
 		current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
+#endif

 	/*
 	 * We are experiencing memory pressure which the arc_evict_zthr was
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
@ -493,10 +493,8 @@ zfs_acl_release_nodes(zfs_acl_t *aclp)
 {
 	zfs_acl_node_t *aclnode;

-	while ((aclnode = list_head(&aclp->z_acl))) {
-		list_remove(&aclp->z_acl, aclnode);
+	while ((aclnode = list_remove_head(&aclp->z_acl)))
 		zfs_acl_node_free(aclnode);
-	}
 	aclp->z_acl_count = 0;
 	aclp->z_acl_bytes = 0;
 }
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@ -2052,91 +2052,6 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
 	return (0);
 }

-/*
- * Read a property stored within the master node.
- */
-int
-zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
-{
-	uint64_t *cached_copy = NULL;
-
-	/*
-	 * Figure out where in the objset_t the cached copy would live, if it
-	 * is available for the requested property.
-	 */
-	if (os != NULL) {
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			cached_copy = &os->os_version;
-			break;
-		case ZFS_PROP_NORMALIZE:
-			cached_copy = &os->os_normalization;
-			break;
-		case ZFS_PROP_UTF8ONLY:
-			cached_copy = &os->os_utf8only;
-			break;
-		case ZFS_PROP_CASE:
-			cached_copy = &os->os_casesensitivity;
-			break;
-		default:
-			break;
-		}
-	}
-	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
-		*value = *cached_copy;
-		return (0);
-	}
-
-	/*
-	 * If the property wasn't cached, look up the file system's value for
-	 * the property. For the version property, we look up a slightly
-	 * different string.
-	 */
-	const char *pname;
-	int error = ENOENT;
-	if (prop == ZFS_PROP_VERSION)
-		pname = ZPL_VERSION_STR;
-	else
-		pname = zfs_prop_to_name(prop);
-
-	if (os != NULL) {
-		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
-		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
-	}
-
-	if (error == ENOENT) {
-		/* No value set, use the default value */
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			*value = ZPL_VERSION;
-			break;
-		case ZFS_PROP_NORMALIZE:
-		case ZFS_PROP_UTF8ONLY:
-			*value = 0;
-			break;
-		case ZFS_PROP_CASE:
-			*value = ZFS_CASE_SENSITIVE;
-			break;
-		case ZFS_PROP_ACLTYPE:
-			*value = ZFS_ACLTYPE_OFF;
-			break;
-		default:
-			return (error);
-		}
-		error = 0;
-	}
-
-	/*
-	 * If one of the methods for getting the property value above worked,
-	 * copy it into the objset_t's cache.
-	 */
-	if (error == 0 && cached_copy != NULL) {
-		*cached_copy = *value;
-	}
-
-	return (error);
-}
-
 /*
 * Return true if the corresponding vfs's unmounted flag is set.
 * Otherwise return false.
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
@ -2254,6 +2254,91 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
 	return (error);
 }

+/*
+ * Read a property stored within the master node.
+ */
+int
+zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
+{
+	uint64_t *cached_copy = NULL;
+
+	/*
+	 * Figure out where in the objset_t the cached copy would live, if it
+	 * is available for the requested property.
+	 */
+	if (os != NULL) {
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			cached_copy = &os->os_version;
+			break;
+		case ZFS_PROP_NORMALIZE:
+			cached_copy = &os->os_normalization;
+			break;
+		case ZFS_PROP_UTF8ONLY:
+			cached_copy = &os->os_utf8only;
+			break;
+		case ZFS_PROP_CASE:
+			cached_copy = &os->os_casesensitivity;
+			break;
+		default:
+			break;
+		}
+	}
+	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
+		*value = *cached_copy;
+		return (0);
+	}
+
+	/*
+	 * If the property wasn't cached, look up the file system's value for
+	 * the property. For the version property, we look up a slightly
+	 * different string.
+	 */
+	const char *pname;
+	int error = ENOENT;
+	if (prop == ZFS_PROP_VERSION)
+		pname = ZPL_VERSION_STR;
+	else
+		pname = zfs_prop_to_name(prop);
+
+	if (os != NULL) {
+		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
+		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
+	}
+
+	if (error == ENOENT) {
+		/* No value set, use the default value */
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			*value = ZPL_VERSION;
+			break;
+		case ZFS_PROP_NORMALIZE:
+		case ZFS_PROP_UTF8ONLY:
+			*value = 0;
+			break;
+		case ZFS_PROP_CASE:
+			*value = ZFS_CASE_SENSITIVE;
+			break;
+		case ZFS_PROP_ACLTYPE:
+			*value = ZFS_ACLTYPE_OFF;
+			break;
+		default:
+			return (error);
+		}
+		error = 0;
+	}
+
+	/*
+	 * If one of the methods for getting the property value above worked,
+	 * copy it into the objset_t's cache.
+	 */
+	if (error == 0 && cached_copy != NULL) {
+		*cached_copy = *value;
+	}
+
+	return (error);
+}
+
 #if defined(_KERNEL)
 EXPORT_SYMBOL(zfs_create_fs);
 EXPORT_SYMBOL(zfs_obj_to_path);
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@ -965,7 +965,7 @@ static void l2arc_hdr_restore(const l2arc_log_ent_phys_t *le,
    l2arc_dev_t *dev);

 /* L2ARC persistence write I/O routines. */
-static void l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio,
+static uint64_t l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio,
    l2arc_write_callback_t *cb);

 /* L2ARC persistence auxiliary routines. */
@ -6106,8 +6106,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
 				    asize, abd,
 				    ZIO_CHECKSUM_OFF,
 				    l2arc_read_done, cb, priority,
-				    zio_flags | ZIO_FLAG_DONT_CACHE |
-				    ZIO_FLAG_CANFAIL |
+				    zio_flags | ZIO_FLAG_CANFAIL |
 				    ZIO_FLAG_DONT_PROPAGATE |
 				    ZIO_FLAG_DONT_RETRY, B_FALSE);
 				acb->acb_zio_head = rzio;
@ -7866,8 +7865,7 @@ arc_fini(void)
 	taskq_destroy(arc_prune_taskq);

 	mutex_enter(&arc_prune_mtx);
-	while ((p = list_head(&arc_prune_list)) != NULL) {
-		list_remove(&arc_prune_list, p);
+	while ((p = list_remove_head(&arc_prune_list)) != NULL) {
 		zfs_refcount_remove(&p->p_refcnt, &arc_prune_list);
 		zfs_refcount_destroy(&p->p_refcnt);
 		kmem_free(p, sizeof (*p));
@ -8175,7 +8173,7 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr)
 static uint64_t
 l2arc_write_size(l2arc_dev_t *dev)
 {
-	uint64_t size, dev_size, tsize;
+	uint64_t size;

 	/*
 	 * Make sure our globals have meaningful values in case the user
@ -8192,35 +8190,45 @@ l2arc_write_size(l2arc_dev_t *dev)
 	if (arc_warm == B_FALSE)
 		size += l2arc_write_boost;

-	/*
-	 * Make sure the write size does not exceed the size of the cache
-	 * device. This is important in l2arc_evict(), otherwise infinite
-	 * iteration can occur.
-	 */
-	dev_size = dev->l2ad_end - dev->l2ad_start;
-
 	/* We need to add in the worst case scenario of log block overhead. */
-	tsize = size + l2arc_log_blk_overhead(size, dev);
+	size += l2arc_log_blk_overhead(size, dev);
 	if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
 		/*
 		 * Trim ahead of the write size 64MB or (l2arc_trim_ahead/100)
 		 * times the writesize, whichever is greater.
 		 */
-		tsize += MAX(64 * 1024 * 1024,
-		    (tsize * l2arc_trim_ahead) / 100);
+		size += MAX(64 * 1024 * 1024,
+		    (size * l2arc_trim_ahead) / 100);
 	}

-	if (tsize >= dev_size) {
+	/*
+	 * Make sure the write size does not exceed the size of the cache
+	 * device. This is important in l2arc_evict(), otherwise infinite
+	 * iteration can occur.
+	 */
+	if (size > dev->l2ad_end - dev->l2ad_start) {
 		cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
 		    "plus the overhead of log blocks (persistent L2ARC, "
 		    "%llu bytes) exceeds the size of the cache device "
 		    "(guid %llu), resetting them to the default (%d)",
 		    (u_longlong_t)l2arc_log_blk_overhead(size, dev),
 		    (u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
+
 		size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;

+		if (l2arc_trim_ahead > 1) {
+			cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1");
+			l2arc_trim_ahead = 1;
+		}
+
 		if (arc_warm == B_FALSE)
 			size += l2arc_write_boost;
+
+		size += l2arc_log_blk_overhead(size, dev);
+		if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
+			size += MAX(64 * 1024 * 1024,
+			    (size * l2arc_trim_ahead) / 100);
+		}
 	}

 	return (size);
@ -8319,20 +8327,14 @@ l2arc_dev_get_next(void)
 static void
 l2arc_do_free_on_write(void)
 {
-	list_t *buflist;
-	l2arc_data_free_t *df, *df_prev;
+	l2arc_data_free_t *df;

 	mutex_enter(&l2arc_free_on_write_mtx);
-	buflist = l2arc_free_on_write;
-
-	for (df = list_tail(buflist); df; df = df_prev) {
-		df_prev = list_prev(buflist, df);
+	while ((df = list_remove_head(l2arc_free_on_write)) != NULL) {
 		ASSERT3P(df->l2df_abd, !=, NULL);
 		abd_free(df->l2df_abd);
-		list_remove(buflist, df);
 		kmem_free(df, sizeof (l2arc_data_free_t));
 	}
-
 	mutex_exit(&l2arc_free_on_write_mtx);
 }

@ -8845,7 +8847,7 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)

 top:
 	rerun = B_FALSE;
-	if (dev->l2ad_hand >= (dev->l2ad_end - distance)) {
+	if (dev->l2ad_hand + distance > dev->l2ad_end) {
 		/*
 		 * When there is no space to accommodate upcoming writes,
 		 * evict to the end. Then bump the write and evict hands
@ -9039,7 +9041,7 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
 		 */
 		ASSERT3U(dev->l2ad_hand + distance, <, dev->l2ad_end);
 		if (!dev->l2ad_first)
-			ASSERT3U(dev->l2ad_hand, <, dev->l2ad_evict);
+			ASSERT3U(dev->l2ad_hand, <=, dev->l2ad_evict);
 	}
 }

@ -9299,7 +9301,13 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 			uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
 			    psize);

-			if ((write_asize + asize) > target_sz) {
+			/*
+			 * If the allocated size of this buffer plus the max
+			 * size for the pending log block exceeds the evicted
+			 * target size, terminate writing buffers for this run.
+			 */
+			if (write_asize + asize +
+			    sizeof (l2arc_log_blk_phys_t) > target_sz) {
 				full = B_TRUE;
 				mutex_exit(hash_lock);
 				break;
@ -9413,8 +9421,14 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 			 * arcstat_l2_{size,asize} kstats are updated
 			 * internally.
 			 */
-			if (l2arc_log_blk_insert(dev, hdr))
-				l2arc_log_blk_commit(dev, pio, cb);
+			if (l2arc_log_blk_insert(dev, hdr)) {
+				/*
+				 * l2ad_hand will be adjusted in
+				 * l2arc_log_blk_commit().
+				 */
+				write_asize +=
+				    l2arc_log_blk_commit(dev, pio, cb);
+			}

 			zio_nowait(wzio);
 		}
@ -10173,8 +10187,7 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev)
 	err = zio_wait(zio_read_phys(NULL, dev->l2ad_vdev,
 	    VDEV_LABEL_START_SIZE, l2dhdr_asize, abd,
 	    ZIO_CHECKSUM_LABEL, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
-	    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
-	    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
+	    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
 	    ZIO_FLAG_SPECULATIVE, B_FALSE));

 	abd_free(abd);
@ -10494,11 +10507,10 @@ l2arc_log_blk_fetch(vdev_t *vd, const l2arc_log_blkptr_t *lbp,
 	cb = kmem_zalloc(sizeof (l2arc_read_callback_t), KM_SLEEP);
 	cb->l2rcb_abd = abd_get_from_buf(lb, asize);
 	pio = zio_root(vd->vdev_spa, l2arc_blk_fetch_done, cb,
-	    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
-	    ZIO_FLAG_DONT_RETRY);
+	    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY);
 	(void) zio_nowait(zio_read_phys(pio, vd, lbp->lbp_daddr, asize,
 	    cb->l2rcb_abd, ZIO_CHECKSUM_OFF, NULL, NULL,
-	    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
+	    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL |
 	    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY, B_FALSE));

 	return (pio);
@ -10564,7 +10576,7 @@ l2arc_dev_hdr_update(l2arc_dev_t *dev)
 * This function allocates some memory to temporarily hold the serialized
 * buffer to be written. This is then released in l2arc_write_done.
 */
-static void
+static uint64_t
 l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
 {
 	l2arc_log_blk_phys_t	*lb = &dev->l2ad_log_blk;
@ -10675,6 +10687,8 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
 	dev->l2ad_log_ent_idx = 0;
 	dev->l2ad_log_blk_payload_asize = 0;
 	dev->l2ad_log_blk_payload_start = 0;
+
+	return (asize);
 }

 /*
--- a/sys/contrib/openzfs/module/zfs/bplist.c
+++ b/sys/contrib/openzfs/module/zfs/bplist.c
@ -65,9 +65,8 @@ bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
 	bplist_entry_t *bpe;

 	mutex_enter(&bpl->bpl_lock);
-	while ((bpe = list_head(&bpl->bpl_list))) {
+	while ((bpe = list_remove_head(&bpl->bpl_list))) {
 		bplist_iterate_last_removed = bpe;
-		list_remove(&bpl->bpl_list, bpe);
 		mutex_exit(&bpl->bpl_lock);
 		func(arg, &bpe->bpe_blk, tx);
 		kmem_free(bpe, sizeof (*bpe));
@ -82,10 +81,7 @@ bplist_clear(bplist_t *bpl)
 	bplist_entry_t *bpe;

 	mutex_enter(&bpl->bpl_lock);
-	while ((bpe = list_head(&bpl->bpl_list))) {
-		bplist_iterate_last_removed = bpe;
-		list_remove(&bpl->bpl_list, bpe);
+	while ((bpe = list_remove_head(&bpl->bpl_list)))
 		kmem_free(bpe, sizeof (*bpe));
-	}
 	mutex_exit(&bpl->bpl_lock);
 }
--- a/sys/contrib/openzfs/module/zfs/btree.c
+++ b/sys/contrib/openzfs/module/zfs/btree.c
@ -193,14 +193,20 @@ zfs_btree_leaf_free(zfs_btree_t *tree, void *ptr)

 void
 zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
-    size_t size)
+    bt_find_in_buf_f bt_find_in_buf, size_t size)
 {
-	zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE);
+	zfs_btree_create_custom(tree, compar, bt_find_in_buf, size,
+	    BTREE_LEAF_SIZE);
 }

+static void *
+zfs_btree_find_in_buf(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems,
+    const void *value, zfs_btree_index_t *where);
+
 void
 zfs_btree_create_custom(zfs_btree_t *tree,
    int (*compar) (const void *, const void *),
+    bt_find_in_buf_f bt_find_in_buf,
    size_t size, size_t lsize)
 {
 	size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems);
@ -208,6 +214,8 @@ zfs_btree_create_custom(zfs_btree_t *tree,
 	ASSERT3U(size, <=, esize / 2);
 	memset(tree, 0, sizeof (*tree));
 	tree->bt_compar = compar;
+	tree->bt_find_in_buf = (bt_find_in_buf == NULL) ?
+	    zfs_btree_find_in_buf : bt_find_in_buf;
 	tree->bt_elem_size = size;
 	tree->bt_leaf_size = lsize;
 	tree->bt_leaf_cap = P2ALIGN(esize / size, 2);
@ -303,7 +311,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
 			 * element in the last leaf, it's in the last leaf or
 			 * it's not in the tree.
 			 */
-			void *d = zfs_btree_find_in_buf(tree,
+			void *d = tree->bt_find_in_buf(tree,
 			    last_leaf->btl_elems +
 			    last_leaf->btl_hdr.bth_first * size,
 			    last_leaf->btl_hdr.bth_count, value, &idx);
@ -327,7 +335,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
 	for (node = (zfs_btree_core_t *)tree->bt_root; depth < tree->bt_height;
 	    node = (zfs_btree_core_t *)node->btc_children[child], depth++) {
 		ASSERT3P(node, !=, NULL);
-		void *d = zfs_btree_find_in_buf(tree, node->btc_elems,
+		void *d = tree->bt_find_in_buf(tree, node->btc_elems,
 		    node->btc_hdr.bth_count, value, &idx);
 		EQUIV(d != NULL, !idx.bti_before);
 		if (d != NULL) {
@ -347,7 +355,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
 	 */
 	zfs_btree_leaf_t *leaf = (depth == 0 ?
 	    (zfs_btree_leaf_t *)tree->bt_root : (zfs_btree_leaf_t *)node);
-	void *d = zfs_btree_find_in_buf(tree, leaf->btl_elems +
+	void *d = tree->bt_find_in_buf(tree, leaf->btl_elems +
 	    leaf->btl_hdr.bth_first * size,
 	    leaf->btl_hdr.bth_count, value, &idx);

@ -671,7 +679,7 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
 	zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
 	zfs_btree_index_t idx;
 	ASSERT(zfs_btree_is_core(par_hdr));
-	VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
+	VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
 	    par_hdr->bth_count, buf, &idx), ==, NULL);
 	ASSERT(idx.bti_before);
 	uint32_t offset = idx.bti_offset;
@ -897,7 +905,7 @@ zfs_btree_find_parent_idx(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
 	}
 	zfs_btree_index_t idx;
 	zfs_btree_core_t *parent = hdr->bth_parent;
-	VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
+	VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
 	    parent->btc_hdr.bth_count, buf, &idx), ==, NULL);
 	ASSERT(idx.bti_before);
 	ASSERT3U(idx.bti_offset, <=, parent->btc_hdr.bth_count);
--- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c
+++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
@ -49,8 +49,12 @@ static dataset_kstat_values_t empty_dataset_kstats = {
 	{ "zil_itx_needcopy_bytes",		KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_normal_count",	KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_normal_bytes",	KSTAT_DATA_UINT64 },
+	{ "zil_itx_metaslab_normal_write",	KSTAT_DATA_UINT64 },
+	{ "zil_itx_metaslab_normal_alloc",	KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_slog_count",	KSTAT_DATA_UINT64 },
-	{ "zil_itx_metaslab_slog_bytes",	KSTAT_DATA_UINT64 }
+	{ "zil_itx_metaslab_slog_bytes",	KSTAT_DATA_UINT64 },
+	{ "zil_itx_metaslab_slog_write",	KSTAT_DATA_UINT64 },
+	{ "zil_itx_metaslab_slog_alloc",	KSTAT_DATA_UINT64 }
 	}
 };

--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
@ -1755,9 +1755,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
 	taskq_wait(dmu_objset_pool(os)->dp_sync_taskq);

 	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
-	while ((dr = list_head(list)) != NULL) {
+	while ((dr = list_remove_head(list)) != NULL) {
 		ASSERT0(dr->dr_dbuf->db_level);
-		list_remove(list, dr);
 		zio_nowait(dr->dr_zio);
 	}

--- a/sys/contrib/openzfs/module/zfs/dmu_recv.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c
@ -1371,8 +1371,8 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw,
 	dnode_t *dn;
 	abd_t *abd = rrd->abd;
 	zio_cksum_t bp_cksum = bp->blk_cksum;
-	zio_flag_t flags = ZIO_FLAG_SPECULATIVE |
-	    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL;
+	zio_flag_t flags = ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_RETRY |
+	    ZIO_FLAG_CANFAIL;

 	if (rwa->raw)
 		flags |= ZIO_FLAG_RAW;
--- a/sys/contrib/openzfs/module/zfs/dmu_send.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_send.c
@ -1955,7 +1955,7 @@ setup_featureflags(struct dmu_send_params *dspp, objset_t *os,
 {
 	dsl_dataset_t *to_ds = dspp->to_ds;
 	dsl_pool_t *dp = dspp->dp;
-#ifdef _KERNEL
+
 	if (dmu_objset_type(os) == DMU_OST_ZFS) {
 		uint64_t version;
 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0)
@ -1964,7 +1964,6 @@ setup_featureflags(struct dmu_send_params *dspp, objset_t *os,
 		if (version >= ZPL_VERSION_SA)
 			*featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
 	}
-#endif

 	/* raw sends imply large_block_ok */
 	if ((dspp->rawok || dspp->large_block_ok) &&
@ -2793,6 +2792,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
 			}

 			if (err == 0) {
+				owned = B_TRUE;
 				err = zap_lookup(dspp.dp->dp_meta_objset,
 				    dspp.to_ds->ds_object,
 				    DS_FIELD_RESUME_TOGUID, 8, 1,
@ -2806,21 +2806,24 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
 				    sizeof (dspp.saved_toname),
 				    dspp.saved_toname);
 			}
-			if (err != 0)
+			/* Only disown if there was an error in the lookups */
+			if (owned && (err != 0))
 				dsl_dataset_disown(dspp.to_ds, dsflags, FTAG);

 			kmem_strfree(name);
 		} else {
 			err = dsl_dataset_own(dspp.dp, tosnap, dsflags,
 			    FTAG, &dspp.to_ds);
+			if (err == 0)
+				owned = B_TRUE;
 		}
-		owned = B_TRUE;
 	} else {
 		err = dsl_dataset_hold_flags(dspp.dp, tosnap, dsflags, FTAG,
 		    &dspp.to_ds);
 	}

 	if (err != 0) {
+		/* Note: dsl dataset is not owned at this point */
 		dsl_pool_rele(dspp.dp, FTAG);
 		return (err);
 	}
--- a/sys/contrib/openzfs/module/zfs/dmu_tx.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c
@ -1396,8 +1396,7 @@ dmu_tx_do_callbacks(list_t *cb_list, int error)
 {
 	dmu_tx_callback_t *dcb;

-	while ((dcb = list_tail(cb_list)) != NULL) {
-		list_remove(cb_list, dcb);
+	while ((dcb = list_remove_tail(cb_list)) != NULL) {
 		dcb->dcb_func(dcb->dcb_data, error);
 		kmem_free(dcb, sizeof (dmu_tx_callback_t));
 	}
--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
@ -520,8 +520,7 @@ dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock)
 	issued = pf_end - pf_start + ipf_end - ipf_start;
 	if (issued > 1) {
 		/* More references on top of taken in dmu_zfetch_prepare(). */
-		for (int i = 0; i < issued - 1; i++)
-			zfs_refcount_add(&zs->zs_refs, NULL);
+		zfs_refcount_add_few(&zs->zs_refs, issued - 1, NULL);
 	} else if (issued == 0) {
 		/* Some other thread has done our work, so drop the ref. */
 		if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@ -3782,8 +3782,7 @@ snaplist_destroy(list_t *l, const void *tag)
 	if (l == NULL || !list_link_active(&l->list_head))
 		return;

-	while ((snap = list_tail(l)) != NULL) {
-		list_remove(l, snap);
+	while ((snap = list_remove_tail(l)) != NULL) {
 		dsl_dataset_rele(snap->ds, tag);
 		kmem_free(snap, sizeof (*snap));
 	}
--- a/sys/contrib/openzfs/module/zfs/dsl_dir.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c
@ -1490,7 +1490,7 @@ dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
 	if (tr_cookie == NULL)
 		return;

-	while ((tr = list_head(tr_list)) != NULL) {
+	while ((tr = list_remove_head(tr_list)) != NULL) {
 		if (tr->tr_ds) {
 			mutex_enter(&tr->tr_ds->dd_lock);
 			ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
@ -1500,7 +1500,6 @@ dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
 		} else {
 			arc_tempreserve_clear(tr->tr_size);
 		}
-		list_remove(tr_list, tr);
 		kmem_free(tr, sizeof (struct tempreserve));
 	}

--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@ -234,7 +234,7 @@ static int zfs_resilver_disable_defer = B_FALSE;
 static int zfs_free_bpobj_enabled = 1;

 /* Error blocks to be scrubbed in one txg. */
-uint_t zfs_scrub_error_blocks_per_txg = 1 << 12;
+static uint_t zfs_scrub_error_blocks_per_txg = 1 << 12;

 /* the order has to match pool_scan_type */
 static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = {
@ -3437,10 +3437,8 @@ scan_io_queues_run_one(void *arg)
 	 * If we were suspended in the middle of processing,
 	 * requeue any unfinished sios and exit.
 	 */
-	while ((sio = list_head(&sio_list)) != NULL) {
-		list_remove(&sio_list, sio);
+	while ((sio = list_remove_head(&sio_list)) != NULL)
 		scan_io_queue_insert_impl(queue, sio);
-	}

 	queue->q_zio = NULL;
 	mutex_exit(q_lock);
@ -4877,6 +4875,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
 * with single operation.  Plus it makes scrubs more sequential and reduces
 * chances that minor extent change move it within the B-tree.
 */
+__attribute__((always_inline)) inline
 static int
 ext_size_compare(const void *x, const void *y)
 {
@ -4885,13 +4884,17 @@ ext_size_compare(const void *x, const void *y)
 	return (TREE_CMP(*a, *b));
 }

+ZFS_BTREE_FIND_IN_BUF_FUNC(ext_size_find_in_buf, uint64_t,
+    ext_size_compare)
+
 static void
 ext_size_create(range_tree_t *rt, void *arg)
 {
 	(void) rt;
 	zfs_btree_t *size_tree = arg;

-	zfs_btree_create(size_tree, ext_size_compare, sizeof (uint64_t));
+	zfs_btree_create(size_tree, ext_size_compare, ext_size_find_in_buf,
+	    sizeof (uint64_t));
 }

 static void
--- a/sys/contrib/openzfs/module/zfs/fm.c
+++ b/sys/contrib/openzfs/module/zfs/fm.c
@ -148,8 +148,7 @@ zfs_zevent_drain(zevent_t *ev)
 	list_remove(&zevent_list, ev);

 	/* Remove references to this event in all private file data */
-	while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
-		list_remove(&ev->ev_ze_list, ze);
+	while ((ze = list_remove_head(&ev->ev_ze_list)) != NULL) {
 		ze->ze_zevent = NULL;
 		ze->ze_dropped++;
 	}
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@ -1342,6 +1342,7 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
 * Comparison function for the private size-ordered tree using 32-bit
 * ranges. Tree is sorted by size, larger sizes at the end of the tree.
 */
+__attribute__((always_inline)) inline
 static int
 metaslab_rangesize32_compare(const void *x1, const void *x2)
 {
@ -1352,16 +1353,15 @@ metaslab_rangesize32_compare(const void *x1, const void *x2)
 	uint64_t rs_size2 = r2->rs_end - r2->rs_start;

 	int cmp = TREE_CMP(rs_size1, rs_size2);
-	if (likely(cmp))
-		return (cmp);

-	return (TREE_CMP(r1->rs_start, r2->rs_start));
+	return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
 }

 /*
 * Comparison function for the private size-ordered tree using 64-bit
 * ranges. Tree is sorted by size, larger sizes at the end of the tree.
 */
+__attribute__((always_inline)) inline
 static int
 metaslab_rangesize64_compare(const void *x1, const void *x2)
 {
@ -1372,11 +1372,10 @@ metaslab_rangesize64_compare(const void *x1, const void *x2)
 	uint64_t rs_size2 = r2->rs_end - r2->rs_start;

 	int cmp = TREE_CMP(rs_size1, rs_size2);
-	if (likely(cmp))
-		return (cmp);

-	return (TREE_CMP(r1->rs_start, r2->rs_start));
+	return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
 }
+
 typedef struct metaslab_rt_arg {
 	zfs_btree_t *mra_bt;
 	uint32_t mra_floor_shift;
@ -1412,6 +1411,13 @@ metaslab_size_tree_full_load(range_tree_t *rt)
 	range_tree_walk(rt, metaslab_size_sorted_add, &arg);
 }

+
+ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize32_in_buf,
+    range_seg32_t, metaslab_rangesize32_compare)
+
+ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize64_in_buf,
+    range_seg64_t, metaslab_rangesize64_compare)
+
 /*
 * Create any block allocator specific components. The current allocators
 * rely on using both a size-ordered range_tree_t and an array of uint64_t's.
@ -1424,19 +1430,22 @@ metaslab_rt_create(range_tree_t *rt, void *arg)

 	size_t size;
 	int (*compare) (const void *, const void *);
+	bt_find_in_buf_f bt_find;
 	switch (rt->rt_type) {
 	case RANGE_SEG32:
 		size = sizeof (range_seg32_t);
 		compare = metaslab_rangesize32_compare;
+		bt_find = metaslab_rt_find_rangesize32_in_buf;
 		break;
 	case RANGE_SEG64:
 		size = sizeof (range_seg64_t);
 		compare = metaslab_rangesize64_compare;
+		bt_find = metaslab_rt_find_rangesize64_in_buf;
 		break;
 	default:
 		panic("Invalid range seg type %d", rt->rt_type);
 	}
-	zfs_btree_create(size_tree, compare, size);
+	zfs_btree_create(size_tree, compare, bt_find, size);
 	mrap->mra_floor_shift = metaslab_by_size_min_shift;
 }

@ -5641,8 +5650,7 @@ metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, int allocator,
 		 * We reserve the slots individually so that we can unreserve
 		 * them individually when an I/O completes.
 		 */
-		for (int d = 0; d < slots; d++)
-			zfs_refcount_add(&mca->mca_alloc_slots, zio);
+		zfs_refcount_add_few(&mca->mca_alloc_slots, slots, zio);
 		zio->io_flags |= ZIO_FLAG_IO_ALLOCATING;
 		return (B_TRUE);
 	}
@ -5656,8 +5664,7 @@ metaslab_class_throttle_unreserve(metaslab_class_t *mc, int slots,
 	metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];

 	ASSERT(mc->mc_alloc_throttle_enabled);
-	for (int d = 0; d < slots; d++)
-		zfs_refcount_remove(&mca->mca_alloc_slots, zio);
+	zfs_refcount_remove_few(&mca->mca_alloc_slots, slots, zio);
 }

 static int
--- a/sys/contrib/openzfs/module/zfs/range_tree.c
+++ b/sys/contrib/openzfs/module/zfs/range_tree.c
@ -151,6 +151,7 @@ range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
 	rt->rt_histogram[idx]--;
 }

+__attribute__((always_inline)) inline
 static int
 range_tree_seg32_compare(const void *x1, const void *x2)
 {
@ -163,6 +164,7 @@ range_tree_seg32_compare(const void *x1, const void *x2)
 	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
 }

+__attribute__((always_inline)) inline
 static int
 range_tree_seg64_compare(const void *x1, const void *x2)
 {
@ -175,6 +177,7 @@ range_tree_seg64_compare(const void *x1, const void *x2)
 	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
 }

+__attribute__((always_inline)) inline
 static int
 range_tree_seg_gap_compare(const void *x1, const void *x2)
 {
@ -187,6 +190,15 @@ range_tree_seg_gap_compare(const void *x1, const void *x2)
 	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
 }

+ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg32_find_in_buf, range_seg32_t,
+    range_tree_seg32_compare)
+
+ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg64_find_in_buf, range_seg64_t,
+    range_tree_seg64_compare)
+
+ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg_gap_find_in_buf, range_seg_gap_t,
+    range_tree_seg_gap_compare)
+
 range_tree_t *
 range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
    void *arg, uint64_t start, uint64_t shift, uint64_t gap)
@ -197,23 +209,27 @@ range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
 	ASSERT3U(type, <=, RANGE_SEG_NUM_TYPES);
 	size_t size;
 	int (*compare) (const void *, const void *);
+	bt_find_in_buf_f bt_find;
 	switch (type) {
 	case RANGE_SEG32:
 		size = sizeof (range_seg32_t);
 		compare = range_tree_seg32_compare;
+		bt_find = range_tree_seg32_find_in_buf;
 		break;
 	case RANGE_SEG64:
 		size = sizeof (range_seg64_t);
 		compare = range_tree_seg64_compare;
+		bt_find = range_tree_seg64_find_in_buf;
 		break;
 	case RANGE_SEG_GAP:
 		size = sizeof (range_seg_gap_t);
 		compare = range_tree_seg_gap_compare;
+		bt_find = range_tree_seg_gap_find_in_buf;
 		break;
 	default:
 		panic("Invalid range seg type %d", type);
 	}
-	zfs_btree_create(&rt->rt_root, compare, size);
+	zfs_btree_create(&rt->rt_root, compare, bt_find, size);

 	rt->rt_ops = ops;
 	rt->rt_gap = gap;
--- a/sys/contrib/openzfs/module/zfs/refcount.c
+++ b/sys/contrib/openzfs/module/zfs/refcount.c
@ -88,14 +88,11 @@ zfs_refcount_destroy_many(zfs_refcount_t *rc, uint64_t number)
 	reference_t *ref;

 	ASSERT3U(rc->rc_count, ==, number);
-	while ((ref = list_head(&rc->rc_list))) {
-		list_remove(&rc->rc_list, ref);
+	while ((ref = list_remove_head(&rc->rc_list)))
 		kmem_cache_free(reference_cache, ref);
-	}
 	list_destroy(&rc->rc_list);

-	while ((ref = list_head(&rc->rc_removed))) {
-		list_remove(&rc->rc_removed, ref);
+	while ((ref = list_remove_head(&rc->rc_removed))) {
 		kmem_cache_free(reference_history_cache, ref->ref_removed);
 		kmem_cache_free(reference_cache, ref);
 	}
@ -151,6 +148,15 @@ zfs_refcount_add(zfs_refcount_t *rc, const void *holder)
 	return (zfs_refcount_add_many(rc, 1, holder));
 }

+void
+zfs_refcount_add_few(zfs_refcount_t *rc, uint64_t number, const void *holder)
+{
+	if (!rc->rc_tracked)
+		(void) zfs_refcount_add_many(rc, number, holder);
+	else for (; number > 0; number--)
+		(void) zfs_refcount_add(rc, holder);
+}
+
 int64_t
 zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number,
    const void *holder)
@ -204,6 +210,15 @@ zfs_refcount_remove(zfs_refcount_t *rc, const void *holder)
 	return (zfs_refcount_remove_many(rc, 1, holder));
 }

+void
+zfs_refcount_remove_few(zfs_refcount_t *rc, uint64_t number, const void *holder)
+{
+	if (!rc->rc_tracked)
+		(void) zfs_refcount_remove_many(rc, number, holder);
+	else for (; number > 0; number--)
+		(void) zfs_refcount_remove(rc, holder);
+}
+
 void
 zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src)
 {
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@ -33,6 +33,7 @@
 * Copyright 2017 Joyent, Inc.
 * Copyright (c) 2017, Intel Corporation.
 * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
+ * Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
 */

 /*
@ -1608,16 +1609,16 @@ spa_unload_log_sm_metadata(spa_t *spa)
 {
 	void *cookie = NULL;
 	spa_log_sm_t *sls;
+	log_summary_entry_t *e;
+
 	while ((sls = avl_destroy_nodes(&spa->spa_sm_logs_by_txg,
 	    &cookie)) != NULL) {
 		VERIFY0(sls->sls_mscount);
 		kmem_free(sls, sizeof (spa_log_sm_t));
 	}

-	for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
-	    e != NULL; e = list_head(&spa->spa_log_summary)) {
+	while ((e = list_remove_head(&spa->spa_log_summary)) != NULL) {
 		VERIFY0(e->lse_mscount);
-		list_remove(&spa->spa_log_summary, e);
 		kmem_free(e, sizeof (log_summary_entry_t));
 	}

@ -6874,9 +6875,11 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
 		if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
 			return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));

-		if (dsl_scan_resilvering(spa_get_dsl(spa)))
+		if (dsl_scan_resilvering(spa_get_dsl(spa)) ||
+		    dsl_scan_resilver_scheduled(spa_get_dsl(spa))) {
 			return (spa_vdev_exit(spa, NULL, txg,
 			    ZFS_ERR_RESILVER_IN_PROGRESS));
+		}
 	} else {
 		if (vdev_rebuild_active(rvd))
 			return (spa_vdev_exit(spa, NULL, txg,
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@ -814,8 +814,7 @@ spa_remove(spa_t *spa)
 	if (spa->spa_root)
 		spa_strfree(spa->spa_root);

-	while ((dp = list_head(&spa->spa_config_list)) != NULL) {
-		list_remove(&spa->spa_config_list, dp);
+	while ((dp = list_remove_head(&spa->spa_config_list)) != NULL) {
 		if (dp->scd_path != NULL)
 			spa_strfree(dp->scd_path);
 		kmem_free(dp, sizeof (spa_config_dirent_t));
@ -2439,7 +2438,6 @@ spa_init(spa_mode_t mode)
 	zio_init();
 	dmu_init();
 	zil_init();
-	vdev_cache_stat_init();
 	vdev_mirror_stat_init();
 	vdev_raidz_math_init();
 	vdev_file_init();
@ -2463,7 +2461,6 @@ spa_fini(void)
 	spa_evict_all();

 	vdev_file_fini();
-	vdev_cache_stat_fini();
 	vdev_mirror_stat_fini();
 	vdev_raidz_math_fini();
 	chksum_fini();
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@ -29,7 +29,7 @@
 * Copyright (c) 2017, Intel Corporation.
 * Copyright (c) 2019, Datto Inc. All rights reserved.
 * Copyright (c) 2021, Klara Inc.
- * Copyright [2021] Hewlett Packard Enterprise Development LP
+ * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
 */

 #include <sys/zfs_context.h>
@ -715,7 +715,6 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
 	    offsetof(struct vdev, vdev_dtl_node));
 	vd->vdev_stat.vs_timestamp = gethrtime();
 	vdev_queue_init(vd);
-	vdev_cache_init(vd);

 	return (vd);
 }
@ -1096,7 +1095,6 @@ vdev_free(vdev_t *vd)
 	 * Clean up vdev structure.
 	 */
 	vdev_queue_fini(vd);
-	vdev_cache_fini(vd);

 	if (vd->vdev_path)
 		spa_strfree(vd->vdev_path);
@ -1720,8 +1718,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
 		vps = kmem_zalloc(sizeof (*vps), KM_SLEEP);

 		vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
-		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
-		    ZIO_FLAG_TRYHARD;
+		    ZIO_FLAG_DONT_AGGREGATE | ZIO_FLAG_TRYHARD;

 		if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
 			/*
@ -2612,8 +2609,6 @@ vdev_close(vdev_t *vd)

 	vd->vdev_ops->vdev_op_close(vd);

-	vdev_cache_purge(vd);
-
 	/*
 	 * We record the previous state before we close it, so that if we are
 	 * doing a reopen(), we don't generate FMA ereports if we notice that
@ -2699,6 +2694,17 @@ vdev_reopen(vdev_t *vd)
 		(void) vdev_validate(vd);
 	}

+	/*
+	 * Recheck if resilver is still needed and cancel any
+	 * scheduled resilver if resilver is unneeded.
+	 */
+	if (!vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL) &&
+	    spa->spa_async_tasks & SPA_ASYNC_RESILVER) {
+		mutex_enter(&spa->spa_async_lock);
+		spa->spa_async_tasks &= ~SPA_ASYNC_RESILVER;
+		mutex_exit(&spa->spa_async_lock);
+	}
+
 	/*
 	 * Reassess parent vdev's health.
 	 */
--- a/sys/contrib/openzfs/module/zfs/vdev_cache.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_cache.c
@ -1,436 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/vdev_impl.h>
-#include <sys/zio.h>
-#include <sys/kstat.h>
-#include <sys/abd.h>
-
-/*
- * Virtual device read-ahead caching.
- *
- * This file implements a simple LRU read-ahead cache.  When the DMU reads
- * a given block, it will often want other, nearby blocks soon thereafter.
- * We take advantage of this by reading a larger disk region and caching
- * the result.  In the best case, this can turn 128 back-to-back 512-byte
- * reads into a single 64k read followed by 127 cache hits; this reduces
- * latency dramatically.  In the worst case, it can turn an isolated 512-byte
- * read into a 64k read, which doesn't affect latency all that much but is
- * terribly wasteful of bandwidth.  A more intelligent version of the cache
- * could keep track of access patterns and not do read-ahead unless it sees
- * at least two temporally close I/Os to the same region.  Currently, only
- * metadata I/O is inflated.  A further enhancement could take advantage of
- * more semantic information about the I/O.  And it could use something
- * faster than an AVL tree; that was chosen solely for convenience.
- *
- * There are five cache operations: allocate, fill, read, write, evict.
- *
- * (1) Allocate.  This reserves a cache entry for the specified region.
- *     We separate the allocate and fill operations so that multiple threads
- *     don't generate I/O for the same cache miss.
- *
- * (2) Fill.  When the I/O for a cache miss completes, the fill routine
- *     places the data in the previously allocated cache entry.
- *
- * (3) Read.  Read data from the cache.
- *
- * (4) Write.  Update cache contents after write completion.
- *
- * (5) Evict.  When allocating a new entry, we evict the oldest (LRU) entry
- *     if the total cache size exceeds zfs_vdev_cache_size.
- */
-
-/*
- * These tunables are for performance analysis.
- */
-/*
- * All i/os smaller than zfs_vdev_cache_max will be turned into
- * 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
- * track buffer).  At most zfs_vdev_cache_size bytes will be kept in each
- * vdev's vdev_cache.
- *
- * TODO: Note that with the current ZFS code, it turns out that the
- * vdev cache is not helpful, and in some cases actually harmful.  It
- * is better if we disable this.  Once some time has passed, we should
- * actually remove this to simplify the code.  For now we just disable
- * it by setting the zfs_vdev_cache_size to zero.  Note that Solaris 11
- * has made these same changes.
- */
-static uint_t zfs_vdev_cache_max = 1 << 14;			/* 16KB */
-static uint_t zfs_vdev_cache_size = 0;
-static uint_t zfs_vdev_cache_bshift = 16;
-
-#define	VCBS (1 << zfs_vdev_cache_bshift)	/* 64KB */
-
-static kstat_t *vdc_ksp = NULL;
-
-typedef struct vdc_stats {
-	kstat_named_t vdc_stat_delegations;
-	kstat_named_t vdc_stat_hits;
-	kstat_named_t vdc_stat_misses;
-} vdc_stats_t;
-
-static vdc_stats_t vdc_stats = {
-	{ "delegations",	KSTAT_DATA_UINT64 },
-	{ "hits",		KSTAT_DATA_UINT64 },
-	{ "misses",		KSTAT_DATA_UINT64 }
-};
-
-#define	VDCSTAT_BUMP(stat)	atomic_inc_64(&vdc_stats.stat.value.ui64);
-
-static inline int
-vdev_cache_offset_compare(const void *a1, const void *a2)
-{
-	const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
-	const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
-
-	return (TREE_CMP(ve1->ve_offset, ve2->ve_offset));
-}
-
-static int
-vdev_cache_lastused_compare(const void *a1, const void *a2)
-{
-	const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
-	const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
-
-	int cmp = TREE_CMP(ve1->ve_lastused, ve2->ve_lastused);
-	if (likely(cmp))
-		return (cmp);
-
-	/*
-	 * Among equally old entries, sort by offset to ensure uniqueness.
-	 */
-	return (vdev_cache_offset_compare(a1, a2));
-}
-
-/*
- * Evict the specified entry from the cache.
- */
-static void
-vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve)
-{
-	ASSERT(MUTEX_HELD(&vc->vc_lock));
-	ASSERT3P(ve->ve_fill_io, ==, NULL);
-	ASSERT3P(ve->ve_abd, !=, NULL);
-
-	avl_remove(&vc->vc_lastused_tree, ve);
-	avl_remove(&vc->vc_offset_tree, ve);
-	abd_free(ve->ve_abd);
-	kmem_free(ve, sizeof (vdev_cache_entry_t));
-}
-
-/*
- * Allocate an entry in the cache.  At the point we don't have the data,
- * we're just creating a placeholder so that multiple threads don't all
- * go off and read the same blocks.
- */
-static vdev_cache_entry_t *
-vdev_cache_allocate(zio_t *zio)
-{
-	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
-	uint64_t offset = P2ALIGN(zio->io_offset, VCBS);
-	vdev_cache_entry_t *ve;
-
-	ASSERT(MUTEX_HELD(&vc->vc_lock));
-
-	if (zfs_vdev_cache_size == 0)
-		return (NULL);
-
-	/*
-	 * If adding a new entry would exceed the cache size,
-	 * evict the oldest entry (LRU).
-	 */
-	if ((avl_numnodes(&vc->vc_lastused_tree) << zfs_vdev_cache_bshift) >
-	    zfs_vdev_cache_size) {
-		ve = avl_first(&vc->vc_lastused_tree);
-		if (ve->ve_fill_io != NULL)
-			return (NULL);
-		ASSERT3U(ve->ve_hits, !=, 0);
-		vdev_cache_evict(vc, ve);
-	}
-
-	ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP);
-	ve->ve_offset = offset;
-	ve->ve_lastused = ddi_get_lbolt();
-	ve->ve_abd = abd_alloc_for_io(VCBS, B_TRUE);
-
-	avl_add(&vc->vc_offset_tree, ve);
-	avl_add(&vc->vc_lastused_tree, ve);
-
-	return (ve);
-}
-
-static void
-vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio)
-{
-	uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
-
-	ASSERT(MUTEX_HELD(&vc->vc_lock));
-	ASSERT3P(ve->ve_fill_io, ==, NULL);
-
-	if (ve->ve_lastused != ddi_get_lbolt()) {
-		avl_remove(&vc->vc_lastused_tree, ve);
-		ve->ve_lastused = ddi_get_lbolt();
-		avl_add(&vc->vc_lastused_tree, ve);
-	}
-
-	ve->ve_hits++;
-	abd_copy_off(zio->io_abd, ve->ve_abd, 0, cache_phase, zio->io_size);
-}
-
-/*
- * Fill a previously allocated cache entry with data.
- */
-static void
-vdev_cache_fill(zio_t *fio)
-{
-	vdev_t *vd = fio->io_vd;
-	vdev_cache_t *vc = &vd->vdev_cache;
-	vdev_cache_entry_t *ve = fio->io_private;
-	zio_t *pio;
-
-	ASSERT3U(fio->io_size, ==, VCBS);
-
-	/*
-	 * Add data to the cache.
-	 */
-	mutex_enter(&vc->vc_lock);
-
-	ASSERT3P(ve->ve_fill_io, ==, fio);
-	ASSERT3U(ve->ve_offset, ==, fio->io_offset);
-	ASSERT3P(ve->ve_abd, ==, fio->io_abd);
-
-	ve->ve_fill_io = NULL;
-
-	/*
-	 * Even if this cache line was invalidated by a missed write update,
-	 * any reads that were queued up before the missed update are still
-	 * valid, so we can satisfy them from this line before we evict it.
-	 */
-	zio_link_t *zl = NULL;
-	while ((pio = zio_walk_parents(fio, &zl)) != NULL)
-		vdev_cache_hit(vc, ve, pio);
-
-	if (fio->io_error || ve->ve_missed_update)
-		vdev_cache_evict(vc, ve);
-
-	mutex_exit(&vc->vc_lock);
-}
-
-/*
- * Read data from the cache.  Returns B_TRUE cache hit, B_FALSE on miss.
- */
-boolean_t
-vdev_cache_read(zio_t *zio)
-{
-	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
-	vdev_cache_entry_t *ve, ve_search;
-	uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS);
-	zio_t *fio;
-	uint64_t cache_phase __maybe_unused = P2PHASE(zio->io_offset, VCBS);
-
-	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
-
-	if (zfs_vdev_cache_size == 0)
-		return (B_FALSE);
-
-	if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
-		return (B_FALSE);
-
-	if (zio->io_size > zfs_vdev_cache_max)
-		return (B_FALSE);
-
-	/*
-	 * If the I/O straddles two or more cache blocks, don't cache it.
-	 */
-	if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS))
-		return (B_FALSE);
-
-	ASSERT3U(cache_phase + zio->io_size, <=, VCBS);
-
-	mutex_enter(&vc->vc_lock);
-
-	ve_search.ve_offset = cache_offset;
-	ve = avl_find(&vc->vc_offset_tree, &ve_search, NULL);
-
-	if (ve != NULL) {
-		if (ve->ve_missed_update) {
-			mutex_exit(&vc->vc_lock);
-			return (B_FALSE);
-		}
-
-		if ((fio = ve->ve_fill_io) != NULL) {
-			zio_vdev_io_bypass(zio);
-			zio_add_child(zio, fio);
-			mutex_exit(&vc->vc_lock);
-			VDCSTAT_BUMP(vdc_stat_delegations);
-			return (B_TRUE);
-		}
-
-		vdev_cache_hit(vc, ve, zio);
-		zio_vdev_io_bypass(zio);
-
-		mutex_exit(&vc->vc_lock);
-		VDCSTAT_BUMP(vdc_stat_hits);
-		return (B_TRUE);
-	}
-
-	ve = vdev_cache_allocate(zio);
-
-	if (ve == NULL) {
-		mutex_exit(&vc->vc_lock);
-		return (B_FALSE);
-	}
-
-	fio = zio_vdev_delegated_io(zio->io_vd, cache_offset,
-	    ve->ve_abd, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW,
-	    ZIO_FLAG_DONT_CACHE, vdev_cache_fill, ve);
-
-	ve->ve_fill_io = fio;
-	zio_vdev_io_bypass(zio);
-	zio_add_child(zio, fio);
-
-	mutex_exit(&vc->vc_lock);
-	zio_nowait(fio);
-	VDCSTAT_BUMP(vdc_stat_misses);
-
-	return (B_TRUE);
-}
-
-/*
- * Update cache contents upon write completion.
- */
-void
-vdev_cache_write(zio_t *zio)
-{
-	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
-	vdev_cache_entry_t *ve, ve_search;
-	uint64_t io_start = zio->io_offset;
-	uint64_t io_end = io_start + zio->io_size;
-	uint64_t min_offset = P2ALIGN(io_start, VCBS);
-	uint64_t max_offset = P2ROUNDUP(io_end, VCBS);
-	avl_index_t where;
-
-	ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
-
-	mutex_enter(&vc->vc_lock);
-
-	ve_search.ve_offset = min_offset;
-	ve = avl_find(&vc->vc_offset_tree, &ve_search, &where);
-
-	if (ve == NULL)
-		ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER);
-
-	while (ve != NULL && ve->ve_offset < max_offset) {
-		uint64_t start = MAX(ve->ve_offset, io_start);
-		uint64_t end = MIN(ve->ve_offset + VCBS, io_end);
-
-		if (ve->ve_fill_io != NULL) {
-			ve->ve_missed_update = 1;
-		} else {
-			abd_copy_off(ve->ve_abd, zio->io_abd,
-			    start - ve->ve_offset, start - io_start,
-			    end - start);
-		}
-		ve = AVL_NEXT(&vc->vc_offset_tree, ve);
-	}
-	mutex_exit(&vc->vc_lock);
-}
-
-void
-vdev_cache_purge(vdev_t *vd)
-{
-	vdev_cache_t *vc = &vd->vdev_cache;
-	vdev_cache_entry_t *ve;
-
-	mutex_enter(&vc->vc_lock);
-	while ((ve = avl_first(&vc->vc_offset_tree)) != NULL)
-		vdev_cache_evict(vc, ve);
-	mutex_exit(&vc->vc_lock);
-}
-
-void
-vdev_cache_init(vdev_t *vd)
-{
-	vdev_cache_t *vc = &vd->vdev_cache;
-
-	mutex_init(&vc->vc_lock, NULL, MUTEX_DEFAULT, NULL);
-
-	avl_create(&vc->vc_offset_tree, vdev_cache_offset_compare,
-	    sizeof (vdev_cache_entry_t),
-	    offsetof(struct vdev_cache_entry, ve_offset_node));
-
-	avl_create(&vc->vc_lastused_tree, vdev_cache_lastused_compare,
-	    sizeof (vdev_cache_entry_t),
-	    offsetof(struct vdev_cache_entry, ve_lastused_node));
-}
-
-void
-vdev_cache_fini(vdev_t *vd)
-{
-	vdev_cache_t *vc = &vd->vdev_cache;
-
-	vdev_cache_purge(vd);
-
-	avl_destroy(&vc->vc_offset_tree);
-	avl_destroy(&vc->vc_lastused_tree);
-
-	mutex_destroy(&vc->vc_lock);
-}
-
-void
-vdev_cache_stat_init(void)
-{
-	vdc_ksp = kstat_create("zfs", 0, "vdev_cache_stats", "misc",
-	    KSTAT_TYPE_NAMED, sizeof (vdc_stats) / sizeof (kstat_named_t),
-	    KSTAT_FLAG_VIRTUAL);
-	if (vdc_ksp != NULL) {
-		vdc_ksp->ks_data = &vdc_stats;
-		kstat_install(vdc_ksp);
-	}
-}
-
-void
-vdev_cache_stat_fini(void)
-{
-	if (vdc_ksp != NULL) {
-		kstat_delete(vdc_ksp);
-		vdc_ksp = NULL;
-	}
-}
-
-ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_max, UINT, ZMOD_RW,
-	"Inflate reads small than max");
-
-ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_size, UINT, ZMOD_RD,
-	"Total size of the per-disk cache");
-
-ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_bshift, UINT, ZMOD_RW,
-	"Shift size to inflate reads too");
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
@ -293,17 +293,16 @@ vdev_indirect_map_free(zio_t *zio)
 	indirect_vsd_t *iv = zio->io_vsd;

 	indirect_split_t *is;
-	while ((is = list_head(&iv->iv_splits)) != NULL) {
+	while ((is = list_remove_head(&iv->iv_splits)) != NULL) {
 		for (int c = 0; c < is->is_children; c++) {
 			indirect_child_t *ic = &is->is_child[c];
 			if (ic->ic_data != NULL)
 				abd_free(ic->ic_data);
 		}
-		list_remove(&iv->iv_splits, is);

 		indirect_child_t *ic;
-		while ((ic = list_head(&is->is_unique_child)) != NULL)
-			list_remove(&is->is_unique_child, ic);
+		while ((ic = list_remove_head(&is->is_unique_child)) != NULL)
+			;

 		list_destroy(&is->is_unique_child);

@ -1659,8 +1658,8 @@ vdev_indirect_splits_damage(indirect_vsd_t *iv, zio_t *zio)
 	for (indirect_split_t *is = list_head(&iv->iv_splits);
 	    is != NULL; is = list_next(&iv->iv_splits, is)) {
 		indirect_child_t *ic;
-		while ((ic = list_head(&is->is_unique_child)) != NULL)
-			list_remove(&is->is_unique_child, ic);
+		while ((ic = list_remove_head(&is->is_unique_child)) != NULL)
+			;

 		is->is_unique_children = 0;
 	}
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@ -748,8 +748,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)

 	aio = zio_vdev_delegated_io(first->io_vd, first->io_offset,
 	    abd, size, first->io_type, zio->io_priority,
-	    flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
-	    vdev_queue_agg_io_done, NULL);
+	    flags | ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL);
 	aio->io_timestamp = first->io_timestamp;

 	nio = first;
@ -907,7 +906,7 @@ vdev_queue_io(zio_t *zio)
 		ASSERT(zio->io_priority == ZIO_PRIORITY_TRIM);
 	}

-	zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
+	zio->io_flags |= ZIO_FLAG_DONT_QUEUE;
 	zio->io_timestamp = gethrtime();

 	mutex_enter(&vq->vq_lock);
--- a/sys/contrib/openzfs/module/zfs/zap_micro.c
+++ b/sys/contrib/openzfs/module/zfs/zap_micro.c
@ -285,6 +285,7 @@ zap_byteswap(void *buf, size_t size)
 	}
 }

+__attribute__((always_inline)) inline
 static int
 mze_compare(const void *arg1, const void *arg2)
 {
@ -295,6 +296,9 @@ mze_compare(const void *arg1, const void *arg2)
 	    (uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd));
 }

+ZFS_BTREE_FIND_IN_BUF_FUNC(mze_find_in_buf, mzap_ent_t,
+    mze_compare)
+
 static void
 mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
 {
@ -461,7 +465,7 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
 		 * 62 entries before we have to add 2KB B-tree core node.
 		 */
 		zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare,
-		    sizeof (mzap_ent_t), 512);
+		    mze_find_in_buf, sizeof (mzap_ent_t), 512);

 		zap_name_t *zn = zap_name_alloc(zap);
 		for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) {
--- a/sys/contrib/openzfs/module/zfs/zfs_fm.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c
@ -1522,9 +1522,8 @@ zfs_ereport_fini(void)
 {
 	recent_events_node_t *entry;

-	while ((entry = list_head(&recent_events_list)) != NULL) {
+	while ((entry = list_remove_head(&recent_events_list)) != NULL) {
 		avl_remove(&recent_events_tree, entry);
-		list_remove(&recent_events_list, entry);
 		kmem_free(entry, sizeof (*entry));
 	}
 	avl_destroy(&recent_events_tree);
--- a/sys/contrib/openzfs/module/zfs/zfs_fuid.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
@ -699,19 +699,15 @@ zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
 	zfs_fuid_t *zfuid;
 	zfs_fuid_domain_t *zdomain;

-	while ((zfuid = list_head(&fuidp->z_fuids)) != NULL) {
-		list_remove(&fuidp->z_fuids, zfuid);
+	while ((zfuid = list_remove_head(&fuidp->z_fuids)) != NULL)
 		kmem_free(zfuid, sizeof (zfs_fuid_t));
-	}

 	if (fuidp->z_domain_table != NULL)
 		kmem_free(fuidp->z_domain_table,
 		    (sizeof (char *)) * fuidp->z_domain_cnt);

-	while ((zdomain = list_head(&fuidp->z_domains)) != NULL) {
-		list_remove(&fuidp->z_domains, zdomain);
+	while ((zdomain = list_remove_head(&fuidp->z_domains)) != NULL)
 		kmem_free(zdomain, sizeof (zfs_fuid_domain_t));
-	}

 	kmem_free(fuidp, sizeof (zfs_fuid_info_t));
 }
--- a/sys/contrib/openzfs/module/zfs/zfs_onexit.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
@ -87,8 +87,7 @@ zfs_onexit_destroy(zfs_onexit_t *zo)
 	zfs_onexit_action_node_t *ap;

 	mutex_enter(&zo->zo_lock);
-	while ((ap = list_head(&zo->zo_actions)) != NULL) {
-		list_remove(&zo->zo_actions, ap);
+	while ((ap = list_remove_head(&zo->zo_actions)) != NULL) {
 		mutex_exit(&zo->zo_lock);
 		ap->za_func(ap->za_data);
 		kmem_free(ap, sizeof (zfs_onexit_action_node_t));
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@ -116,8 +116,12 @@ static zil_kstat_values_t zil_stats = {
 	{ "zil_itx_needcopy_bytes",		KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_normal_count",	KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_normal_bytes",	KSTAT_DATA_UINT64 },
+	{ "zil_itx_metaslab_normal_write",	KSTAT_DATA_UINT64 },
+	{ "zil_itx_metaslab_normal_alloc",	KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_slog_count",	KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_slog_bytes",	KSTAT_DATA_UINT64 },
+	{ "zil_itx_metaslab_slog_write",	KSTAT_DATA_UINT64 },
+	{ "zil_itx_metaslab_slog_alloc",	KSTAT_DATA_UINT64 },
 };

 static zil_sums_t zil_sums_global;
@ -146,6 +150,9 @@ static uint64_t zil_slog_bulk = 768 * 1024;
 static kmem_cache_t *zil_lwb_cache;
 static kmem_cache_t *zil_zcw_cache;

+static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
+static itx_t *zil_itx_clone(itx_t *oitx);
+
 static int
 zil_bp_compare(const void *x1, const void *x2)
 {
@ -241,11 +248,10 @@ zil_kstats_global_update(kstat_t *ksp, int rw)
 */
 static int
 zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
-    blkptr_t *nbp, void *dst, char **end)
+    blkptr_t *nbp, char **begin, char **end, arc_buf_t **abuf)
 {
 	zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
 	arc_flags_t aflags = ARC_FLAG_WAIT;
-	arc_buf_t *abuf = NULL;
 	zbookmark_phys_t zb;
 	int error;

@ -262,7 +268,7 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);

 	error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func,
-	    &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
+	    abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);

 	if (error == 0) {
 		zio_cksum_t cksum = bp->blk_cksum;
@ -277,23 +283,23 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
 		 */
 		cksum.zc_word[ZIL_ZC_SEQ]++;

+		uint64_t size = BP_GET_LSIZE(bp);
 		if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
-			zil_chain_t *zilc = abuf->b_data;
+			zil_chain_t *zilc = (*abuf)->b_data;
 			char *lr = (char *)(zilc + 1);
-			uint64_t len = zilc->zc_nused - sizeof (zil_chain_t);

 			if (memcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
-			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) {
+			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) ||
+			    zilc->zc_nused < sizeof (*zilc) ||
+			    zilc->zc_nused > size) {
 				error = SET_ERROR(ECKSUM);
 			} else {
-				ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE);
-				memcpy(dst, lr, len);
-				*end = (char *)dst + len;
+				*begin = lr;
+				*end = lr + zilc->zc_nused - sizeof (*zilc);
 				*nbp = zilc->zc_next_blk;
 			}
 		} else {
-			char *lr = abuf->b_data;
-			uint64_t size = BP_GET_LSIZE(bp);
+			char *lr = (*abuf)->b_data;
 			zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1;

 			if (memcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
@ -301,15 +307,11 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
 			    (zilc->zc_nused > (size - sizeof (*zilc)))) {
 				error = SET_ERROR(ECKSUM);
 			} else {
-				ASSERT3U(zilc->zc_nused, <=,
-				    SPA_OLD_MAXBLOCKSIZE);
-				memcpy(dst, lr, zilc->zc_nused);
-				*end = (char *)dst + zilc->zc_nused;
+				*begin = lr;
+				*end = lr + zilc->zc_nused;
 				*nbp = zilc->zc_next_blk;
 			}
 		}
-
-		arc_buf_destroy(abuf, &abuf);
 	}

 	return (error);
@ -375,8 +377,12 @@ zil_sums_init(zil_sums_t *zs)
 	wmsum_init(&zs->zil_itx_needcopy_bytes, 0);
 	wmsum_init(&zs->zil_itx_metaslab_normal_count, 0);
 	wmsum_init(&zs->zil_itx_metaslab_normal_bytes, 0);
+	wmsum_init(&zs->zil_itx_metaslab_normal_write, 0);
+	wmsum_init(&zs->zil_itx_metaslab_normal_alloc, 0);
 	wmsum_init(&zs->zil_itx_metaslab_slog_count, 0);
 	wmsum_init(&zs->zil_itx_metaslab_slog_bytes, 0);
+	wmsum_init(&zs->zil_itx_metaslab_slog_write, 0);
+	wmsum_init(&zs->zil_itx_metaslab_slog_alloc, 0);
 }

 void
@ -393,8 +399,12 @@ zil_sums_fini(zil_sums_t *zs)
 	wmsum_fini(&zs->zil_itx_needcopy_bytes);
 	wmsum_fini(&zs->zil_itx_metaslab_normal_count);
 	wmsum_fini(&zs->zil_itx_metaslab_normal_bytes);
+	wmsum_fini(&zs->zil_itx_metaslab_normal_write);
+	wmsum_fini(&zs->zil_itx_metaslab_normal_alloc);
 	wmsum_fini(&zs->zil_itx_metaslab_slog_count);
 	wmsum_fini(&zs->zil_itx_metaslab_slog_bytes);
+	wmsum_fini(&zs->zil_itx_metaslab_slog_write);
+	wmsum_fini(&zs->zil_itx_metaslab_slog_alloc);
 }

 void
@ -422,10 +432,18 @@ zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums)
 	    wmsum_value(&zil_sums->zil_itx_metaslab_normal_count);
 	zs->zil_itx_metaslab_normal_bytes.value.ui64 =
 	    wmsum_value(&zil_sums->zil_itx_metaslab_normal_bytes);
+	zs->zil_itx_metaslab_normal_write.value.ui64 =
+	    wmsum_value(&zil_sums->zil_itx_metaslab_normal_write);
+	zs->zil_itx_metaslab_normal_alloc.value.ui64 =
+	    wmsum_value(&zil_sums->zil_itx_metaslab_normal_alloc);
 	zs->zil_itx_metaslab_slog_count.value.ui64 =
 	    wmsum_value(&zil_sums->zil_itx_metaslab_slog_count);
 	zs->zil_itx_metaslab_slog_bytes.value.ui64 =
 	    wmsum_value(&zil_sums->zil_itx_metaslab_slog_bytes);
+	zs->zil_itx_metaslab_slog_write.value.ui64 =
+	    wmsum_value(&zil_sums->zil_itx_metaslab_slog_write);
+	zs->zil_itx_metaslab_slog_alloc.value.ui64 =
+	    wmsum_value(&zil_sums->zil_itx_metaslab_slog_alloc);
 }

 /*
@ -445,7 +463,6 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
 	uint64_t blk_count = 0;
 	uint64_t lr_count = 0;
 	blkptr_t blk, next_blk = {{{{0}}}};
-	char *lrbuf, *lrp;
 	int error = 0;

 	/*
@ -463,13 +480,13 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
 	 * If the log has been claimed, stop if we encounter a sequence
 	 * number greater than the highest claimed sequence number.
 	 */
-	lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE);
 	zil_bp_tree_init(zilog);

 	for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
 		uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
 		int reclen;
-		char *end = NULL;
+		char *lrp, *end;
+		arc_buf_t *abuf = NULL;

 		if (blk_seq > claim_blk_seq)
 			break;
@ -485,8 +502,10 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
 			break;

 		error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
-		    lrbuf, &end);
+		    &lrp, &end, &abuf);
 		if (error != 0) {
+			if (abuf)
+				arc_buf_destroy(abuf, &abuf);
 			if (claimed) {
 				char name[ZFS_MAX_DATASET_NAME_LEN];

@ -499,7 +518,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
 			break;
 		}

-		for (lrp = lrbuf; lrp < end; lrp += reclen) {
+		for (; lrp < end; lrp += reclen) {
 			lr_t *lr = (lr_t *)lrp;
 			reclen = lr->lrc_reclen;
 			ASSERT3U(reclen, >=, sizeof (lr_t));
@ -513,6 +532,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
 			max_lr_seq = lr->lrc_seq;
 			lr_count++;
 		}
+		arc_buf_destroy(abuf, &abuf);
 	}
 done:
 	zilog->zl_parse_error = error;
@ -522,7 +542,6 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
 	zilog->zl_parse_lr_count = lr_count;

 	zil_bp_tree_fini(zilog);
-	zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);

 	return (error);
 }
@ -747,20 +766,21 @@ zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg,
 	lwb->lwb_blk = *bp;
 	lwb->lwb_fastwrite = fastwrite;
 	lwb->lwb_slog = slog;
+	lwb->lwb_indirect = B_FALSE;
+	if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
+		lwb->lwb_nused = lwb->lwb_nfilled = sizeof (zil_chain_t);
+		lwb->lwb_sz = BP_GET_LSIZE(bp);
+	} else {
+		lwb->lwb_nused = lwb->lwb_nfilled = 0;
+		lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t);
+	}
 	lwb->lwb_state = LWB_STATE_CLOSED;
 	lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp));
-	lwb->lwb_max_txg = txg;
 	lwb->lwb_write_zio = NULL;
 	lwb->lwb_root_zio = NULL;
 	lwb->lwb_issued_timestamp = 0;
 	lwb->lwb_issued_txg = 0;
-	if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
-		lwb->lwb_nused = sizeof (zil_chain_t);
-		lwb->lwb_sz = BP_GET_LSIZE(bp);
-	} else {
-		lwb->lwb_nused = 0;
-		lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t);
-	}
+	lwb->lwb_max_txg = txg;

 	mutex_enter(&zilog->zl_lock);
 	list_insert_tail(&zilog->zl_lwb_list, lwb);
@ -1373,9 +1393,14 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
 	zil_commit_waiter_t *zcw;
 	itx_t *itx;
 	uint64_t txg;
+	list_t itxs, waiters;

 	spa_config_exit(zilog->zl_spa, SCL_STATE, lwb);

+	list_create(&itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
+	list_create(&waiters, sizeof (zil_commit_waiter_t),
+	    offsetof(zil_commit_waiter_t, zcw_node));
+
 	hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp;

 	mutex_enter(&zilog->zl_lock);
@ -1384,9 +1409,6 @@ zil_lwb_flush_vdevs_done(zio_t *zio)

 	lwb->lwb_root_zio = NULL;

-	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
-	lwb->lwb_state = LWB_STATE_FLUSH_DONE;
-
 	if (zilog->zl_last_lwb_opened == lwb) {
 		/*
 		 * Remember the highest committed log sequence number
@ -1397,13 +1419,21 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
 		zilog->zl_commit_lr_seq = zilog->zl_lr_seq;
 	}

-	while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
-		zil_itx_destroy(itx);
+	list_move_tail(&itxs, &lwb->lwb_itxs);
+	list_move_tail(&waiters, &lwb->lwb_waiters);

-	while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
+	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
+	lwb->lwb_state = LWB_STATE_FLUSH_DONE;
+
+	mutex_exit(&zilog->zl_lock);
+
+	while ((itx = list_remove_head(&itxs)) != NULL)
+		zil_itx_destroy(itx);
+	list_destroy(&itxs);
+
+	while ((zcw = list_remove_head(&waiters)) != NULL) {
 		mutex_enter(&zcw->zcw_lock);

-		ASSERT3P(zcw->zcw_lwb, ==, lwb);
 		zcw->zcw_lwb = NULL;
 		/*
 		 * We expect any ZIO errors from child ZIOs to have been
@ -1428,8 +1458,7 @@ zil_lwb_flush_vdevs_done(zio_t *zio)

 		mutex_exit(&zcw->zcw_lock);
 	}
-
-	mutex_exit(&zilog->zl_lock);
+	list_destroy(&waiters);

 	mutex_enter(&zilog->zl_lwb_io_lock);
 	txg = lwb->lwb_issued_txg;
@ -1666,46 +1695,41 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
 	EQUIV(lwb->lwb_root_zio == NULL, lwb->lwb_state == LWB_STATE_CLOSED);
 	EQUIV(lwb->lwb_root_zio != NULL, lwb->lwb_state == LWB_STATE_OPENED);

+	if (lwb->lwb_root_zio != NULL)
+		return;
+
+	lwb->lwb_root_zio = zio_root(zilog->zl_spa,
+	    zil_lwb_flush_vdevs_done, lwb, ZIO_FLAG_CANFAIL);
+
+	abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf,
+	    BP_GET_LSIZE(&lwb->lwb_blk));
+
+	if (!lwb->lwb_slog || zilog->zl_cur_used <= zil_slog_bulk)
+		prio = ZIO_PRIORITY_SYNC_WRITE;
+	else
+		prio = ZIO_PRIORITY_ASYNC_WRITE;
+
 	SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
 	    lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);

 	/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */
 	mutex_enter(&zilog->zl_lock);
-	if (lwb->lwb_root_zio == NULL) {
-		abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf,
-		    BP_GET_LSIZE(&lwb->lwb_blk));
-
-		if (!lwb->lwb_fastwrite) {
-			metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
-			lwb->lwb_fastwrite = 1;
-		}
-
-		if (!lwb->lwb_slog || zilog->zl_cur_used <= zil_slog_bulk)
-			prio = ZIO_PRIORITY_SYNC_WRITE;
-		else
-			prio = ZIO_PRIORITY_ASYNC_WRITE;
-
-		lwb->lwb_root_zio = zio_root(zilog->zl_spa,
-		    zil_lwb_flush_vdevs_done, lwb, ZIO_FLAG_CANFAIL);
-		ASSERT3P(lwb->lwb_root_zio, !=, NULL);
-
-		lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio,
-		    zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd,
-		    BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb,
-		    prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
-		ASSERT3P(lwb->lwb_write_zio, !=, NULL);
-
-		lwb->lwb_state = LWB_STATE_OPENED;
-
-		zil_lwb_set_zio_dependency(zilog, lwb);
-		zilog->zl_last_lwb_opened = lwb;
+	if (!lwb->lwb_fastwrite) {
+		metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
+		lwb->lwb_fastwrite = 1;
 	}
-	mutex_exit(&zilog->zl_lock);

-	ASSERT3P(lwb->lwb_root_zio, !=, NULL);
-	ASSERT3P(lwb->lwb_write_zio, !=, NULL);
-	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
+	lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0,
+	    &lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk),
+	    zil_lwb_write_done, lwb, prio,
+	    ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
+
+	lwb->lwb_state = LWB_STATE_OPENED;
+
+	zil_lwb_set_zio_dependency(zilog, lwb);
+	zilog->zl_last_lwb_opened = lwb;
+	mutex_exit(&zilog->zl_lock);
 }

 /*
@ -1736,11 +1760,11 @@ static const struct {
 static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE;

 /*
- * Start a log block write and advance to the next log block.
- * Calls are serialized.
+ * Close the log block for being issued and allocate the next one.
+ * Has to be called under zl_issuer_lock to chain more lwbs.
 */
 static lwb_t *
-zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
+zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb)
 {
 	lwb_t *nlwb = NULL;
 	zil_chain_t *zilc;
@ -1748,7 +1772,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
 	blkptr_t *bp;
 	dmu_tx_t *tx;
 	uint64_t txg;
-	uint64_t zil_blksz, wsz;
+	uint64_t zil_blksz;
 	int i, error;
 	boolean_t slog;

@ -1757,16 +1781,17 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
 	ASSERT3P(lwb->lwb_write_zio, !=, NULL);
 	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);

-	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
-		zilc = (zil_chain_t *)lwb->lwb_buf;
-		bp = &zilc->zc_next_blk;
-	} else {
-		zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
-		bp = &zilc->zc_next_blk;
+	/*
+	 * If this lwb includes indirect writes, we have to commit before
+	 * creating the transaction, otherwise we may end up in dead lock.
+	 */
+	if (lwb->lwb_indirect) {
+		for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
+		    itx = list_next(&lwb->lwb_itxs, itx))
+			zil_lwb_commit(zilog, lwb, itx);
+		lwb->lwb_nused = lwb->lwb_nfilled;
 	}

-	ASSERT(lwb->lwb_nused <= lwb->lwb_sz);
-
 	/*
 	 * Allocate the next block and save its address in this block
 	 * before writing it in order to establish the log chain.
@ -1814,19 +1839,18 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
 	zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz;
 	for (i = 0; i < ZIL_PREV_BLKS; i++)
 		zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]);
+	DTRACE_PROBE3(zil__block__size, zilog_t *, zilog,
+	    uint64_t, zil_blksz,
+	    uint64_t, zilog->zl_prev_blks[zilog->zl_prev_rotor]);
 	zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);

+	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2)
+		zilc = (zil_chain_t *)lwb->lwb_buf;
+	else
+		zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
+	bp = &zilc->zc_next_blk;
 	BP_ZERO(bp);
 	error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, zil_blksz, &slog);
-	if (slog) {
-		ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count);
-		ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes,
-		    lwb->lwb_nused);
-	} else {
-		ZIL_STAT_BUMP(zilog, zil_itx_metaslab_normal_count);
-		ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_bytes,
-		    lwb->lwb_nused);
-	}
 	if (error == 0) {
 		ASSERT3U(bp->blk_birth, ==, txg);
 		bp->blk_cksum = lwb->lwb_blk.blk_cksum;
@ -1838,17 +1862,47 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
 		nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE);
 	}

+	lwb->lwb_state = LWB_STATE_ISSUED;
+
+	dmu_tx_commit(tx);
+
+	/*
+	 * If there was an allocation failure then nlwb will be null which
+	 * forces a txg_wait_synced().
+	 */
+	return (nlwb);
+}
+
+/*
+ * Finalize previously closed block and issue the write zio.
+ * Does not require locking.
+ */
+static void
+zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
+{
+	zil_chain_t *zilc;
+	int wsz;
+
+	/* Actually fill the lwb with the data if not yet. */
+	if (!lwb->lwb_indirect) {
+		for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
+		    itx = list_next(&lwb->lwb_itxs, itx))
+			zil_lwb_commit(zilog, lwb, itx);
+		lwb->lwb_nused = lwb->lwb_nfilled;
+	}
+
 	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
 		/* For Slim ZIL only write what is used. */
-		wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t);
-		ASSERT3U(wsz, <=, lwb->lwb_sz);
+		wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, int);
+		ASSERT3S(wsz, <=, lwb->lwb_sz);
 		zio_shrink(lwb->lwb_write_zio, wsz);
 		wsz = lwb->lwb_write_zio->io_size;

+		zilc = (zil_chain_t *)lwb->lwb_buf;
 	} else {
 		wsz = lwb->lwb_sz;
+		zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
 	}
-
 	zilc->zc_pad = 0;
 	zilc->zc_nused = lwb->lwb_nused;
 	zilc->zc_eck.zec_cksum = lwb->lwb_blk.blk_cksum;
@ -1858,22 +1912,28 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
 	 */
 	memset(lwb->lwb_buf + lwb->lwb_nused, 0, wsz - lwb->lwb_nused);

+	if (lwb->lwb_slog) {
+		ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count);
+		ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes,
+		    lwb->lwb_nused);
+		ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_write,
+		    wsz);
+		ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_alloc,
+		    BP_GET_LSIZE(&lwb->lwb_blk));
+	} else {
+		ZIL_STAT_BUMP(zilog, zil_itx_metaslab_normal_count);
+		ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_bytes,
+		    lwb->lwb_nused);
+		ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_write,
+		    wsz);
+		ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc,
+		    BP_GET_LSIZE(&lwb->lwb_blk));
+	}
 	spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER);
-
 	zil_lwb_add_block(lwb, &lwb->lwb_blk);
 	lwb->lwb_issued_timestamp = gethrtime();
-	lwb->lwb_state = LWB_STATE_ISSUED;
-
 	zio_nowait(lwb->lwb_root_zio);
 	zio_nowait(lwb->lwb_write_zio);
-
-	dmu_tx_commit(tx);
-
-	/*
-	 * If there was an allocation failure then nlwb will be null which
-	 * forces a txg_wait_synced().
-	 */
-	return (nlwb);
 }

 /*
@ -1909,13 +1969,19 @@ zil_max_copied_data(zilog_t *zilog)
 	    sizeof (lr_write_t));
 }

+/*
+ * Estimate space needed in the lwb for the itx.  Allocate more lwbs or
+ * split the itx as needed, but don't touch the actual transaction data.
+ * Has to be called under zl_issuer_lock to call zil_lwb_write_close()
+ * to chain more lwbs.
+ */
 static lwb_t *
-zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
+zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
 {
-	lr_t *lrcb, *lrc;
-	lr_write_t *lrwb, *lrw;
-	char *lr_buf;
-	uint64_t dlen, dnow, dpad, lwb_sp, reclen, txg, max_log_data;
+	itx_t *citx;
+	lr_t *lr, *clr;
+	lr_write_t *lrw;
+	uint64_t dlen, dnow, lwb_sp, reclen, max_log_data;

 	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
 	ASSERT3P(lwb, !=, NULL);
@ -1923,8 +1989,8 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)

 	zil_lwb_write_open(zilog, lwb);

-	lrc = &itx->itx_lr;
-	lrw = (lr_write_t *)lrc;
+	lr = &itx->itx_lr;
+	lrw = (lr_write_t *)lr;

 	/*
 	 * A commit itx doesn't represent any on-disk state; instead
@ -1938,24 +2004,23 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
 	 *
 	 * For more details, see the comment above zil_commit().
 	 */
-	if (lrc->lrc_txtype == TX_COMMIT) {
+	if (lr->lrc_txtype == TX_COMMIT) {
 		mutex_enter(&zilog->zl_lock);
 		zil_commit_waiter_link_lwb(itx->itx_private, lwb);
 		itx->itx_private = NULL;
 		mutex_exit(&zilog->zl_lock);
+		list_insert_tail(&lwb->lwb_itxs, itx);
 		return (lwb);
 	}

-	if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
+	if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
 		dlen = P2ROUNDUP_TYPED(
 		    lrw->lr_length, sizeof (uint64_t), uint64_t);
-		dpad = dlen - lrw->lr_length;
 	} else {
-		dlen = dpad = 0;
+		dlen = 0;
 	}
-	reclen = lrc->lrc_reclen;
+	reclen = lr->lrc_reclen;
 	zilog->zl_cur_used += (reclen + dlen);
-	txg = lrc->lrc_txg;

 cont:
 	/*
@ -1968,7 +2033,8 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
 	    lwb_sp < zil_max_waste_space(zilog) &&
 	    (dlen % max_log_data == 0 ||
 	    lwb_sp < reclen + dlen % max_log_data))) {
-		lwb = zil_lwb_write_issue(zilog, lwb);
+		list_insert_tail(ilwbs, lwb);
+		lwb = zil_lwb_write_close(zilog, lwb);
 		if (lwb == NULL)
 			return (NULL);
 		zil_lwb_write_open(zilog, lwb);
@ -1987,19 +2053,99 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
 	}

 	dnow = MIN(dlen, lwb_sp - reclen);
-	lr_buf = lwb->lwb_buf + lwb->lwb_nused;
-	memcpy(lr_buf, lrc, reclen);
-	lrcb = (lr_t *)lr_buf;		/* Like lrc, but inside lwb. */
-	lrwb = (lr_write_t *)lrcb;	/* Like lrw, but inside lwb. */
+	if (dlen > dnow) {
+		ASSERT3U(lr->lrc_txtype, ==, TX_WRITE);
+		ASSERT3U(itx->itx_wr_state, ==, WR_NEED_COPY);
+		citx = zil_itx_clone(itx);
+		clr = &citx->itx_lr;
+		lr_write_t *clrw = (lr_write_t *)clr;
+		clrw->lr_length = dnow;
+		lrw->lr_offset += dnow;
+		lrw->lr_length -= dnow;
+	} else {
+		citx = itx;
+		clr = lr;
+	}
+
+	/*
+	 * We're actually making an entry, so update lrc_seq to be the
+	 * log record sequence number.  Note that this is generally not
+	 * equal to the itx sequence number because not all transactions
+	 * are synchronous, and sometimes spa_sync() gets there first.
+	 */
+	clr->lrc_seq = ++zilog->zl_lr_seq;
+
+	lwb->lwb_nused += reclen + dnow;
+	ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
+	ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
+
+	zil_lwb_add_txg(lwb, lr->lrc_txg);
+	list_insert_tail(&lwb->lwb_itxs, citx);
+
+	dlen -= dnow;
+	if (dlen > 0) {
+		zilog->zl_cur_used += reclen;
+		goto cont;
+	}
+
+	/*
+	 * We have to really issue all queued LWBs before we may have to
+	 * wait for a txg sync.  Otherwise we may end up in a dead lock.
+	 */
+	if (lr->lrc_txtype == TX_WRITE) {
+		boolean_t frozen = lr->lrc_txg > spa_freeze_txg(zilog->zl_spa);
+		if (frozen || itx->itx_wr_state == WR_INDIRECT) {
+			lwb_t *tlwb;
+			while ((tlwb = list_remove_head(ilwbs)) != NULL)
+				zil_lwb_write_issue(zilog, tlwb);
+		}
+		if (itx->itx_wr_state == WR_INDIRECT)
+			lwb->lwb_indirect = B_TRUE;
+		if (frozen)
+			txg_wait_synced(zilog->zl_dmu_pool, lr->lrc_txg);
+	}
+
+	return (lwb);
+}
+
+/*
+ * Fill the actual transaction data into the lwb, following zil_lwb_assign().
+ * Does not require locking.
+ */
+static void
+zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
+{
+	lr_t *lr, *lrb;
+	lr_write_t *lrw, *lrwb;
+	char *lr_buf;
+	uint64_t dlen, reclen;
+
+	lr = &itx->itx_lr;
+	lrw = (lr_write_t *)lr;
+
+	if (lr->lrc_txtype == TX_COMMIT)
+		return;
+
+	if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
+		dlen = P2ROUNDUP_TYPED(
+		    lrw->lr_length, sizeof (uint64_t), uint64_t);
+	} else {
+		dlen = 0;
+	}
+	reclen = lr->lrc_reclen;
+	ASSERT3U(reclen + dlen, <=, lwb->lwb_nused - lwb->lwb_nfilled);
+
+	lr_buf = lwb->lwb_buf + lwb->lwb_nfilled;
+	memcpy(lr_buf, lr, reclen);
+	lrb = (lr_t *)lr_buf;		/* Like lr, but inside lwb. */
+	lrwb = (lr_write_t *)lrb;	/* Like lrw, but inside lwb. */

 	ZIL_STAT_BUMP(zilog, zil_itx_count);

 	/*
 	 * If it's a write, fetch the data or get its blkptr as appropriate.
 	 */
-	if (lrc->lrc_txtype == TX_WRITE) {
-		if (txg > spa_freeze_txg(zilog->zl_spa))
-			txg_wait_synced(zilog->zl_dmu_pool, txg);
+	if (lr->lrc_txtype == TX_WRITE) {
 		if (itx->itx_wr_state == WR_COPIED) {
 			ZIL_STAT_BUMP(zilog, zil_itx_copied_count);
 			ZIL_STAT_INCR(zilog, zil_itx_copied_bytes,
@ -2010,14 +2156,10 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)

 			if (itx->itx_wr_state == WR_NEED_COPY) {
 				dbuf = lr_buf + reclen;
-				lrcb->lrc_reclen += dnow;
-				if (lrwb->lr_length > dnow)
-					lrwb->lr_length = dnow;
-				lrw->lr_offset += dnow;
-				lrw->lr_length -= dnow;
+				lrb->lrc_reclen += dlen;
 				ZIL_STAT_BUMP(zilog, zil_itx_needcopy_count);
 				ZIL_STAT_INCR(zilog, zil_itx_needcopy_bytes,
-				    dnow);
+				    dlen);
 			} else {
 				ASSERT3S(itx->itx_wr_state, ==, WR_INDIRECT);
 				dbuf = NULL;
@ -2044,9 +2186,11 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
 			error = zilog->zl_get_data(itx->itx_private,
 			    itx->itx_gen, lrwb, dbuf, lwb,
 			    lwb->lwb_write_zio);
-			if (dbuf != NULL && error == 0 && dnow == dlen)
+			if (dbuf != NULL && error == 0) {
 				/* Zero any padding bytes in the last block. */
-				memset((char *)dbuf + lrwb->lr_length, 0, dpad);
+				memset((char *)dbuf + lrwb->lr_length, 0,
+				    dlen - lrwb->lr_length);
+			}

 			/*
 			 * Typically, the only return values we should see from
@ -2074,39 +2218,26 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
 				    error);
 				zfs_fallthrough;
 			case EIO:
-				txg_wait_synced(zilog->zl_dmu_pool, txg);
+				if (lwb->lwb_indirect) {
+					txg_wait_synced(zilog->zl_dmu_pool,
+					    lr->lrc_txg);
+				} else {
+					lwb->lwb_write_zio->io_error = error;
+				}
 				zfs_fallthrough;
 			case ENOENT:
 				zfs_fallthrough;
 			case EEXIST:
 				zfs_fallthrough;
 			case EALREADY:
-				return (lwb);
+				return;
 			}
 		}
 	}

-	/*
-	 * We're actually making an entry, so update lrc_seq to be the
-	 * log record sequence number.  Note that this is generally not
-	 * equal to the itx sequence number because not all transactions
-	 * are synchronous, and sometimes spa_sync() gets there first.
-	 */
-	lrcb->lrc_seq = ++zilog->zl_lr_seq;
-	lwb->lwb_nused += reclen + dnow;
-
-	zil_lwb_add_txg(lwb, txg);
-
-	ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
-	ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
-
-	dlen -= dnow;
-	if (dlen > 0) {
-		zilog->zl_cur_used += reclen;
-		goto cont;
-	}
-
-	return (lwb);
+	lwb->lwb_nfilled += reclen + dlen;
+	ASSERT3S(lwb->lwb_nfilled, <=, lwb->lwb_nused);
+	ASSERT0(P2PHASE(lwb->lwb_nfilled, sizeof (uint64_t)));
 }

 itx_t *
@ -2131,6 +2262,16 @@ zil_itx_create(uint64_t txtype, size_t olrsize)
 	return (itx);
 }

+static itx_t *
+zil_itx_clone(itx_t *oitx)
+{
+	itx_t *itx = zio_data_buf_alloc(oitx->itx_size);
+	memcpy(itx, oitx, oitx->itx_size);
+	itx->itx_callback = NULL;
+	itx->itx_callback_data = NULL;
+	return (itx);
+}
+
 void
 zil_itx_destroy(itx_t *itx)
 {
@ -2162,7 +2303,7 @@ zil_itxg_clean(void *arg)
 		/*
 		 * In the general case, commit itxs will not be found
 		 * here, as they'll be committed to an lwb via
-		 * zil_lwb_commit(), and free'd in that function. Having
+		 * zil_lwb_assign(), and free'd in that function. Having
 		 * said that, it is still possible for commit itxs to be
 		 * found here, due to the following race:
 		 *
@ -2561,7 +2702,7 @@ zil_commit_writer_stall(zilog_t *zilog)
 * lwb will be issued to the zio layer to be written to disk.
 */
 static void
-zil_process_commit_list(zilog_t *zilog)
+zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
 {
 	spa_t *spa = zilog->zl_spa;
 	list_t nolwb_itxs;
@ -2663,18 +2804,23 @@ zil_process_commit_list(zilog_t *zilog)
 		 */
 		if (frozen || !synced || lrc->lrc_txtype == TX_COMMIT) {
 			if (lwb != NULL) {
-				lwb = zil_lwb_commit(zilog, itx, lwb);
-
-				if (lwb == NULL)
+				lwb = zil_lwb_assign(zilog, lwb, itx, ilwbs);
+				if (lwb == NULL) {
 					list_insert_tail(&nolwb_itxs, itx);
-				else
-					list_insert_tail(&lwb->lwb_itxs, itx);
+				} else if ((zcw->zcw_lwb != NULL &&
+				    zcw->zcw_lwb != lwb) || zcw->zcw_done) {
+					/*
+					 * Our lwb is done, leave the rest of
+					 * itx list to somebody else who care.
+					 */
+					first = B_FALSE;
+					break;
+				}
 			} else {
 				if (lrc->lrc_txtype == TX_COMMIT) {
 					zil_commit_waiter_link_nolwb(
 					    itx->itx_private, &nolwb_waiters);
 				}
-
 				list_insert_tail(&nolwb_itxs, itx);
 			}
 		} else {
@ -2690,6 +2836,8 @@ zil_process_commit_list(zilog_t *zilog)
 		 * the ZIL write pipeline; see the comment within
 		 * zil_commit_writer_stall() for more details.
 		 */
+		while ((lwb = list_remove_head(ilwbs)) != NULL)
+			zil_lwb_write_issue(zilog, lwb);
 		zil_commit_writer_stall(zilog);

 		/*
@ -2735,13 +2883,13 @@ zil_process_commit_list(zilog_t *zilog)
 		 * on the system, such that this function will be
 		 * immediately called again (not necessarily by the same
 		 * thread) and this lwb's zio will be issued via
-		 * zil_lwb_commit(). This way, the lwb is guaranteed to
+		 * zil_lwb_assign(). This way, the lwb is guaranteed to
 		 * be "full" when it is issued to disk, and we'll make
 		 * use of the lwb's size the best we can.
 		 *
 		 * 2. If there isn't sufficient ZIL activity occurring on
 		 * the system, such that this lwb's zio isn't issued via
-		 * zil_lwb_commit(), zil_commit_waiter() will issue the
+		 * zil_lwb_assign(), zil_commit_waiter() will issue the
 		 * lwb's zio. If this occurs, the lwb is not guaranteed
 		 * to be "full" by the time its zio is issued, and means
 		 * the size of the lwb was "too large" given the amount
@ -2773,10 +2921,15 @@ zil_process_commit_list(zilog_t *zilog)
 			    zfs_commit_timeout_pct / 100;
 			if (sleep < zil_min_commit_timeout ||
 			    lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) {
-				lwb = zil_lwb_write_issue(zilog, lwb);
+				list_insert_tail(ilwbs, lwb);
+				lwb = zil_lwb_write_close(zilog, lwb);
 				zilog->zl_cur_used = 0;
-				if (lwb == NULL)
+				if (lwb == NULL) {
+					while ((lwb = list_remove_head(ilwbs))
+					    != NULL)
+						zil_lwb_write_issue(zilog, lwb);
 					zil_commit_writer_stall(zilog);
+				}
 			}
 		}
 	}
@ -2799,9 +2952,13 @@ zil_process_commit_list(zilog_t *zilog)
 static void
 zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw)
 {
+	list_t ilwbs;
+	lwb_t *lwb;
+
 	ASSERT(!MUTEX_HELD(&zilog->zl_lock));
 	ASSERT(spa_writeable(zilog->zl_spa));

+	list_create(&ilwbs, sizeof (lwb_t), offsetof(lwb_t, lwb_issue_node));
 	mutex_enter(&zilog->zl_issuer_lock);

 	if (zcw->zcw_lwb != NULL || zcw->zcw_done) {
@ -2828,10 +2985,13 @@ zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw)

 	zil_get_commit_list(zilog);
 	zil_prune_commit_list(zilog);
-	zil_process_commit_list(zilog);
+	zil_process_commit_list(zilog, zcw, &ilwbs);

 out:
 	mutex_exit(&zilog->zl_issuer_lock);
+	while ((lwb = list_remove_head(&ilwbs)) != NULL)
+		zil_lwb_write_issue(zilog, lwb);
+	list_destroy(&ilwbs);
 }

 static void
@ -2858,7 +3018,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 		return;

 	/*
-	 * In order to call zil_lwb_write_issue() we must hold the
+	 * In order to call zil_lwb_write_close() we must hold the
 	 * zilog's "zl_issuer_lock". We can't simply acquire that lock,
 	 * since we're already holding the commit waiter's "zcw_lock",
 	 * and those two locks are acquired in the opposite order
@ -2876,8 +3036,10 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 	 * the waiter is marked "done"), so without this check we could
 	 * wind up with a use-after-free error below.
 	 */
-	if (zcw->zcw_done)
+	if (zcw->zcw_done) {
+		lwb = NULL;
 		goto out;
+	}

 	ASSERT3P(lwb, ==, zcw->zcw_lwb);

@ -2896,15 +3058,17 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 	 * if it's ISSUED or OPENED, and block any other threads that might
 	 * attempt to issue this lwb. For that reason we hold the
 	 * zl_issuer_lock when checking the lwb_state; we must not call
-	 * zil_lwb_write_issue() if the lwb had already been issued.
+	 * zil_lwb_write_close() if the lwb had already been issued.
 	 *
 	 * See the comment above the lwb_state_t structure definition for
 	 * more details on the lwb states, and locking requirements.
 	 */
 	if (lwb->lwb_state == LWB_STATE_ISSUED ||
 	    lwb->lwb_state == LWB_STATE_WRITE_DONE ||
-	    lwb->lwb_state == LWB_STATE_FLUSH_DONE)
+	    lwb->lwb_state == LWB_STATE_FLUSH_DONE) {
+		lwb = NULL;
 		goto out;
+	}

 	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);

@ -2914,7 +3078,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 	 * since we've reached the commit waiter's timeout and it still
 	 * hasn't been issued.
 	 */
-	lwb_t *nlwb = zil_lwb_write_issue(zilog, lwb);
+	lwb_t *nlwb = zil_lwb_write_close(zilog, lwb);

 	ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);

@ -2934,7 +3098,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)

 	if (nlwb == NULL) {
 		/*
-		 * When zil_lwb_write_issue() returns NULL, this
+		 * When zil_lwb_write_close() returns NULL, this
 		 * indicates zio_alloc_zil() failed to allocate the
 		 * "next" lwb on-disk. When this occurs, the ZIL write
 		 * pipeline must be stalled; see the comment within the
@ -2956,12 +3120,16 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 		 *   lock, which occurs prior to calling dmu_tx_commit()
 		 */
 		mutex_exit(&zcw->zcw_lock);
+		zil_lwb_write_issue(zilog, lwb);
+		lwb = NULL;
 		zil_commit_writer_stall(zilog);
 		mutex_enter(&zcw->zcw_lock);
 	}

 out:
 	mutex_exit(&zilog->zl_issuer_lock);
+	if (lwb)
+		zil_lwb_write_issue(zilog, lwb);
 	ASSERT(MUTEX_HELD(&zcw->zcw_lock));
 }

@ -2976,7 +3144,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 *    waited "long enough" and the lwb is still in the "open" state.
 *
 * Given a sufficient amount of itxs being generated and written using
- * the ZIL, the lwb's zio will be issued via the zil_lwb_commit()
+ * the ZIL, the lwb's zio will be issued via the zil_lwb_assign()
 * function. If this does not occur, this secondary responsibility will
 * ensure the lwb is issued even if there is not other synchronous
 * activity on the system.
@ -3656,7 +3824,7 @@ zil_close(zilog_t *zilog)
 	/*
 	 * zl_lwb_max_issued_txg may be larger than lwb_max_txg. It depends
 	 * on the time when the dmu_tx transaction is assigned in
-	 * zil_lwb_write_issue().
+	 * zil_lwb_write_close().
 	 */
 	mutex_enter(&zilog->zl_lwb_io_lock);
 	txg = MAX(zilog->zl_lwb_max_issued_txg, txg);
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@ -1617,12 +1617,6 @@ zio_read_bp_init(zio_t *zio)
 		ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy);
 	}

-	if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
-		zio->io_flags |= ZIO_FLAG_DONT_CACHE;
-
-	if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
-		zio->io_flags |= ZIO_FLAG_DONT_CACHE;
-
 	if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL)
 		zio->io_pipeline = ZIO_DDT_READ_PIPELINE;

@ -3955,9 +3949,6 @@ zio_vdev_io_start(zio_t *zio)
 	    zio->io_type == ZIO_TYPE_WRITE ||
 	    zio->io_type == ZIO_TYPE_TRIM)) {

-		if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio))
-			return (zio);
-
 		if ((zio = vdev_queue_io(zio)) == NULL)
 			return (NULL);

@ -3994,9 +3985,6 @@ zio_vdev_io_done(zio_t *zio)
 	    vd->vdev_ops != &vdev_draid_spare_ops) {
 		vdev_queue_io_done(zio);

-		if (zio->io_type == ZIO_TYPE_WRITE)
-			vdev_cache_write(zio);
-
 		if (zio_injection_enabled && zio->io_error == 0)
 			zio->io_error = zio_handle_device_injections(vd, zio,
 			    EIO, EILSEQ);
@ -4106,8 +4094,7 @@ zio_vdev_io_assess(zio_t *zio)
 		ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_QUEUE));	/* not a leaf */
 		ASSERT(!(zio->io_flags & ZIO_FLAG_IO_BYPASS));	/* not a leaf */
 		zio->io_error = 0;
-		zio->io_flags |= ZIO_FLAG_IO_RETRY |
-		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE;
+		zio->io_flags |= ZIO_FLAG_IO_RETRY | ZIO_FLAG_DONT_AGGREGATE;
 		zio->io_stage = ZIO_STAGE_VDEV_IO_START >> 1;
 		zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE,
 		    zio_requeue_io_start_cut_in_line);
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@ -1203,8 +1203,7 @@ zvol_create_minors_recursive(const char *name)
 	 * Prefetch is completed, we can do zvol_os_create_minor
 	 * sequentially.
 	 */
-	while ((job = list_head(&minors_list)) != NULL) {
-		list_remove(&minors_list, job);
+	while ((job = list_remove_head(&minors_list)) != NULL) {
 		if (!job->error)
 			(void) zvol_os_create_minor(job->name);
 		kmem_strfree(job->name);
@ -1311,10 +1310,8 @@ zvol_remove_minors_impl(const char *name)
 	rw_exit(&zvol_state_lock);

 	/* Drop zvol_state_lock before calling zvol_free() */
-	while ((zv = list_head(&free_list)) != NULL) {
-		list_remove(&free_list, zv);
+	while ((zv = list_remove_head(&free_list)) != NULL)
 		zvol_os_free(zv);
-	}
 }

 /* Remove minor for this specific volume only */
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@ -128,7 +128,7 @@ tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos',
    'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress',
    'zdb_display_block', 'zdb_encrypted', 'zdb_label_checksum',
    'zdb_object_range_neg', 'zdb_object_range_pos', 'zdb_objset_id',
-    'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2']
+    'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2', 'zdb_backup']
 pre =
 post =
 tags = ['functional', 'cli_root', 'zdb']
@ -472,7 +472,8 @@ tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
 tags = ['functional', 'cli_root', 'zpool_replace']

 [tests/functional/cli_root/zpool_resilver]
-tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
+tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart',
+    'zpool_resilver_concurrent']
 tags = ['functional', 'cli_root', 'zpool_resilver']

 [tests/functional/cli_root/zpool_scrub]
--- a/sys/contrib/openzfs/tests/runfiles/freebsd.run
+++ b/sys/contrib/openzfs/tests/runfiles/freebsd.run
@ -25,3 +25,8 @@ tags = ['functional']
 [tests/functional/cli_root/zfs_jail:FreeBSD]
 tests = ['zfs_jail_001_pos']
 tags = ['functional', 'cli_root', 'zfs_jail']
+
+[tests/functional/pam:FreeBSD]
+tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive',
+    'pam_short_password']
+tags = ['functional', 'pam']
--- a/sys/contrib/openzfs/tests/runfiles/linux.run
+++ b/sys/contrib/openzfs/tests/runfiles/linux.run
@ -140,7 +140,8 @@ tests = ['umount_unlinked_drain']
 tags = ['functional', 'mount']

 [tests/functional/pam:Linux]
-tests = ['pam_basic', 'pam_nounmount', 'pam_short_password']
+tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive',
+    'pam_short_password']
 tags = ['functional', 'pam']

 [tests/functional/procfs:Linux]
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@ -152,6 +152,7 @@ known = {
        ['FAIL', rewind_reason],
    'cli_user/misc/zfs_share_001_neg': ['SKIP', na_reason],
    'cli_user/misc/zfs_unshare_001_neg': ['SKIP', na_reason],
+    'pool_checkpoint/checkpoint_discard_busy': ['SKIP', 12053],
    'privilege/setup': ['SKIP', na_reason],
    'refreserv/refreserv_004_pos': ['FAIL', known_reason],
    'rootpool/setup': ['SKIP', na_reason],
@ -163,6 +164,8 @@ if sys.platform.startswith('freebsd'):
    known.update({
        'cli_root/zfs_receive/receive-o-x_props_override':
            ['FAIL', known_reason],
+        'cli_root/zpool_resilver/zpool_resilver_concurrent':
+            ['SKIP', na_reason],
        'cli_root/zpool_wait/zpool_wait_trim_basic': ['SKIP', trim_reason],
        'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
        'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
@ -277,6 +280,8 @@ elif sys.platform.startswith('linux'):
        'mmp/mmp_inactive_import': ['FAIL', known_reason],
        'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621],
        'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason],
+        'zvol/zvol_misc/zvol_misc_fua': ['SKIP', 14872],
+        'zvol/zvol_misc/zvol_misc_trim': ['SKIP', 14872],
        'idmap_mount/idmap_mount_001': ['SKIP', idmap_reason],
        'idmap_mount/idmap_mount_002': ['SKIP', idmap_reason],
        'idmap_mount/idmap_mount_003': ['SKIP', idmap_reason],
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c
@ -501,7 +501,7 @@ main(int argc, char *argv[])
 	srandom(seed);

 	zfs_btree_init();
-	zfs_btree_create(&bt, zfs_btree_compare, sizeof (uint64_t));
+	zfs_btree_create(&bt, zfs_btree_compare, NULL, sizeof (uint64_t));

 	/*
 	 * This runs the named negative test. None of them should
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@ -572,6 +572,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/cli_root/zdb/zdb_006_pos.ksh \
 	functional/cli_root/zdb/zdb_args_neg.ksh \
 	functional/cli_root/zdb/zdb_args_pos.ksh \
+	functional/cli_root/zdb/zdb_backup.ksh \
 	functional/cli_root/zdb/zdb_block_size_histogram.ksh \
 	functional/cli_root/zdb/zdb_checksum.ksh \
 	functional/cli_root/zdb/zdb_decompress.ksh \
@ -1142,6 +1143,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/cli_root/zpool_resilver/setup.ksh \
 	functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh \
 	functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh \
+	functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh \
 	functional/cli_root/zpool_scrub/cleanup.ksh \
 	functional/cli_root/zpool_scrub/setup.ksh \
 	functional/cli_root/zpool_scrub/zpool_scrub_001_neg.ksh \
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh
@ -0,0 +1,55 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2023, Klara Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+write_count=8
+blksize=131072
+
+tmpfile=$TEST_BASE_DIR/tmpfile
+
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+	rm $tmpfile.1 $tmpfile.2
+}
+
+log_onexit cleanup
+
+log_assert "Verify that zfs send and zdb -B produce the same stream"
+
+verify_runnable "global"
+verify_disk_count "$DISKS" 2
+
+default_mirror_setup_noexit $DISKS
+file_write -o create -w -f $TESTDIR/file -b $blksize -c $write_count
+
+snap=$TESTPOOL/$TESTFS@snap
+log_must zfs snapshot $snap
+typeset -i objsetid=$(zfs get -Ho value objsetid $snap)
+
+sync_pool $TESTPOOL
+
+log_must eval "zfs send -ecL $snap > $tmpfile.1"
+log_must eval "zdb -B $TESTPOOL/$objsetid ecL > $tmpfile.2"
+
+typeset sum1=$(cat $tmpfile.1 | md5sum)
+typeset sum2=$(cat $tmpfile.2 | md5sum)
+
+log_must test "$sum1" = "$sum2"
+
+log_pass "zfs send and zdb -B produce the same stream"
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh
@ -0,0 +1,101 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	Verify 'zpool clear' doesn't cause concurrent resilvers
+#
+# STRATEGY:
+#	1. Create N(10) virtual disk files.
+#	2. Create draid pool based on the virtual disk files.
+#	3. Fill the filesystem with directories and files.
+#	4. Force-fault 2 vdevs and verify distributed spare is kicked in.
+#	5. Free the distributed spare by replacing the faulty drive.
+#	6. Run zpool clear and verify that it does not initiate 2 resilvers
+#	   concurrently while distributed spare gets kicked in.
+#
+
+verify_runnable "global"
+
+typeset -ir devs=10
+typeset -ir nparity=1
+typeset -ir ndata=8
+typeset -ir dspare=1
+
+function cleanup
+{
+	poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
+
+	for i in {0..$devs}; do
+		log_must rm -f "$BASEDIR/vdev$i"
+	done
+
+	for dir in $BASEDIR; do
+		if [[ -d $dir ]]; then
+			log_must rm -rf $dir
+		fi
+	done
+
+	zed_stop
+	zed_cleanup
+}
+
+log_assert "Verify zpool clear on draid pool doesn't cause concurrent resilvers"
+log_onexit cleanup
+
+setup_test_env $TESTPOOL draid${nparity}:${ndata}d:${dspare}s $devs
+
+# ZED needed for sequential resilver
+zed_setup
+log_must zed_start
+
+log_must zpool offline -f $TESTPOOL $BASEDIR/vdev5
+log_must wait_vdev_state  $TESTPOOL draid1-0-0 "ONLINE" 60
+log_must zpool wait -t resilver $TESTPOOL
+log_must zpool offline -f $TESTPOOL $BASEDIR/vdev6
+
+log_must zpool labelclear -f $BASEDIR/vdev5
+log_must zpool labelclear -f $BASEDIR/vdev6
+
+log_must zpool replace -w $TESTPOOL $BASEDIR/vdev5
+sync_pool $TESTPOOL
+
+log_must zpool events -c
+log_must zpool clear $TESTPOOL
+log_must wait_vdev_state  $TESTPOOL draid1-0-0 "ONLINE" 60
+log_must zpool wait -t resilver $TESTPOOL
+log_must zpool wait -t scrub $TESTPOOL
+
+nof_resilver=$(zpool events | grep -c resilver_start)
+if [ $nof_resilver = 1 ] ; then
+	log_must verify_pool $TESTPOOL
+	log_pass "zpool clear on draid pool doesn't cause concurrent resilvers"
+else
+	log_fail "FAIL: sequential and healing resilver initiated concurrently"
+fi
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh
@ -25,7 +25,7 @@
 is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing"

 set -A args  "" "-s \",\"" "-v" \
-    "-f time,zcwc,zimnb,zimsb"
+    "-f time,cwc,imnb,imsb"

 log_assert "zilstat generates output and doesn't return an error code"

--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh
@ -25,5 +25,6 @@
 rmconfig
 destroy_pool $TESTPOOL
 del_user ${username}
+del_user ${username}rec
 del_group pamtestgroup
 log_must rm -rf "$runstatedir" $TESTDIRS
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_change_unmounted.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_change_unmounted.ksh
@ -0,0 +1,55 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/pam/utilities.kshlib
+
+if [ -n "$ASAN_OPTIONS" ]; then
+	export LD_PRELOAD=$(ldd "$(command -v zfs)" | awk '/libasan\.so/ {print $3}')
+fi
+
+log_mustnot ismounted "$TESTPOOL/pam/${username}"
+keystatus unavailable
+
+genconfig "homes=$TESTPOOL/pam runstatedir=${runstatedir}"
+
+printf "testpass\nsecondpass\nsecondpass\n" | pamtester -v ${pamservice} ${username} chauthtok
+
+log_mustnot ismounted "$TESTPOOL/pam/${username}"
+keystatus unavailable
+
+echo "secondpass" | pamtester ${pamservice} ${username} open_session
+references 1
+log_must ismounted "$TESTPOOL/pam/${username}"
+keystatus available
+
+printf "secondpass\ntestpass\ntestpass\n" | pamtester -v ${pamservice} ${username} chauthtok
+
+log_must ismounted "$TESTPOOL/pam/${username}"
+log_must ismounted "$TESTPOOL/pam/${username}"
+keystatus available
+
+log_must pamtester ${pamservice} ${username} close_session
+references 0
+log_mustnot ismounted "$TESTPOOL/pam/${username}"
+keystatus unavailable
+
+log_pass "done."
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_recursive.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_recursive.ksh
@ -0,0 +1,72 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/pam/utilities.kshlib
+
+if [ -n "$ASAN_OPTIONS" ]; then
+	export LD_PRELOAD=$(ldd "$(command -v zfs)" | awk '/libasan\.so/ {print $3}')
+fi
+
+username="${username}rec"
+
+# Set up a deeper hierarchy, a mountpoint that doesn't interfere with other tests,
+# and a user which references that mountpoint
+log_must zfs create "$TESTPOOL/pampam"
+log_must zfs create -o mountpoint="$TESTDIR/rec" "$TESTPOOL/pampam/pam"
+echo "recurpass" | zfs create -o encryption=aes-256-gcm -o keyformat=passphrase \
+	-o keylocation=prompt "$TESTPOOL/pampam/pam/${username}"
+log_must zfs unmount "$TESTPOOL/pampam/pam/${username}"
+log_must zfs unload-key "$TESTPOOL/pampam/pam/${username}"
+log_must add_user pamtestgroup ${username} "$TESTDIR/rec"
+
+function keystatus {
+	log_must [ "$(get_prop keystatus "$TESTPOOL/pampam/pam/${username}")" = "$1" ]
+}
+
+log_mustnot ismounted "$TESTPOOL/pampam/pam/${username}"
+keystatus unavailable
+
+function test_session {
+	echo "recurpass" | pamtester ${pamservice} ${username} open_session
+	references 1
+	log_must ismounted "$TESTPOOL/pampam/pam/${username}"
+	keystatus available
+
+	log_must pamtester ${pamservice} ${username} close_session
+	references 0
+	log_mustnot ismounted "$TESTPOOL/pampam/pam/${username}"
+	keystatus unavailable
+}
+
+genconfig "homes=$TESTPOOL/pampam/pam prop_mountpoint runstatedir=${runstatedir}"
+test_session
+
+genconfig "homes=$TESTPOOL/pampam recursive_homes prop_mountpoint runstatedir=${runstatedir}"
+test_session
+
+genconfig "homes=$TESTPOOL recursive_homes prop_mountpoint runstatedir=${runstatedir}"
+test_session
+
+genconfig "homes=* recursive_homes prop_mountpoint runstatedir=${runstatedir}"
+test_session
+
+log_pass "done."
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_short_password.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_short_password.ksh
@ -52,7 +52,7 @@ log_must ismounted "$TESTPOOL/pam/${username}"
 keystatus available

 # Change user and dataset password to short one.
-printf "short\nshort\n" | pamtester ${pamservice} ${username} chauthtok
+printf "testpass\nshort\nshort\n" | pamtester -v ${pamservice} ${username} chauthtok

 # Unmount and unload key.
 log_must pamtester ${pamservice} ${username} close_session
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
@ -38,6 +38,8 @@

 verify_runnable "global"

+log_unsupported "Skipping, issue https://github.com/openzfs/zfs/issues/12053"
+
 function test_cleanup
 {
 	# reset memory limit to 16M
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
@ -45,6 +45,15 @@ fi

 if ! is_linux ; then
 	log_unsupported "Only linux supports dd with oflag=dsync for FUA writes"
+else
+	if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then
+		log_unsupported "Disabled while issue #14872 is being worked"
+	fi
+
+	# Disabled for the CentOS 9 kernel
+	if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then
+		log_unsupported "Disabled while issue #14872 is being worked"
+	fi
 fi

 typeset datafile1="$(mktemp zvol_misc_fua1.XXXXXX)"
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
@ -44,6 +44,15 @@
 verify_runnable "global"

 if is_linux ; then
+	if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then
+		log_unsupported "Disabled while issue #14872 is being worked"
+	fi
+
+	# Disabled for the CentOS 9 kernel
+	if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then
+		log_unsupported "Disabled while issue #14872 is being worked"
+	fi
+
 	# We need '--force' here since the prior tests may leave a filesystem
 	# on the zvol, and blkdiscard will see that filesystem and print a
 	# warning unless you force it.
@ -123,7 +132,6 @@ log_must zfs set compression=off $TESTPOOL/$TESTVOL
 # Remove old data from previous tests
 log_must $trimcmd $zvolpath

-
 set_blk_mq 1
 log_must_busy zpool export $TESTPOOL
 log_must zpool import $TESTPOOL
--- a/sys/modules/zfs/Makefile
+++ b/sys/modules/zfs/Makefile
@ -295,7 +295,6 @@ SRCS+=	abd.c \
 	uberblock.c \
 	unique.c \
 	vdev.c \
-	vdev_cache.c \
 	vdev_draid.c \
 	vdev_draid_rand.c \
 	vdev_indirect.c \
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@ -653,6 +653,9 @@
 /* qat is enabled and existed */
 /* #undef HAVE_QAT */

+/* struct reclaim_state has reclaimed */
+/* #undef HAVE_RECLAIM_STATE_RECLAIMED */
+
 /* register_shrinker is vararg */
 /* #undef HAVE_REGISTER_SHRINKER_VARARG */

@ -1048,7 +1051,7 @@
 /* #undef ZFS_IS_GPL_COMPATIBLE */

 /* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gad0a55461"
+#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gfeff9dfed"

 /* Define the project author. */
 #define ZFS_META_AUTHOR "OpenZFS"
@ -1057,7 +1060,7 @@
 /* #undef ZFS_META_DATA */

 /* Define the maximum compatible kernel version. */
-#define ZFS_META_KVER_MAX "6.2"
+#define ZFS_META_KVER_MAX "6.3"

 /* Define the minimum compatible kernel version. */
 #define ZFS_META_KVER_MIN "3.10"
@ -1078,7 +1081,7 @@
 #define ZFS_META_NAME "zfs"

 /* Define the project release. */
-#define ZFS_META_RELEASE "FreeBSD_gad0a55461"
+#define ZFS_META_RELEASE "FreeBSD_gfeff9dfed"

 /* Define the project version. */
 #define ZFS_META_VERSION "2.1.99"
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@ -1 +1 @@
-#define	ZFS_META_GITREV "zfs-2.1.99-1955-gad0a55461"
+#define	ZFS_META_GITREV "zfs-2.1.99-1993-gfeff9dfed"