zfs: merge openzfs/zfs@03e9caaec

Notable upstream pull request merges: #15516 da51bd17e Fix snap_obj_array memory leak in check_filesystem() #15519 35da34516 L2ARC: Restrict write size to 1/4 of the device #15529 03e9caaec Add a tunable to disable BRT support Obtained from: OpenZFS OpenZFS commit: 03e9caaec0
2024-07-23 19:28:36 +00:00 · 2023-11-17 09:39:42 +01:00 · 2023-11-17 09:39:42 +01:00 · 47bb16f8f0
parent 70e30addaf 03e9caaec0
commit 47bb16f8f0
23 changed files with 87 additions and 71 deletions
--- a/sys/contrib/openzfs/.gitignore
+++ b/sys/contrib/openzfs/.gitignore
@ -83,6 +83,7 @@
 modules.order
 Makefile
 Makefile.in
+changelog
 *.patch
 *.orig
 *.tmp
--- a/sys/contrib/openzfs/META
+++ b/sys/contrib/openzfs/META
@ -6,5 +6,5 @@ Release:       1
 Release-Tags:  relext
 License:       CDDL
 Author:        OpenZFS
-Linux-Maximum: 6.5
+Linux-Maximum: 6.6
 Linux-Minimum: 3.10
--- a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfsonosx-1.8.1
+++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfsonosx-1.8.1
@ -6,7 +6,6 @@ edonr
 embedded_data
 empty_bpobj
 enabled_txg
-encryption
 extensible_dataset
 filesystem_limits
 hole_birth
--- a/sys/contrib/openzfs/configure.ac
+++ b/sys/contrib/openzfs/configure.ac
@ -67,6 +67,7 @@ ZFS_AC_DEBUG_INVARIANTS

 AC_CONFIG_FILES([
 	contrib/debian/rules
+	contrib/debian/changelog
 	Makefile
 	include/Makefile
 	lib/libzfs/libzfs.pc
--- a/sys/contrib/openzfs/contrib/debian/changelog.in
+++ b/sys/contrib/openzfs/contrib/debian/changelog.in
@ -1,3 +1,9 @@
+openzfs-linux (@VERSION@-1) unstable; urgency=low
+
+  * OpenZFS @VERSION@ is tagged.
+
+ -- Umer Saleem <usaleem@ixsystems.com>  Wed, 15 Nov 2023 15:00:00 +0500
+
 openzfs-linux (2.2.99-1) unstable; urgency=low

  * OpenZFS 2.2 is tagged.
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h
@ -70,8 +70,6 @@ typedef enum kmem_cbrc {
 #define	KMC_REAP_CHUNK		INT_MAX
 #define	KMC_DEFAULT_SEEKS	1

-#define	KMC_RECLAIM_ONCE	0x1	/* Force a single shrinker pass */
-
 extern struct list_head spl_kmem_cache_list;
 extern struct rw_semaphore spl_kmem_cache_sem;

@ -108,7 +106,7 @@ typedef struct spl_kmem_magazine {
 	uint32_t		skm_refill;	/* Batch refill size */
 	struct spl_kmem_cache	*skm_cache;	/* Owned by cache */
 	unsigned int		skm_cpu;	/* Owned by cpu */
-	void			*skm_objs[0];	/* Object pointers */
+	void			*skm_objs[];	/* Object pointers */
 } spl_kmem_magazine_t;

 typedef struct spl_kmem_obj {
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
@ -45,6 +45,8 @@ extern "C" {
 typedef struct zfsvfs zfsvfs_t;
 struct znode;

+extern int zfs_bclone_enabled;
+
 /*
 * This structure emulates the vfs_t from other platforms.  It's purpose
 * is to facilitate the handling of mount options and minimize structural
--- a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
@ -136,7 +136,7 @@ typedef struct raidz_row {
 	uint64_t rr_offset;		/* Logical offset for *_io_verify() */
 	uint64_t rr_size;		/* Physical size for *_io_verify() */
 #endif
-	raidz_col_t rr_col[0];		/* Flexible array of I/O columns */
+	raidz_col_t rr_col[];		/* Flexible array of I/O columns */
 } raidz_row_t;

 typedef struct raidz_map {
@ -149,7 +149,7 @@ typedef struct raidz_map {
 	zfs_locked_range_t *rm_lr;
 	const raidz_impl_ops_t *rm_ops;	/* RAIDZ math operations */
 	raidz_col_t *rm_phys_col;	/* if non-NULL, read i/o aggregation */
-	raidz_row_t *rm_row[0];		/* flexible array of rows */
+	raidz_row_t *rm_row[];		/* flexible array of rows */
 } raidz_map_t;

 /*
--- a/sys/contrib/openzfs/man/man4/spl.4
+++ b/sys/contrib/openzfs/man/man4/spl.4
@ -31,14 +31,6 @@ for use by the kmem caches.
 For the majority of systems and workloads only a small number of threads are
 required.
 .
-.It Sy spl_kmem_cache_reclaim Ns = Ns Sy 0 Pq uint
-When this is set it prevents Linux from being able to rapidly reclaim all the
-memory held by the kmem caches.
-This may be useful in circumstances where it's preferable that Linux
-reclaim memory from some other subsystem first.
-Setting this will increase the likelihood out of memory events on a memory
-constrained system.
-.
 .It Sy spl_kmem_cache_obj_per_slab Ns = Ns Sy 8 Pq uint
 The preferred number of objects per slab in the cache.
 In general, a larger value will increase the caches memory footprint
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@ -1154,6 +1154,11 @@ Selecting any option other than
 results in vector instructions
 from the respective CPU instruction set being used.
 .
+.It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Enable the experimental block cloning feature.
+If this setting is 0, then even if feature@block_cloning is enabled,
+attempts to clone blocks will act as though the feature is disabled.
+.
 .It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
 Select a BLAKE3 implementation.
 .Pp
--- a/sys/contrib/openzfs/man/man7/zpool-features.7
+++ b/sys/contrib/openzfs/man/man7/zpool-features.7
@ -219,8 +219,11 @@ to the end of the line is ignored.
 .Bd -literal -compact -offset 4n
 .No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2
 # Features which are supported by GRUB2
+allocation_classes
 async_destroy
+block_cloning
 bookmarks
+device_rebuild
 embedded_data
 empty_bpobj
 enabled_txg
@ -229,8 +232,14 @@ filesystem_limits
 hole_birth
 large_blocks
 livelist
+log_spacemap
 lz4_compress
+project_quota
+resilver_defer
 spacemap_histogram
+spacemap_v2
+userobj_accounting
+zilsaxattr
 zpool_checkpoint

 .No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@ -489,6 +489,10 @@ zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
 zfs-$(CONFIG_PPC)   += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
 zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))

+UBSAN_SANITIZE_zap_leaf.o := n
+UBSAN_SANITIZE_zap_micro.o := n
+UBSAN_SANITIZE_sa.o := n
+
 # Suppress incorrect warnings from versions of objtool which are not
 # aware of x86 EVEX prefix instructions used for AVX512.
 OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
@ -76,17 +76,6 @@ module_param(spl_kmem_cache_magazine_size, uint, 0444);
 MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
 	"Default magazine size (2-256), set automatically (0)");

-/*
- * The default behavior is to report the number of objects remaining in the
- * cache.  This allows the Linux VM to repeatedly reclaim objects from the
- * cache when memory is low satisfy other memory allocations.  Alternately,
- * setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
- * is reclaimed.  This may increase the likelihood of out of memory events.
- */
-static unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
-module_param(spl_kmem_cache_reclaim, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
-
 static unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
 module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@ -4249,4 +4249,8 @@ EXPORT_SYMBOL(zfs_map);
 module_param(zfs_delete_blocks, ulong, 0644);
 MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");

+/* CSTYLED */
+module_param(zfs_bclone_enabled, uint, 0644);
+MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
+
 #endif
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
@ -31,6 +31,8 @@
 #include <sys/zfs_vnops.h>
 #include <sys/zfeature.h>

+int zfs_bclone_enabled = 1;
+
 /*
 * Clone part of a file via block cloning.
 *
@ -50,6 +52,9 @@ __zpl_clone_file_range(struct file *src_file, loff_t src_off,
 	fstrans_cookie_t cookie;
 	int err;

+	if (!zfs_bclone_enabled)
+		return (-EOPNOTSUPP);
+
 	if (!spa_feature_is_enabled(
 	    dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
 		return (-EOPNOTSUPP);
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@ -8035,9 +8035,8 @@ l2arc_write_size(l2arc_dev_t *dev)
 	 */
 	size = l2arc_write_max;
 	if (size == 0) {
-		cmn_err(CE_NOTE, "Bad value for l2arc_write_max, value must "
-		    "be greater than zero, resetting it to the default (%d)",
-		    L2ARC_WRITE_SIZE);
+		cmn_err(CE_NOTE, "l2arc_write_max must be greater than zero, "
+		    "resetting it to the default (%d)", L2ARC_WRITE_SIZE);
 		size = l2arc_write_max = L2ARC_WRITE_SIZE;
 	}

@ -8060,30 +8059,9 @@ l2arc_write_size(l2arc_dev_t *dev)
 	 * device. This is important in l2arc_evict(), otherwise infinite
 	 * iteration can occur.
 	 */
-	if (size > dev->l2ad_end - dev->l2ad_start) {
-		cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
-		    "plus the overhead of log blocks (persistent L2ARC, "
-		    "%llu bytes) exceeds the size of the cache device "
-		    "(guid %llu), resetting them to the default (%d)",
-		    (u_longlong_t)l2arc_log_blk_overhead(size, dev),
-		    (u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
+	size = MIN(size, (dev->l2ad_end - dev->l2ad_start) / 4);

-		size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;
-
-		if (l2arc_trim_ahead > 1) {
-			cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1");
-			l2arc_trim_ahead = 1;
-		}
-
-		if (arc_warm == B_FALSE)
-			size += l2arc_write_boost;
-
-		size += l2arc_log_blk_overhead(size, dev);
-		if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
-			size += MAX(64 * 1024 * 1024,
-			    (size * l2arc_trim_ahead) / 100);
-		}
-	}
+	size = P2ROUNDUP(size, 1ULL << dev->l2ad_vdev->vdev_ashift);

 	return (size);

--- a/sys/contrib/openzfs/module/zfs/spa_errlog.c
+++ b/sys/contrib/openzfs/module/zfs/spa_errlog.c
@ -425,8 +425,10 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
 		dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
 	}

-	if (zap_clone == 0 || aff_snap_count == 0)
-		return (0);
+	if (zap_clone == 0 || aff_snap_count == 0) {
+		error = 0;
+		goto out;
+	}

 	/* Check clones. */
 	zap_cursor_t *zc;
--- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
@ -3334,6 +3334,21 @@ function set_tunable_impl
 	esac
 }

+function save_tunable
+{
+	[[ ! -d $TEST_BASE_DIR ]] && return 1
+	[[ -e $TEST_BASE_DIR/tunable-$1 ]] && return 2
+	echo "$(get_tunable """$1""")" > "$TEST_BASE_DIR"/tunable-"$1"
+}
+
+function restore_tunable
+{
+	[[ ! -e $TEST_BASE_DIR/tunable-$1 ]] && return 1
+	val="$(cat $TEST_BASE_DIR/tunable-"""$1""")"
+	set_tunable64 "$1" "$val"
+	rm $TEST_BASE_DIR/tunable-$1
+}
+
 #
 # Get a global system tunable
 #
--- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
@ -93,6 +93,7 @@ VOL_INHIBIT_DEV			UNSUPPORTED			zvol_inhibit_dev
 VOL_MODE			vol.mode			zvol_volmode
 VOL_RECURSIVE			vol.recursive			UNSUPPORTED
 VOL_USE_BLK_MQ			UNSUPPORTED			zvol_use_blk_mq
+BCLONE_ENABLED			zfs_bclone_enabled		zfs_bclone_enabled
 XATTR_COMPAT			xattr_compat			zfs_xattr_compat
 ZEVENT_LEN_MAX			zevent.len_max			zfs_zevent_len_max
 ZEVENT_RETAIN_MAX		zevent.retain_max		zfs_zevent_retain_max
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
@ -31,4 +31,8 @@ verify_runnable "global"

 default_cleanup_noexit

+if tunable_exists BCLONE_ENABLED ; then
+	log_must restore_tunable BCLONE_ENABLED
+fi
+
 log_pass
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
@ -33,4 +33,9 @@ fi

 verify_runnable "global"

+if tunable_exists BCLONE_ENABLED ; then
+    log_must save_tunable BCLONE_ENABLED
+    log_must set_tunable32 BCLONE_ENABLED 1
+fi
+
 log_pass
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh
@ -31,15 +31,13 @@
 #	2. Set l2arc_write_max to a value larger than the cache device.
 #	3. Create a file larger than the cache device and random read
 #		for 10 sec.
-#	4. Verify that l2arc_write_max is set back to the default.
-#	5. Set l2arc_write_max to a value less than the cache device size but
+#	4. Set l2arc_write_max to a value less than the cache device size but
 #		larger than the default (256MB).
-#	6. Record the l2_size.
-#	7. Random read for 1 sec.
-#	8. Record the l2_size again.
-#	9. If (6) <= (8) then we have not looped around yet.
-#	10. If (6) > (8) then we looped around. Break out of the loop and test.
-#	11. Destroy pool.
+#	5. Record the l2_size.
+#	6. Random read for 1 sec.
+#	7. Record the l2_size again.
+#	8. If (5) <= (7) then we have not looped around yet.
+#	9. Destroy pool.
 #

 verify_runnable "global"
@ -93,10 +91,6 @@ log_must zfs set relatime=off $TESTPOOL
 log_must fio $FIO_SCRIPTS/mkfiles.fio
 log_must fio $FIO_SCRIPTS/random_reads.fio

-typeset write_max2=$(get_tunable L2ARC_WRITE_MAX)
-
-log_must test $write_max2 -eq $write_max
-
 log_must set_tunable32 L2ARC_WRITE_MAX $(( 256 * 1024 * 1024 ))
 export RUNTIME=1

@ -108,8 +102,6 @@ while $do_once || [[ $l2_size1 -le $l2_size2 ]]; do
 	do_once=false
 done

-log_must test $l2_size1 -gt $l2_size2
-
 log_must zpool destroy $TESTPOOL

 log_pass "Looping around a cache device succeeds."
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh
@ -34,6 +34,7 @@
 # STRATEGY:
 #	1. Create a pool with a known feature set.
 #	2. Verify only those features are active/enabled.
+#	3. Do this for all known feature sets
 #

 verify_runnable "global"
@ -47,8 +48,11 @@ log_onexit cleanup

 log_assert "creates a pool with a specified feature set enabled"

-log_must zpool create -f -o compatibility=compat-2020 $TESTPOOL $DISKS
-check_feature_set $TESTPOOL compat-2020
-log_must zpool destroy -f $TESTPOOL
+for compat in "$ZPOOL_COMPAT_DIR"/*
+do
+	log_must zpool create -f -o compatibility="${compat##*/}" $TESTPOOL $DISKS
+	check_feature_set $TESTPOOL "${compat##*/}"
+	log_must zpool destroy -f $TESTPOOL
+done

 log_pass "creates a pool with a specified feature set enabled"