Notable upstream pull request merges:
 #15516 da51bd17e Fix snap_obj_array memory leak in check_filesystem()
 #15519 35da34516 L2ARC: Restrict write size to 1/4 of the device
 #15529 03e9caaec Add a tunable to disable BRT support

Obtained from:	OpenZFS
OpenZFS commit:	03e9caaec0
This commit is contained in:
Martin Matuska 2023-11-17 09:39:42 +01:00
commit 47bb16f8f0
23 changed files with 87 additions and 71 deletions

View file

@ -83,6 +83,7 @@
modules.order
Makefile
Makefile.in
changelog
*.patch
*.orig
*.tmp

View file

@ -6,5 +6,5 @@ Release: 1
Release-Tags: relext
License: CDDL
Author: OpenZFS
Linux-Maximum: 6.5
Linux-Maximum: 6.6
Linux-Minimum: 3.10

View file

@ -6,7 +6,6 @@ edonr
embedded_data
empty_bpobj
enabled_txg
encryption
extensible_dataset
filesystem_limits
hole_birth

View file

@ -67,6 +67,7 @@ ZFS_AC_DEBUG_INVARIANTS
AC_CONFIG_FILES([
contrib/debian/rules
contrib/debian/changelog
Makefile
include/Makefile
lib/libzfs/libzfs.pc

View file

@ -1,3 +1,9 @@
openzfs-linux (@VERSION@-1) unstable; urgency=low
* OpenZFS @VERSION@ is tagged.
-- Umer Saleem <usaleem@ixsystems.com> Wed, 15 Nov 2023 15:00:00 +0500
openzfs-linux (2.2.99-1) unstable; urgency=low
* OpenZFS 2.2 is tagged.

View file

@ -70,8 +70,6 @@ typedef enum kmem_cbrc {
#define KMC_REAP_CHUNK INT_MAX
#define KMC_DEFAULT_SEEKS 1
#define KMC_RECLAIM_ONCE 0x1 /* Force a single shrinker pass */
extern struct list_head spl_kmem_cache_list;
extern struct rw_semaphore spl_kmem_cache_sem;
@ -108,7 +106,7 @@ typedef struct spl_kmem_magazine {
uint32_t skm_refill; /* Batch refill size */
struct spl_kmem_cache *skm_cache; /* Owned by cache */
unsigned int skm_cpu; /* Owned by cpu */
void *skm_objs[0]; /* Object pointers */
void *skm_objs[]; /* Object pointers */
} spl_kmem_magazine_t;
typedef struct spl_kmem_obj {

View file

@ -45,6 +45,8 @@ extern "C" {
typedef struct zfsvfs zfsvfs_t;
struct znode;
extern int zfs_bclone_enabled;
/*
* This structure emulates the vfs_t from other platforms. It's purpose
* is to facilitate the handling of mount options and minimize structural

View file

@ -136,7 +136,7 @@ typedef struct raidz_row {
uint64_t rr_offset; /* Logical offset for *_io_verify() */
uint64_t rr_size; /* Physical size for *_io_verify() */
#endif
raidz_col_t rr_col[0]; /* Flexible array of I/O columns */
raidz_col_t rr_col[]; /* Flexible array of I/O columns */
} raidz_row_t;
typedef struct raidz_map {
@ -149,7 +149,7 @@ typedef struct raidz_map {
zfs_locked_range_t *rm_lr;
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
raidz_col_t *rm_phys_col; /* if non-NULL, read i/o aggregation */
raidz_row_t *rm_row[0]; /* flexible array of rows */
raidz_row_t *rm_row[]; /* flexible array of rows */
} raidz_map_t;
/*

View file

@ -31,14 +31,6 @@ for use by the kmem caches.
For the majority of systems and workloads only a small number of threads are
required.
.
.It Sy spl_kmem_cache_reclaim Ns = Ns Sy 0 Pq uint
When this is set it prevents Linux from being able to rapidly reclaim all the
memory held by the kmem caches.
This may be useful in circumstances where it's preferable that Linux
reclaim memory from some other subsystem first.
Setting this will increase the likelihood out of memory events on a memory
constrained system.
.
.It Sy spl_kmem_cache_obj_per_slab Ns = Ns Sy 8 Pq uint
The preferred number of objects per slab in the cache.
In general, a larger value will increase the caches memory footprint

View file

@ -1154,6 +1154,11 @@ Selecting any option other than
results in vector instructions
from the respective CPU instruction set being used.
.
.It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Enable the experimental block cloning feature.
If this setting is 0, then even if feature@block_cloning is enabled,
attempts to clone blocks will act as though the feature is disabled.
.
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
Select a BLAKE3 implementation.
.Pp

View file

@ -219,8 +219,11 @@ to the end of the line is ignored.
.Bd -literal -compact -offset 4n
.No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2
# Features which are supported by GRUB2
allocation_classes
async_destroy
block_cloning
bookmarks
device_rebuild
embedded_data
empty_bpobj
enabled_txg
@ -229,8 +232,14 @@ filesystem_limits
hole_birth
large_blocks
livelist
log_spacemap
lz4_compress
project_quota
resilver_defer
spacemap_histogram
spacemap_v2
userobj_accounting
zilsaxattr
zpool_checkpoint
.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev

View file

@ -489,6 +489,10 @@ zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
UBSAN_SANITIZE_zap_leaf.o := n
UBSAN_SANITIZE_zap_micro.o := n
UBSAN_SANITIZE_sa.o := n
# Suppress incorrect warnings from versions of objtool which are not
# aware of x86 EVEX prefix instructions used for AVX512.
OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y

View file

@ -76,17 +76,6 @@ module_param(spl_kmem_cache_magazine_size, uint, 0444);
MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
"Default magazine size (2-256), set automatically (0)");
/*
* The default behavior is to report the number of objects remaining in the
* cache. This allows the Linux VM to repeatedly reclaim objects from the
* cache when memory is low satisfy other memory allocations. Alternately,
* setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
* is reclaimed. This may increase the likelihood of out of memory events.
*/
static unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
module_param(spl_kmem_cache_reclaim, uint, 0644);
MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
static unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");

View file

@ -4249,4 +4249,8 @@ EXPORT_SYMBOL(zfs_map);
module_param(zfs_delete_blocks, ulong, 0644);
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
/* CSTYLED */
module_param(zfs_bclone_enabled, uint, 0644);
MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
#endif

View file

@ -31,6 +31,8 @@
#include <sys/zfs_vnops.h>
#include <sys/zfeature.h>
int zfs_bclone_enabled = 1;
/*
* Clone part of a file via block cloning.
*
@ -50,6 +52,9 @@ __zpl_clone_file_range(struct file *src_file, loff_t src_off,
fstrans_cookie_t cookie;
int err;
if (!zfs_bclone_enabled)
return (-EOPNOTSUPP);
if (!spa_feature_is_enabled(
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
return (-EOPNOTSUPP);

View file

@ -8035,9 +8035,8 @@ l2arc_write_size(l2arc_dev_t *dev)
*/
size = l2arc_write_max;
if (size == 0) {
cmn_err(CE_NOTE, "Bad value for l2arc_write_max, value must "
"be greater than zero, resetting it to the default (%d)",
L2ARC_WRITE_SIZE);
cmn_err(CE_NOTE, "l2arc_write_max must be greater than zero, "
"resetting it to the default (%d)", L2ARC_WRITE_SIZE);
size = l2arc_write_max = L2ARC_WRITE_SIZE;
}
@ -8060,30 +8059,9 @@ l2arc_write_size(l2arc_dev_t *dev)
* device. This is important in l2arc_evict(), otherwise infinite
* iteration can occur.
*/
if (size > dev->l2ad_end - dev->l2ad_start) {
cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
"plus the overhead of log blocks (persistent L2ARC, "
"%llu bytes) exceeds the size of the cache device "
"(guid %llu), resetting them to the default (%d)",
(u_longlong_t)l2arc_log_blk_overhead(size, dev),
(u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
size = MIN(size, (dev->l2ad_end - dev->l2ad_start) / 4);
size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;
if (l2arc_trim_ahead > 1) {
cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1");
l2arc_trim_ahead = 1;
}
if (arc_warm == B_FALSE)
size += l2arc_write_boost;
size += l2arc_log_blk_overhead(size, dev);
if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
size += MAX(64 * 1024 * 1024,
(size * l2arc_trim_ahead) / 100);
}
}
size = P2ROUNDUP(size, 1ULL << dev->l2ad_vdev->vdev_ashift);
return (size);

View file

@ -425,8 +425,10 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
}
if (zap_clone == 0 || aff_snap_count == 0)
return (0);
if (zap_clone == 0 || aff_snap_count == 0) {
error = 0;
goto out;
}
/* Check clones. */
zap_cursor_t *zc;

View file

@ -3334,6 +3334,21 @@ function set_tunable_impl
esac
}
function save_tunable
{
[[ ! -d $TEST_BASE_DIR ]] && return 1
[[ -e $TEST_BASE_DIR/tunable-$1 ]] && return 2
echo "$(get_tunable """$1""")" > "$TEST_BASE_DIR"/tunable-"$1"
}
function restore_tunable
{
[[ ! -e $TEST_BASE_DIR/tunable-$1 ]] && return 1
val="$(cat $TEST_BASE_DIR/tunable-"""$1""")"
set_tunable64 "$1" "$val"
rm $TEST_BASE_DIR/tunable-$1
}
#
# Get a global system tunable
#

View file

@ -93,6 +93,7 @@ VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
XATTR_COMPAT xattr_compat zfs_xattr_compat
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max

View file

@ -31,4 +31,8 @@ verify_runnable "global"
default_cleanup_noexit
if tunable_exists BCLONE_ENABLED ; then
log_must restore_tunable BCLONE_ENABLED
fi
log_pass

View file

@ -33,4 +33,9 @@ fi
verify_runnable "global"
if tunable_exists BCLONE_ENABLED ; then
log_must save_tunable BCLONE_ENABLED
log_must set_tunable32 BCLONE_ENABLED 1
fi
log_pass

View file

@ -31,15 +31,13 @@
# 2. Set l2arc_write_max to a value larger than the cache device.
# 3. Create a file larger than the cache device and random read
# for 10 sec.
# 4. Verify that l2arc_write_max is set back to the default.
# 5. Set l2arc_write_max to a value less than the cache device size but
# 4. Set l2arc_write_max to a value less than the cache device size but
# larger than the default (256MB).
# 6. Record the l2_size.
# 7. Random read for 1 sec.
# 8. Record the l2_size again.
# 9. If (6) <= (8) then we have not looped around yet.
# 10. If (6) > (8) then we looped around. Break out of the loop and test.
# 11. Destroy pool.
# 5. Record the l2_size.
# 6. Random read for 1 sec.
# 7. Record the l2_size again.
# 8. If (5) <= (7) then we have not looped around yet.
# 9. Destroy pool.
#
verify_runnable "global"
@ -93,10 +91,6 @@ log_must zfs set relatime=off $TESTPOOL
log_must fio $FIO_SCRIPTS/mkfiles.fio
log_must fio $FIO_SCRIPTS/random_reads.fio
typeset write_max2=$(get_tunable L2ARC_WRITE_MAX)
log_must test $write_max2 -eq $write_max
log_must set_tunable32 L2ARC_WRITE_MAX $(( 256 * 1024 * 1024 ))
export RUNTIME=1
@ -108,8 +102,6 @@ while $do_once || [[ $l2_size1 -le $l2_size2 ]]; do
do_once=false
done
log_must test $l2_size1 -gt $l2_size2
log_must zpool destroy $TESTPOOL
log_pass "Looping around a cache device succeeds."

View file

@ -34,6 +34,7 @@
# STRATEGY:
# 1. Create a pool with a known feature set.
# 2. Verify only those features are active/enabled.
# 3. Do this for all known feature sets
#
verify_runnable "global"
@ -47,8 +48,11 @@ log_onexit cleanup
log_assert "creates a pool with a specified feature set enabled"
log_must zpool create -f -o compatibility=compat-2020 $TESTPOOL $DISKS
check_feature_set $TESTPOOL compat-2020
log_must zpool destroy -f $TESTPOOL
for compat in "$ZPOOL_COMPAT_DIR"/*
do
log_must zpool create -f -o compatibility="${compat##*/}" $TESTPOOL $DISKS
check_feature_set $TESTPOOL "${compat##*/}"
log_must zpool destroy -f $TESTPOOL
done
log_pass "creates a pool with a specified feature set enabled"