mirror of
https://github.com/freebsd/freebsd-src
synced 2024-07-24 03:37:16 +00:00
zfs: merge openzfs/zfs@03e9caaec
Notable upstream pull request merges: #15516da51bd17e
Fix snap_obj_array memory leak in check_filesystem() #1551935da34516
L2ARC: Restrict write size to 1/4 of the device #1552903e9caaec
Add a tunable to disable BRT support Obtained from: OpenZFS OpenZFS commit:03e9caaec0
This commit is contained in:
commit
47bb16f8f0
1
sys/contrib/openzfs/.gitignore
vendored
1
sys/contrib/openzfs/.gitignore
vendored
|
@ -83,6 +83,7 @@
|
|||
modules.order
|
||||
Makefile
|
||||
Makefile.in
|
||||
changelog
|
||||
*.patch
|
||||
*.orig
|
||||
*.tmp
|
||||
|
|
|
@ -6,5 +6,5 @@ Release: 1
|
|||
Release-Tags: relext
|
||||
License: CDDL
|
||||
Author: OpenZFS
|
||||
Linux-Maximum: 6.5
|
||||
Linux-Maximum: 6.6
|
||||
Linux-Minimum: 3.10
|
||||
|
|
|
@ -6,7 +6,6 @@ edonr
|
|||
embedded_data
|
||||
empty_bpobj
|
||||
enabled_txg
|
||||
encryption
|
||||
extensible_dataset
|
||||
filesystem_limits
|
||||
hole_birth
|
||||
|
|
|
@ -67,6 +67,7 @@ ZFS_AC_DEBUG_INVARIANTS
|
|||
|
||||
AC_CONFIG_FILES([
|
||||
contrib/debian/rules
|
||||
contrib/debian/changelog
|
||||
Makefile
|
||||
include/Makefile
|
||||
lib/libzfs/libzfs.pc
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
openzfs-linux (@VERSION@-1) unstable; urgency=low
|
||||
|
||||
* OpenZFS @VERSION@ is tagged.
|
||||
|
||||
-- Umer Saleem <usaleem@ixsystems.com> Wed, 15 Nov 2023 15:00:00 +0500
|
||||
|
||||
openzfs-linux (2.2.99-1) unstable; urgency=low
|
||||
|
||||
* OpenZFS 2.2 is tagged.
|
|
@ -70,8 +70,6 @@ typedef enum kmem_cbrc {
|
|||
#define KMC_REAP_CHUNK INT_MAX
|
||||
#define KMC_DEFAULT_SEEKS 1
|
||||
|
||||
#define KMC_RECLAIM_ONCE 0x1 /* Force a single shrinker pass */
|
||||
|
||||
extern struct list_head spl_kmem_cache_list;
|
||||
extern struct rw_semaphore spl_kmem_cache_sem;
|
||||
|
||||
|
@ -108,7 +106,7 @@ typedef struct spl_kmem_magazine {
|
|||
uint32_t skm_refill; /* Batch refill size */
|
||||
struct spl_kmem_cache *skm_cache; /* Owned by cache */
|
||||
unsigned int skm_cpu; /* Owned by cpu */
|
||||
void *skm_objs[0]; /* Object pointers */
|
||||
void *skm_objs[]; /* Object pointers */
|
||||
} spl_kmem_magazine_t;
|
||||
|
||||
typedef struct spl_kmem_obj {
|
||||
|
|
|
@ -45,6 +45,8 @@ extern "C" {
|
|||
typedef struct zfsvfs zfsvfs_t;
|
||||
struct znode;
|
||||
|
||||
extern int zfs_bclone_enabled;
|
||||
|
||||
/*
|
||||
* This structure emulates the vfs_t from other platforms. It's purpose
|
||||
* is to facilitate the handling of mount options and minimize structural
|
||||
|
|
|
@ -136,7 +136,7 @@ typedef struct raidz_row {
|
|||
uint64_t rr_offset; /* Logical offset for *_io_verify() */
|
||||
uint64_t rr_size; /* Physical size for *_io_verify() */
|
||||
#endif
|
||||
raidz_col_t rr_col[0]; /* Flexible array of I/O columns */
|
||||
raidz_col_t rr_col[]; /* Flexible array of I/O columns */
|
||||
} raidz_row_t;
|
||||
|
||||
typedef struct raidz_map {
|
||||
|
@ -149,7 +149,7 @@ typedef struct raidz_map {
|
|||
zfs_locked_range_t *rm_lr;
|
||||
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
|
||||
raidz_col_t *rm_phys_col; /* if non-NULL, read i/o aggregation */
|
||||
raidz_row_t *rm_row[0]; /* flexible array of rows */
|
||||
raidz_row_t *rm_row[]; /* flexible array of rows */
|
||||
} raidz_map_t;
|
||||
|
||||
/*
|
||||
|
|
|
@ -31,14 +31,6 @@ for use by the kmem caches.
|
|||
For the majority of systems and workloads only a small number of threads are
|
||||
required.
|
||||
.
|
||||
.It Sy spl_kmem_cache_reclaim Ns = Ns Sy 0 Pq uint
|
||||
When this is set it prevents Linux from being able to rapidly reclaim all the
|
||||
memory held by the kmem caches.
|
||||
This may be useful in circumstances where it's preferable that Linux
|
||||
reclaim memory from some other subsystem first.
|
||||
Setting this will increase the likelihood out of memory events on a memory
|
||||
constrained system.
|
||||
.
|
||||
.It Sy spl_kmem_cache_obj_per_slab Ns = Ns Sy 8 Pq uint
|
||||
The preferred number of objects per slab in the cache.
|
||||
In general, a larger value will increase the caches memory footprint
|
||||
|
|
|
@ -1154,6 +1154,11 @@ Selecting any option other than
|
|||
results in vector instructions
|
||||
from the respective CPU instruction set being used.
|
||||
.
|
||||
.It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
|
||||
Enable the experimental block cloning feature.
|
||||
If this setting is 0, then even if feature@block_cloning is enabled,
|
||||
attempts to clone blocks will act as though the feature is disabled.
|
||||
.
|
||||
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
|
||||
Select a BLAKE3 implementation.
|
||||
.Pp
|
||||
|
|
|
@ -219,8 +219,11 @@ to the end of the line is ignored.
|
|||
.Bd -literal -compact -offset 4n
|
||||
.No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2
|
||||
# Features which are supported by GRUB2
|
||||
allocation_classes
|
||||
async_destroy
|
||||
block_cloning
|
||||
bookmarks
|
||||
device_rebuild
|
||||
embedded_data
|
||||
empty_bpobj
|
||||
enabled_txg
|
||||
|
@ -229,8 +232,14 @@ filesystem_limits
|
|||
hole_birth
|
||||
large_blocks
|
||||
livelist
|
||||
log_spacemap
|
||||
lz4_compress
|
||||
project_quota
|
||||
resilver_defer
|
||||
spacemap_histogram
|
||||
spacemap_v2
|
||||
userobj_accounting
|
||||
zilsaxattr
|
||||
zpool_checkpoint
|
||||
|
||||
.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev
|
||||
|
|
|
@ -489,6 +489,10 @@ zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
|
|||
zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
|
||||
zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
|
||||
|
||||
UBSAN_SANITIZE_zap_leaf.o := n
|
||||
UBSAN_SANITIZE_zap_micro.o := n
|
||||
UBSAN_SANITIZE_sa.o := n
|
||||
|
||||
# Suppress incorrect warnings from versions of objtool which are not
|
||||
# aware of x86 EVEX prefix instructions used for AVX512.
|
||||
OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y
|
||||
|
|
|
@ -76,17 +76,6 @@ module_param(spl_kmem_cache_magazine_size, uint, 0444);
|
|||
MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
|
||||
"Default magazine size (2-256), set automatically (0)");
|
||||
|
||||
/*
|
||||
* The default behavior is to report the number of objects remaining in the
|
||||
* cache. This allows the Linux VM to repeatedly reclaim objects from the
|
||||
* cache when memory is low satisfy other memory allocations. Alternately,
|
||||
* setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
|
||||
* is reclaimed. This may increase the likelihood of out of memory events.
|
||||
*/
|
||||
static unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
|
||||
module_param(spl_kmem_cache_reclaim, uint, 0644);
|
||||
MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
|
||||
|
||||
static unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
|
||||
module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
|
||||
MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
|
||||
|
|
|
@ -4249,4 +4249,8 @@ EXPORT_SYMBOL(zfs_map);
|
|||
module_param(zfs_delete_blocks, ulong, 0644);
|
||||
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
|
||||
|
||||
/* CSTYLED */
|
||||
module_param(zfs_bclone_enabled, uint, 0644);
|
||||
MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
|
||||
|
||||
#endif
|
||||
|
|
|
@ -31,6 +31,8 @@
|
|||
#include <sys/zfs_vnops.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
int zfs_bclone_enabled = 1;
|
||||
|
||||
/*
|
||||
* Clone part of a file via block cloning.
|
||||
*
|
||||
|
@ -50,6 +52,9 @@ __zpl_clone_file_range(struct file *src_file, loff_t src_off,
|
|||
fstrans_cookie_t cookie;
|
||||
int err;
|
||||
|
||||
if (!zfs_bclone_enabled)
|
||||
return (-EOPNOTSUPP);
|
||||
|
||||
if (!spa_feature_is_enabled(
|
||||
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
|
||||
return (-EOPNOTSUPP);
|
||||
|
|
|
@ -8035,9 +8035,8 @@ l2arc_write_size(l2arc_dev_t *dev)
|
|||
*/
|
||||
size = l2arc_write_max;
|
||||
if (size == 0) {
|
||||
cmn_err(CE_NOTE, "Bad value for l2arc_write_max, value must "
|
||||
"be greater than zero, resetting it to the default (%d)",
|
||||
L2ARC_WRITE_SIZE);
|
||||
cmn_err(CE_NOTE, "l2arc_write_max must be greater than zero, "
|
||||
"resetting it to the default (%d)", L2ARC_WRITE_SIZE);
|
||||
size = l2arc_write_max = L2ARC_WRITE_SIZE;
|
||||
}
|
||||
|
||||
|
@ -8060,30 +8059,9 @@ l2arc_write_size(l2arc_dev_t *dev)
|
|||
* device. This is important in l2arc_evict(), otherwise infinite
|
||||
* iteration can occur.
|
||||
*/
|
||||
if (size > dev->l2ad_end - dev->l2ad_start) {
|
||||
cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
|
||||
"plus the overhead of log blocks (persistent L2ARC, "
|
||||
"%llu bytes) exceeds the size of the cache device "
|
||||
"(guid %llu), resetting them to the default (%d)",
|
||||
(u_longlong_t)l2arc_log_blk_overhead(size, dev),
|
||||
(u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
|
||||
size = MIN(size, (dev->l2ad_end - dev->l2ad_start) / 4);
|
||||
|
||||
size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;
|
||||
|
||||
if (l2arc_trim_ahead > 1) {
|
||||
cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1");
|
||||
l2arc_trim_ahead = 1;
|
||||
}
|
||||
|
||||
if (arc_warm == B_FALSE)
|
||||
size += l2arc_write_boost;
|
||||
|
||||
size += l2arc_log_blk_overhead(size, dev);
|
||||
if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
|
||||
size += MAX(64 * 1024 * 1024,
|
||||
(size * l2arc_trim_ahead) / 100);
|
||||
}
|
||||
}
|
||||
size = P2ROUNDUP(size, 1ULL << dev->l2ad_vdev->vdev_ashift);
|
||||
|
||||
return (size);
|
||||
|
||||
|
|
|
@ -425,8 +425,10 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
|
|||
dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
|
||||
}
|
||||
|
||||
if (zap_clone == 0 || aff_snap_count == 0)
|
||||
return (0);
|
||||
if (zap_clone == 0 || aff_snap_count == 0) {
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Check clones. */
|
||||
zap_cursor_t *zc;
|
||||
|
|
|
@ -3334,6 +3334,21 @@ function set_tunable_impl
|
|||
esac
|
||||
}
|
||||
|
||||
function save_tunable
|
||||
{
|
||||
[[ ! -d $TEST_BASE_DIR ]] && return 1
|
||||
[[ -e $TEST_BASE_DIR/tunable-$1 ]] && return 2
|
||||
echo "$(get_tunable """$1""")" > "$TEST_BASE_DIR"/tunable-"$1"
|
||||
}
|
||||
|
||||
function restore_tunable
|
||||
{
|
||||
[[ ! -e $TEST_BASE_DIR/tunable-$1 ]] && return 1
|
||||
val="$(cat $TEST_BASE_DIR/tunable-"""$1""")"
|
||||
set_tunable64 "$1" "$val"
|
||||
rm $TEST_BASE_DIR/tunable-$1
|
||||
}
|
||||
|
||||
#
|
||||
# Get a global system tunable
|
||||
#
|
||||
|
|
|
@ -93,6 +93,7 @@ VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
|
|||
VOL_MODE vol.mode zvol_volmode
|
||||
VOL_RECURSIVE vol.recursive UNSUPPORTED
|
||||
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
|
||||
BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
|
||||
XATTR_COMPAT xattr_compat zfs_xattr_compat
|
||||
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
|
||||
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
|
||||
|
|
|
@ -31,4 +31,8 @@ verify_runnable "global"
|
|||
|
||||
default_cleanup_noexit
|
||||
|
||||
if tunable_exists BCLONE_ENABLED ; then
|
||||
log_must restore_tunable BCLONE_ENABLED
|
||||
fi
|
||||
|
||||
log_pass
|
||||
|
|
|
@ -33,4 +33,9 @@ fi
|
|||
|
||||
verify_runnable "global"
|
||||
|
||||
if tunable_exists BCLONE_ENABLED ; then
|
||||
log_must save_tunable BCLONE_ENABLED
|
||||
log_must set_tunable32 BCLONE_ENABLED 1
|
||||
fi
|
||||
|
||||
log_pass
|
||||
|
|
|
@ -31,15 +31,13 @@
|
|||
# 2. Set l2arc_write_max to a value larger than the cache device.
|
||||
# 3. Create a file larger than the cache device and random read
|
||||
# for 10 sec.
|
||||
# 4. Verify that l2arc_write_max is set back to the default.
|
||||
# 5. Set l2arc_write_max to a value less than the cache device size but
|
||||
# 4. Set l2arc_write_max to a value less than the cache device size but
|
||||
# larger than the default (256MB).
|
||||
# 6. Record the l2_size.
|
||||
# 7. Random read for 1 sec.
|
||||
# 8. Record the l2_size again.
|
||||
# 9. If (6) <= (8) then we have not looped around yet.
|
||||
# 10. If (6) > (8) then we looped around. Break out of the loop and test.
|
||||
# 11. Destroy pool.
|
||||
# 5. Record the l2_size.
|
||||
# 6. Random read for 1 sec.
|
||||
# 7. Record the l2_size again.
|
||||
# 8. If (5) <= (7) then we have not looped around yet.
|
||||
# 9. Destroy pool.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
@ -93,10 +91,6 @@ log_must zfs set relatime=off $TESTPOOL
|
|||
log_must fio $FIO_SCRIPTS/mkfiles.fio
|
||||
log_must fio $FIO_SCRIPTS/random_reads.fio
|
||||
|
||||
typeset write_max2=$(get_tunable L2ARC_WRITE_MAX)
|
||||
|
||||
log_must test $write_max2 -eq $write_max
|
||||
|
||||
log_must set_tunable32 L2ARC_WRITE_MAX $(( 256 * 1024 * 1024 ))
|
||||
export RUNTIME=1
|
||||
|
||||
|
@ -108,8 +102,6 @@ while $do_once || [[ $l2_size1 -le $l2_size2 ]]; do
|
|||
do_once=false
|
||||
done
|
||||
|
||||
log_must test $l2_size1 -gt $l2_size2
|
||||
|
||||
log_must zpool destroy $TESTPOOL
|
||||
|
||||
log_pass "Looping around a cache device succeeds."
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
# STRATEGY:
|
||||
# 1. Create a pool with a known feature set.
|
||||
# 2. Verify only those features are active/enabled.
|
||||
# 3. Do this for all known feature sets
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
@ -47,8 +48,11 @@ log_onexit cleanup
|
|||
|
||||
log_assert "creates a pool with a specified feature set enabled"
|
||||
|
||||
log_must zpool create -f -o compatibility=compat-2020 $TESTPOOL $DISKS
|
||||
check_feature_set $TESTPOOL compat-2020
|
||||
log_must zpool destroy -f $TESTPOOL
|
||||
for compat in "$ZPOOL_COMPAT_DIR"/*
|
||||
do
|
||||
log_must zpool create -f -o compatibility="${compat##*/}" $TESTPOOL $DISKS
|
||||
check_feature_set $TESTPOOL "${compat##*/}"
|
||||
log_must zpool destroy -f $TESTPOOL
|
||||
done
|
||||
|
||||
log_pass "creates a pool with a specified feature set enabled"
|
||||
|
|
Loading…
Reference in a new issue