mirror of
https://github.com/freebsd/freebsd-src
synced 2024-07-23 11:16:33 +00:00
zfs: merge openzfs/zfs@66b81b349
Notable upstream pull request merges: #1529054b1b1d89
import: require force when cachefile hostid doesn't match on-disk #15319342357cd9
Reduce number of metaslab preload taskq threads #153402a6c62109
ARC: Remove b_cv from struct l1arc_buf_hdr #1534775a2eb7fa
ARC: Drop different size headers for crypto #1535096b9cf42e
ARC: Remove b_bufcnt/b_ebufcnt from ARC headers #1535366b81b349
ZIL: Reduce maximum size of WR_COPIED to 7.5K #153625b8688e62
zfsconcepts: add description of block cloning Obtained from: OpenZFS OpenZFS commit:66b81b3497
This commit is contained in:
commit
b2526e8bfe
21
sys/contrib/openzfs/.cirrus.yml
Normal file
21
sys/contrib/openzfs/.cirrus.yml
Normal file
|
@ -0,0 +1,21 @@
|
|||
env:
|
||||
CIRRUS_CLONE_DEPTH: 1
|
||||
ARCH: amd64
|
||||
|
||||
build_task:
|
||||
matrix:
|
||||
freebsd_instance:
|
||||
image_family: freebsd-12-4
|
||||
freebsd_instance:
|
||||
image_family: freebsd-13-2
|
||||
freebsd_instance:
|
||||
image_family: freebsd-14-0-snap
|
||||
prepare_script:
|
||||
- pkg install -y autoconf automake libtool gettext-runtime gmake ksh93 py39-packaging py39-cffi py39-sysctl
|
||||
configure_script:
|
||||
- env MAKE=gmake ./autogen.sh
|
||||
- env MAKE=gmake ./configure --with-config="user" --with-python=3.9
|
||||
build_script:
|
||||
- gmake -j `sysctl -n kern.smp.cpus`
|
||||
install_script:
|
||||
- gmake install
|
2
sys/contrib/openzfs/.gitignore
vendored
2
sys/contrib/openzfs/.gitignore
vendored
|
@ -42,6 +42,7 @@
|
|||
!udev/**
|
||||
|
||||
!.editorconfig
|
||||
!.cirrus.yml
|
||||
!.gitignore
|
||||
!.gitmodules
|
||||
!AUTHORS
|
||||
|
@ -60,7 +61,6 @@
|
|||
!TEST
|
||||
!zfs.release.in
|
||||
|
||||
|
||||
#
|
||||
# Normal rules
|
||||
#
|
||||
|
|
|
@ -3122,12 +3122,21 @@ zfs_force_import_required(nvlist_t *config)
|
|||
nvlist_t *nvinfo;
|
||||
|
||||
state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE);
|
||||
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
|
||||
nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
|
||||
|
||||
/*
|
||||
* The hostid on LOAD_INFO comes from the MOS label via
|
||||
* spa_tryimport(). If its not there then we're likely talking to an
|
||||
* older kernel, so use the top one, which will be from the label
|
||||
* discovered in zpool_find_import(), or if a cachefile is in use, the
|
||||
* local hostid.
|
||||
*/
|
||||
if (nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_HOSTID, &hostid) != 0)
|
||||
nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
|
||||
|
||||
if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid())
|
||||
return (B_TRUE);
|
||||
|
||||
nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
|
||||
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) {
|
||||
mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo,
|
||||
ZPOOL_CONFIG_MMP_STATE);
|
||||
|
@ -3198,7 +3207,10 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
|
|||
time_t timestamp = 0;
|
||||
uint64_t hostid = 0;
|
||||
|
||||
if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME))
|
||||
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTNAME))
|
||||
hostname = fnvlist_lookup_string(nvinfo,
|
||||
ZPOOL_CONFIG_HOSTNAME);
|
||||
else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME))
|
||||
hostname = fnvlist_lookup_string(config,
|
||||
ZPOOL_CONFIG_HOSTNAME);
|
||||
|
||||
|
@ -3206,7 +3218,10 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
|
|||
timestamp = fnvlist_lookup_uint64(config,
|
||||
ZPOOL_CONFIG_TIMESTAMP);
|
||||
|
||||
if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID))
|
||||
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTID))
|
||||
hostid = fnvlist_lookup_uint64(nvinfo,
|
||||
ZPOOL_CONFIG_HOSTID);
|
||||
else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID))
|
||||
hostid = fnvlist_lookup_uint64(config,
|
||||
ZPOOL_CONFIG_HOSTID);
|
||||
|
||||
|
|
|
@ -358,6 +358,9 @@ AC_DEFUN([ZFS_AC_RPM], [
|
|||
AS_IF([test -n "$udevruledir" ], [
|
||||
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevruledir $(udevruledir)"'
|
||||
])
|
||||
AS_IF([test -n "$bashcompletiondir" ], [
|
||||
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_bashcompletiondir $(bashcompletiondir)"'
|
||||
])
|
||||
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_SYSTEMD)'
|
||||
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYZFS)'
|
||||
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PAM)'
|
||||
|
|
|
@ -51,7 +51,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
|
|||
__array(uint64_t, hdr_dva_word, 2)
|
||||
__field(uint64_t, hdr_birth)
|
||||
__field(uint32_t, hdr_flags)
|
||||
__field(uint32_t, hdr_bufcnt)
|
||||
__field(arc_buf_contents_t, hdr_type)
|
||||
__field(uint16_t, hdr_psize)
|
||||
__field(uint16_t, hdr_lsize)
|
||||
|
@ -70,7 +69,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
|
|||
__entry->hdr_dva_word[1] = ab->b_dva.dva_word[1];
|
||||
__entry->hdr_birth = ab->b_birth;
|
||||
__entry->hdr_flags = ab->b_flags;
|
||||
__entry->hdr_bufcnt = ab->b_l1hdr.b_bufcnt;
|
||||
__entry->hdr_psize = ab->b_psize;
|
||||
__entry->hdr_lsize = ab->b_lsize;
|
||||
__entry->hdr_spa = ab->b_spa;
|
||||
|
@ -84,12 +82,12 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
|
|||
__entry->hdr_refcount = ab->b_l1hdr.b_refcnt.rc_count;
|
||||
),
|
||||
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
|
||||
"flags 0x%x bufcnt %u type %u psize %u lsize %u spa %llu "
|
||||
"flags 0x%x type %u psize %u lsize %u spa %llu "
|
||||
"state_type %u access %lu mru_hits %u mru_ghost_hits %u "
|
||||
"mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
|
||||
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
|
||||
__entry->hdr_birth, __entry->hdr_flags,
|
||||
__entry->hdr_bufcnt, __entry->hdr_type, __entry->hdr_psize,
|
||||
__entry->hdr_type, __entry->hdr_psize,
|
||||
__entry->hdr_lsize, __entry->hdr_spa, __entry->hdr_state_type,
|
||||
__entry->hdr_access, __entry->hdr_mru_hits,
|
||||
__entry->hdr_mru_ghost_hits, __entry->hdr_mfu_hits,
|
||||
|
@ -192,7 +190,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
|||
__array(uint64_t, hdr_dva_word, 2)
|
||||
__field(uint64_t, hdr_birth)
|
||||
__field(uint32_t, hdr_flags)
|
||||
__field(uint32_t, hdr_bufcnt)
|
||||
__field(arc_buf_contents_t, hdr_type)
|
||||
__field(uint16_t, hdr_psize)
|
||||
__field(uint16_t, hdr_lsize)
|
||||
|
@ -223,7 +220,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
|||
__entry->hdr_dva_word[1] = hdr->b_dva.dva_word[1];
|
||||
__entry->hdr_birth = hdr->b_birth;
|
||||
__entry->hdr_flags = hdr->b_flags;
|
||||
__entry->hdr_bufcnt = hdr->b_l1hdr.b_bufcnt;
|
||||
__entry->hdr_psize = hdr->b_psize;
|
||||
__entry->hdr_lsize = hdr->b_lsize;
|
||||
__entry->hdr_spa = hdr->b_spa;
|
||||
|
@ -255,7 +251,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
|||
__entry->zb_blkid = zb->zb_blkid;
|
||||
),
|
||||
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
|
||||
"flags 0x%x bufcnt %u psize %u lsize %u spa %llu state_type %u "
|
||||
"flags 0x%x psize %u lsize %u spa %llu state_type %u "
|
||||
"access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
|
||||
"mfu_ghost_hits %u l2_hits %u refcount %lli } "
|
||||
"bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
|
||||
|
@ -264,7 +260,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
|||
"blkid %llu }",
|
||||
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
|
||||
__entry->hdr_birth, __entry->hdr_flags,
|
||||
__entry->hdr_bufcnt, __entry->hdr_psize, __entry->hdr_lsize,
|
||||
__entry->hdr_psize, __entry->hdr_lsize,
|
||||
__entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
|
||||
__entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
|
||||
__entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,
|
||||
|
|
|
@ -159,10 +159,6 @@ struct arc_write_callback {
|
|||
* these two allocation states.
|
||||
*/
|
||||
typedef struct l1arc_buf_hdr {
|
||||
/* for waiting on reads to complete */
|
||||
kcondvar_t b_cv;
|
||||
uint8_t b_byteswap;
|
||||
|
||||
/* protected by arc state mutex */
|
||||
arc_state_t *b_state;
|
||||
multilist_node_t b_arc_node;
|
||||
|
@ -173,7 +169,7 @@ typedef struct l1arc_buf_hdr {
|
|||
uint32_t b_mru_ghost_hits;
|
||||
uint32_t b_mfu_hits;
|
||||
uint32_t b_mfu_ghost_hits;
|
||||
uint32_t b_bufcnt;
|
||||
uint8_t b_byteswap;
|
||||
arc_buf_t *b_buf;
|
||||
|
||||
/* self protecting */
|
||||
|
@ -436,12 +432,12 @@ typedef struct l2arc_dev {
|
|||
*/
|
||||
typedef struct arc_buf_hdr_crypt {
|
||||
abd_t *b_rabd; /* raw encrypted data */
|
||||
dmu_object_type_t b_ot; /* object type */
|
||||
uint32_t b_ebufcnt; /* count of encrypted buffers */
|
||||
|
||||
/* dsobj for looking up encryption key for l2arc encryption */
|
||||
uint64_t b_dsobj;
|
||||
|
||||
dmu_object_type_t b_ot; /* object type */
|
||||
|
||||
/* encryption parameters */
|
||||
uint8_t b_salt[ZIO_DATA_SALT_LEN];
|
||||
uint8_t b_iv[ZIO_DATA_IV_LEN];
|
||||
|
|
|
@ -250,7 +250,6 @@ struct metaslab_group {
|
|||
int64_t mg_activation_count;
|
||||
metaslab_class_t *mg_class;
|
||||
vdev_t *mg_vd;
|
||||
taskq_t *mg_taskq;
|
||||
metaslab_group_t *mg_prev;
|
||||
metaslab_group_t *mg_next;
|
||||
|
||||
|
|
|
@ -424,7 +424,9 @@ struct spa {
|
|||
|
||||
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
|
||||
taskq_t *spa_zvol_taskq; /* Taskq for minor management */
|
||||
taskq_t *spa_metaslab_taskq; /* Taskq for metaslab preload */
|
||||
taskq_t *spa_prefetch_taskq; /* Taskq for prefetch threads */
|
||||
taskq_t *spa_upgrade_taskq; /* Taskq for upgrade jobs */
|
||||
uint64_t spa_multihost; /* multihost aware (mmp) */
|
||||
mmp_thread_t spa_mmp; /* multihost mmp thread */
|
||||
list_t spa_leaf_list; /* list of leaf vdevs */
|
||||
|
@ -448,8 +450,6 @@ struct spa {
|
|||
*/
|
||||
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
|
||||
zfs_refcount_t spa_refcount; /* number of opens */
|
||||
|
||||
taskq_t *spa_upgrade_taskq; /* taskq for upgrade jobs */
|
||||
};
|
||||
|
||||
extern char *spa_config_path;
|
||||
|
|
|
@ -402,6 +402,12 @@ Practical upper limit of total metaslabs per top-level vdev.
|
|||
.It Sy metaslab_preload_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
|
||||
Enable metaslab group preloading.
|
||||
.
|
||||
.It Sy metaslab_preload_limit Ns = Ns Sy 10 Pq uint
|
||||
Maximum number of metaslabs per group to preload
|
||||
.
|
||||
.It Sy metaslab_preload_pct Ns = Ns Sy 50 Pq uint
|
||||
Percentage of CPUs to run a metaslab preload taskq
|
||||
.
|
||||
.It Sy metaslab_lba_weighting_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
|
||||
Give more weight to metaslabs with lower LBAs,
|
||||
assuming they have greater bandwidth,
|
||||
|
@ -2144,6 +2150,11 @@ On very fragmented pools, lowering this
|
|||
.Pq typically to Sy 36 KiB
|
||||
can improve performance.
|
||||
.
|
||||
.It Sy zil_maxcopied Ns = Ns Sy 7680 Ns B Po 7.5 KiB Pc Pq uint
|
||||
This sets the maximum number of write bytes logged via WR_COPIED.
|
||||
It tunes a tradeoff between additional memory copy and possibly worse log
|
||||
space efficiency vs additional range lock/unlock.
|
||||
.
|
||||
.It Sy zil_min_commit_timeout Ns = Ns Sy 5000 Pq u64
|
||||
This sets the minimum delay in nanoseconds ZIL care to delay block commit,
|
||||
waiting for more records.
|
||||
|
|
|
@ -28,8 +28,9 @@
|
|||
.\" Copyright 2019 Richard Laager. All rights reserved.
|
||||
.\" Copyright 2018 Nexenta Systems, Inc.
|
||||
.\" Copyright 2019 Joyent, Inc.
|
||||
.\" Copyright 2023 Klara, Inc.
|
||||
.\"
|
||||
.Dd June 30, 2019
|
||||
.Dd October 6, 2023
|
||||
.Dt ZFSCONCEPTS 7
|
||||
.Os
|
||||
.
|
||||
|
@ -205,3 +206,40 @@ practices, such as regular backups.
|
|||
Consider using the
|
||||
.Sy compression
|
||||
property as a less resource-intensive alternative.
|
||||
.Ss Block cloning
|
||||
Block cloning is a facility that allows a file (or parts of a file) to be
|
||||
.Qq cloned ,
|
||||
that is, a shallow copy made where the existing data blocks are referenced
|
||||
rather than copied.
|
||||
Later modifications to the data will cause a copy of the data block to be taken
|
||||
and that copy modified.
|
||||
This facility is used to implement
|
||||
.Qq reflinks
|
||||
or
|
||||
.Qq file-level copy-on-write .
|
||||
.Pp
|
||||
Cloned blocks are tracked in a special on-disk structure called the Block
|
||||
Reference Table
|
||||
.Po BRT
|
||||
.Pc .
|
||||
Unlike deduplication, this table has minimal overhead, so can be enabled at all
|
||||
times.
|
||||
.Pp
|
||||
Also unlike deduplication, cloning must be requested by a user program.
|
||||
Many common file copying programs, including newer versions of
|
||||
.Nm /bin/cp ,
|
||||
will try to create clones automatically.
|
||||
Look for
|
||||
.Qq clone ,
|
||||
.Qq dedupe
|
||||
or
|
||||
.Qq reflink
|
||||
in the documentation for more information.
|
||||
.Pp
|
||||
There are some limitations to block cloning.
|
||||
Only whole blocks can be cloned, and blocks can not be cloned if they are not
|
||||
yet written to disk, or if they are encrypted, or the source and destination
|
||||
.Sy recordsize
|
||||
properties differ.
|
||||
The OS may add additional restrictions;
|
||||
for example, most versions of Linux will not allow clones across datasets.
|
||||
|
|
|
@ -614,28 +614,6 @@ SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct,
|
|||
" space map to continue allocations in a first-fit fashion");
|
||||
/* END CSTYLED */
|
||||
|
||||
/*
|
||||
* Percentage of all cpus that can be used by the metaslab taskq.
|
||||
*/
|
||||
extern int metaslab_load_pct;
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct,
|
||||
CTLFLAG_RWTUN, &metaslab_load_pct, 0,
|
||||
"Percentage of cpus that can be used by the metaslab taskq");
|
||||
/* END CSTYLED */
|
||||
|
||||
/*
|
||||
* Max number of metaslabs per group to preload.
|
||||
*/
|
||||
extern uint_t metaslab_preload_limit;
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, preload_limit,
|
||||
CTLFLAG_RWTUN, &metaslab_preload_limit, 0,
|
||||
"Max number of metaslabs per group to preload");
|
||||
/* END CSTYLED */
|
||||
|
||||
/* mmp.c */
|
||||
|
||||
int
|
||||
|
|
|
@ -748,8 +748,7 @@ taskq_t *arc_prune_taskq;
|
|||
* Other sizes
|
||||
*/
|
||||
|
||||
#define HDR_FULL_CRYPT_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
|
||||
#define HDR_FULL_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_crypt_hdr))
|
||||
#define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
|
||||
#define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
|
||||
|
||||
/*
|
||||
|
@ -1113,7 +1112,6 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
|
|||
*/
|
||||
|
||||
static kmem_cache_t *hdr_full_cache;
|
||||
static kmem_cache_t *hdr_full_crypt_cache;
|
||||
static kmem_cache_t *hdr_l2only_cache;
|
||||
static kmem_cache_t *buf_cache;
|
||||
|
||||
|
@ -1134,7 +1132,6 @@ buf_fini(void)
|
|||
for (int i = 0; i < BUF_LOCKS; i++)
|
||||
mutex_destroy(BUF_HASH_LOCK(i));
|
||||
kmem_cache_destroy(hdr_full_cache);
|
||||
kmem_cache_destroy(hdr_full_crypt_cache);
|
||||
kmem_cache_destroy(hdr_l2only_cache);
|
||||
kmem_cache_destroy(buf_cache);
|
||||
}
|
||||
|
@ -1151,7 +1148,6 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
|
|||
|
||||
memset(hdr, 0, HDR_FULL_SIZE);
|
||||
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
|
||||
cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL);
|
||||
zfs_refcount_create(&hdr->b_l1hdr.b_refcnt);
|
||||
#ifdef ZFS_DEBUG
|
||||
mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
@ -1163,19 +1159,6 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
|
|||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag)
|
||||
{
|
||||
(void) unused;
|
||||
arc_buf_hdr_t *hdr = vbuf;
|
||||
|
||||
hdr_full_cons(vbuf, unused, kmflag);
|
||||
memset(&hdr->b_crypt_hdr, 0, sizeof (hdr->b_crypt_hdr));
|
||||
arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
|
||||
{
|
||||
|
@ -1211,7 +1194,6 @@ hdr_full_dest(void *vbuf, void *unused)
|
|||
arc_buf_hdr_t *hdr = vbuf;
|
||||
|
||||
ASSERT(HDR_EMPTY(hdr));
|
||||
cv_destroy(&hdr->b_l1hdr.b_cv);
|
||||
zfs_refcount_destroy(&hdr->b_l1hdr.b_refcnt);
|
||||
#ifdef ZFS_DEBUG
|
||||
mutex_destroy(&hdr->b_l1hdr.b_freeze_lock);
|
||||
|
@ -1220,16 +1202,6 @@ hdr_full_dest(void *vbuf, void *unused)
|
|||
arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS);
|
||||
}
|
||||
|
||||
static void
|
||||
hdr_full_crypt_dest(void *vbuf, void *unused)
|
||||
{
|
||||
(void) vbuf, (void) unused;
|
||||
|
||||
hdr_full_dest(vbuf, unused);
|
||||
arc_space_return(sizeof (((arc_buf_hdr_t *)NULL)->b_crypt_hdr),
|
||||
ARC_SPACE_HDRS);
|
||||
}
|
||||
|
||||
static void
|
||||
hdr_l2only_dest(void *vbuf, void *unused)
|
||||
{
|
||||
|
@ -1285,9 +1257,6 @@ buf_init(void)
|
|||
|
||||
hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
|
||||
0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0);
|
||||
hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt",
|
||||
HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest,
|
||||
NULL, NULL, NULL, 0);
|
||||
hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
|
||||
HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL,
|
||||
NULL, NULL, 0);
|
||||
|
@ -1995,7 +1964,6 @@ arc_buf_untransform_in_place(arc_buf_t *buf)
|
|||
arc_buf_size(buf));
|
||||
buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED;
|
||||
buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED;
|
||||
hdr->b_crypt_hdr.b_ebufcnt -= 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2230,7 +2198,6 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
|
|||
ASSERT(HDR_HAS_L1HDR(hdr));
|
||||
|
||||
if (GHOST_STATE(state)) {
|
||||
ASSERT0(hdr->b_l1hdr.b_bufcnt);
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
|
||||
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
|
||||
ASSERT(!HDR_HAS_RABD(hdr));
|
||||
|
@ -2270,7 +2237,6 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
|
|||
ASSERT(HDR_HAS_L1HDR(hdr));
|
||||
|
||||
if (GHOST_STATE(state)) {
|
||||
ASSERT0(hdr->b_l1hdr.b_bufcnt);
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
|
||||
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
|
||||
ASSERT(!HDR_HAS_RABD(hdr));
|
||||
|
@ -2386,7 +2352,9 @@ arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
|
|||
l2hdr = &hdr->b_l2hdr;
|
||||
|
||||
if (l1hdr) {
|
||||
abi->abi_bufcnt = l1hdr->b_bufcnt;
|
||||
abi->abi_bufcnt = 0;
|
||||
for (arc_buf_t *buf = l1hdr->b_buf; buf; buf = buf->b_next)
|
||||
abi->abi_bufcnt++;
|
||||
abi->abi_access = l1hdr->b_arc_access;
|
||||
abi->abi_mru_hits = l1hdr->b_mru_hits;
|
||||
abi->abi_mru_ghost_hits = l1hdr->b_mru_ghost_hits;
|
||||
|
@ -2414,7 +2382,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||
{
|
||||
arc_state_t *old_state;
|
||||
int64_t refcnt;
|
||||
uint32_t bufcnt;
|
||||
boolean_t update_old, update_new;
|
||||
arc_buf_contents_t type = arc_buf_type(hdr);
|
||||
|
||||
|
@ -2428,19 +2395,16 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||
if (HDR_HAS_L1HDR(hdr)) {
|
||||
old_state = hdr->b_l1hdr.b_state;
|
||||
refcnt = zfs_refcount_count(&hdr->b_l1hdr.b_refcnt);
|
||||
bufcnt = hdr->b_l1hdr.b_bufcnt;
|
||||
update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL ||
|
||||
HDR_HAS_RABD(hdr));
|
||||
update_old = (hdr->b_l1hdr.b_buf != NULL ||
|
||||
hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
|
||||
|
||||
IMPLY(GHOST_STATE(old_state), bufcnt == 0);
|
||||
IMPLY(GHOST_STATE(new_state), bufcnt == 0);
|
||||
IMPLY(GHOST_STATE(old_state), hdr->b_l1hdr.b_buf == NULL);
|
||||
IMPLY(GHOST_STATE(new_state), hdr->b_l1hdr.b_buf == NULL);
|
||||
IMPLY(old_state == arc_anon, bufcnt <= 1);
|
||||
IMPLY(old_state == arc_anon, hdr->b_l1hdr.b_buf == NULL ||
|
||||
ARC_BUF_LAST(hdr->b_l1hdr.b_buf));
|
||||
} else {
|
||||
old_state = arc_l2c_only;
|
||||
refcnt = 0;
|
||||
bufcnt = 0;
|
||||
update_old = B_FALSE;
|
||||
}
|
||||
update_new = update_old;
|
||||
|
@ -2488,14 +2452,12 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||
if (update_new && new_state != arc_l2c_only) {
|
||||
ASSERT(HDR_HAS_L1HDR(hdr));
|
||||
if (GHOST_STATE(new_state)) {
|
||||
ASSERT0(bufcnt);
|
||||
|
||||
/*
|
||||
* When moving a header to a ghost state, we first
|
||||
* remove all arc buffers. Thus, we'll have a
|
||||
* bufcnt of zero, and no arc buffer to use for
|
||||
* the reference. As a result, we use the arc
|
||||
* header pointer for the reference.
|
||||
* remove all arc buffers. Thus, we'll have no arc
|
||||
* buffer to use for the reference. As a result, we
|
||||
* use the arc header pointer for the reference.
|
||||
*/
|
||||
(void) zfs_refcount_add_many(
|
||||
&new_state->arcs_size[type],
|
||||
|
@ -2503,7 +2465,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
|
||||
ASSERT(!HDR_HAS_RABD(hdr));
|
||||
} else {
|
||||
uint32_t buffers = 0;
|
||||
|
||||
/*
|
||||
* Each individual buffer holds a unique reference,
|
||||
|
@ -2512,8 +2473,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||
*/
|
||||
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
|
||||
buf = buf->b_next) {
|
||||
ASSERT3U(bufcnt, !=, 0);
|
||||
buffers++;
|
||||
|
||||
/*
|
||||
* When the arc_buf_t is sharing the data
|
||||
|
@ -2529,7 +2488,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||
&new_state->arcs_size[type],
|
||||
arc_buf_size(buf), buf);
|
||||
}
|
||||
ASSERT3U(bufcnt, ==, buffers);
|
||||
|
||||
if (hdr->b_l1hdr.b_pabd != NULL) {
|
||||
(void) zfs_refcount_add_many(
|
||||
|
@ -2548,7 +2506,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||
if (update_old && old_state != arc_l2c_only) {
|
||||
ASSERT(HDR_HAS_L1HDR(hdr));
|
||||
if (GHOST_STATE(old_state)) {
|
||||
ASSERT0(bufcnt);
|
||||
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
|
||||
ASSERT(!HDR_HAS_RABD(hdr));
|
||||
|
||||
|
@ -2564,7 +2521,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||
&old_state->arcs_size[type],
|
||||
HDR_GET_LSIZE(hdr), hdr);
|
||||
} else {
|
||||
uint32_t buffers = 0;
|
||||
|
||||
/*
|
||||
* Each individual buffer holds a unique reference,
|
||||
|
@ -2573,8 +2529,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||
*/
|
||||
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
|
||||
buf = buf->b_next) {
|
||||
ASSERT3U(bufcnt, !=, 0);
|
||||
buffers++;
|
||||
|
||||
/*
|
||||
* When the arc_buf_t is sharing the data
|
||||
|
@ -2590,7 +2544,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||
&old_state->arcs_size[type],
|
||||
arc_buf_size(buf), buf);
|
||||
}
|
||||
ASSERT3U(bufcnt, ==, buffers);
|
||||
ASSERT(hdr->b_l1hdr.b_pabd != NULL ||
|
||||
HDR_HAS_RABD(hdr));
|
||||
|
||||
|
@ -2838,9 +2791,6 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
|
|||
VERIFY3P(buf->b_data, !=, NULL);
|
||||
|
||||
hdr->b_l1hdr.b_buf = buf;
|
||||
hdr->b_l1hdr.b_bufcnt += 1;
|
||||
if (encrypted)
|
||||
hdr->b_crypt_hdr.b_ebufcnt += 1;
|
||||
|
||||
/*
|
||||
* If the user wants the data from the hdr, we need to either copy or
|
||||
|
@ -3082,8 +3032,6 @@ arc_buf_remove(arc_buf_hdr_t *hdr, arc_buf_t *buf)
|
|||
}
|
||||
buf->b_next = NULL;
|
||||
ASSERT3P(lastbuf, !=, buf);
|
||||
IMPLY(hdr->b_l1hdr.b_bufcnt > 0, lastbuf != NULL);
|
||||
IMPLY(hdr->b_l1hdr.b_bufcnt > 0, hdr->b_l1hdr.b_buf != NULL);
|
||||
IMPLY(lastbuf != NULL, ARC_BUF_LAST(lastbuf));
|
||||
|
||||
return (lastbuf);
|
||||
|
@ -3122,22 +3070,20 @@ arc_buf_destroy_impl(arc_buf_t *buf)
|
|||
}
|
||||
buf->b_data = NULL;
|
||||
|
||||
ASSERT(hdr->b_l1hdr.b_bufcnt > 0);
|
||||
hdr->b_l1hdr.b_bufcnt -= 1;
|
||||
|
||||
if (ARC_BUF_ENCRYPTED(buf)) {
|
||||
hdr->b_crypt_hdr.b_ebufcnt -= 1;
|
||||
|
||||
/*
|
||||
* If we have no more encrypted buffers and we've
|
||||
* already gotten a copy of the decrypted data we can
|
||||
* free b_rabd to save some space.
|
||||
*/
|
||||
if (hdr->b_crypt_hdr.b_ebufcnt == 0 &&
|
||||
HDR_HAS_RABD(hdr) && hdr->b_l1hdr.b_pabd != NULL &&
|
||||
!HDR_IO_IN_PROGRESS(hdr)) {
|
||||
arc_hdr_free_abd(hdr, B_TRUE);
|
||||
/*
|
||||
* If we have no more encrypted buffers and we've already
|
||||
* gotten a copy of the decrypted data we can free b_rabd
|
||||
* to save some space.
|
||||
*/
|
||||
if (ARC_BUF_ENCRYPTED(buf) && HDR_HAS_RABD(hdr) &&
|
||||
hdr->b_l1hdr.b_pabd != NULL && !HDR_IO_IN_PROGRESS(hdr)) {
|
||||
arc_buf_t *b;
|
||||
for (b = hdr->b_l1hdr.b_buf; b; b = b->b_next) {
|
||||
if (b != buf && ARC_BUF_ENCRYPTED(b))
|
||||
break;
|
||||
}
|
||||
if (b == NULL)
|
||||
arc_hdr_free_abd(hdr, B_TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3298,11 +3244,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
|
|||
arc_buf_hdr_t *hdr;
|
||||
|
||||
VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
|
||||
if (protected) {
|
||||
hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE);
|
||||
} else {
|
||||
hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
|
||||
}
|
||||
hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
|
||||
|
||||
ASSERT(HDR_EMPTY(hdr));
|
||||
#ifdef ZFS_DEBUG
|
||||
|
@ -3325,7 +3267,6 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
|
|||
hdr->b_l1hdr.b_mru_ghost_hits = 0;
|
||||
hdr->b_l1hdr.b_mfu_hits = 0;
|
||||
hdr->b_l1hdr.b_mfu_ghost_hits = 0;
|
||||
hdr->b_l1hdr.b_bufcnt = 0;
|
||||
hdr->b_l1hdr.b_buf = NULL;
|
||||
|
||||
ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
|
||||
|
@ -3351,16 +3292,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
|
|||
ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
|
||||
(old == hdr_l2only_cache && new == hdr_full_cache));
|
||||
|
||||
/*
|
||||
* if the caller wanted a new full header and the header is to be
|
||||
* encrypted we will actually allocate the header from the full crypt
|
||||
* cache instead. The same applies to freeing from the old cache.
|
||||
*/
|
||||
if (HDR_PROTECTED(hdr) && new == hdr_full_cache)
|
||||
new = hdr_full_crypt_cache;
|
||||
if (HDR_PROTECTED(hdr) && old == hdr_full_cache)
|
||||
old = hdr_full_crypt_cache;
|
||||
|
||||
nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
|
||||
|
||||
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
|
||||
|
@ -3368,7 +3299,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
|
|||
|
||||
memcpy(nhdr, hdr, HDR_L2ONLY_SIZE);
|
||||
|
||||
if (new == hdr_full_cache || new == hdr_full_crypt_cache) {
|
||||
if (new == hdr_full_cache) {
|
||||
arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR);
|
||||
/*
|
||||
* arc_access and arc_change_state need to be aware that a
|
||||
|
@ -3382,7 +3313,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
|
|||
ASSERT(!HDR_HAS_RABD(hdr));
|
||||
} else {
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
|
||||
ASSERT0(hdr->b_l1hdr.b_bufcnt);
|
||||
#ifdef ZFS_DEBUG
|
||||
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
|
||||
#endif
|
||||
|
@ -3448,126 +3378,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
|
|||
return (nhdr);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function allows an L1 header to be reallocated as a crypt
|
||||
* header and vice versa. If we are going to a crypt header, the
|
||||
* new fields will be zeroed out.
|
||||
*/
|
||||
static arc_buf_hdr_t *
|
||||
arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
|
||||
{
|
||||
arc_buf_hdr_t *nhdr;
|
||||
arc_buf_t *buf;
|
||||
kmem_cache_t *ncache, *ocache;
|
||||
|
||||
/*
|
||||
* This function requires that hdr is in the arc_anon state.
|
||||
* Therefore it won't have any L2ARC data for us to worry
|
||||
* about copying.
|
||||
*/
|
||||
ASSERT(HDR_HAS_L1HDR(hdr));
|
||||
ASSERT(!HDR_HAS_L2HDR(hdr));
|
||||
ASSERT3U(!!HDR_PROTECTED(hdr), !=, need_crypt);
|
||||
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
|
||||
ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
|
||||
ASSERT(!list_link_active(&hdr->b_l2hdr.b_l2node));
|
||||
ASSERT3P(hdr->b_hash_next, ==, NULL);
|
||||
|
||||
if (need_crypt) {
|
||||
ncache = hdr_full_crypt_cache;
|
||||
ocache = hdr_full_cache;
|
||||
} else {
|
||||
ncache = hdr_full_cache;
|
||||
ocache = hdr_full_crypt_cache;
|
||||
}
|
||||
|
||||
nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE);
|
||||
|
||||
/*
|
||||
* Copy all members that aren't locks or condvars to the new header.
|
||||
* No lists are pointing to us (as we asserted above), so we don't
|
||||
* need to worry about the list nodes.
|
||||
*/
|
||||
nhdr->b_dva = hdr->b_dva;
|
||||
nhdr->b_birth = hdr->b_birth;
|
||||
nhdr->b_type = hdr->b_type;
|
||||
nhdr->b_flags = hdr->b_flags;
|
||||
nhdr->b_psize = hdr->b_psize;
|
||||
nhdr->b_lsize = hdr->b_lsize;
|
||||
nhdr->b_spa = hdr->b_spa;
|
||||
#ifdef ZFS_DEBUG
|
||||
nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum;
|
||||
#endif
|
||||
nhdr->b_l1hdr.b_bufcnt = hdr->b_l1hdr.b_bufcnt;
|
||||
nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap;
|
||||
nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state;
|
||||
nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access;
|
||||
nhdr->b_l1hdr.b_mru_hits = hdr->b_l1hdr.b_mru_hits;
|
||||
nhdr->b_l1hdr.b_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
|
||||
nhdr->b_l1hdr.b_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
|
||||
nhdr->b_l1hdr.b_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
|
||||
nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb;
|
||||
nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd;
|
||||
|
||||
/*
|
||||
* This zfs_refcount_add() exists only to ensure that the individual
|
||||
* arc buffers always point to a header that is referenced, avoiding
|
||||
* a small race condition that could trigger ASSERTs.
|
||||
*/
|
||||
(void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG);
|
||||
nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf;
|
||||
for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next)
|
||||
buf->b_hdr = nhdr;
|
||||
|
||||
zfs_refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt);
|
||||
(void) zfs_refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG);
|
||||
ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt));
|
||||
|
||||
if (need_crypt) {
|
||||
arc_hdr_set_flags(nhdr, ARC_FLAG_PROTECTED);
|
||||
} else {
|
||||
arc_hdr_clear_flags(nhdr, ARC_FLAG_PROTECTED);
|
||||
}
|
||||
|
||||
/* unset all members of the original hdr */
|
||||
memset(&hdr->b_dva, 0, sizeof (dva_t));
|
||||
hdr->b_birth = 0;
|
||||
hdr->b_type = 0;
|
||||
hdr->b_flags = 0;
|
||||
hdr->b_psize = 0;
|
||||
hdr->b_lsize = 0;
|
||||
hdr->b_spa = 0;
|
||||
#ifdef ZFS_DEBUG
|
||||
hdr->b_l1hdr.b_freeze_cksum = NULL;
|
||||
#endif
|
||||
hdr->b_l1hdr.b_buf = NULL;
|
||||
hdr->b_l1hdr.b_bufcnt = 0;
|
||||
hdr->b_l1hdr.b_byteswap = 0;
|
||||
hdr->b_l1hdr.b_state = NULL;
|
||||
hdr->b_l1hdr.b_arc_access = 0;
|
||||
hdr->b_l1hdr.b_mru_hits = 0;
|
||||
hdr->b_l1hdr.b_mru_ghost_hits = 0;
|
||||
hdr->b_l1hdr.b_mfu_hits = 0;
|
||||
hdr->b_l1hdr.b_mfu_ghost_hits = 0;
|
||||
hdr->b_l1hdr.b_acb = NULL;
|
||||
hdr->b_l1hdr.b_pabd = NULL;
|
||||
|
||||
if (ocache == hdr_full_crypt_cache) {
|
||||
ASSERT(!HDR_HAS_RABD(hdr));
|
||||
hdr->b_crypt_hdr.b_ot = DMU_OT_NONE;
|
||||
hdr->b_crypt_hdr.b_ebufcnt = 0;
|
||||
hdr->b_crypt_hdr.b_dsobj = 0;
|
||||
memset(hdr->b_crypt_hdr.b_salt, 0, ZIO_DATA_SALT_LEN);
|
||||
memset(hdr->b_crypt_hdr.b_iv, 0, ZIO_DATA_IV_LEN);
|
||||
memset(hdr->b_crypt_hdr.b_mac, 0, ZIO_DATA_MAC_LEN);
|
||||
}
|
||||
|
||||
buf_discard_identity(hdr);
|
||||
kmem_cache_free(ocache, hdr);
|
||||
|
||||
return (nhdr);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is used by the send / receive code to convert a newly
|
||||
* allocated arc_buf_t to one that is suitable for a raw encrypted write. It
|
||||
|
@ -3587,8 +3397,7 @@ arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
|
|||
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
|
||||
|
||||
buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED);
|
||||
if (!HDR_PROTECTED(hdr))
|
||||
hdr = arc_hdr_realloc_crypt(hdr, B_TRUE);
|
||||
arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
|
||||
hdr->b_crypt_hdr.b_dsobj = dsobj;
|
||||
hdr->b_crypt_hdr.b_ot = ot;
|
||||
hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ?
|
||||
|
@ -3789,8 +3598,6 @@ static void
|
|||
arc_hdr_destroy(arc_buf_hdr_t *hdr)
|
||||
{
|
||||
if (HDR_HAS_L1HDR(hdr)) {
|
||||
ASSERT(hdr->b_l1hdr.b_buf == NULL ||
|
||||
hdr->b_l1hdr.b_bufcnt > 0);
|
||||
ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
|
||||
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
|
||||
}
|
||||
|
@ -3854,12 +3661,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
|
|||
#ifdef ZFS_DEBUG
|
||||
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
|
||||
#endif
|
||||
|
||||
if (!HDR_PROTECTED(hdr)) {
|
||||
kmem_cache_free(hdr_full_cache, hdr);
|
||||
} else {
|
||||
kmem_cache_free(hdr_full_crypt_cache, hdr);
|
||||
}
|
||||
kmem_cache_free(hdr_full_cache, hdr);
|
||||
} else {
|
||||
kmem_cache_free(hdr_l2only_cache, hdr);
|
||||
}
|
||||
|
@ -3871,7 +3673,8 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag)
|
|||
arc_buf_hdr_t *hdr = buf->b_hdr;
|
||||
|
||||
if (hdr->b_l1hdr.b_state == arc_anon) {
|
||||
ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, ==, buf);
|
||||
ASSERT(ARC_BUF_LAST(buf));
|
||||
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
|
||||
VERIFY0(remove_reference(hdr, tag));
|
||||
return;
|
||||
|
@ -3881,7 +3684,7 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag)
|
|||
mutex_enter(hash_lock);
|
||||
|
||||
ASSERT3P(hdr, ==, buf->b_hdr);
|
||||
ASSERT(hdr->b_l1hdr.b_bufcnt > 0);
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
|
||||
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
|
||||
ASSERT3P(hdr->b_l1hdr.b_state, !=, arc_anon);
|
||||
ASSERT3P(buf->b_data, !=, NULL);
|
||||
|
@ -3924,7 +3727,6 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
|
|||
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
|
||||
ASSERT(HDR_HAS_L1HDR(hdr));
|
||||
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
|
||||
ASSERT0(hdr->b_l1hdr.b_bufcnt);
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
|
||||
ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt));
|
||||
|
||||
|
@ -5586,13 +5388,6 @@ arc_read_done(zio_t *zio)
|
|||
buf_hash_remove(hdr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Broadcast before we drop the hash_lock to avoid the possibility
|
||||
* that the hdr (and hence the cv) might be freed before we get to
|
||||
* the cv_broadcast().
|
||||
*/
|
||||
cv_broadcast(&hdr->b_l1hdr.b_cv);
|
||||
|
||||
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
|
||||
(void) remove_reference(hdr, hdr);
|
||||
|
||||
|
@ -5787,8 +5582,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
|||
}
|
||||
acb->acb_zio_head = head_zio;
|
||||
acb->acb_next = hdr->b_l1hdr.b_acb;
|
||||
if (hdr->b_l1hdr.b_acb)
|
||||
hdr->b_l1hdr.b_acb->acb_prev = acb;
|
||||
hdr->b_l1hdr.b_acb->acb_prev = acb;
|
||||
hdr->b_l1hdr.b_acb = acb;
|
||||
}
|
||||
mutex_exit(hash_lock);
|
||||
|
@ -5928,8 +5722,28 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
|||
* and so the performance impact shouldn't
|
||||
* matter.
|
||||
*/
|
||||
cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
|
||||
arc_callback_t *acb = kmem_zalloc(
|
||||
sizeof (arc_callback_t), KM_SLEEP);
|
||||
acb->acb_wait = B_TRUE;
|
||||
mutex_init(&acb->acb_wait_lock, NULL,
|
||||
MUTEX_DEFAULT, NULL);
|
||||
cv_init(&acb->acb_wait_cv, NULL, CV_DEFAULT,
|
||||
NULL);
|
||||
acb->acb_zio_head =
|
||||
hdr->b_l1hdr.b_acb->acb_zio_head;
|
||||
acb->acb_next = hdr->b_l1hdr.b_acb;
|
||||
hdr->b_l1hdr.b_acb->acb_prev = acb;
|
||||
hdr->b_l1hdr.b_acb = acb;
|
||||
mutex_exit(hash_lock);
|
||||
mutex_enter(&acb->acb_wait_lock);
|
||||
while (acb->acb_wait) {
|
||||
cv_wait(&acb->acb_wait_cv,
|
||||
&acb->acb_wait_lock);
|
||||
}
|
||||
mutex_exit(&acb->acb_wait_lock);
|
||||
mutex_destroy(&acb->acb_wait_lock);
|
||||
cv_destroy(&acb->acb_wait_cv);
|
||||
kmem_free(acb, sizeof (arc_callback_t));
|
||||
goto top;
|
||||
}
|
||||
}
|
||||
|
@ -6310,7 +6124,8 @@ arc_release(arc_buf_t *buf, const void *tag)
|
|||
ASSERT(!HDR_IN_HASH_TABLE(hdr));
|
||||
ASSERT(!HDR_HAS_L2HDR(hdr));
|
||||
|
||||
ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, ==, buf);
|
||||
ASSERT(ARC_BUF_LAST(buf));
|
||||
ASSERT3S(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1);
|
||||
ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
|
||||
|
||||
|
@ -6361,7 +6176,7 @@ arc_release(arc_buf_t *buf, const void *tag)
|
|||
/*
|
||||
* Do we have more than one buf?
|
||||
*/
|
||||
if (hdr->b_l1hdr.b_bufcnt > 1) {
|
||||
if (hdr->b_l1hdr.b_buf != buf || !ARC_BUF_LAST(buf)) {
|
||||
arc_buf_hdr_t *nhdr;
|
||||
uint64_t spa = hdr->b_spa;
|
||||
uint64_t psize = HDR_GET_PSIZE(hdr);
|
||||
|
@ -6442,10 +6257,6 @@ arc_release(arc_buf_t *buf, const void *tag)
|
|||
arc_buf_size(buf), buf);
|
||||
}
|
||||
|
||||
hdr->b_l1hdr.b_bufcnt -= 1;
|
||||
if (ARC_BUF_ENCRYPTED(buf))
|
||||
hdr->b_crypt_hdr.b_ebufcnt -= 1;
|
||||
|
||||
arc_cksum_verify(buf);
|
||||
arc_buf_unwatch(buf);
|
||||
|
||||
|
@ -6458,15 +6269,11 @@ arc_release(arc_buf_t *buf, const void *tag)
|
|||
nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
|
||||
compress, hdr->b_complevel, type);
|
||||
ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
|
||||
ASSERT0(nhdr->b_l1hdr.b_bufcnt);
|
||||
ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
|
||||
VERIFY3U(nhdr->b_type, ==, type);
|
||||
ASSERT(!HDR_SHARED_DATA(nhdr));
|
||||
|
||||
nhdr->b_l1hdr.b_buf = buf;
|
||||
nhdr->b_l1hdr.b_bufcnt = 1;
|
||||
if (ARC_BUF_ENCRYPTED(buf))
|
||||
nhdr->b_crypt_hdr.b_ebufcnt = 1;
|
||||
(void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
|
||||
buf->b_hdr = nhdr;
|
||||
|
||||
|
@ -6517,7 +6324,7 @@ arc_write_ready(zio_t *zio)
|
|||
|
||||
ASSERT(HDR_HAS_L1HDR(hdr));
|
||||
ASSERT(!zfs_refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt));
|
||||
ASSERT(hdr->b_l1hdr.b_bufcnt > 0);
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
|
||||
|
||||
/*
|
||||
* If we're reexecuting this zio because the pool suspended, then
|
||||
|
@ -6552,13 +6359,9 @@ arc_write_ready(zio_t *zio)
|
|||
add_reference(hdr, hdr); /* For IO_IN_PROGRESS. */
|
||||
}
|
||||
|
||||
if (BP_IS_PROTECTED(bp) != !!HDR_PROTECTED(hdr))
|
||||
hdr = arc_hdr_realloc_crypt(hdr, BP_IS_PROTECTED(bp));
|
||||
|
||||
if (BP_IS_PROTECTED(bp)) {
|
||||
/* ZIL blocks are written through zio_rewrite */
|
||||
ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
|
||||
ASSERT(HDR_PROTECTED(hdr));
|
||||
|
||||
if (BP_SHOULD_BYTESWAP(bp)) {
|
||||
if (BP_GET_LEVEL(bp) > 0) {
|
||||
|
@ -6571,11 +6374,14 @@ arc_write_ready(zio_t *zio)
|
|||
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
|
||||
}
|
||||
|
||||
arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
|
||||
hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp);
|
||||
hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset;
|
||||
zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
|
||||
hdr->b_crypt_hdr.b_iv);
|
||||
zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac);
|
||||
} else {
|
||||
arc_hdr_clear_flags(hdr, ARC_FLAG_PROTECTED);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -6656,7 +6462,8 @@ arc_write_ready(zio_t *zio)
|
|||
} else {
|
||||
ASSERT3P(buf->b_data, ==, abd_to_buf(zio->io_orig_abd));
|
||||
ASSERT3U(zio->io_orig_size, ==, arc_buf_size(buf));
|
||||
ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, ==, buf);
|
||||
ASSERT(ARC_BUF_LAST(buf));
|
||||
|
||||
arc_share_buf(hdr, buf);
|
||||
}
|
||||
|
@ -6737,7 +6544,8 @@ arc_write_done(zio_t *zio)
|
|||
(void *)hdr, (void *)exists);
|
||||
} else {
|
||||
/* Dedup */
|
||||
ASSERT(hdr->b_l1hdr.b_bufcnt == 1);
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
|
||||
ASSERT(ARC_BUF_LAST(hdr->b_l1hdr.b_buf));
|
||||
ASSERT(hdr->b_l1hdr.b_state == arc_anon);
|
||||
ASSERT(BP_GET_DEDUP(zio->io_bp));
|
||||
ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
|
||||
|
@ -6778,7 +6586,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
|
|||
ASSERT(!HDR_IO_ERROR(hdr));
|
||||
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
|
||||
ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
|
||||
ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
|
||||
ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
|
||||
if (uncached)
|
||||
arc_hdr_set_flags(hdr, ARC_FLAG_UNCACHED);
|
||||
else if (l2arc)
|
||||
|
|
|
@ -205,11 +205,6 @@ static const uint32_t metaslab_min_search_count = 100;
|
|||
*/
|
||||
static int metaslab_df_use_largest_segment = B_FALSE;
|
||||
|
||||
/*
|
||||
* Percentage of all cpus that can be used by the metaslab taskq.
|
||||
*/
|
||||
int metaslab_load_pct = 50;
|
||||
|
||||
/*
|
||||
* These tunables control how long a metaslab will remain loaded after the
|
||||
* last allocation from it. A metaslab can't be unloaded until at least
|
||||
|
@ -854,9 +849,6 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd, int allocators)
|
|||
zfs_refcount_create_tracked(&mga->mga_alloc_queue_depth);
|
||||
}
|
||||
|
||||
mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
|
||||
maxclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC);
|
||||
|
||||
return (mg);
|
||||
}
|
||||
|
||||
|
@ -872,7 +864,6 @@ metaslab_group_destroy(metaslab_group_t *mg)
|
|||
*/
|
||||
ASSERT(mg->mg_activation_count <= 0);
|
||||
|
||||
taskq_destroy(mg->mg_taskq);
|
||||
avl_destroy(&mg->mg_metaslab_tree);
|
||||
mutex_destroy(&mg->mg_lock);
|
||||
mutex_destroy(&mg->mg_ms_disabled_lock);
|
||||
|
@ -963,7 +954,7 @@ metaslab_group_passivate(metaslab_group_t *mg)
|
|||
* allocations from taking place and any changes to the vdev tree.
|
||||
*/
|
||||
spa_config_exit(spa, locks & ~(SCL_ZIO - 1), spa);
|
||||
taskq_wait_outstanding(mg->mg_taskq, 0);
|
||||
taskq_wait_outstanding(spa->spa_metaslab_taskq, 0);
|
||||
spa_config_enter(spa, locks & ~(SCL_ZIO - 1), spa, RW_WRITER);
|
||||
metaslab_group_alloc_update(mg);
|
||||
for (int i = 0; i < mg->mg_allocators; i++) {
|
||||
|
@ -3571,10 +3562,8 @@ metaslab_group_preload(metaslab_group_t *mg)
|
|||
avl_tree_t *t = &mg->mg_metaslab_tree;
|
||||
int m = 0;
|
||||
|
||||
if (spa_shutting_down(spa) || !metaslab_preload_enabled) {
|
||||
taskq_wait_outstanding(mg->mg_taskq, 0);
|
||||
if (spa_shutting_down(spa) || !metaslab_preload_enabled)
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_enter(&mg->mg_lock);
|
||||
|
||||
|
@ -3594,8 +3583,9 @@ metaslab_group_preload(metaslab_group_t *mg)
|
|||
continue;
|
||||
}
|
||||
|
||||
VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload,
|
||||
msp, TQ_SLEEP) != TASKQID_INVALID);
|
||||
VERIFY(taskq_dispatch(spa->spa_metaslab_taskq, metaslab_preload,
|
||||
msp, TQ_SLEEP | (m <= mg->mg_allocators ? TQ_FRONT : 0))
|
||||
!= TASKQID_INVALID);
|
||||
}
|
||||
mutex_exit(&mg->mg_lock);
|
||||
}
|
||||
|
@ -6224,6 +6214,9 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, debug_unload, INT, ZMOD_RW,
|
|||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_enabled, INT, ZMOD_RW,
|
||||
"Preload potential metaslabs during reassessment");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_limit, UINT, ZMOD_RW,
|
||||
"Max number of metaslabs per group to preload");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, unload_delay, UINT, ZMOD_RW,
|
||||
"Delay in txgs after metaslab was last used before unloading");
|
||||
|
||||
|
|
|
@ -169,6 +169,11 @@ static int spa_load_impl(spa_t *spa, spa_import_type_t type,
|
|||
const char **ereport);
|
||||
static void spa_vdev_resilver_done(spa_t *spa);
|
||||
|
||||
/*
|
||||
* Percentage of all CPUs that can be used by the metaslab preload taskq.
|
||||
*/
|
||||
static uint_t metaslab_preload_pct = 50;
|
||||
|
||||
static uint_t zio_taskq_batch_pct = 80; /* 1 thread per cpu in pset */
|
||||
static uint_t zio_taskq_batch_tpq; /* threads per taskq */
|
||||
static const boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
|
||||
|
@ -1399,6 +1404,13 @@ spa_activate(spa_t *spa, spa_mode_t mode)
|
|||
spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri,
|
||||
1, INT_MAX, 0);
|
||||
|
||||
/*
|
||||
* The taskq to preload metaslabs.
|
||||
*/
|
||||
spa->spa_metaslab_taskq = taskq_create("z_metaslab",
|
||||
metaslab_preload_pct, maxclsyspri, 1, INT_MAX,
|
||||
TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
|
||||
|
||||
/*
|
||||
* Taskq dedicated to prefetcher threads: this is used to prevent the
|
||||
* pool traverse code from monopolizing the global (and limited)
|
||||
|
@ -1434,6 +1446,11 @@ spa_deactivate(spa_t *spa)
|
|||
spa->spa_zvol_taskq = NULL;
|
||||
}
|
||||
|
||||
if (spa->spa_metaslab_taskq) {
|
||||
taskq_destroy(spa->spa_metaslab_taskq);
|
||||
spa->spa_metaslab_taskq = NULL;
|
||||
}
|
||||
|
||||
if (spa->spa_prefetch_taskq) {
|
||||
taskq_destroy(spa->spa_prefetch_taskq);
|
||||
spa->spa_prefetch_taskq = NULL;
|
||||
|
@ -1706,13 +1723,7 @@ spa_unload(spa_t *spa)
|
|||
* This ensures that there is no async metaslab prefetching
|
||||
* while we attempt to unload the spa.
|
||||
*/
|
||||
if (spa->spa_root_vdev != NULL) {
|
||||
for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) {
|
||||
vdev_t *vc = spa->spa_root_vdev->vdev_child[c];
|
||||
if (vc->vdev_mg != NULL)
|
||||
taskq_wait(vc->vdev_mg->mg_taskq);
|
||||
}
|
||||
}
|
||||
taskq_wait(spa->spa_metaslab_taskq);
|
||||
|
||||
if (spa->spa_mmp.mmp_thread)
|
||||
mmp_thread_stop(spa);
|
||||
|
@ -3922,6 +3933,24 @@ spa_ld_trusted_config(spa_t *spa, spa_import_type_t type,
|
|||
rvd = mrvd;
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
|
||||
/*
|
||||
* If 'zpool import' used a cached config, then the on-disk hostid and
|
||||
* hostname may be different to the cached config in ways that should
|
||||
* prevent import. Userspace can't discover this without a scan, but
|
||||
* we know, so we add these values to LOAD_INFO so the caller can know
|
||||
* the difference.
|
||||
*
|
||||
* Note that we have to do this before the config is regenerated,
|
||||
* because the new config will have the hostid and hostname for this
|
||||
* host, in readiness for import.
|
||||
*/
|
||||
if (nvlist_exists(mos_config, ZPOOL_CONFIG_HOSTID))
|
||||
fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_HOSTID,
|
||||
fnvlist_lookup_uint64(mos_config, ZPOOL_CONFIG_HOSTID));
|
||||
if (nvlist_exists(mos_config, ZPOOL_CONFIG_HOSTNAME))
|
||||
fnvlist_add_string(spa->spa_load_info, ZPOOL_CONFIG_HOSTNAME,
|
||||
fnvlist_lookup_string(mos_config, ZPOOL_CONFIG_HOSTNAME));
|
||||
|
||||
/*
|
||||
* We will use spa_config if we decide to reload the spa or if spa_load
|
||||
* fails and we rewind. We must thus regenerate the config using the
|
||||
|
@ -10134,6 +10163,9 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
|
|||
/* asynchronous event notification */
|
||||
EXPORT_SYMBOL(spa_event_notify);
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_pct, UINT, ZMOD_RW,
|
||||
"Percentage of CPUs to run a metaslab preload taskq");
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_shift, UINT, ZMOD_RW,
|
||||
"log2 fraction of arc that can be used by inflight I/Os when "
|
||||
|
|
|
@ -1958,26 +1958,28 @@ zil_max_log_data(zilog_t *zilog, size_t hdrsize)
|
|||
|
||||
/*
|
||||
* Maximum amount of log space we agree to waste to reduce number of
|
||||
* WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%).
|
||||
* WR_NEED_COPY chunks to reduce zl_get_data() overhead (~6%).
|
||||
*/
|
||||
static inline uint64_t
|
||||
zil_max_waste_space(zilog_t *zilog)
|
||||
{
|
||||
return (zil_max_log_data(zilog, sizeof (lr_write_t)) / 8);
|
||||
return (zil_max_log_data(zilog, sizeof (lr_write_t)) / 16);
|
||||
}
|
||||
|
||||
/*
|
||||
* Maximum amount of write data for WR_COPIED. For correctness, consumers
|
||||
* must fall back to WR_NEED_COPY if we can't fit the entire record into one
|
||||
* maximum sized log block, because each WR_COPIED record must fit in a
|
||||
* single log block. For space efficiency, we want to fit two records into a
|
||||
* max-sized log block.
|
||||
* single log block. Below that it is a tradeoff of additional memory copy
|
||||
* and possibly worse log space efficiency vs additional range lock/unlock.
|
||||
*/
|
||||
static uint_t zil_maxcopied = 7680;
|
||||
|
||||
uint64_t
|
||||
zil_max_copied_data(zilog_t *zilog)
|
||||
{
|
||||
return ((zilog->zl_max_block_size - sizeof (zil_chain_t)) / 2 -
|
||||
sizeof (lr_write_t));
|
||||
uint64_t max_data = zil_max_log_data(zilog, sizeof (lr_write_t));
|
||||
return (MIN(max_data, zil_maxcopied));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4226,3 +4228,6 @@ ZFS_MODULE_PARAM(zfs_zil, zil_, slog_bulk, U64, ZMOD_RW,
|
|||
|
||||
ZFS_MODULE_PARAM(zfs_zil, zil_, maxblocksize, UINT, ZMOD_RW,
|
||||
"Limit in bytes of ZIL log block size");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_zil, zil_, maxcopied, UINT, ZMOD_RW,
|
||||
"Limit in bytes WR_COPIED size");
|
||||
|
|
|
@ -522,7 +522,7 @@ systemctl --system daemon-reload >/dev/null || true
|
|||
%config(noreplace) %{_sysconfdir}/%{name}/vdev_id.conf.*.example
|
||||
%attr(440, root, root) %config(noreplace) %{_sysconfdir}/sudoers.d/*
|
||||
|
||||
%config(noreplace) %{_sysconfdir}/bash_completion.d/zfs
|
||||
%config(noreplace) %{_bashcompletiondir}/zfs
|
||||
|
||||
%files -n libzpool5
|
||||
%{_libdir}/libzpool.so.*
|
||||
|
|
|
@ -415,6 +415,10 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
|
|||
'zpool_import_rename_001_pos', 'zpool_import_all_001_pos',
|
||||
'zpool_import_encrypted', 'zpool_import_encrypted_load',
|
||||
'zpool_import_errata3', 'zpool_import_errata4',
|
||||
'zpool_import_hostid_changed',
|
||||
'zpool_import_hostid_changed_unclean_export',
|
||||
'zpool_import_hostid_changed_cachefile',
|
||||
'zpool_import_hostid_changed_cachefile_unclean_export',
|
||||
'import_cachefile_device_added',
|
||||
'import_cachefile_device_removed',
|
||||
'import_cachefile_device_replaced',
|
||||
|
|
|
@ -1104,6 +1104,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
|||
functional/cli_root/zpool_import/zpool_import_features_001_pos.ksh \
|
||||
functional/cli_root/zpool_import/zpool_import_features_002_neg.ksh \
|
||||
functional/cli_root/zpool_import/zpool_import_features_003_pos.ksh \
|
||||
functional/cli_root/zpool_import/zpool_import_hostid_changed.ksh \
|
||||
functional/cli_root/zpool_import/zpool_import_hostid_changed_unclean_export.ksh \
|
||||
functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile.ksh \
|
||||
functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile_unclean_export.ksh \
|
||||
functional/cli_root/zpool_import/zpool_import_missing_001_pos.ksh \
|
||||
functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh \
|
||||
functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh \
|
||||
|
|
|
@ -52,6 +52,8 @@ log_must set_tunable64 TXG_TIMEOUT 5000
|
|||
|
||||
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
|
||||
|
||||
log_must sync_pool $TESTPOOL true
|
||||
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
|
||||
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
#
|
||||
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
@ -63,3 +64,7 @@ export VDEV4=$DEVICE_DIR/${DEVICE_FILE}4
|
|||
export VDEV5=$DEVICE_DIR/${DEVICE_FILE}5
|
||||
|
||||
export ALTER_ROOT=/alter_import-test
|
||||
|
||||
export HOSTID_FILE="/etc/hostid"
|
||||
export HOSTID1=01234567
|
||||
export HOSTID2=89abcdef
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#
|
||||
# Copyright (c) 2016 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# A pool that was cleanly exported should be importable without force even if
|
||||
# the local hostid doesn't match the on-disk hostid.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Set a hostid.
|
||||
# 2. Create a pool.
|
||||
# 3. Export the pool.
|
||||
# 4. Change the hostid.
|
||||
# 5. Verify that importing the pool without force succeeds.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function custom_cleanup
|
||||
{
|
||||
rm -f $HOSTID_FILE
|
||||
cleanup
|
||||
}
|
||||
|
||||
log_onexit custom_cleanup
|
||||
|
||||
# 1. Set a hostid.
|
||||
log_must zgenhostid -f $HOSTID1
|
||||
|
||||
# 2. Create a pool.
|
||||
log_must zpool create $TESTPOOL1 $VDEV0
|
||||
|
||||
# 3. Export the pool.
|
||||
log_must zpool export $TESTPOOL1
|
||||
|
||||
# 4. Change the hostid.
|
||||
log_must zgenhostid -f $HOSTID2
|
||||
|
||||
# 5. Verify that importing the pool without force succeeds.
|
||||
log_must zpool import -d $DEVICE_DIR $TESTPOOL1
|
||||
|
||||
log_pass "zpool import can import cleanly exported pool when hostid changes."
|
|
@ -0,0 +1,65 @@
|
|||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# A pool that was cleanly exported should be importable from a cachefile
|
||||
# without force even if the local hostid doesn't match the on-disk hostid.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Set a hostid.
|
||||
# 2. Create a pool with a cachefile.
|
||||
# 3. Backup the cachfile.
|
||||
# 4. Export the pool.
|
||||
# 5. Change the hostid.
|
||||
# 6. Verify that importing the pool from the cachefile succeeds
|
||||
# without force.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function custom_cleanup
|
||||
{
|
||||
rm -f $HOSTID_FILE $CPATH $CPATHBKP
|
||||
cleanup
|
||||
}
|
||||
|
||||
log_onexit custom_cleanup
|
||||
|
||||
# 1. Set a hostid.
|
||||
log_must zgenhostid -f $HOSTID1
|
||||
|
||||
# 2. Create a pool.
|
||||
log_must zpool create -o cachefile=$CPATH $TESTPOOL1 $VDEV0
|
||||
|
||||
# 3. Backup the cachfile.
|
||||
log_must cp $CPATH $CPATHBKP
|
||||
|
||||
# 4. Export the pool.
|
||||
log_must zpool export $TESTPOOL1
|
||||
|
||||
# 5. Change the hostid.
|
||||
log_must zgenhostid -f $HOSTID2
|
||||
|
||||
# 6. Verify that importing the pool from the cachefile succeeds without force.
|
||||
log_must zpool import -c $CPATHBKP $TESTPOOL1
|
||||
|
||||
log_pass "zpool import can import cleanly exported pool from cachefile " \
|
||||
"when hostid changes."
|
|
@ -0,0 +1,75 @@
|
|||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# A pool that wasn't cleanly exported should not be importable from a cachefile
|
||||
# without force if the local hostid doesn't match the on-disk hostid.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Set a hostid.
|
||||
# 2. Create a pool.
|
||||
# 3. Backup the cachefile.
|
||||
# 4. Simulate the pool being torn down without export:
|
||||
# 4.1. Copy the underlying device state.
|
||||
# 4.2. Export the pool.
|
||||
# 4.3. Restore the device state from the copy.
|
||||
# 5. Change the hostid.
|
||||
# 6. Verify that importing the pool from the cachefile fails.
|
||||
# 7. Verify that importing the pool from the cachefile with force
|
||||
# succeeds.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function custom_cleanup
|
||||
{
|
||||
rm -f $HOSTID_FILE $CPATH $CPATHBKP $VDEV0.bak
|
||||
cleanup
|
||||
}
|
||||
|
||||
log_onexit custom_cleanup
|
||||
|
||||
# 1. Set a hostid.
|
||||
log_must zgenhostid -f $HOSTID1
|
||||
|
||||
# 2. Create a pool.
|
||||
log_must zpool create -o cachefile=$CPATH $TESTPOOL1 $VDEV0
|
||||
|
||||
# 3. Backup the cachfile.
|
||||
log_must cp $CPATH $CPATHBKP
|
||||
|
||||
# 4. Simulate the pool being torn down without export.
|
||||
log_must cp $VDEV0 $VDEV0.bak
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must cp -f $VDEV0.bak $VDEV0
|
||||
log_must rm -f $VDEV0.bak
|
||||
|
||||
# 5. Change the hostid.
|
||||
log_must zgenhostid -f $HOSTID2
|
||||
|
||||
# 6. Verify that importing the pool from the cachefile fails.
|
||||
log_mustnot zpool import -c $CPATHBKP $TESTPOOL1
|
||||
|
||||
# 7. Verify that importing the pool from the cachefile with force succeeds.
|
||||
log_must zpool import -f -c $CPATHBKP $TESTPOOL1
|
||||
|
||||
log_pass "zpool import from cachefile requires force if not cleanly " \
|
||||
"exported and hostid changes."
|
|
@ -0,0 +1,70 @@
|
|||
#!/bin/ksh -p
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2023 by Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# A pool that wasn't cleanly exported should not be importable without force if
|
||||
# the local hostid doesn't match the on-disk hostid.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Set a hostid.
|
||||
# 2. Create a pool.
|
||||
# 3. Simulate the pool being torn down without export:
|
||||
# 3.1. Copy the underlying device state.
|
||||
# 3.2. Export the pool.
|
||||
# 3.3. Restore the device state from the copy.
|
||||
# 4. Change the hostid.
|
||||
# 5. Verify that importing the pool fails.
|
||||
# 6. Verify that importing the pool with force succeeds.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function custom_cleanup
|
||||
{
|
||||
rm -f $HOSTID_FILE $VDEV0.bak
|
||||
cleanup
|
||||
}
|
||||
|
||||
log_onexit custom_cleanup
|
||||
|
||||
# 1. Set a hostid.
|
||||
log_must zgenhostid -f $HOSTID1
|
||||
|
||||
# 2. Create a pool.
|
||||
log_must zpool create $TESTPOOL1 $VDEV0
|
||||
|
||||
# 3. Simulate the pool being torn down without export.
|
||||
log_must cp $VDEV0 $VDEV0.bak
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must cp -f $VDEV0.bak $VDEV0
|
||||
log_must rm -f $VDEV0.bak
|
||||
|
||||
# 4. Change the hostid.
|
||||
log_must zgenhostid -f $HOSTID2
|
||||
|
||||
# 5. Verify that importing the pool fails.
|
||||
log_mustnot zpool import -d $DEVICE_DIR $TESTPOOL1
|
||||
|
||||
# 6. Verify that importing the pool with force succeeds.
|
||||
log_must zpool import -d $DEVICE_DIR -f $TESTPOOL1
|
||||
|
||||
log_pass "zpool import requires force if not cleanly exported " \
|
||||
"and hostid changed."
|
Loading…
Reference in a new issue