Notable upstream pull request merges:
 #15532 c1a47de86 zdb: Fix zdb '-O|-r' options with -e/exported zpool
 #15535 cf3316633 ZVOL: Minor code cleanup
 #15541 803a9c12c brt: lift internal definitions into _impl header
 #15541 213d68296 zdb: show BRT statistics and dump its contents
 #15543 a49087510 ZIL: Refactor TX_WRITE encryption similar to
                  TX_CLONE_RANGE
 #15543 27d8c23c5 ZIL: Do not encrypt block pointers in lr_clone_range_t
 #15549 67894a597 unnecessary alloc/free in dsl_scan_visitbp()
 #15551 126efb588 FreeBSD: Fix the build on FreeBSD 12
 #15563 acb33ee1c FreeBSD: Fix ZFS so that snapshots under .zfs/snapshot are
                  NFS visible
 #15564 7bbd42ef4 Don't allow attach to a raidz child vdev
 #15566 688514e47 dmu_buf_will_clone: fix race in transition back to NOFILL
 #15571 30d581121 dnode_is_dirty: check dnode and its data for dirtiness

Obtained from:	OpenZFS
OpenZFS commit:	688514e470
This commit is contained in:
Martin Matuska 2023-11-28 21:35:02 +01:00
commit 2276e53940
26 changed files with 443 additions and 255 deletions

View file

@ -32,4 +32,4 @@ For more details see the NOTICE, LICENSE and COPYRIGHT files; `UCRL-CODE-235197`
# Supported Kernels
* The `META` file contains the officially recognized supported Linux kernel versions.
* Supported FreeBSD versions are any supported branches and releases starting from 12.2-RELEASE.
* Supported FreeBSD versions are any supported branches and releases starting from 12.4-RELEASE.

View file

@ -34,6 +34,7 @@
* Copyright (c) 2021 Allan Jude
* Copyright (c) 2021 Toomas Soome <tsoome@me.com>
* Copyright (c) 2023, Klara Inc.
* Copyright (c) 2023, Rob Norris <robn@despairlabs.com>
*/
#include <stdio.h>
@ -80,6 +81,7 @@
#include <sys/dsl_scan.h>
#include <sys/btree.h>
#include <sys/brt.h>
#include <sys/brt_impl.h>
#include <zfs_comutil.h>
#include <sys/zstd/zstd.h>
@ -899,6 +901,8 @@ usage(void)
"don't print label contents\n");
(void) fprintf(stderr, " -t --txg=INTEGER "
"highest txg to use when searching for uberblocks\n");
(void) fprintf(stderr, " -T --brt-stats "
"BRT statistics\n");
(void) fprintf(stderr, " -u --uberblock "
"uberblock\n");
(void) fprintf(stderr, " -U --cachefile=PATH "
@ -999,6 +1003,15 @@ zdb_nicenum(uint64_t num, char *buf, size_t buflen)
nicenum(num, buf, buflen);
}
static void
zdb_nicebytes(uint64_t bytes, char *buf, size_t buflen)
{
if (dump_opt['P'])
(void) snprintf(buf, buflen, "%llu", (longlong_t)bytes);
else
zfs_nicebytes(bytes, buf, buflen);
}
static const char histo_stars[] = "****************************************";
static const uint64_t histo_width = sizeof (histo_stars) - 1;
@ -2081,6 +2094,76 @@ dump_all_ddts(spa_t *spa)
dump_dedup_ratio(&dds_total);
}
static void
dump_brt(spa_t *spa)
{
if (!spa_feature_is_enabled(spa, SPA_FEATURE_BLOCK_CLONING)) {
printf("BRT: unsupported on this pool\n");
return;
}
if (!spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
printf("BRT: empty\n");
return;
}
brt_t *brt = spa->spa_brt;
VERIFY(brt);
char count[32], used[32], saved[32];
zdb_nicebytes(brt_get_used(spa), used, sizeof (used));
zdb_nicebytes(brt_get_saved(spa), saved, sizeof (saved));
uint64_t ratio = brt_get_ratio(spa);
printf("BRT: used %s; saved %s; ratio %llu.%02llux\n", used, saved,
(u_longlong_t)(ratio / 100), (u_longlong_t)(ratio % 100));
if (dump_opt['T'] < 2)
return;
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
if (brtvd == NULL)
continue;
if (!brtvd->bv_initiated) {
printf("BRT: vdev %lu: empty\n", vdevid);
continue;
}
zdb_nicenum(brtvd->bv_totalcount, count, sizeof (count));
zdb_nicebytes(brtvd->bv_usedspace, used, sizeof (used));
zdb_nicebytes(brtvd->bv_savedspace, saved, sizeof (saved));
printf("BRT: vdev %lu: refcnt %s; used %s; saved %s\n",
vdevid, count, used, saved);
}
if (dump_opt['T'] < 3)
return;
char dva[64];
printf("\n%-16s %-10s\n", "DVA", "REFCNT");
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
if (brtvd == NULL || !brtvd->bv_initiated)
continue;
zap_cursor_t zc;
zap_attribute_t za;
for (zap_cursor_init(&zc, brt->brt_mos, brtvd->bv_mos_entries);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
uint64_t offset = *(uint64_t *)za.za_name;
uint64_t refcnt = za.za_first_integer;
snprintf(dva, sizeof (dva), "%lu:%llx", vdevid,
(u_longlong_t)offset);
printf("%-16s %-10llu\n", dva, (u_longlong_t)refcnt);
}
zap_cursor_fini(&zc);
}
}
static void
dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
{
@ -8108,6 +8191,9 @@ dump_zpool(spa_t *spa)
if (dump_opt['D'])
dump_all_ddts(spa);
if (dump_opt['T'])
dump_brt(spa);
if (dump_opt['d'] > 2 || dump_opt['m'])
dump_metaslabs(spa);
if (dump_opt['M'])
@ -8894,6 +8980,7 @@ main(int argc, char **argv)
{"io-stats", no_argument, NULL, 's'},
{"simulate-dedup", no_argument, NULL, 'S'},
{"txg", required_argument, NULL, 't'},
{"brt-stats", no_argument, NULL, 'T'},
{"uberblock", no_argument, NULL, 'u'},
{"cachefile", required_argument, NULL, 'U'},
{"verbose", no_argument, NULL, 'v'},
@ -8907,7 +8994,7 @@ main(int argc, char **argv)
};
while ((c = getopt_long(argc, argv,
"AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ",
"AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:TuU:vVx:XYyZ",
long_options, NULL)) != -1) {
switch (c) {
case 'b':
@ -8929,6 +9016,7 @@ main(int argc, char **argv)
case 'R':
case 's':
case 'S':
case 'T':
case 'u':
case 'y':
case 'Z':
@ -9091,22 +9179,6 @@ main(int argc, char **argv)
if (dump_opt['l'])
return (dump_label(argv[0]));
if (dump_opt['O']) {
if (argc != 2)
usage();
dump_opt['v'] = verbose + 3;
return (dump_path(argv[0], argv[1], NULL));
}
if (dump_opt['r']) {
target_is_spa = B_FALSE;
if (argc != 3)
usage();
dump_opt['v'] = verbose;
error = dump_path(argv[0], argv[1], &object);
if (error != 0)
fatal("internal error: %s", strerror(error));
}
if (dump_opt['X'] || dump_opt['F'])
rewind = ZPOOL_DO_REWIND |
(dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
@ -9207,6 +9279,29 @@ main(int argc, char **argv)
searchdirs = NULL;
}
/*
* We need to make sure to process -O option or call
* dump_path after the -e option has been processed,
* which imports the pool to the namespace if it's
* not in the cachefile.
*/
if (dump_opt['O']) {
if (argc != 2)
usage();
dump_opt['v'] = verbose + 3;
return (dump_path(argv[0], argv[1], NULL));
}
if (dump_opt['r']) {
target_is_spa = B_FALSE;
if (argc != 3)
usage();
dump_opt['v'] = verbose;
error = dump_path(argv[0], argv[1], &object);
if (error != 0)
fatal("internal error: %s", strerror(error));
}
/*
* import_checkpointed_state makes the assumption that the
* target pool that we pass it is already part of the spa

View file

@ -33,6 +33,7 @@ COMMON_H = \
sys/bqueue.h \
sys/btree.h \
sys/brt.h \
sys/brt_impl.h \
sys/dataset_kstats.h \
sys/dbuf.h \
sys/ddt.h \

View file

@ -101,7 +101,7 @@ void vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
void vfs_clearmntopt(vfs_t *vfsp, const char *name);
int vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp);
int mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype,
char *fspath, char *fspec, int fsflags);
char *fspath, char *fspec, int fsflags, vfs_t *parent_vfsp);
typedef uint64_t vfs_feature_t;

View file

@ -56,6 +56,7 @@ enum symfollow { NO_FOLLOW = NOFOLLOW };
#ifndef IN_BASE
#include_next <sys/vnode.h>
#endif
#include <sys/ccompat.h>
#include <sys/mount.h>
#include <sys/cred.h>
#include <sys/fcntl.h>
@ -104,7 +105,7 @@ vn_flush_cached_data(vnode_t *vp, boolean_t sync)
zfs_vmobject_wlock(vp->v_object);
vm_object_page_clean(vp->v_object, 0, 0, flags);
zfs_vmobject_wunlock(vp->v_object);
VOP_UNLOCK(vp);
VOP_UNLOCK1(vp);
}
}
#endif

View file

@ -0,0 +1,199 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2020, 2021, 2022 by Pawel Jakub Dawidek
*/
#ifndef _SYS_BRT_IMPL_H
#define _SYS_BRT_IMPL_H
#ifdef __cplusplus
extern "C" {
#endif
/*
* BRT - Block Reference Table.
*/
#define BRT_OBJECT_VDEV_PREFIX "com.fudosecurity:brt:vdev:"
/*
* We divide each VDEV into 16MB chunks. Each chunk is represented in memory
* by a 16bit counter, thus 1TB VDEV requires 128kB of memory: (1TB / 16MB) * 2B
* Each element in this array represents how many BRT entries do we have in this
* chunk of storage. We always load this entire array into memory and update as
* needed. By having it in memory we can quickly tell (during zio_free()) if
* there are any BRT entries that we might need to update.
*
* This value cannot be larger than 16MB, at least as long as we support
* 512 byte block sizes. With 512 byte block size we can have exactly
* 32768 blocks in 16MB. In 32MB we could have 65536 blocks, which is one too
* many for a 16bit counter.
*/
#define BRT_RANGESIZE (16 * 1024 * 1024)
_Static_assert(BRT_RANGESIZE / SPA_MINBLOCKSIZE <= UINT16_MAX,
"BRT_RANGESIZE is too large.");
/*
* We don't want to update the whole structure every time. Maintain bitmap
* of dirty blocks within the regions, so that a single bit represents a
* block size of entcounts. For example if we have a 1PB vdev then all
* entcounts take 128MB of memory ((64TB / 16MB) * 2B). We can divide this
* 128MB array of entcounts into 32kB disk blocks, as we don't want to update
* the whole 128MB on disk when we have updated only a single entcount.
* We maintain a bitmap where each 32kB disk block within 128MB entcounts array
* is represented by a single bit. This gives us 4096 bits. A set bit in the
* bitmap means that we had a change in at least one of the 16384 entcounts
* that reside on a 32kB disk block (32kB / sizeof (uint16_t)).
*/
#define BRT_BLOCKSIZE (32 * 1024)
#define BRT_RANGESIZE_TO_NBLOCKS(size) \
(((size) - 1) / BRT_BLOCKSIZE / sizeof (uint16_t) + 1)
#define BRT_LITTLE_ENDIAN 0
#define BRT_BIG_ENDIAN 1
#ifdef _ZFS_LITTLE_ENDIAN
#define BRT_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
#define BRT_NON_NATIVE_BYTEORDER BRT_BIG_ENDIAN
#else
#define BRT_NATIVE_BYTEORDER BRT_BIG_ENDIAN
#define BRT_NON_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
#endif
typedef struct brt_vdev_phys {
uint64_t bvp_mos_entries;
uint64_t bvp_size;
uint64_t bvp_byteorder;
uint64_t bvp_totalcount;
uint64_t bvp_rangesize;
uint64_t bvp_usedspace;
uint64_t bvp_savedspace;
} brt_vdev_phys_t;
typedef struct brt_vdev {
/*
* VDEV id.
*/
uint64_t bv_vdevid;
/*
* Is the structure initiated?
* (bv_entcount and bv_bitmap are allocated?)
*/
boolean_t bv_initiated;
/*
* Object number in the MOS for the entcount array and brt_vdev_phys.
*/
uint64_t bv_mos_brtvdev;
/*
* Object number in the MOS for the entries table.
*/
uint64_t bv_mos_entries;
/*
* Entries to sync.
*/
avl_tree_t bv_tree;
/*
* Does the bv_entcount[] array needs byte swapping?
*/
boolean_t bv_need_byteswap;
/*
* Number of entries in the bv_entcount[] array.
*/
uint64_t bv_size;
/*
* This is the array with BRT entry count per BRT_RANGESIZE.
*/
uint16_t *bv_entcount;
/*
* Sum of all bv_entcount[]s.
*/
uint64_t bv_totalcount;
/*
* Space on disk occupied by cloned blocks (without compression).
*/
uint64_t bv_usedspace;
/*
* How much additional space would be occupied without block cloning.
*/
uint64_t bv_savedspace;
/*
* brt_vdev_phys needs updating on disk.
*/
boolean_t bv_meta_dirty;
/*
* bv_entcount[] needs updating on disk.
*/
boolean_t bv_entcount_dirty;
/*
* bv_entcount[] potentially can be a bit too big to sychronize it all
* when we just changed few entcounts. The fields below allow us to
* track updates to bv_entcount[] array since the last sync.
* A single bit in the bv_bitmap represents as many entcounts as can
* fit into a single BRT_BLOCKSIZE.
* For example we have 65536 entcounts in the bv_entcount array
* (so the whole array is 128kB). We updated bv_entcount[2] and
* bv_entcount[5]. In that case only first bit in the bv_bitmap will
* be set and we will write only first BRT_BLOCKSIZE out of 128kB.
*/
ulong_t *bv_bitmap;
uint64_t bv_nblocks;
} brt_vdev_t;
/*
* In-core brt
*/
typedef struct brt {
krwlock_t brt_lock;
spa_t *brt_spa;
#define brt_mos brt_spa->spa_meta_objset
uint64_t brt_rangesize;
uint64_t brt_usedspace;
uint64_t brt_savedspace;
avl_tree_t brt_pending_tree[TXG_SIZE];
kmutex_t brt_pending_lock[TXG_SIZE];
/* Sum of all entries across all bv_trees. */
uint64_t brt_nentries;
brt_vdev_t *brt_vdevs;
uint64_t brt_nvdevs;
} brt_t;
/* Size of bre_offset / sizeof (uint64_t). */
#define BRT_KEY_WORDS (1)
/*
* In-core brt entry.
* On-disk we use bre_offset as the key and bre_refcount as the value.
*/
typedef struct brt_entry {
uint64_t bre_offset;
uint64_t bre_refcount;
avl_node_t bre_node;
} brt_entry_t;
typedef struct brt_pending_entry {
blkptr_t bpe_bp;
int bpe_count;
avl_node_t bpe_node;
} brt_pending_entry_t;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_BRT_IMPL_H */

View file

@ -14,7 +14,7 @@
.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
.\" Copyright (c) 2017 Intel Corporation.
.\"
.Dd June 27, 2023
.Dd November 18, 2023
.Dt ZDB 8
.Os
.
@ -23,7 +23,7 @@
.Nd display ZFS storage pool debugging and consistency information
.Sh SYNOPSIS
.Nm
.Op Fl AbcdDFGhikLMNPsvXYy
.Op Fl AbcdDFGhikLMNPsTvXYy
.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns
.Op Fl I Ar inflight-I/O-ops
.Oo Fl o Ar var Ns = Ns Ar value Oc Ns
@ -403,6 +403,13 @@ Display operation counts, bandwidth, and error counts of I/O to the pool from
Simulate the effects of deduplication, constructing a DDT and then display
that DDT as with
.Fl DD .
.It Fl T , -brt-stats
Display block reference table (BRT) statistics, including the size of uniques
blocks cloned, the space saving as a result of cloning, and the saving ratio.
.It Fl TT
Display the per-vdev BRT statistics, including total references.
.It Fl TTT
Dump the contents of the block reference tables.
.It Fl u , -uberblock
Display the current uberblock.
.El

View file

@ -117,7 +117,7 @@ vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
int
mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
char *fspec, int fsflags)
char *fspec, int fsflags, vfs_t *parent_vfsp)
{
struct vfsconf *vfsp;
struct mount *mp;
@ -217,6 +217,13 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
mp->mnt_opt = mp->mnt_optnew;
(void) VFS_STATFS(mp, &mp->mnt_stat);
#ifdef VFS_SUPPORTS_EXJAIL_CLONE
/*
* Clone the mnt_exjail credentials of the parent, as required.
*/
vfs_exjail_clone(parent_vfsp, mp);
#endif
/*
* Prevent external consumers of mount options from reading
* mnt_optnew.

View file

@ -29,11 +29,7 @@
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#include <sys/zmod.h>
#if __FreeBSD_version >= 1300041
#include <contrib/zlib/zlib.h>
#else
#include <sys/zlib.h>
#endif
#include <sys/kobj.h>
@ -87,11 +83,7 @@ zlib_inflateInit(z_stream *stream)
static int
zlib_inflate(z_stream *stream, int finish)
{
#if __FreeBSD_version >= 1300024
return (inflate(stream, finish));
#else
return (_zlib104_inflate(stream, finish));
#endif
}

View file

@ -46,6 +46,7 @@ knlist_sx_xunlock(void *arg)
sx_xunlock((struct sx *)arg);
}
#if __FreeBSD_version >= 1300128
static void
knlist_sx_assert_lock(void *arg, int what)
{
@ -55,11 +56,28 @@ knlist_sx_assert_lock(void *arg, int what)
else
sx_assert((struct sx *)arg, SX_UNLOCKED);
}
#else
static void
knlist_sx_assert_locked(void *arg)
{
sx_assert((struct sx *)arg, SX_LOCKED);
}
static void
knlist_sx_assert_unlocked(void *arg)
{
sx_assert((struct sx *)arg, SX_UNLOCKED);
}
#endif
void
knlist_init_sx(struct knlist *knl, struct sx *lock)
{
#if __FreeBSD_version >= 1300128
knlist_init(knl, lock, knlist_sx_xlock, knlist_sx_xunlock,
knlist_sx_assert_lock);
#else
knlist_init(knl, lock, knlist_sx_xlock, knlist_sx_xunlock,
knlist_sx_assert_locked, knlist_sx_assert_unlocked);
#endif
}

View file

@ -1026,7 +1026,8 @@ zfsctl_snapdir_lookup(struct vop_lookup_args *ap)
"%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
dvp->v_vfsp->mnt_stat.f_mntonname, name);
err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0);
err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0,
dvp->v_vfsp);
kmem_free(mountpoint, mountpoint_len);
if (err == 0) {
/*

View file

@ -6220,6 +6220,7 @@ zfs_deallocate(struct vop_deallocate_args *ap)
}
#endif
#if __FreeBSD_version >= 1300039
#ifndef _SYS_SYSPROTO_H_
struct vop_copy_file_range_args {
struct vnode *a_invp;
@ -6324,6 +6325,7 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
error = ENOSYS;
return (error);
}
#endif
struct vop_vector zfs_vnodeops;
struct vop_vector zfs_fifoops;
@ -6388,7 +6390,9 @@ struct vop_vector zfs_vnodeops = {
#if __FreeBSD_version >= 1400043
.vop_add_writecount = vop_stdadd_writecount_nomsync,
#endif
#if __FreeBSD_version >= 1300039
.vop_copy_file_range = zfs_freebsd_copy_file_range,
#endif
};
VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);

View file

@ -1338,19 +1338,14 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
* authenticate it.
*/
if (txtype == TX_WRITE) {
crypt_len = sizeof (lr_write_t) -
sizeof (lr_t) - sizeof (blkptr_t);
dst_iovecs[vec].iov_base = (char *)dlrp +
sizeof (lr_t);
const size_t o = offsetof(lr_write_t, lr_blkptr);
crypt_len = o - sizeof (lr_t);
dst_iovecs[vec].iov_base = (char *)dlrp + sizeof (lr_t);
dst_iovecs[vec].iov_len = crypt_len;
/* copy the bp now since it will not be encrypted */
memcpy(dlrp + sizeof (lr_write_t) - sizeof (blkptr_t),
slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
sizeof (blkptr_t));
memcpy(aadp,
slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
sizeof (blkptr_t));
memcpy(dlrp + o, slrp + o, sizeof (blkptr_t));
memcpy(aadp, slrp + o, sizeof (blkptr_t));
aadp += sizeof (blkptr_t);
aad_len += sizeof (blkptr_t);
vec++;
@ -1364,10 +1359,22 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
vec++;
total_len += crypt_len;
}
} else if (txtype == TX_CLONE_RANGE) {
const size_t o = offsetof(lr_clone_range_t, lr_nbps);
crypt_len = o - sizeof (lr_t);
dst_iovecs[vec].iov_base = (char *)dlrp + sizeof (lr_t);
dst_iovecs[vec].iov_len = crypt_len;
/* copy the bps now since they will not be encrypted */
memcpy(dlrp + o, slrp + o, lr_len - o);
memcpy(aadp, slrp + o, lr_len - o);
aadp += lr_len - o;
aad_len += lr_len - o;
vec++;
total_len += crypt_len;
} else {
crypt_len = lr_len - sizeof (lr_t);
dst_iovecs[vec].iov_base = (char *)dlrp +
sizeof (lr_t);
dst_iovecs[vec].iov_base = (char *)dlrp + sizeof (lr_t);
dst_iovecs[vec].iov_len = crypt_len;
vec++;
total_len += crypt_len;

View file

@ -1513,20 +1513,16 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
* authenticate it.
*/
if (txtype == TX_WRITE) {
crypt_len = sizeof (lr_write_t) -
sizeof (lr_t) - sizeof (blkptr_t);
const size_t o = offsetof(lr_write_t, lr_blkptr);
crypt_len = o - sizeof (lr_t);
src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
src_iovecs[nr_iovecs].iov_len = crypt_len;
dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
dst_iovecs[nr_iovecs].iov_len = crypt_len;
/* copy the bp now since it will not be encrypted */
memcpy(dlrp + sizeof (lr_write_t) - sizeof (blkptr_t),
slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
sizeof (blkptr_t));
memcpy(aadp,
slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
sizeof (blkptr_t));
memcpy(dlrp + o, slrp + o, sizeof (blkptr_t));
memcpy(aadp, slrp + o, sizeof (blkptr_t));
aadp += sizeof (blkptr_t);
aad_len += sizeof (blkptr_t);
nr_iovecs++;
@ -1543,6 +1539,21 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
nr_iovecs++;
total_len += crypt_len;
}
} else if (txtype == TX_CLONE_RANGE) {
const size_t o = offsetof(lr_clone_range_t, lr_nbps);
crypt_len = o - sizeof (lr_t);
src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
src_iovecs[nr_iovecs].iov_len = crypt_len;
dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
dst_iovecs[nr_iovecs].iov_len = crypt_len;
/* copy the bps now since they will not be encrypted */
memcpy(dlrp + o, slrp + o, lr_len - o);
memcpy(aadp, slrp + o, lr_len - o);
aadp += lr_len - o;
aad_len += lr_len - o;
nr_iovecs++;
total_len += crypt_len;
} else {
crypt_len = lr_len - sizeof (lr_t);
src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);

View file

@ -1199,7 +1199,7 @@ zvol_alloc(dev_t dev, const char *name)
zso->zvo_queue->queuedata = zv;
zso->zvo_dev = dev;
zv->zv_open_count = 0;
strlcpy(zv->zv_name, name, MAXNAMELEN);
strlcpy(zv->zv_name, name, sizeof (zv->zv_name));
zfs_rangelock_init(&zv->zv_rangelock, NULL, NULL);
rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);

View file

@ -28,6 +28,7 @@
#include <sys/spa_impl.h>
#include <sys/zio.h>
#include <sys/brt.h>
#include <sys/brt_impl.h>
#include <sys/ddt.h>
#include <sys/bitmap.h>
#include <sys/zap.h>
@ -243,169 +244,6 @@
* a chance to clean this up on dataset destroy (see zil_free_clone_range()).
*/
/*
* BRT - Block Reference Table.
*/
#define BRT_OBJECT_VDEV_PREFIX "com.fudosecurity:brt:vdev:"
/*
* We divide each VDEV into 16MB chunks. Each chunk is represented in memory
* by a 16bit counter, thus 1TB VDEV requires 128kB of memory: (1TB / 16MB) * 2B
* Each element in this array represents how many BRT entries do we have in this
* chunk of storage. We always load this entire array into memory and update as
* needed. By having it in memory we can quickly tell (during zio_free()) if
* there are any BRT entries that we might need to update.
*
* This value cannot be larger than 16MB, at least as long as we support
* 512 byte block sizes. With 512 byte block size we can have exactly
* 32768 blocks in 16MB. In 32MB we could have 65536 blocks, which is one too
* many for a 16bit counter.
*/
#define BRT_RANGESIZE (16 * 1024 * 1024)
_Static_assert(BRT_RANGESIZE / SPA_MINBLOCKSIZE <= UINT16_MAX,
"BRT_RANGESIZE is too large.");
/*
* We don't want to update the whole structure every time. Maintain bitmap
* of dirty blocks within the regions, so that a single bit represents a
* block size of entcounts. For example if we have a 1PB vdev then all
* entcounts take 128MB of memory ((64TB / 16MB) * 2B). We can divide this
* 128MB array of entcounts into 32kB disk blocks, as we don't want to update
* the whole 128MB on disk when we have updated only a single entcount.
* We maintain a bitmap where each 32kB disk block within 128MB entcounts array
* is represented by a single bit. This gives us 4096 bits. A set bit in the
* bitmap means that we had a change in at least one of the 16384 entcounts
* that reside on a 32kB disk block (32kB / sizeof (uint16_t)).
*/
#define BRT_BLOCKSIZE (32 * 1024)
#define BRT_RANGESIZE_TO_NBLOCKS(size) \
(((size) - 1) / BRT_BLOCKSIZE / sizeof (uint16_t) + 1)
#define BRT_LITTLE_ENDIAN 0
#define BRT_BIG_ENDIAN 1
#ifdef _ZFS_LITTLE_ENDIAN
#define BRT_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
#define BRT_NON_NATIVE_BYTEORDER BRT_BIG_ENDIAN
#else
#define BRT_NATIVE_BYTEORDER BRT_BIG_ENDIAN
#define BRT_NON_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
#endif
typedef struct brt_vdev_phys {
uint64_t bvp_mos_entries;
uint64_t bvp_size;
uint64_t bvp_byteorder;
uint64_t bvp_totalcount;
uint64_t bvp_rangesize;
uint64_t bvp_usedspace;
uint64_t bvp_savedspace;
} brt_vdev_phys_t;
typedef struct brt_vdev {
/*
* VDEV id.
*/
uint64_t bv_vdevid;
/*
* Is the structure initiated?
* (bv_entcount and bv_bitmap are allocated?)
*/
boolean_t bv_initiated;
/*
* Object number in the MOS for the entcount array and brt_vdev_phys.
*/
uint64_t bv_mos_brtvdev;
/*
* Object number in the MOS for the entries table.
*/
uint64_t bv_mos_entries;
/*
* Entries to sync.
*/
avl_tree_t bv_tree;
/*
* Does the bv_entcount[] array needs byte swapping?
*/
boolean_t bv_need_byteswap;
/*
* Number of entries in the bv_entcount[] array.
*/
uint64_t bv_size;
/*
* This is the array with BRT entry count per BRT_RANGESIZE.
*/
uint16_t *bv_entcount;
/*
* Sum of all bv_entcount[]s.
*/
uint64_t bv_totalcount;
/*
* Space on disk occupied by cloned blocks (without compression).
*/
uint64_t bv_usedspace;
/*
* How much additional space would be occupied without block cloning.
*/
uint64_t bv_savedspace;
/*
* brt_vdev_phys needs updating on disk.
*/
boolean_t bv_meta_dirty;
/*
* bv_entcount[] needs updating on disk.
*/
boolean_t bv_entcount_dirty;
/*
* bv_entcount[] potentially can be a bit too big to sychronize it all
* when we just changed few entcounts. The fields below allow us to
* track updates to bv_entcount[] array since the last sync.
* A single bit in the bv_bitmap represents as many entcounts as can
* fit into a single BRT_BLOCKSIZE.
* For example we have 65536 entcounts in the bv_entcount array
* (so the whole array is 128kB). We updated bv_entcount[2] and
* bv_entcount[5]. In that case only first bit in the bv_bitmap will
* be set and we will write only first BRT_BLOCKSIZE out of 128kB.
*/
ulong_t *bv_bitmap;
uint64_t bv_nblocks;
} brt_vdev_t;
/*
* In-core brt
*/
typedef struct brt {
krwlock_t brt_lock;
spa_t *brt_spa;
#define brt_mos brt_spa->spa_meta_objset
uint64_t brt_rangesize;
uint64_t brt_usedspace;
uint64_t brt_savedspace;
avl_tree_t brt_pending_tree[TXG_SIZE];
kmutex_t brt_pending_lock[TXG_SIZE];
/* Sum of all entries across all bv_trees. */
uint64_t brt_nentries;
brt_vdev_t *brt_vdevs;
uint64_t brt_nvdevs;
} brt_t;
/* Size of bre_offset / sizeof (uint64_t). */
#define BRT_KEY_WORDS (1)
/*
* In-core brt entry.
* On-disk we use bre_offset as the key and bre_refcount as the value.
*/
typedef struct brt_entry {
uint64_t bre_offset;
uint64_t bre_refcount;
avl_node_t bre_node;
} brt_entry_t;
typedef struct brt_pending_entry {
blkptr_t bpe_bp;
int bpe_count;
avl_node_t bpe_node;
} brt_pending_entry_t;
static kmem_cache_t *brt_entry_cache;
static kmem_cache_t *brt_pending_entry_cache;

View file

@ -2715,15 +2715,23 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx)
* writes and clones into this block.
*/
mutex_enter(&db->db_mtx);
DBUF_VERIFY(db);
VERIFY(!dbuf_undirty(db, tx));
ASSERT0P(dbuf_find_dirty_eq(db, tx->tx_txg));
if (db->db_buf != NULL) {
arc_buf_destroy(db->db_buf, db);
db->db_buf = NULL;
dbuf_clear_data(db);
}
db->db_state = DB_NOFILL;
DTRACE_SET_STATE(db, "allocating NOFILL buffer for clone");
DBUF_VERIFY(db);
mutex_exit(&db->db_mtx);
dmu_buf_will_not_fill(db_fake, tx);
dbuf_noread(db);
(void) dbuf_dirty(db, tx);
}
void

View file

@ -1778,7 +1778,14 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots)
}
/*
* Checks if the dnode contains any uncommitted dirty records.
* Checks if the dnode itself is dirty, or is carrying any uncommitted records.
* It is important to check both conditions, as some operations (eg appending
* to a file) can dirty both as a single logical unit, but they are not synced
* out atomically, so checking one and not the other can result in an object
* appearing to be clean mid-way through a commit.
*
* Do not change this lightly! If you get it wrong, dmu_offset_next() can
* detect a hole where there is really data, leading to silent corruption.
*/
boolean_t
dnode_is_dirty(dnode_t *dn)
@ -1786,7 +1793,8 @@ dnode_is_dirty(dnode_t *dn)
mutex_enter(&dn->dn_mtx);
for (int i = 0; i < TXG_SIZE; i++) {
if (multilist_link_active(&dn->dn_dirty_link[i])) {
if (multilist_link_active(&dn->dn_dirty_link[i]) ||
!list_is_empty(&dn->dn_dirty_records[i])) {
mutex_exit(&dn->dn_mtx);
return (B_TRUE);
}

View file

@ -2142,7 +2142,7 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
return (B_FALSE);
}
static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
static void dsl_scan_visitbp(const blkptr_t *bp, const zbookmark_phys_t *zb,
dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn,
dmu_objset_type_t ostype, dmu_tx_t *tx);
inline __attribute__((always_inline)) static void dsl_scan_visitdnode(
@ -2307,12 +2307,11 @@ dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds,
* first 5; we want them to be useful.
*/
static void
dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
dsl_scan_visitbp(const blkptr_t *bp, const zbookmark_phys_t *zb,
dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn,
dmu_objset_type_t ostype, dmu_tx_t *tx)
{
dsl_pool_t *dp = scn->scn_dp;
blkptr_t *bp_toread = NULL;
if (dsl_scan_check_suspend(scn, zb))
return;
@ -2353,11 +2352,8 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
return;
}
bp_toread = kmem_alloc(sizeof (blkptr_t), KM_SLEEP);
*bp_toread = *bp;
if (dsl_scan_recurse(scn, ds, ostype, dnp, bp_toread, zb, tx) != 0)
goto out;
if (dsl_scan_recurse(scn, ds, ostype, dnp, bp, zb, tx) != 0)
return;
/*
* If dsl_scan_ddt() has already visited this block, it will have
@ -2367,7 +2363,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
if (ddt_class_contains(dp->dp_spa,
scn->scn_phys.scn_ddt_class_max, bp)) {
scn->scn_ddt_contained_this_txg++;
goto out;
return;
}
/*
@ -2379,13 +2375,10 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
*/
if (BP_PHYSICAL_BIRTH(bp) > scn->scn_phys.scn_cur_max_txg) {
scn->scn_gt_max_this_txg++;
goto out;
return;
}
scan_funcs[scn->scn_phys.scn_func](dp, bp, zb);
out:
kmem_free(bp_toread, sizeof (blkptr_t));
}
static void

View file

@ -7063,12 +7063,13 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
if (!replacing) {
/*
* For attach, the only allowable parent is a mirror or the root
* vdev.
* For attach, the only allowable parent is a mirror or
* the root vdev. A raidz vdev can be attached to, but
* you cannot attach to a raidz child.
*/
if (pvd->vdev_ops != &vdev_mirror_ops &&
pvd->vdev_ops != &vdev_raidz_ops &&
pvd->vdev_ops != &vdev_root_ops)
pvd->vdev_ops != &vdev_root_ops &&
!raidz)
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
pvops = &vdev_mirror_ops;

View file

@ -111,13 +111,10 @@ typedef struct {
uint64_t
zvol_name_hash(const char *name)
{
int i;
uint64_t crc = -1ULL;
const uint8_t *p = (const uint8_t *)name;
ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
for (i = 0; i < MAXNAMELEN - 1 && *p; i++, p++) {
for (const uint8_t *p = (const uint8_t *)name; *p != 0; p++)
crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (*p)) & 0xFF];
}
return (crc);
}
@ -138,8 +135,7 @@ zvol_find_by_name_hash(const char *name, uint64_t hash, int mode)
hlist_for_each(p, ZVOL_HT_HEAD(hash)) {
zv = hlist_entry(p, zvol_state_t, zv_hlink);
mutex_enter(&zv->zv_state_lock);
if (zv->zv_hash == hash &&
strncmp(zv->zv_name, name, MAXNAMELEN) == 0) {
if (zv->zv_hash == hash && strcmp(zv->zv_name, name) == 0) {
/*
* this is the right zvol, take the locks in the
* right order
@ -154,8 +150,7 @@ zvol_find_by_name_hash(const char *name, uint64_t hash, int mode)
* to hold zvol_state_lock
*/
ASSERT(zv->zv_hash == hash &&
strncmp(zv->zv_name, name, MAXNAMELEN)
== 0);
strcmp(zv->zv_name, name) == 0);
}
rw_exit(&zvol_state_lock);
return (zv);
@ -1526,9 +1521,9 @@ zvol_task_alloc(zvol_async_op_t op, const char *name1, const char *name2,
task->op = op;
task->value = value;
strlcpy(task->name1, name1, MAXNAMELEN);
strlcpy(task->name1, name1, sizeof (task->name1));
if (name2 != NULL)
strlcpy(task->name2, name2, MAXNAMELEN);
strlcpy(task->name2, name2, sizeof (task->name2));
return (task);
}
@ -1573,7 +1568,6 @@ typedef struct zvol_set_prop_int_arg {
uint64_t zsda_value;
zprop_source_t zsda_source;
zfs_prop_t zsda_prop;
dmu_tx_t *zsda_tx;
} zvol_set_prop_int_arg_t;
/*
@ -1601,7 +1595,7 @@ static int
zvol_set_common_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
zvol_set_prop_int_arg_t *zsda = arg;
char dsname[MAXNAMELEN];
char dsname[ZFS_MAX_DATASET_NAME_LEN];
zvol_task_t *task;
uint64_t prop;
@ -1650,13 +1644,12 @@ zvol_set_common_sync(void *arg, dmu_tx_t *tx)
int error;
VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL));
zsda->zsda_tx = tx;
error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds);
if (error == 0) {
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(zsda->zsda_prop),
zsda->zsda_source, sizeof (zsda->zsda_value), 1,
&zsda->zsda_value, zsda->zsda_tx);
&zsda->zsda_value, tx);
dsl_dataset_rele(ds, FTAG);
}

View file

@ -103,6 +103,7 @@ if [ -d ${dkms_root}/%{module} ]; then
fi
fi
done
cd ${dkms_root}
fi
# Uninstall this version of zfs dkms modules before installation of the package.

View file

@ -58,7 +58,7 @@ set -A args "create" "add" "destroy" "import fakepool" \
"setvprop" "blah blah" "-%" "--?" "-*" "-=" \
"-a" "-f" "-g" "-j" "-n" "-o" "-p" "-p /tmp" \
"-t" "-w" "-z" "-E" "-H" "-I" "-J" \
"-Q" "-R" "-T" "-W"
"-Q" "-R" "-W"
log_assert "Execute zdb using invalid parameters."

View file

@ -123,7 +123,10 @@ if not httpd:
with open('$HTTPS_PORT_FILE', 'w') as portf:
print(port, file=portf)
httpd.socket = ssl.wrap_socket(httpd.socket, server_side=True, keyfile='/$TESTPOOL/snakeoil.key', certfile='$SSL_CA_CERT_FILE', ssl_version=ssl.PROTOCOL_TLS)
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
sslctx.check_hostname = False
sslctx.load_cert_chain(certfile='$SSL_CA_CERT_FILE', keyfile='/$TESTPOOL/snakeoil.key')
httpd.socket = httpd.socket = sslctx.wrap_socket(httpd.socket, server_side=True)
os.chdir('$STF_SUITE/tests/functional/cli_root/zfs_load-key')

View file

@ -1110,7 +1110,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.2.99-217-FreeBSD_ga94860a6d"
#define ZFS_META_ALIAS "zfs-2.2.99-231-FreeBSD_g688514e47"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@ -1140,7 +1140,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
#define ZFS_META_RELEASE "217-FreeBSD_ga94860a6d"
#define ZFS_META_RELEASE "231-FreeBSD_g688514e47"
/* Define the project version. */
#define ZFS_META_VERSION "2.2.99"

View file

@ -1 +1 @@
#define ZFS_META_GITREV "zfs-2.2.99-217-ga94860a6d"
#define ZFS_META_GITREV "zfs-2.2.99-231-g688514e47"