Notable upstream pull request merges:
 #15366 c3773de1 ZIL: Cleanup sync and commit handling
 #15409 dbe839a9 zvol: Cleanup set property
 #15409 60387fac zvol: Implement zvol threading as a Property
 #15409 9ccdb8be zvol: fix delayed update to block device ro entry
 #15448 05a7348a RAIDZ: Use cache blocking during parity math
 #15452 514d661c Tune zio buffer caches and their alignments
 #15456 799e09f7 Unify arc_prune_async() code
 #15465 763ca47f Fix block cloning between unencrypted and encrypted
                 datasets

To make the module version better comparable, the module version number
now includes the commit count since last tag.

Obtained from:	OpenZFS
OpenZFS commit:	41e55b476b
This commit is contained in:
Martin Matuska 2023-11-01 10:12:47 +01:00
commit f8b1db88b8
33 changed files with 452 additions and 442 deletions

View file

@ -1,6 +1,9 @@
# Features which are supported by GRUB2
allocation_classes
async_destroy
block_cloning
bookmarks
device_rebuild
embedded_data
empty_bpobj
enabled_txg
@ -9,6 +12,13 @@ filesystem_limits
hole_birth
large_blocks
livelist
log_spacemap
lz4_compress
obsolete_counts
project_quota
resilver_defer
spacemap_histogram
spacemap_v2
userobj_accounting
zilsaxattr
zpool_checkpoint

View file

@ -284,7 +284,6 @@ typedef struct zfid_long {
#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t))
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
extern uint_t zfs_fsyncer_key;
extern int zfs_super_owner;
extern int zfs_bclone_enabled;

View file

@ -60,7 +60,7 @@ extern const struct file_operations zpl_file_operations;
extern const struct file_operations zpl_dir_file_operations;
/* zpl_super.c */
extern void zpl_prune_sb(int64_t nr_to_scan, void *arg);
extern void zpl_prune_sb(uint64_t nr_to_scan, void *arg);
extern const struct super_operations zpl_super_operations;
extern const struct export_operations zpl_export_operations;

View file

@ -133,11 +133,11 @@ int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
void abd_zero_off(abd_t *, size_t, size_t);
void abd_verify(abd_t *);
void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
ssize_t csize, ssize_t dsize, const unsigned parity,
void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
size_t csize, size_t dsize, const unsigned parity,
void (*func_raidz_gen)(void **, const void *, size_t, size_t));
void abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
ssize_t tsize, const unsigned parity,
size_t tsize, const unsigned parity,
void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
const unsigned *mul),
const unsigned *mul);

View file

@ -81,7 +81,7 @@ typedef struct arc_prune arc_prune_t;
typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
const blkptr_t *bp, arc_buf_t *buf, void *priv);
typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
typedef void arc_prune_func_t(int64_t bytes, void *priv);
typedef void arc_prune_func_t(uint64_t bytes, void *priv);
/* Shared module parameters */
extern uint_t zfs_arc_average_blocksize;

View file

@ -1065,7 +1065,6 @@ extern void arc_wait_for_eviction(uint64_t, boolean_t);
extern void arc_lowmem_init(void);
extern void arc_lowmem_fini(void);
extern void arc_prune_async(uint64_t);
extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
extern uint64_t arc_free_memory(void);
extern int64_t arc_available_memory(void);

View file

@ -192,6 +192,7 @@ typedef enum {
ZFS_PROP_REDACT_SNAPS,
ZFS_PROP_SNAPSHOTS_CHANGED,
ZFS_PROP_PREFETCH,
ZFS_PROP_VOLTHREADING,
ZFS_NUM_PROPS
} zfs_prop_t;

View file

@ -575,7 +575,6 @@ typedef struct zfsdev_state {
extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which);
extern int zfsdev_getminor(zfs_file_t *fp, minor_t *minorp);
extern uint_t zfs_fsyncer_key;
extern uint_t zfs_allow_log_key;
#endif /* _KERNEL */

View file

@ -307,7 +307,7 @@ extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx,
uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
const char *dname, znode_t *szp, znode_t *wzp);
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t len, int ioflag,
znode_t *zp, offset_t off, ssize_t len, boolean_t commit,
zil_callback_t callback, void *callback_data);
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, uint64_t off, uint64_t len);

View file

@ -50,8 +50,9 @@ extern int zvol_get_stats(objset_t *, nvlist_t *);
extern boolean_t zvol_is_zvol(const char *);
extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *);
extern int zvol_set_volsize(const char *, uint64_t);
extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t);
extern int zvol_set_volmode(const char *, zprop_source_t, uint64_t);
extern int zvol_set_volthreading(const char *, boolean_t);
extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t);
extern int zvol_set_ro(const char *, boolean_t);
extern zvol_state_handle_t *zvol_suspend(const char *);
extern int zvol_resume(zvol_state_handle_t *);
extern void *zvol_tag(zvol_state_handle_t *);

View file

@ -58,6 +58,7 @@ typedef struct zvol_state {
atomic_t zv_suspend_ref; /* refcount for suspend */
krwlock_t zv_suspend_lock; /* suspend lock */
struct zvol_state_os *zv_zso; /* private platform state */
boolean_t zv_threading; /* volthreading property */
} zvol_state_t;
@ -81,9 +82,9 @@ void zvol_remove_minors_impl(const char *name);
void zvol_last_close(zvol_state_t *zv);
void zvol_insert(zvol_state_t *zv);
void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
uint64_t len, boolean_t sync);
uint64_t len);
void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint64_t size, int sync);
uint64_t size, boolean_t commit);
int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
struct lwb *lwb, zio_t *zio);
int zvol_init_impl(void);

View file

@ -47,6 +47,7 @@
static boolean_t nfs_available(void);
static boolean_t exports_available(void);
typedef int (*nfs_shareopt_callback_t)(const char *opt, const char *value,
void *cookie);
@ -539,6 +540,8 @@ nfs_commit_shares(void)
static void
nfs_truncate_shares(void)
{
if (!exports_available())
return;
nfs_reset_shares(ZFS_EXPORTS_LOCK, ZFS_EXPORTS_FILE);
}
@ -566,3 +569,18 @@ nfs_available(void)
return (avail == 1);
}
static boolean_t
exports_available(void)
{
static int avail;
if (!avail) {
if (access(ZFS_EXPORTS_DIR, F_OK) != 0)
avail = -1;
else
avail = 1;
}
return (avail == 1);
}

View file

@ -1868,7 +1868,8 @@
<enumerator name='ZFS_PROP_REDACT_SNAPS' value='94'/>
<enumerator name='ZFS_PROP_SNAPSHOTS_CHANGED' value='95'/>
<enumerator name='ZFS_PROP_PREFETCH' value='96'/>
<enumerator name='ZFS_NUM_PROPS' value='97'/>
<enumerator name='ZFS_PROP_VOLTHREADING' value='97'/>
<enumerator name='ZFS_NUM_PROPS' value='98'/>
</enum-decl>
<typedef-decl name='zfs_prop_t' type-id='4b000d60' id='58603c44'/>
<enum-decl name='zprop_source_t' naming-typedef-id='a2256d42' id='5903f80e'>

View file

@ -1197,6 +1197,18 @@ are equivalent to the
and
.Sy noexec
mount options.
.It Sy volthreading Ns = Ns Sy on Ns | Ns Sy off
Controls internal zvol threading.
The value
.Sy off
disables zvol threading, and zvol relies on application threads.
The default value is
.Sy on ,
which enables threading within a zvol.
Please note that this property will be overridden by
.Sy zvol_request_sync
module parameter.
This property is only applicable to Linux.
.It Sy filesystem_limit Ns = Ns Ar count Ns | Ns Sy none
Limits the number of filesystems and volumes that can exist under this point in
the dataset tree.

View file

@ -52,11 +52,6 @@
#include <sys/vm.h>
#include <sys/vmmeter.h>
#if __FreeBSD_version >= 1300139
static struct sx arc_vnlru_lock;
static struct vnode *arc_vnlru_marker;
#endif
extern struct vfsops zfs_vfsops;
uint_t zfs_arc_free_target = 0;
@ -131,53 +126,6 @@ arc_default_max(uint64_t min, uint64_t allmem)
return (MAX(allmem * 5 / 8, size));
}
/*
* Helper function for arc_prune_async() it is responsible for safely
* handling the execution of a registered arc_prune_func_t.
*/
static void
arc_prune_task(void *arg)
{
uint64_t nr_scan = (uintptr_t)arg;
#ifndef __ILP32__
if (nr_scan > INT_MAX)
nr_scan = INT_MAX;
#endif
#if __FreeBSD_version >= 1300139
sx_xlock(&arc_vnlru_lock);
vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
sx_xunlock(&arc_vnlru_lock);
#else
vnlru_free(nr_scan, &zfs_vfsops);
#endif
}
/*
* Notify registered consumers they must drop holds on a portion of the ARC
* buffered they reference. This provides a mechanism to ensure the ARC can
* honor the metadata limit and reclaim otherwise pinned ARC buffers. This
* is analogous to dnlc_reduce_cache() but more generic.
*
* This operation is performed asynchronously so it may be safely called
* in the context of the arc_reclaim_thread(). A reference is taken here
* for each registered arc_prune_t and the arc_prune_task() is responsible
* for releasing it once the registered arc_prune_func_t has completed.
*/
void
arc_prune_async(uint64_t adjust)
{
#ifndef __LP64__
if (adjust > UINTPTR_MAX)
adjust = UINTPTR_MAX;
#endif
taskq_dispatch(arc_prune_taskq, arc_prune_task,
(void *)(intptr_t)adjust, TQ_SLEEP);
ARCSTAT_BUMP(arcstat_prune);
}
uint64_t
arc_all_memory(void)
{
@ -228,10 +176,6 @@ arc_lowmem_init(void)
{
arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
EVENTHANDLER_PRI_FIRST);
#if __FreeBSD_version >= 1300139
arc_vnlru_marker = vnlru_alloc_marker();
sx_init(&arc_vnlru_lock, "arc vnlru lock");
#endif
}
void
@ -239,12 +183,6 @@ arc_lowmem_fini(void)
{
if (arc_event_lowmem != NULL)
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
#if __FreeBSD_version >= 1300139
if (arc_vnlru_marker != NULL) {
vnlru_free_marker(arc_vnlru_marker);
sx_destroy(&arc_vnlru_lock);
}
#endif
}
void

View file

@ -2074,6 +2074,26 @@ zfs_vnodes_adjust_back(void)
#endif
}
#if __FreeBSD_version >= 1300139
static struct sx zfs_vnlru_lock;
static struct vnode *zfs_vnlru_marker;
#endif
static arc_prune_t *zfs_prune;
static void
zfs_prune_task(uint64_t nr_to_scan, void *arg __unused)
{
if (nr_to_scan > INT_MAX)
nr_to_scan = INT_MAX;
#if __FreeBSD_version >= 1300139
sx_xlock(&zfs_vnlru_lock);
vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker);
sx_xunlock(&zfs_vnlru_lock);
#else
vnlru_free(nr_to_scan, &zfs_vfsops);
#endif
}
void
zfs_init(void)
{
@ -2100,11 +2120,23 @@ zfs_init(void)
dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
#if __FreeBSD_version >= 1300139
zfs_vnlru_marker = vnlru_alloc_marker();
sx_init(&zfs_vnlru_lock, "zfs vnlru lock");
#endif
zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL);
}
void
zfs_fini(void)
{
arc_remove_prune_callback(zfs_prune);
#if __FreeBSD_version >= 1300139
vnlru_free_marker(zfs_vnlru_marker);
sx_destroy(&zfs_vnlru_lock);
#endif
taskq_destroy(zfsvfs_taskq);
zfsctl_fini();
zfs_znode_fini();

View file

@ -244,9 +244,15 @@ zfs_open(vnode_t **vpp, int flag, cred_t *cr)
return (SET_ERROR(EPERM));
}
/* Keep a count of the synchronous opens in the znode */
if (flag & O_SYNC)
atomic_inc_32(&zp->z_sync_cnt);
/*
* Keep a count of the synchronous opens in the znode. On first
* synchronous open we must convert all previous async transactions
* into sync to keep correct ordering.
*/
if (flag & O_SYNC) {
if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
zil_async_to_sync(zfsvfs->z_log, zp->z_id);
}
zfs_exit(zfsvfs, FTAG);
return (0);
@ -4201,6 +4207,10 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
}
zfs_vmobject_wunlock(object);
boolean_t commit = (flags & (zfs_vm_pagerput_sync |
zfs_vm_pagerput_inval)) != 0 ||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS;
if (ncount == 0)
goto out;
@ -4253,7 +4263,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
* but that would make the locking messier
*/
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
len, 0, NULL, NULL);
len, commit, NULL, NULL);
zfs_vmobject_wlock(object);
for (i = 0; i < ncount; i++) {
@ -4268,8 +4278,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
out:
zfs_rangelock_exit(lr);
if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
if (commit)
zil_commit(zfsvfs->z_log, zp->z_id);
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);

View file

@ -123,7 +123,6 @@ struct zvol_state_os {
/* volmode=dev */
struct zvol_state_dev {
struct cdev *zsd_cdev;
uint64_t zsd_sync_cnt;
struct selinfo zsd_selinfo;
} _zso_dev;
@ -669,7 +668,7 @@ zvol_geom_bio_strategy(struct bio *bp)
int error = 0;
boolean_t doread = B_FALSE;
boolean_t is_dumpified;
boolean_t sync;
boolean_t commit;
if (bp->bio_to)
zv = bp->bio_to->private;
@ -696,7 +695,7 @@ zvol_geom_bio_strategy(struct bio *bp)
}
zvol_ensure_zilog(zv);
if (bp->bio_cmd == BIO_FLUSH)
goto sync;
goto commit;
break;
default:
error = SET_ERROR(EOPNOTSUPP);
@ -718,7 +717,7 @@ zvol_geom_bio_strategy(struct bio *bp)
}
is_dumpified = B_FALSE;
sync = !doread && !is_dumpified &&
commit = !doread && !is_dumpified &&
zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
/*
@ -734,7 +733,7 @@ zvol_geom_bio_strategy(struct bio *bp)
if (error != 0) {
dmu_tx_abort(tx);
} else {
zvol_log_truncate(zv, tx, off, resid, sync);
zvol_log_truncate(zv, tx, off, resid);
dmu_tx_commit(tx);
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
off, resid);
@ -755,7 +754,7 @@ zvol_geom_bio_strategy(struct bio *bp)
dmu_tx_abort(tx);
} else {
dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
zvol_log_write(zv, tx, off, size, sync);
zvol_log_write(zv, tx, off, size, commit);
dmu_tx_commit(tx);
}
}
@ -793,8 +792,8 @@ zvol_geom_bio_strategy(struct bio *bp)
break;
}
if (sync) {
sync:
if (commit) {
commit:
zil_commit(zv->zv_zilog, ZVOL_OBJ);
}
resume:
@ -866,7 +865,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
uint64_t volsize;
zfs_locked_range_t *lr;
int error = 0;
boolean_t sync;
boolean_t commit;
zfs_uio_t uio;
zv = dev->si_drv2;
@ -880,7 +879,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
return (SET_ERROR(EIO));
ssize_t start_resid = zfs_uio_resid(&uio);
sync = (ioflag & IO_SYNC) ||
commit = (ioflag & IO_SYNC) ||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
@ -904,7 +903,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
}
error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
if (error == 0)
zvol_log_write(zv, tx, off, bytes, sync);
zvol_log_write(zv, tx, off, bytes, commit);
dmu_tx_commit(tx);
if (error)
@ -913,7 +912,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
zfs_rangelock_exit(lr);
int64_t nwritten = start_resid - zfs_uio_resid(&uio);
dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten);
if (sync)
if (commit)
zil_commit(zv->zv_zilog, ZVOL_OBJ);
rw_exit(&zv->zv_suspend_lock);
return (error);
@ -923,7 +922,6 @@ static int
zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
{
zvol_state_t *zv;
struct zvol_state_dev *zsd;
int err = 0;
boolean_t drop_suspend = B_FALSE;
@ -1017,13 +1015,6 @@ zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
}
zv->zv_open_count++;
if (flags & O_SYNC) {
zsd = &zv->zv_zso->zso_dev;
zsd->zsd_sync_cnt++;
if (zsd->zsd_sync_cnt == 1 &&
(zv->zv_flags & ZVOL_WRITTEN_TO) != 0)
zil_async_to_sync(zv->zv_zilog, ZVOL_OBJ);
}
out_opened:
if (zv->zv_open_count == 0) {
zvol_last_close(zv);
@ -1041,7 +1032,6 @@ static int
zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
{
zvol_state_t *zv;
struct zvol_state_dev *zsd;
boolean_t drop_suspend = B_TRUE;
rw_enter(&zvol_state_lock, ZVOL_RW_READER);
@ -1091,10 +1081,6 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
* You may get multiple opens, but only one close.
*/
zv->zv_open_count--;
if (flags & O_SYNC) {
zsd = &zv->zv_zso->zso_dev;
zsd->zsd_sync_cnt--;
}
if (zv->zv_open_count == 0) {
ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
@ -1163,7 +1149,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data,
dmu_tx_abort(tx);
} else {
sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
zvol_log_truncate(zv, tx, offset, length, sync);
zvol_log_truncate(zv, tx, offset, length);
dmu_tx_commit(tx);
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
offset, length);

View file

@ -495,56 +495,5 @@ arc_unregister_hotplug(void)
}
#endif /* _KERNEL */
/*
* Helper function for arc_prune_async() it is responsible for safely
* handling the execution of a registered arc_prune_func_t.
*/
static void
arc_prune_task(void *ptr)
{
arc_prune_t *ap = (arc_prune_t *)ptr;
arc_prune_func_t *func = ap->p_pfunc;
if (func != NULL)
func(ap->p_adjust, ap->p_private);
zfs_refcount_remove(&ap->p_refcnt, func);
}
/*
* Notify registered consumers they must drop holds on a portion of the ARC
* buffered they reference. This provides a mechanism to ensure the ARC can
* honor the metadata limit and reclaim otherwise pinned ARC buffers. This
* is analogous to dnlc_reduce_cache() but more generic.
*
* This operation is performed asynchronously so it may be safely called
* in the context of the arc_reclaim_thread(). A reference is taken here
* for each registered arc_prune_t and the arc_prune_task() is responsible
* for releasing it once the registered arc_prune_func_t has completed.
*/
void
arc_prune_async(uint64_t adjust)
{
arc_prune_t *ap;
mutex_enter(&arc_prune_mtx);
for (ap = list_head(&arc_prune_list); ap != NULL;
ap = list_next(&arc_prune_list, ap)) {
if (zfs_refcount_count(&ap->p_refcnt) >= 2)
continue;
zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
ap->p_adjust = adjust;
if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
ap, TQ_SLEEP) == TASKQID_INVALID) {
zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
continue;
}
ARCSTAT_BUMP(arcstat_prune);
}
mutex_exit(&arc_prune_mtx);
}
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW,
"Limit on number of pages that ARC shrinker can reclaim at once");

View file

@ -192,9 +192,15 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
return (SET_ERROR(EPERM));
}
/* Keep a count of the synchronous opens in the znode */
if (flag & O_SYNC)
atomic_inc_32(&zp->z_sync_cnt);
/*
* Keep a count of the synchronous opens in the znode. On first
* synchronous open we must convert all previous async transactions
* into sync to keep correct ordering.
*/
if (flag & O_SYNC) {
if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
zil_async_to_sync(zfsvfs->z_log, zp->z_id);
}
zfs_exit(zfsvfs, FTAG);
return (0);
@ -3826,21 +3832,14 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
for_sync ? zfs_putpage_sync_commit_cb :
zfs_putpage_async_commit_cb, pp);
dmu_tx_commit(tx);
zfs_rangelock_exit(lr);
boolean_t commit = B_FALSE;
if (wbc->sync_mode != WB_SYNC_NONE) {
/*
* Note that this is rarely called under writepages(), because
* writepages() normally handles the entire commit for
* performance reasons.
*/
zil_commit(zfsvfs->z_log, zp->z_id);
commit = B_TRUE;
} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
/*
* If the caller does not intend to wait synchronously
@ -3850,9 +3849,20 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
* our writeback to complete. Refer to the comment in
* zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
*/
zil_commit(zfsvfs->z_log, zp->z_id);
commit = B_TRUE;
}
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit,
for_sync ? zfs_putpage_sync_commit_cb :
zfs_putpage_async_commit_cb, pp);
dmu_tx_commit(tx);
zfs_rangelock_exit(lr);
if (commit)
zil_commit(zfsvfs->z_log, zp->z_id);
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
zfs_exit(zfsvfs, FTAG);

View file

@ -375,7 +375,7 @@ zpl_kill_sb(struct super_block *sb)
}
void
zpl_prune_sb(int64_t nr_to_scan, void *arg)
zpl_prune_sb(uint64_t nr_to_scan, void *arg)
{
struct super_block *sb = (struct super_block *)arg;
int objects = 0;

View file

@ -387,7 +387,7 @@ zvol_discard(zv_request_t *zvr)
if (error != 0) {
dmu_tx_abort(tx);
} else {
zvol_log_truncate(zv, tx, start, size, B_TRUE);
zvol_log_truncate(zv, tx, start, size);
dmu_tx_commit(tx);
error = dmu_free_long_range(zv->zv_objset,
ZVOL_OBJ, start, size);
@ -512,7 +512,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
uint64_t size = io_size(bio, rq);
int rw = io_data_dir(bio, rq);
if (zvol_request_sync)
if (zvol_request_sync || zv->zv_threading == B_FALSE)
force_sync = 1;
zv_request_t zvr = {
@ -1304,6 +1304,7 @@ zvol_os_create_minor(const char *name)
int error = 0;
int idx;
uint64_t hash = zvol_name_hash(name);
uint64_t volthreading;
bool replayed_zil = B_FALSE;
if (zvol_inhibit_dev)
@ -1350,6 +1351,12 @@ zvol_os_create_minor(const char *name)
zv->zv_volsize = volsize;
zv->zv_objset = os;
/* Default */
zv->zv_threading = B_TRUE;
if (dsl_prop_get_integer(name, "volthreading", &volthreading, NULL)
== 0)
zv->zv_threading = volthreading;
set_capacity(zv->zv_zso->zvo_disk, zv->zv_volsize >> 9);
blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue,

View file

@ -628,6 +628,9 @@ zfs_prop_init(void)
ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK", B_FALSE,
sfeatures);
zprop_register_index(ZFS_PROP_VOLTHREADING, "volthreading",
1, PROP_DEFAULT, ZFS_TYPE_VOLUME, "on | off", "zvol threading",
boolean_table, sfeatures);
zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0,
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
"USEDSNAP", B_FALSE, sfeatures);

View file

@ -1017,12 +1017,12 @@ abd_cmp(abd_t *dabd, abd_t *sabd)
* is the same when taking linear and when taking scatter
*/
void
abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
ssize_t csize, ssize_t dsize, const unsigned parity,
abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
size_t csize, size_t dsize, const unsigned parity,
void (*func_raidz_gen)(void **, const void *, size_t, size_t))
{
int i;
ssize_t len, dlen;
size_t len, dlen;
struct abd_iter caiters[3];
struct abd_iter daiter;
void *caddrs[3];
@ -1033,16 +1033,15 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
ASSERT3U(parity, <=, 3);
for (i = 0; i < parity; i++) {
abd_verify(cabds[i]);
ASSERT3U(csize, <=, cabds[i]->abd_size);
c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], 0);
ASSERT3U(off + csize, <=, cabds[i]->abd_size);
c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], off);
}
ASSERT3S(dsize, >=, 0);
if (dsize > 0) {
ASSERT(dabd);
abd_verify(dabd);
ASSERT3U(dsize, <=, dabd->abd_size);
c_dabd = abd_init_abd_iter(dabd, &daiter, 0);
ASSERT3U(off + dsize, <=, dabd->abd_size);
c_dabd = abd_init_abd_iter(dabd, &daiter, off);
}
abd_enter_critical(flags);
@ -1064,7 +1063,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
dlen = 0;
/* must be progressive */
ASSERT3S(len, >, 0);
ASSERT3U(len, >, 0);
/*
* The iterated function likely will not do well if each
* segment except the last one is not multiple of 512 (raidz).
@ -1089,9 +1088,6 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
}
csize -= len;
ASSERT3S(dsize, >=, 0);
ASSERT3S(csize, >=, 0);
}
abd_exit_critical(flags);
}
@ -1108,13 +1104,13 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
*/
void
abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
ssize_t tsize, const unsigned parity,
size_t tsize, const unsigned parity,
void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
const unsigned *mul),
const unsigned *mul)
{
int i;
ssize_t len;
size_t len;
struct abd_iter citers[3];
struct abd_iter xiters[3];
void *caddrs[3], *xaddrs[3];

View file

@ -886,6 +886,8 @@ static void l2arc_do_free_on_write(void);
static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
boolean_t state_only);
static void arc_prune_async(uint64_t adjust);
#define l2arc_hdr_arcstats_increment(hdr) \
l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE)
#define l2arc_hdr_arcstats_decrement(hdr) \
@ -6050,6 +6052,56 @@ arc_remove_prune_callback(arc_prune_t *p)
kmem_free(p, sizeof (*p));
}
/*
* Helper function for arc_prune_async() it is responsible for safely
* handling the execution of a registered arc_prune_func_t.
*/
static void
arc_prune_task(void *ptr)
{
arc_prune_t *ap = (arc_prune_t *)ptr;
arc_prune_func_t *func = ap->p_pfunc;
if (func != NULL)
func(ap->p_adjust, ap->p_private);
zfs_refcount_remove(&ap->p_refcnt, func);
}
/*
* Notify registered consumers they must drop holds on a portion of the ARC
* buffers they reference. This provides a mechanism to ensure the ARC can
* honor the metadata limit and reclaim otherwise pinned ARC buffers.
*
* This operation is performed asynchronously so it may be safely called
* in the context of the arc_reclaim_thread(). A reference is taken here
* for each registered arc_prune_t and the arc_prune_task() is responsible
* for releasing it once the registered arc_prune_func_t has completed.
*/
static void
arc_prune_async(uint64_t adjust)
{
arc_prune_t *ap;
mutex_enter(&arc_prune_mtx);
for (ap = list_head(&arc_prune_list); ap != NULL;
ap = list_next(&arc_prune_list, ap)) {
if (zfs_refcount_count(&ap->p_refcnt) >= 2)
continue;
zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
ap->p_adjust = adjust;
if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
ap, TQ_SLEEP) == TASKQID_INVALID) {
zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
continue;
}
ARCSTAT_BUMP(arcstat_prune);
}
mutex_exit(&arc_prune_mtx);
}
/*
* Notify the arc that a block was freed, and thus will never be used again.
*/

View file

@ -214,9 +214,10 @@ raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
}
#define raidz_copy(dabd, sabd, size) \
#define raidz_copy(dabd, sabd, off, size) \
{ \
abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\
abd_iterate_func2(dabd, sabd, off, off, size, raidz_copy_abd_cb, \
NULL); \
}
/*
@ -254,9 +255,10 @@ raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
return (0);
}
#define raidz_add(dabd, sabd, size) \
#define raidz_add(dabd, sabd, off, size) \
{ \
abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\
abd_iterate_func2(dabd, sabd, off, off, size, raidz_add_abd_cb, \
NULL); \
}
/*
@ -343,7 +345,10 @@ raidz_mul_abd_cb(void *dc, size_t size, void *private)
* the parity/syndrome if data column is shorter.
*
* P parity is calculated using raidz_add_abd().
*
* For CPU L2 cache blocking we process 64KB at a time.
*/
#define CHUNK 65536
/*
* Generate P parity (RAIDZ1)
@ -357,20 +362,26 @@ raidz_generate_p_impl(raidz_row_t * const rr)
const size_t ncols = rr->rr_cols;
const size_t psize = rr->rr_col[CODE_P].rc_size;
abd_t *pabd = rr->rr_col[CODE_P].rc_abd;
size_t size;
abd_t *dabd;
size_t off, size;
raidz_math_begin();
/* start with first data column */
raidz_copy(pabd, rr->rr_col[1].rc_abd, psize);
for (off = 0; off < psize; off += CHUNK) {
for (c = 2; c < ncols; c++) {
dabd = rr->rr_col[c].rc_abd;
size = rr->rr_col[c].rc_size;
/* start with first data column */
size = MIN(CHUNK, psize - off);
raidz_copy(pabd, rr->rr_col[1].rc_abd, off, size);
/* add data column */
raidz_add(pabd, dabd, size);
for (c = 2; c < ncols; c++) {
size = rr->rr_col[c].rc_size;
if (size <= off)
continue;
/* add data column */
size = MIN(CHUNK, size - off);
abd_t *dabd = rr->rr_col[c].rc_abd;
raidz_add(pabd, dabd, off, size);
}
}
raidz_math_end();
@ -423,7 +434,7 @@ raidz_generate_pq_impl(raidz_row_t * const rr)
size_t c;
const size_t ncols = rr->rr_cols;
const size_t csize = rr->rr_col[CODE_P].rc_size;
size_t dsize;
size_t off, size, dsize;
abd_t *dabd;
abd_t *cabds[] = {
rr->rr_col[CODE_P].rc_abd,
@ -432,15 +443,20 @@ raidz_generate_pq_impl(raidz_row_t * const rr)
raidz_math_begin();
raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, csize);
raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, csize);
for (off = 0; off < csize; off += CHUNK) {
for (c = 3; c < ncols; c++) {
dabd = rr->rr_col[c].rc_abd;
dsize = rr->rr_col[c].rc_size;
size = MIN(CHUNK, csize - off);
raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, off, size);
raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, off, size);
abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
raidz_gen_pq_add);
for (c = 3; c < ncols; c++) {
dabd = rr->rr_col[c].rc_abd;
dsize = rr->rr_col[c].rc_size;
dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 2,
raidz_gen_pq_add);
}
}
raidz_math_end();
@ -496,7 +512,7 @@ raidz_generate_pqr_impl(raidz_row_t * const rr)
size_t c;
const size_t ncols = rr->rr_cols;
const size_t csize = rr->rr_col[CODE_P].rc_size;
size_t dsize;
size_t off, size, dsize;
abd_t *dabd;
abd_t *cabds[] = {
rr->rr_col[CODE_P].rc_abd,
@ -506,16 +522,21 @@ raidz_generate_pqr_impl(raidz_row_t * const rr)
raidz_math_begin();
raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, csize);
raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, csize);
raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, csize);
for (off = 0; off < csize; off += CHUNK) {
for (c = 4; c < ncols; c++) {
dabd = rr->rr_col[c].rc_abd;
dsize = rr->rr_col[c].rc_size;
size = MIN(CHUNK, csize - off);
raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, off, size);
raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, off, size);
raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, off, size);
abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
raidz_gen_pqr_add);
for (c = 4; c < ncols; c++) {
dabd = rr->rr_col[c].rc_abd;
dsize = rr->rr_col[c].rc_size;
dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 3,
raidz_gen_pqr_add);
}
}
raidz_math_end();
@ -592,26 +613,31 @@ raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx)
const size_t x = tgtidx[TARGET_X];
const size_t xsize = rr->rr_col[x].rc_size;
abd_t *xabd = rr->rr_col[x].rc_abd;
size_t size;
abd_t *dabd;
size_t off, size;
if (xabd == NULL)
return (1 << CODE_P);
raidz_math_begin();
/* copy P into target */
raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, xsize);
for (off = 0; off < xsize; off += CHUNK) {
/* generate p_syndrome */
for (c = firstdc; c < ncols; c++) {
if (c == x)
continue;
/* copy P into target */
size = MIN(CHUNK, xsize - off);
raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, off, size);
dabd = rr->rr_col[c].rc_abd;
size = MIN(rr->rr_col[c].rc_size, xsize);
/* generate p_syndrome */
for (c = firstdc; c < ncols; c++) {
if (c == x)
continue;
size = rr->rr_col[c].rc_size;
if (size <= off)
continue;
raidz_add(xabd, dabd, size);
size = MIN(CHUNK, MIN(size, xsize) - off);
abd_t *dabd = rr->rr_col[c].rc_abd;
raidz_add(xabd, dabd, off, size);
}
}
raidz_math_end();
@ -683,7 +709,7 @@ raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
/* Start with first data column if present */
if (firstdc != x) {
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
} else {
raidz_zero(xabd, xsize);
}
@ -698,12 +724,12 @@ raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
dsize = rr->rr_col[c].rc_size;
}
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
raidz_syn_q_abd);
}
/* add Q to the syndrome */
raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, xsize);
raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, 0, xsize);
/* transform the syndrome */
abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
@ -777,7 +803,7 @@ raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
/* Start with first data column if present */
if (firstdc != x) {
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
} else {
raidz_zero(xabd, xsize);
}
@ -793,12 +819,12 @@ raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
dsize = rr->rr_col[c].rc_size;
}
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
raidz_syn_r_abd);
}
/* add R to the syndrome */
raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, xsize);
raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, 0, xsize);
/* transform the syndrome */
abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
@ -934,8 +960,8 @@ raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
/* Start with first data column if present */
if (firstdc != x) {
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
} else {
raidz_zero(xabd, xsize);
raidz_zero(yabd, xsize);
@ -951,7 +977,7 @@ raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
dsize = rr->rr_col[c].rc_size;
}
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
raidz_syn_pq_abd);
}
@ -959,7 +985,7 @@ raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
/* Copy shorter targets back to the original abd buffer */
if (ysize < xsize)
raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
raidz_math_end();
@ -1094,8 +1120,8 @@ raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
/* Start with first data column if present */
if (firstdc != x) {
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
} else {
raidz_zero(xabd, xsize);
raidz_zero(yabd, xsize);
@ -1111,7 +1137,7 @@ raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
dsize = rr->rr_col[c].rc_size;
}
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
raidz_syn_pr_abd);
}
@ -1121,7 +1147,7 @@ raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
* Copy shorter targets back to the original abd buffer
*/
if (ysize < xsize)
raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
raidz_math_end();
@ -1261,8 +1287,8 @@ raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
/* Start with first data column if present */
if (firstdc != x) {
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
} else {
raidz_zero(xabd, xsize);
raidz_zero(yabd, xsize);
@ -1278,7 +1304,7 @@ raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
dsize = rr->rr_col[c].rc_size;
}
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
raidz_syn_qr_abd);
}
@ -1288,7 +1314,7 @@ raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
* Copy shorter targets back to the original abd buffer
*/
if (ysize < xsize)
raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
raidz_math_end();
@ -1456,9 +1482,9 @@ raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
/* Start with first data column if present */
if (firstdc != x) {
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, xsize);
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
} else {
raidz_zero(xabd, xsize);
raidz_zero(yabd, xsize);
@ -1475,7 +1501,7 @@ raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
dsize = rr->rr_col[c].rc_size;
}
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 3,
raidz_syn_pqr_abd);
}
@ -1485,9 +1511,9 @@ raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
* Copy shorter targets back to the original abd buffer
*/
if (ysize < xsize)
raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
if (zsize < xsize)
raidz_copy(rr->rr_col[z].rc_abd, zabd, zsize);
raidz_copy(rr->rr_col[z].rc_abd, zabd, 0, zsize);
raidz_math_end();

View file

@ -238,7 +238,6 @@ uint64_t zfs_max_nvlist_src_size = 0;
*/
static uint64_t zfs_history_output_max = 1024 * 1024;
uint_t zfs_fsyncer_key;
uint_t zfs_allow_log_key;
/* DATA_TYPE_ANY is used when zkey_type can vary. */
@ -2523,11 +2522,27 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
case ZFS_PROP_VOLSIZE:
err = zvol_set_volsize(dsname, intval);
break;
case ZFS_PROP_SNAPDEV:
err = zvol_set_snapdev(dsname, source, intval);
case ZFS_PROP_VOLTHREADING:
err = zvol_set_volthreading(dsname, intval);
/*
* Set err to -1 to force the zfs_set_prop_nvlist code down the
* default path to set the value in the nvlist.
*/
if (err == 0)
err = -1;
break;
case ZFS_PROP_SNAPDEV:
case ZFS_PROP_VOLMODE:
err = zvol_set_volmode(dsname, source, intval);
err = zvol_set_common(dsname, prop, source, intval);
break;
case ZFS_PROP_READONLY:
err = zvol_set_ro(dsname, intval);
/*
* Set err to -1 to force the zfs_set_prop_nvlist code down the
* default path to set the value in the nvlist.
*/
if (err == 0)
err = -1;
break;
case ZFS_PROP_VERSION:
{
@ -7882,7 +7897,6 @@ zfs_kmod_init(void)
if ((error = zfsdev_attach()) != 0)
goto out;
tsd_create(&zfs_fsyncer_key, NULL);
tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
@ -7919,7 +7933,6 @@ zfs_kmod_fini(void)
spa_fini();
zvol_fini();
tsd_destroy(&zfs_fsyncer_key);
tsd_destroy(&rrw_tsd_key);
tsd_destroy(&zfs_allow_log_key);
}

View file

@ -606,13 +606,12 @@ static int64_t zfs_immediate_write_sz = 32768;
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t resid, int ioflag,
znode_t *zp, offset_t off, ssize_t resid, boolean_t commit,
zil_callback_t callback, void *callback_data)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
uint32_t blocksize = zp->z_blksz;
itx_wr_state_t write_state;
uintptr_t fsync_cnt;
uint64_t gen = 0;
ssize_t size = resid;
@ -628,15 +627,11 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
else if (!spa_has_slogs(zilog->zl_spa) &&
resid >= zfs_immediate_write_sz)
write_state = WR_INDIRECT;
else if (ioflag & (O_SYNC | O_DSYNC))
else if (commit)
write_state = WR_COPIED;
else
write_state = WR_NEED_COPY;
if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
}
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen,
sizeof (gen));
@ -687,12 +682,9 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
BP_ZERO(&lr->lr_blkptr);
itx->itx_private = ZTOZSB(zp);
itx->itx_sync = (zp->z_sync_cnt != 0);
itx->itx_gen = gen;
if (!(ioflag & (O_SYNC | O_DSYNC)) && (zp->z_sync_cnt == 0) &&
(fsync_cnt == 0))
itx->itx_sync = B_FALSE;
itx->itx_callback = callback;
itx->itx_callback_data = callback_data;
zil_itx_assign(zilog, itx, tx);

View file

@ -58,27 +58,20 @@
#include <sys/zfs_znode.h>
static ulong_t zfs_fsync_sync_cnt = 4;
int
zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
{
int error = 0;
zfsvfs_t *zfsvfs = ZTOZSB(zp);
(void) tsd_set(zfs_fsyncer_key, (void *)(uintptr_t)zfs_fsync_sync_cnt);
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
goto out;
return (error);
atomic_inc_32(&zp->z_sync_writes_cnt);
zil_commit(zfsvfs->z_log, zp->z_id);
atomic_dec_32(&zp->z_sync_writes_cnt);
zfs_exit(zfsvfs, FTAG);
}
out:
tsd_set(zfs_fsyncer_key, NULL);
return (error);
}
@ -520,6 +513,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
uint64_t end_size = MAX(zp->z_size, woff + n);
zilog_t *zilog = zfsvfs->z_log;
boolean_t commit = (ioflag & (O_SYNC | O_DSYNC)) ||
(zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS);
const uint64_t uid = KUID_TO_SUID(ZTOUID(zp));
const uint64_t gid = KGID_TO_SGID(ZTOGID(zp));
@ -741,7 +736,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
* zfs_clear_setid_bits_if_necessary must precede any of
* the TX_WRITE records logged here.
*/
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, commit,
NULL, NULL);
dmu_tx_commit(tx);
@ -767,8 +762,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
return (error);
}
if (ioflag & (O_SYNC | O_DSYNC) ||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
if (commit)
zil_commit(zilog, zp->z_id);
const int64_t nwritten = start_resid - zfs_uio_resid(uio);
@ -1094,6 +1088,15 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
ASSERT(!outzfsvfs->z_replay);
/*
* Block cloning from an unencrypted dataset into an encrypted
* dataset and vice versa is not supported.
*/
if (inos->os_encrypted != outos->os_encrypted) {
zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
return (SET_ERROR(EXDEV));
}
error = zfs_verify_zp(inzp);
if (error == 0)
error = zfs_verify_zp(outzp);

View file

@ -158,23 +158,22 @@ zio_init(void)
zio_link_cache = kmem_cache_create("zio_link_cache",
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
/*
* For small buffers, we want a cache for each multiple of
* SPA_MINBLOCKSIZE. For larger buffers, we want a cache
* for each quarter-power of 2.
*/
for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
size_t align, cflags, data_cflags;
char name[32];
/*
* Create cache for each half-power of 2 size, starting from
* SPA_MINBLOCKSIZE. It should give us memory space efficiency
* of ~7/8, sufficient for transient allocations mostly using
* these caches.
*/
size_t p2 = size;
size_t align = 0;
size_t data_cflags, cflags;
data_cflags = KMC_NODEBUG;
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
KMC_NODEBUG : 0;
while (!ISP2(p2))
p2 &= p2 - 1;
if (!IS_P2ALIGNED(size, p2 / 2))
continue;
#ifndef _KERNEL
/*
@ -185,47 +184,37 @@ zio_init(void)
*/
if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
continue;
/*
* Here's the problem - on 4K native devices in userland on
* Linux using O_DIRECT, buffers must be 4K aligned or I/O
* will fail with EINVAL, causing zdb (and others) to coredump.
* Since userland probably doesn't need optimized buffer caches,
* we just force 4K alignment on everything.
*/
align = 8 * SPA_MINBLOCKSIZE;
#else
if (size < PAGESIZE) {
align = SPA_MINBLOCKSIZE;
} else if (IS_P2ALIGNED(size, p2 >> 2)) {
align = PAGESIZE;
}
#endif
if (align != 0) {
char name[36];
if (cflags == data_cflags) {
/*
* Resulting kmem caches would be identical.
* Save memory by creating only one.
*/
(void) snprintf(name, sizeof (name),
"zio_buf_comb_%lu", (ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name,
size, align, NULL, NULL, NULL, NULL, NULL,
cflags);
zio_data_buf_cache[c] = zio_buf_cache[c];
continue;
}
(void) snprintf(name, sizeof (name), "zio_buf_%lu",
(ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name, size,
align, NULL, NULL, NULL, NULL, NULL, cflags);
if (IS_P2ALIGNED(size, PAGESIZE))
align = PAGESIZE;
else
align = 1 << (highbit64(size ^ (size - 1)) - 1);
(void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
(ulong_t)size);
zio_data_buf_cache[c] = kmem_cache_create(name, size,
align, NULL, NULL, NULL, NULL, NULL, data_cflags);
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
KMC_NODEBUG : 0;
data_cflags = KMC_NODEBUG;
if (cflags == data_cflags) {
/*
* Resulting kmem caches would be identical.
* Save memory by creating only one.
*/
(void) snprintf(name, sizeof (name),
"zio_buf_comb_%lu", (ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name, size, align,
NULL, NULL, NULL, NULL, NULL, cflags);
zio_data_buf_cache[c] = zio_buf_cache[c];
continue;
}
(void) snprintf(name, sizeof (name), "zio_buf_%lu",
(ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name, size, align,
NULL, NULL, NULL, NULL, NULL, cflags);
(void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
(ulong_t)size);
zio_data_buf_cache[c] = kmem_cache_create(name, size, align,
NULL, NULL, NULL, NULL, NULL, data_cflags);
}
while (--c != 0) {

View file

@ -369,6 +369,40 @@ zvol_set_volsize(const char *name, uint64_t volsize)
return (SET_ERROR(error));
}
/*
* Update volthreading.
*/
int
zvol_set_volthreading(const char *name, boolean_t value)
{
zvol_state_t *zv = zvol_find_by_name(name, RW_NONE);
if (zv == NULL)
return (ENOENT);
zv->zv_threading = value;
mutex_exit(&zv->zv_state_lock);
return (0);
}
/*
* Update zvol ro property.
*/
int
zvol_set_ro(const char *name, boolean_t value)
{
zvol_state_t *zv = zvol_find_by_name(name, RW_NONE);
if (zv == NULL)
return (-1);
if (value) {
zvol_os_set_disk_ro(zv, 1);
zv->zv_flags |= ZVOL_RDONLY;
} else {
zvol_os_set_disk_ro(zv, 0);
zv->zv_flags &= ~ZVOL_RDONLY;
}
mutex_exit(&zv->zv_state_lock);
return (0);
}
/*
* Sanity check volume block size.
*/
@ -583,7 +617,7 @@ static const ssize_t zvol_immediate_write_sz = 32768;
void
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint64_t size, int sync)
uint64_t size, boolean_t commit)
{
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
@ -598,7 +632,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
else if (!spa_has_slogs(zilog->zl_spa) &&
size >= blocksize && blocksize > zvol_immediate_write_sz)
write_state = WR_INDIRECT;
else if (sync)
else if (commit)
write_state = WR_COPIED;
else
write_state = WR_NEED_COPY;
@ -633,7 +667,6 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
BP_ZERO(&lr->lr_blkptr);
itx->itx_private = zv;
itx->itx_sync = sync;
(void) zil_itx_assign(zilog, itx, tx);
@ -650,8 +683,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
* Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
*/
void
zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
boolean_t sync)
zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len)
{
itx_t *itx;
lr_truncate_t *lr;
@ -666,7 +698,6 @@ zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
lr->lr_offset = off;
lr->lr_length = len;
itx->itx_sync = sync;
zil_itx_assign(zilog, itx, tx);
}
@ -1541,6 +1572,7 @@ typedef struct zvol_set_prop_int_arg {
const char *zsda_name;
uint64_t zsda_value;
zprop_source_t zsda_source;
zfs_prop_t zsda_prop;
dmu_tx_t *zsda_tx;
} zvol_set_prop_int_arg_t;
@ -1549,7 +1581,7 @@ typedef struct zvol_set_prop_int_arg {
* conditions are imposed.
*/
static int
zvol_set_snapdev_check(void *arg, dmu_tx_t *tx)
zvol_set_common_check(void *arg, dmu_tx_t *tx)
{
zvol_set_prop_int_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
@ -1566,17 +1598,33 @@ zvol_set_snapdev_check(void *arg, dmu_tx_t *tx)
}
static int
zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
zvol_set_common_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
(void) arg;
zvol_set_prop_int_arg_t *zsda = arg;
char dsname[MAXNAMELEN];
zvol_task_t *task;
uint64_t snapdev;
uint64_t prop;
const char *prop_name = zfs_prop_to_name(zsda->zsda_prop);
dsl_dataset_name(ds, dsname);
if (dsl_prop_get_int_ds(ds, "snapdev", &snapdev) != 0)
if (dsl_prop_get_int_ds(ds, prop_name, &prop) != 0)
return (0);
task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname, NULL, snapdev);
switch (zsda->zsda_prop) {
case ZFS_PROP_VOLMODE:
task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname,
NULL, prop);
break;
case ZFS_PROP_SNAPDEV:
task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname,
NULL, prop);
break;
default:
task = NULL;
break;
}
if (task == NULL)
return (0);
@ -1586,14 +1634,14 @@ zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
}
/*
* Traverse all child datasets and apply snapdev appropriately.
* Traverse all child datasets and apply the property appropriately.
* We call dsl_prop_set_sync_impl() here to set the value only on the toplevel
* dataset and read the effective "snapdev" on every child in the callback
* dataset and read the effective "property" on every child in the callback
* function: this is because the value is not guaranteed to be the same in the
* whole dataset hierarchy.
*/
static void
zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx)
zvol_set_common_sync(void *arg, dmu_tx_t *tx)
{
zvol_set_prop_int_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
@ -1606,115 +1654,31 @@ zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx)
error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds);
if (error == 0) {
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_SNAPDEV),
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(zsda->zsda_prop),
zsda->zsda_source, sizeof (zsda->zsda_value), 1,
&zsda->zsda_value, zsda->zsda_tx);
dsl_dataset_rele(ds, FTAG);
}
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_snapdev_sync_cb,
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_common_sync_cb,
zsda, DS_FIND_CHILDREN);
dsl_dir_rele(dd, FTAG);
}
int
zvol_set_snapdev(const char *ddname, zprop_source_t source, uint64_t snapdev)
zvol_set_common(const char *ddname, zfs_prop_t prop, zprop_source_t source,
uint64_t val)
{
zvol_set_prop_int_arg_t zsda;
zsda.zsda_name = ddname;
zsda.zsda_source = source;
zsda.zsda_value = snapdev;
zsda.zsda_value = val;
zsda.zsda_prop = prop;
return (dsl_sync_task(ddname, zvol_set_snapdev_check,
zvol_set_snapdev_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
}
/*
* Sanity check the dataset for safe use by the sync task. No additional
* conditions are imposed.
*/
static int
zvol_set_volmode_check(void *arg, dmu_tx_t *tx)
{
zvol_set_prop_int_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd;
int error;
error = dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL);
if (error != 0)
return (error);
dsl_dir_rele(dd, FTAG);
return (error);
}
static int
zvol_set_volmode_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
(void) arg;
char dsname[MAXNAMELEN];
zvol_task_t *task;
uint64_t volmode;
dsl_dataset_name(ds, dsname);
if (dsl_prop_get_int_ds(ds, "volmode", &volmode) != 0)
return (0);
task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname, NULL, volmode);
if (task == NULL)
return (0);
(void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
task, TQ_SLEEP);
return (0);
}
/*
* Traverse all child datasets and apply volmode appropriately.
* We call dsl_prop_set_sync_impl() here to set the value only on the toplevel
* dataset and read the effective "volmode" on every child in the callback
* function: this is because the value is not guaranteed to be the same in the
* whole dataset hierarchy.
*/
static void
zvol_set_volmode_sync(void *arg, dmu_tx_t *tx)
{
zvol_set_prop_int_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd;
dsl_dataset_t *ds;
int error;
VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL));
zsda->zsda_tx = tx;
error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds);
if (error == 0) {
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_VOLMODE),
zsda->zsda_source, sizeof (zsda->zsda_value), 1,
&zsda->zsda_value, zsda->zsda_tx);
dsl_dataset_rele(ds, FTAG);
}
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_volmode_sync_cb,
zsda, DS_FIND_CHILDREN);
dsl_dir_rele(dd, FTAG);
}
int
zvol_set_volmode(const char *ddname, zprop_source_t source, uint64_t volmode)
{
zvol_set_prop_int_arg_t zsda;
zsda.zsda_name = ddname;
zsda.zsda_source = source;
zsda.zsda_value = volmode;
return (dsl_sync_task(ddname, zvol_set_volmode_check,
zvol_set_volmode_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
return (dsl_sync_task(ddname, zvol_set_common_check,
zvol_set_common_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
}
void

View file

@ -1110,7 +1110,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.2.99-FreeBSD_g043c6ee3b"
#define ZFS_META_ALIAS "zfs-2.2.99-184-FreeBSD_g41e55b476"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@ -1140,7 +1140,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
#define ZFS_META_RELEASE "FreeBSD_g043c6ee3b"
#define ZFS_META_RELEASE "184-FreeBSD_g41e55b476"
/* Define the project version. */
#define ZFS_META_VERSION "2.2.99"

View file

@ -1 +1 @@
#define ZFS_META_GITREV "zfs-2.2.99-174-g043c6ee3b"
#define ZFS_META_GITREV "zfs-2.2.99-184-g41e55b476"