mirror of
https://github.com/freebsd/freebsd-src
synced 2024-10-15 04:43:53 +00:00
zfs: merge openzfs/zfs@41e55b476
Notable upstream pull request merges: #15366c3773de1
ZIL: Cleanup sync and commit handling #15409dbe839a9
zvol: Cleanup set property #1540960387fac
zvol: Implement zvol threading as a Property #154099ccdb8be
zvol: fix delayed update to block device ro entry #1544805a7348a
RAIDZ: Use cache blocking during parity math #15452514d661c
Tune zio buffer caches and their alignments #15456799e09f7
Unify arc_prune_async() code #15465763ca47f
Fix block cloning between unencrypted and encrypted datasets To make the module version better comparable, the module version number now includes the commit count since last tag. Obtained from: OpenZFS OpenZFS commit:41e55b476b
This commit is contained in:
commit
f8b1db88b8
|
@ -1,6 +1,9 @@
|
|||
# Features which are supported by GRUB2
|
||||
allocation_classes
|
||||
async_destroy
|
||||
block_cloning
|
||||
bookmarks
|
||||
device_rebuild
|
||||
embedded_data
|
||||
empty_bpobj
|
||||
enabled_txg
|
||||
|
@ -9,6 +12,13 @@ filesystem_limits
|
|||
hole_birth
|
||||
large_blocks
|
||||
livelist
|
||||
log_spacemap
|
||||
lz4_compress
|
||||
obsolete_counts
|
||||
project_quota
|
||||
resilver_defer
|
||||
spacemap_histogram
|
||||
spacemap_v2
|
||||
userobj_accounting
|
||||
zilsaxattr
|
||||
zpool_checkpoint
|
||||
|
|
|
@ -284,7 +284,6 @@ typedef struct zfid_long {
|
|||
#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t))
|
||||
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
|
||||
|
||||
extern uint_t zfs_fsyncer_key;
|
||||
extern int zfs_super_owner;
|
||||
extern int zfs_bclone_enabled;
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ extern const struct file_operations zpl_file_operations;
|
|||
extern const struct file_operations zpl_dir_file_operations;
|
||||
|
||||
/* zpl_super.c */
|
||||
extern void zpl_prune_sb(int64_t nr_to_scan, void *arg);
|
||||
extern void zpl_prune_sb(uint64_t nr_to_scan, void *arg);
|
||||
|
||||
extern const struct super_operations zpl_super_operations;
|
||||
extern const struct export_operations zpl_export_operations;
|
||||
|
|
|
@ -133,11 +133,11 @@ int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
|
|||
void abd_zero_off(abd_t *, size_t, size_t);
|
||||
void abd_verify(abd_t *);
|
||||
|
||||
void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
||||
ssize_t csize, ssize_t dsize, const unsigned parity,
|
||||
void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
|
||||
size_t csize, size_t dsize, const unsigned parity,
|
||||
void (*func_raidz_gen)(void **, const void *, size_t, size_t));
|
||||
void abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
|
||||
ssize_t tsize, const unsigned parity,
|
||||
size_t tsize, const unsigned parity,
|
||||
void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
|
||||
const unsigned *mul),
|
||||
const unsigned *mul);
|
||||
|
|
|
@ -81,7 +81,7 @@ typedef struct arc_prune arc_prune_t;
|
|||
typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
|
||||
const blkptr_t *bp, arc_buf_t *buf, void *priv);
|
||||
typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
|
||||
typedef void arc_prune_func_t(int64_t bytes, void *priv);
|
||||
typedef void arc_prune_func_t(uint64_t bytes, void *priv);
|
||||
|
||||
/* Shared module parameters */
|
||||
extern uint_t zfs_arc_average_blocksize;
|
||||
|
|
|
@ -1065,7 +1065,6 @@ extern void arc_wait_for_eviction(uint64_t, boolean_t);
|
|||
|
||||
extern void arc_lowmem_init(void);
|
||||
extern void arc_lowmem_fini(void);
|
||||
extern void arc_prune_async(uint64_t);
|
||||
extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
|
||||
extern uint64_t arc_free_memory(void);
|
||||
extern int64_t arc_available_memory(void);
|
||||
|
|
|
@ -192,6 +192,7 @@ typedef enum {
|
|||
ZFS_PROP_REDACT_SNAPS,
|
||||
ZFS_PROP_SNAPSHOTS_CHANGED,
|
||||
ZFS_PROP_PREFETCH,
|
||||
ZFS_PROP_VOLTHREADING,
|
||||
ZFS_NUM_PROPS
|
||||
} zfs_prop_t;
|
||||
|
||||
|
|
|
@ -575,7 +575,6 @@ typedef struct zfsdev_state {
|
|||
extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which);
|
||||
extern int zfsdev_getminor(zfs_file_t *fp, minor_t *minorp);
|
||||
|
||||
extern uint_t zfs_fsyncer_key;
|
||||
extern uint_t zfs_allow_log_key;
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
|
|
@ -307,7 +307,7 @@ extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx,
|
|||
uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
|
||||
const char *dname, znode_t *szp, znode_t *wzp);
|
||||
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, offset_t off, ssize_t len, int ioflag,
|
||||
znode_t *zp, offset_t off, ssize_t len, boolean_t commit,
|
||||
zil_callback_t callback, void *callback_data);
|
||||
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, uint64_t off, uint64_t len);
|
||||
|
|
|
@ -50,8 +50,9 @@ extern int zvol_get_stats(objset_t *, nvlist_t *);
|
|||
extern boolean_t zvol_is_zvol(const char *);
|
||||
extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *);
|
||||
extern int zvol_set_volsize(const char *, uint64_t);
|
||||
extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t);
|
||||
extern int zvol_set_volmode(const char *, zprop_source_t, uint64_t);
|
||||
extern int zvol_set_volthreading(const char *, boolean_t);
|
||||
extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t);
|
||||
extern int zvol_set_ro(const char *, boolean_t);
|
||||
extern zvol_state_handle_t *zvol_suspend(const char *);
|
||||
extern int zvol_resume(zvol_state_handle_t *);
|
||||
extern void *zvol_tag(zvol_state_handle_t *);
|
||||
|
|
|
@ -58,6 +58,7 @@ typedef struct zvol_state {
|
|||
atomic_t zv_suspend_ref; /* refcount for suspend */
|
||||
krwlock_t zv_suspend_lock; /* suspend lock */
|
||||
struct zvol_state_os *zv_zso; /* private platform state */
|
||||
boolean_t zv_threading; /* volthreading property */
|
||||
} zvol_state_t;
|
||||
|
||||
|
||||
|
@ -81,9 +82,9 @@ void zvol_remove_minors_impl(const char *name);
|
|||
void zvol_last_close(zvol_state_t *zv);
|
||||
void zvol_insert(zvol_state_t *zv);
|
||||
void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
|
||||
uint64_t len, boolean_t sync);
|
||||
uint64_t len);
|
||||
void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
||||
uint64_t size, int sync);
|
||||
uint64_t size, boolean_t commit);
|
||||
int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||
struct lwb *lwb, zio_t *zio);
|
||||
int zvol_init_impl(void);
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
|
||||
|
||||
static boolean_t nfs_available(void);
|
||||
static boolean_t exports_available(void);
|
||||
|
||||
typedef int (*nfs_shareopt_callback_t)(const char *opt, const char *value,
|
||||
void *cookie);
|
||||
|
@ -539,6 +540,8 @@ nfs_commit_shares(void)
|
|||
static void
|
||||
nfs_truncate_shares(void)
|
||||
{
|
||||
if (!exports_available())
|
||||
return;
|
||||
nfs_reset_shares(ZFS_EXPORTS_LOCK, ZFS_EXPORTS_FILE);
|
||||
}
|
||||
|
||||
|
@ -566,3 +569,18 @@ nfs_available(void)
|
|||
|
||||
return (avail == 1);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
exports_available(void)
|
||||
{
|
||||
static int avail;
|
||||
|
||||
if (!avail) {
|
||||
if (access(ZFS_EXPORTS_DIR, F_OK) != 0)
|
||||
avail = -1;
|
||||
else
|
||||
avail = 1;
|
||||
}
|
||||
|
||||
return (avail == 1);
|
||||
}
|
||||
|
|
|
@ -1868,7 +1868,8 @@
|
|||
<enumerator name='ZFS_PROP_REDACT_SNAPS' value='94'/>
|
||||
<enumerator name='ZFS_PROP_SNAPSHOTS_CHANGED' value='95'/>
|
||||
<enumerator name='ZFS_PROP_PREFETCH' value='96'/>
|
||||
<enumerator name='ZFS_NUM_PROPS' value='97'/>
|
||||
<enumerator name='ZFS_PROP_VOLTHREADING' value='97'/>
|
||||
<enumerator name='ZFS_NUM_PROPS' value='98'/>
|
||||
</enum-decl>
|
||||
<typedef-decl name='zfs_prop_t' type-id='4b000d60' id='58603c44'/>
|
||||
<enum-decl name='zprop_source_t' naming-typedef-id='a2256d42' id='5903f80e'>
|
||||
|
|
|
@ -1197,6 +1197,18 @@ are equivalent to the
|
|||
and
|
||||
.Sy noexec
|
||||
mount options.
|
||||
.It Sy volthreading Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls internal zvol threading.
|
||||
The value
|
||||
.Sy off
|
||||
disables zvol threading, and zvol relies on application threads.
|
||||
The default value is
|
||||
.Sy on ,
|
||||
which enables threading within a zvol.
|
||||
Please note that this property will be overridden by
|
||||
.Sy zvol_request_sync
|
||||
module parameter.
|
||||
This property is only applicable to Linux.
|
||||
.It Sy filesystem_limit Ns = Ns Ar count Ns | Ns Sy none
|
||||
Limits the number of filesystems and volumes that can exist under this point in
|
||||
the dataset tree.
|
||||
|
|
|
@ -52,11 +52,6 @@
|
|||
#include <sys/vm.h>
|
||||
#include <sys/vmmeter.h>
|
||||
|
||||
#if __FreeBSD_version >= 1300139
|
||||
static struct sx arc_vnlru_lock;
|
||||
static struct vnode *arc_vnlru_marker;
|
||||
#endif
|
||||
|
||||
extern struct vfsops zfs_vfsops;
|
||||
|
||||
uint_t zfs_arc_free_target = 0;
|
||||
|
@ -131,53 +126,6 @@ arc_default_max(uint64_t min, uint64_t allmem)
|
|||
return (MAX(allmem * 5 / 8, size));
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for arc_prune_async() it is responsible for safely
|
||||
* handling the execution of a registered arc_prune_func_t.
|
||||
*/
|
||||
static void
|
||||
arc_prune_task(void *arg)
|
||||
{
|
||||
uint64_t nr_scan = (uintptr_t)arg;
|
||||
|
||||
#ifndef __ILP32__
|
||||
if (nr_scan > INT_MAX)
|
||||
nr_scan = INT_MAX;
|
||||
#endif
|
||||
|
||||
#if __FreeBSD_version >= 1300139
|
||||
sx_xlock(&arc_vnlru_lock);
|
||||
vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
|
||||
sx_xunlock(&arc_vnlru_lock);
|
||||
#else
|
||||
vnlru_free(nr_scan, &zfs_vfsops);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify registered consumers they must drop holds on a portion of the ARC
|
||||
* buffered they reference. This provides a mechanism to ensure the ARC can
|
||||
* honor the metadata limit and reclaim otherwise pinned ARC buffers. This
|
||||
* is analogous to dnlc_reduce_cache() but more generic.
|
||||
*
|
||||
* This operation is performed asynchronously so it may be safely called
|
||||
* in the context of the arc_reclaim_thread(). A reference is taken here
|
||||
* for each registered arc_prune_t and the arc_prune_task() is responsible
|
||||
* for releasing it once the registered arc_prune_func_t has completed.
|
||||
*/
|
||||
void
|
||||
arc_prune_async(uint64_t adjust)
|
||||
{
|
||||
|
||||
#ifndef __LP64__
|
||||
if (adjust > UINTPTR_MAX)
|
||||
adjust = UINTPTR_MAX;
|
||||
#endif
|
||||
taskq_dispatch(arc_prune_taskq, arc_prune_task,
|
||||
(void *)(intptr_t)adjust, TQ_SLEEP);
|
||||
ARCSTAT_BUMP(arcstat_prune);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
arc_all_memory(void)
|
||||
{
|
||||
|
@ -228,10 +176,6 @@ arc_lowmem_init(void)
|
|||
{
|
||||
arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
|
||||
EVENTHANDLER_PRI_FIRST);
|
||||
#if __FreeBSD_version >= 1300139
|
||||
arc_vnlru_marker = vnlru_alloc_marker();
|
||||
sx_init(&arc_vnlru_lock, "arc vnlru lock");
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -239,12 +183,6 @@ arc_lowmem_fini(void)
|
|||
{
|
||||
if (arc_event_lowmem != NULL)
|
||||
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
|
||||
#if __FreeBSD_version >= 1300139
|
||||
if (arc_vnlru_marker != NULL) {
|
||||
vnlru_free_marker(arc_vnlru_marker);
|
||||
sx_destroy(&arc_vnlru_lock);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -2074,6 +2074,26 @@ zfs_vnodes_adjust_back(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
#if __FreeBSD_version >= 1300139
|
||||
static struct sx zfs_vnlru_lock;
|
||||
static struct vnode *zfs_vnlru_marker;
|
||||
#endif
|
||||
static arc_prune_t *zfs_prune;
|
||||
|
||||
static void
|
||||
zfs_prune_task(uint64_t nr_to_scan, void *arg __unused)
|
||||
{
|
||||
if (nr_to_scan > INT_MAX)
|
||||
nr_to_scan = INT_MAX;
|
||||
#if __FreeBSD_version >= 1300139
|
||||
sx_xlock(&zfs_vnlru_lock);
|
||||
vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker);
|
||||
sx_xunlock(&zfs_vnlru_lock);
|
||||
#else
|
||||
vnlru_free(nr_to_scan, &zfs_vfsops);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
zfs_init(void)
|
||||
{
|
||||
|
@ -2100,11 +2120,23 @@ zfs_init(void)
|
|||
dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
|
||||
|
||||
zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
|
||||
|
||||
#if __FreeBSD_version >= 1300139
|
||||
zfs_vnlru_marker = vnlru_alloc_marker();
|
||||
sx_init(&zfs_vnlru_lock, "zfs vnlru lock");
|
||||
#endif
|
||||
zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_fini(void)
|
||||
{
|
||||
arc_remove_prune_callback(zfs_prune);
|
||||
#if __FreeBSD_version >= 1300139
|
||||
vnlru_free_marker(zfs_vnlru_marker);
|
||||
sx_destroy(&zfs_vnlru_lock);
|
||||
#endif
|
||||
|
||||
taskq_destroy(zfsvfs_taskq);
|
||||
zfsctl_fini();
|
||||
zfs_znode_fini();
|
||||
|
|
|
@ -244,9 +244,15 @@ zfs_open(vnode_t **vpp, int flag, cred_t *cr)
|
|||
return (SET_ERROR(EPERM));
|
||||
}
|
||||
|
||||
/* Keep a count of the synchronous opens in the znode */
|
||||
if (flag & O_SYNC)
|
||||
atomic_inc_32(&zp->z_sync_cnt);
|
||||
/*
|
||||
* Keep a count of the synchronous opens in the znode. On first
|
||||
* synchronous open we must convert all previous async transactions
|
||||
* into sync to keep correct ordering.
|
||||
*/
|
||||
if (flag & O_SYNC) {
|
||||
if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
|
||||
zil_async_to_sync(zfsvfs->z_log, zp->z_id);
|
||||
}
|
||||
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
return (0);
|
||||
|
@ -4201,6 +4207,10 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
|
|||
}
|
||||
zfs_vmobject_wunlock(object);
|
||||
|
||||
boolean_t commit = (flags & (zfs_vm_pagerput_sync |
|
||||
zfs_vm_pagerput_inval)) != 0 ||
|
||||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS;
|
||||
|
||||
if (ncount == 0)
|
||||
goto out;
|
||||
|
||||
|
@ -4253,7 +4263,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
|
|||
* but that would make the locking messier
|
||||
*/
|
||||
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
|
||||
len, 0, NULL, NULL);
|
||||
len, commit, NULL, NULL);
|
||||
|
||||
zfs_vmobject_wlock(object);
|
||||
for (i = 0; i < ncount; i++) {
|
||||
|
@ -4268,8 +4278,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
|
|||
|
||||
out:
|
||||
zfs_rangelock_exit(lr);
|
||||
if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
|
||||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||
if (commit)
|
||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||
|
||||
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
|
||||
|
|
|
@ -123,7 +123,6 @@ struct zvol_state_os {
|
|||
/* volmode=dev */
|
||||
struct zvol_state_dev {
|
||||
struct cdev *zsd_cdev;
|
||||
uint64_t zsd_sync_cnt;
|
||||
struct selinfo zsd_selinfo;
|
||||
} _zso_dev;
|
||||
|
||||
|
@ -669,7 +668,7 @@ zvol_geom_bio_strategy(struct bio *bp)
|
|||
int error = 0;
|
||||
boolean_t doread = B_FALSE;
|
||||
boolean_t is_dumpified;
|
||||
boolean_t sync;
|
||||
boolean_t commit;
|
||||
|
||||
if (bp->bio_to)
|
||||
zv = bp->bio_to->private;
|
||||
|
@ -696,7 +695,7 @@ zvol_geom_bio_strategy(struct bio *bp)
|
|||
}
|
||||
zvol_ensure_zilog(zv);
|
||||
if (bp->bio_cmd == BIO_FLUSH)
|
||||
goto sync;
|
||||
goto commit;
|
||||
break;
|
||||
default:
|
||||
error = SET_ERROR(EOPNOTSUPP);
|
||||
|
@ -718,7 +717,7 @@ zvol_geom_bio_strategy(struct bio *bp)
|
|||
}
|
||||
|
||||
is_dumpified = B_FALSE;
|
||||
sync = !doread && !is_dumpified &&
|
||||
commit = !doread && !is_dumpified &&
|
||||
zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
|
||||
|
||||
/*
|
||||
|
@ -734,7 +733,7 @@ zvol_geom_bio_strategy(struct bio *bp)
|
|||
if (error != 0) {
|
||||
dmu_tx_abort(tx);
|
||||
} else {
|
||||
zvol_log_truncate(zv, tx, off, resid, sync);
|
||||
zvol_log_truncate(zv, tx, off, resid);
|
||||
dmu_tx_commit(tx);
|
||||
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
|
||||
off, resid);
|
||||
|
@ -755,7 +754,7 @@ zvol_geom_bio_strategy(struct bio *bp)
|
|||
dmu_tx_abort(tx);
|
||||
} else {
|
||||
dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
|
||||
zvol_log_write(zv, tx, off, size, sync);
|
||||
zvol_log_write(zv, tx, off, size, commit);
|
||||
dmu_tx_commit(tx);
|
||||
}
|
||||
}
|
||||
|
@ -793,8 +792,8 @@ zvol_geom_bio_strategy(struct bio *bp)
|
|||
break;
|
||||
}
|
||||
|
||||
if (sync) {
|
||||
sync:
|
||||
if (commit) {
|
||||
commit:
|
||||
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
||||
}
|
||||
resume:
|
||||
|
@ -866,7 +865,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
|
|||
uint64_t volsize;
|
||||
zfs_locked_range_t *lr;
|
||||
int error = 0;
|
||||
boolean_t sync;
|
||||
boolean_t commit;
|
||||
zfs_uio_t uio;
|
||||
|
||||
zv = dev->si_drv2;
|
||||
|
@ -880,7 +879,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
|
|||
return (SET_ERROR(EIO));
|
||||
|
||||
ssize_t start_resid = zfs_uio_resid(&uio);
|
||||
sync = (ioflag & IO_SYNC) ||
|
||||
commit = (ioflag & IO_SYNC) ||
|
||||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
|
||||
|
||||
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
|
||||
|
@ -904,7 +903,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
|
|||
}
|
||||
error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
|
||||
if (error == 0)
|
||||
zvol_log_write(zv, tx, off, bytes, sync);
|
||||
zvol_log_write(zv, tx, off, bytes, commit);
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
if (error)
|
||||
|
@ -913,7 +912,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
|
|||
zfs_rangelock_exit(lr);
|
||||
int64_t nwritten = start_resid - zfs_uio_resid(&uio);
|
||||
dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten);
|
||||
if (sync)
|
||||
if (commit)
|
||||
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
return (error);
|
||||
|
@ -923,7 +922,6 @@ static int
|
|||
zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
|
||||
{
|
||||
zvol_state_t *zv;
|
||||
struct zvol_state_dev *zsd;
|
||||
int err = 0;
|
||||
boolean_t drop_suspend = B_FALSE;
|
||||
|
||||
|
@ -1017,13 +1015,6 @@ zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
|
|||
}
|
||||
|
||||
zv->zv_open_count++;
|
||||
if (flags & O_SYNC) {
|
||||
zsd = &zv->zv_zso->zso_dev;
|
||||
zsd->zsd_sync_cnt++;
|
||||
if (zsd->zsd_sync_cnt == 1 &&
|
||||
(zv->zv_flags & ZVOL_WRITTEN_TO) != 0)
|
||||
zil_async_to_sync(zv->zv_zilog, ZVOL_OBJ);
|
||||
}
|
||||
out_opened:
|
||||
if (zv->zv_open_count == 0) {
|
||||
zvol_last_close(zv);
|
||||
|
@ -1041,7 +1032,6 @@ static int
|
|||
zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
|
||||
{
|
||||
zvol_state_t *zv;
|
||||
struct zvol_state_dev *zsd;
|
||||
boolean_t drop_suspend = B_TRUE;
|
||||
|
||||
rw_enter(&zvol_state_lock, ZVOL_RW_READER);
|
||||
|
@ -1091,10 +1081,6 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
|
|||
* You may get multiple opens, but only one close.
|
||||
*/
|
||||
zv->zv_open_count--;
|
||||
if (flags & O_SYNC) {
|
||||
zsd = &zv->zv_zso->zso_dev;
|
||||
zsd->zsd_sync_cnt--;
|
||||
}
|
||||
|
||||
if (zv->zv_open_count == 0) {
|
||||
ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
|
||||
|
@ -1163,7 +1149,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data,
|
|||
dmu_tx_abort(tx);
|
||||
} else {
|
||||
sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
|
||||
zvol_log_truncate(zv, tx, offset, length, sync);
|
||||
zvol_log_truncate(zv, tx, offset, length);
|
||||
dmu_tx_commit(tx);
|
||||
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
|
||||
offset, length);
|
||||
|
|
|
@ -495,56 +495,5 @@ arc_unregister_hotplug(void)
|
|||
}
|
||||
#endif /* _KERNEL */
|
||||
|
||||
/*
|
||||
* Helper function for arc_prune_async() it is responsible for safely
|
||||
* handling the execution of a registered arc_prune_func_t.
|
||||
*/
|
||||
static void
|
||||
arc_prune_task(void *ptr)
|
||||
{
|
||||
arc_prune_t *ap = (arc_prune_t *)ptr;
|
||||
arc_prune_func_t *func = ap->p_pfunc;
|
||||
|
||||
if (func != NULL)
|
||||
func(ap->p_adjust, ap->p_private);
|
||||
|
||||
zfs_refcount_remove(&ap->p_refcnt, func);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify registered consumers they must drop holds on a portion of the ARC
|
||||
* buffered they reference. This provides a mechanism to ensure the ARC can
|
||||
* honor the metadata limit and reclaim otherwise pinned ARC buffers. This
|
||||
* is analogous to dnlc_reduce_cache() but more generic.
|
||||
*
|
||||
* This operation is performed asynchronously so it may be safely called
|
||||
* in the context of the arc_reclaim_thread(). A reference is taken here
|
||||
* for each registered arc_prune_t and the arc_prune_task() is responsible
|
||||
* for releasing it once the registered arc_prune_func_t has completed.
|
||||
*/
|
||||
void
|
||||
arc_prune_async(uint64_t adjust)
|
||||
{
|
||||
arc_prune_t *ap;
|
||||
|
||||
mutex_enter(&arc_prune_mtx);
|
||||
for (ap = list_head(&arc_prune_list); ap != NULL;
|
||||
ap = list_next(&arc_prune_list, ap)) {
|
||||
|
||||
if (zfs_refcount_count(&ap->p_refcnt) >= 2)
|
||||
continue;
|
||||
|
||||
zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
|
||||
ap->p_adjust = adjust;
|
||||
if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
|
||||
ap, TQ_SLEEP) == TASKQID_INVALID) {
|
||||
zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
|
||||
continue;
|
||||
}
|
||||
ARCSTAT_BUMP(arcstat_prune);
|
||||
}
|
||||
mutex_exit(&arc_prune_mtx);
|
||||
}
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW,
|
||||
"Limit on number of pages that ARC shrinker can reclaim at once");
|
||||
|
|
|
@ -192,9 +192,15 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
|
|||
return (SET_ERROR(EPERM));
|
||||
}
|
||||
|
||||
/* Keep a count of the synchronous opens in the znode */
|
||||
if (flag & O_SYNC)
|
||||
atomic_inc_32(&zp->z_sync_cnt);
|
||||
/*
|
||||
* Keep a count of the synchronous opens in the znode. On first
|
||||
* synchronous open we must convert all previous async transactions
|
||||
* into sync to keep correct ordering.
|
||||
*/
|
||||
if (flag & O_SYNC) {
|
||||
if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
|
||||
zil_async_to_sync(zfsvfs->z_log, zp->z_id);
|
||||
}
|
||||
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
return (0);
|
||||
|
@ -3826,21 +3832,14 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
|||
|
||||
err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
|
||||
|
||||
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
|
||||
for_sync ? zfs_putpage_sync_commit_cb :
|
||||
zfs_putpage_async_commit_cb, pp);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
zfs_rangelock_exit(lr);
|
||||
|
||||
boolean_t commit = B_FALSE;
|
||||
if (wbc->sync_mode != WB_SYNC_NONE) {
|
||||
/*
|
||||
* Note that this is rarely called under writepages(), because
|
||||
* writepages() normally handles the entire commit for
|
||||
* performance reasons.
|
||||
*/
|
||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||
commit = B_TRUE;
|
||||
} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
|
||||
/*
|
||||
* If the caller does not intend to wait synchronously
|
||||
|
@ -3850,9 +3849,20 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
|||
* our writeback to complete. Refer to the comment in
|
||||
* zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
|
||||
*/
|
||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||
commit = B_TRUE;
|
||||
}
|
||||
|
||||
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit,
|
||||
for_sync ? zfs_putpage_sync_commit_cb :
|
||||
zfs_putpage_async_commit_cb, pp);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
zfs_rangelock_exit(lr);
|
||||
|
||||
if (commit)
|
||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||
|
||||
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
|
||||
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
|
|
|
@ -375,7 +375,7 @@ zpl_kill_sb(struct super_block *sb)
|
|||
}
|
||||
|
||||
void
|
||||
zpl_prune_sb(int64_t nr_to_scan, void *arg)
|
||||
zpl_prune_sb(uint64_t nr_to_scan, void *arg)
|
||||
{
|
||||
struct super_block *sb = (struct super_block *)arg;
|
||||
int objects = 0;
|
||||
|
|
|
@ -387,7 +387,7 @@ zvol_discard(zv_request_t *zvr)
|
|||
if (error != 0) {
|
||||
dmu_tx_abort(tx);
|
||||
} else {
|
||||
zvol_log_truncate(zv, tx, start, size, B_TRUE);
|
||||
zvol_log_truncate(zv, tx, start, size);
|
||||
dmu_tx_commit(tx);
|
||||
error = dmu_free_long_range(zv->zv_objset,
|
||||
ZVOL_OBJ, start, size);
|
||||
|
@ -512,7 +512,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
|||
uint64_t size = io_size(bio, rq);
|
||||
int rw = io_data_dir(bio, rq);
|
||||
|
||||
if (zvol_request_sync)
|
||||
if (zvol_request_sync || zv->zv_threading == B_FALSE)
|
||||
force_sync = 1;
|
||||
|
||||
zv_request_t zvr = {
|
||||
|
@ -1304,6 +1304,7 @@ zvol_os_create_minor(const char *name)
|
|||
int error = 0;
|
||||
int idx;
|
||||
uint64_t hash = zvol_name_hash(name);
|
||||
uint64_t volthreading;
|
||||
bool replayed_zil = B_FALSE;
|
||||
|
||||
if (zvol_inhibit_dev)
|
||||
|
@ -1350,6 +1351,12 @@ zvol_os_create_minor(const char *name)
|
|||
zv->zv_volsize = volsize;
|
||||
zv->zv_objset = os;
|
||||
|
||||
/* Default */
|
||||
zv->zv_threading = B_TRUE;
|
||||
if (dsl_prop_get_integer(name, "volthreading", &volthreading, NULL)
|
||||
== 0)
|
||||
zv->zv_threading = volthreading;
|
||||
|
||||
set_capacity(zv->zv_zso->zvo_disk, zv->zv_volsize >> 9);
|
||||
|
||||
blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue,
|
||||
|
|
|
@ -628,6 +628,9 @@ zfs_prop_init(void)
|
|||
ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
|
||||
ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK", B_FALSE,
|
||||
sfeatures);
|
||||
zprop_register_index(ZFS_PROP_VOLTHREADING, "volthreading",
|
||||
1, PROP_DEFAULT, ZFS_TYPE_VOLUME, "on | off", "zvol threading",
|
||||
boolean_table, sfeatures);
|
||||
zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0,
|
||||
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
|
||||
"USEDSNAP", B_FALSE, sfeatures);
|
||||
|
|
|
@ -1017,12 +1017,12 @@ abd_cmp(abd_t *dabd, abd_t *sabd)
|
|||
* is the same when taking linear and when taking scatter
|
||||
*/
|
||||
void
|
||||
abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
||||
ssize_t csize, ssize_t dsize, const unsigned parity,
|
||||
abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
|
||||
size_t csize, size_t dsize, const unsigned parity,
|
||||
void (*func_raidz_gen)(void **, const void *, size_t, size_t))
|
||||
{
|
||||
int i;
|
||||
ssize_t len, dlen;
|
||||
size_t len, dlen;
|
||||
struct abd_iter caiters[3];
|
||||
struct abd_iter daiter;
|
||||
void *caddrs[3];
|
||||
|
@ -1033,16 +1033,15 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
|||
ASSERT3U(parity, <=, 3);
|
||||
for (i = 0; i < parity; i++) {
|
||||
abd_verify(cabds[i]);
|
||||
ASSERT3U(csize, <=, cabds[i]->abd_size);
|
||||
c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], 0);
|
||||
ASSERT3U(off + csize, <=, cabds[i]->abd_size);
|
||||
c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], off);
|
||||
}
|
||||
|
||||
ASSERT3S(dsize, >=, 0);
|
||||
if (dsize > 0) {
|
||||
ASSERT(dabd);
|
||||
abd_verify(dabd);
|
||||
ASSERT3U(dsize, <=, dabd->abd_size);
|
||||
c_dabd = abd_init_abd_iter(dabd, &daiter, 0);
|
||||
ASSERT3U(off + dsize, <=, dabd->abd_size);
|
||||
c_dabd = abd_init_abd_iter(dabd, &daiter, off);
|
||||
}
|
||||
|
||||
abd_enter_critical(flags);
|
||||
|
@ -1064,7 +1063,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
|||
dlen = 0;
|
||||
|
||||
/* must be progressive */
|
||||
ASSERT3S(len, >, 0);
|
||||
ASSERT3U(len, >, 0);
|
||||
/*
|
||||
* The iterated function likely will not do well if each
|
||||
* segment except the last one is not multiple of 512 (raidz).
|
||||
|
@ -1089,9 +1088,6 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
|||
}
|
||||
|
||||
csize -= len;
|
||||
|
||||
ASSERT3S(dsize, >=, 0);
|
||||
ASSERT3S(csize, >=, 0);
|
||||
}
|
||||
abd_exit_critical(flags);
|
||||
}
|
||||
|
@ -1108,13 +1104,13 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
|||
*/
|
||||
void
|
||||
abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
|
||||
ssize_t tsize, const unsigned parity,
|
||||
size_t tsize, const unsigned parity,
|
||||
void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
|
||||
const unsigned *mul),
|
||||
const unsigned *mul)
|
||||
{
|
||||
int i;
|
||||
ssize_t len;
|
||||
size_t len;
|
||||
struct abd_iter citers[3];
|
||||
struct abd_iter xiters[3];
|
||||
void *caddrs[3], *xaddrs[3];
|
||||
|
|
|
@ -886,6 +886,8 @@ static void l2arc_do_free_on_write(void);
|
|||
static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
|
||||
boolean_t state_only);
|
||||
|
||||
static void arc_prune_async(uint64_t adjust);
|
||||
|
||||
#define l2arc_hdr_arcstats_increment(hdr) \
|
||||
l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE)
|
||||
#define l2arc_hdr_arcstats_decrement(hdr) \
|
||||
|
@ -6050,6 +6052,56 @@ arc_remove_prune_callback(arc_prune_t *p)
|
|||
kmem_free(p, sizeof (*p));
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for arc_prune_async() it is responsible for safely
|
||||
* handling the execution of a registered arc_prune_func_t.
|
||||
*/
|
||||
static void
|
||||
arc_prune_task(void *ptr)
|
||||
{
|
||||
arc_prune_t *ap = (arc_prune_t *)ptr;
|
||||
arc_prune_func_t *func = ap->p_pfunc;
|
||||
|
||||
if (func != NULL)
|
||||
func(ap->p_adjust, ap->p_private);
|
||||
|
||||
zfs_refcount_remove(&ap->p_refcnt, func);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify registered consumers they must drop holds on a portion of the ARC
|
||||
* buffers they reference. This provides a mechanism to ensure the ARC can
|
||||
* honor the metadata limit and reclaim otherwise pinned ARC buffers.
|
||||
*
|
||||
* This operation is performed asynchronously so it may be safely called
|
||||
* in the context of the arc_reclaim_thread(). A reference is taken here
|
||||
* for each registered arc_prune_t and the arc_prune_task() is responsible
|
||||
* for releasing it once the registered arc_prune_func_t has completed.
|
||||
*/
|
||||
static void
|
||||
arc_prune_async(uint64_t adjust)
|
||||
{
|
||||
arc_prune_t *ap;
|
||||
|
||||
mutex_enter(&arc_prune_mtx);
|
||||
for (ap = list_head(&arc_prune_list); ap != NULL;
|
||||
ap = list_next(&arc_prune_list, ap)) {
|
||||
|
||||
if (zfs_refcount_count(&ap->p_refcnt) >= 2)
|
||||
continue;
|
||||
|
||||
zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
|
||||
ap->p_adjust = adjust;
|
||||
if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
|
||||
ap, TQ_SLEEP) == TASKQID_INVALID) {
|
||||
zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
|
||||
continue;
|
||||
}
|
||||
ARCSTAT_BUMP(arcstat_prune);
|
||||
}
|
||||
mutex_exit(&arc_prune_mtx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify the arc that a block was freed, and thus will never be used again.
|
||||
*/
|
||||
|
|
|
@ -214,9 +214,10 @@ raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
|
|||
}
|
||||
|
||||
|
||||
#define raidz_copy(dabd, sabd, size) \
|
||||
#define raidz_copy(dabd, sabd, off, size) \
|
||||
{ \
|
||||
abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\
|
||||
abd_iterate_func2(dabd, sabd, off, off, size, raidz_copy_abd_cb, \
|
||||
NULL); \
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -254,9 +255,10 @@ raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
|
|||
return (0);
|
||||
}
|
||||
|
||||
#define raidz_add(dabd, sabd, size) \
|
||||
#define raidz_add(dabd, sabd, off, size) \
|
||||
{ \
|
||||
abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\
|
||||
abd_iterate_func2(dabd, sabd, off, off, size, raidz_add_abd_cb, \
|
||||
NULL); \
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -343,7 +345,10 @@ raidz_mul_abd_cb(void *dc, size_t size, void *private)
|
|||
* the parity/syndrome if data column is shorter.
|
||||
*
|
||||
* P parity is calculated using raidz_add_abd().
|
||||
*
|
||||
* For CPU L2 cache blocking we process 64KB at a time.
|
||||
*/
|
||||
#define CHUNK 65536
|
||||
|
||||
/*
|
||||
* Generate P parity (RAIDZ1)
|
||||
|
@ -357,20 +362,26 @@ raidz_generate_p_impl(raidz_row_t * const rr)
|
|||
const size_t ncols = rr->rr_cols;
|
||||
const size_t psize = rr->rr_col[CODE_P].rc_size;
|
||||
abd_t *pabd = rr->rr_col[CODE_P].rc_abd;
|
||||
size_t size;
|
||||
abd_t *dabd;
|
||||
size_t off, size;
|
||||
|
||||
raidz_math_begin();
|
||||
|
||||
/* start with first data column */
|
||||
raidz_copy(pabd, rr->rr_col[1].rc_abd, psize);
|
||||
for (off = 0; off < psize; off += CHUNK) {
|
||||
|
||||
for (c = 2; c < ncols; c++) {
|
||||
dabd = rr->rr_col[c].rc_abd;
|
||||
size = rr->rr_col[c].rc_size;
|
||||
/* start with first data column */
|
||||
size = MIN(CHUNK, psize - off);
|
||||
raidz_copy(pabd, rr->rr_col[1].rc_abd, off, size);
|
||||
|
||||
/* add data column */
|
||||
raidz_add(pabd, dabd, size);
|
||||
for (c = 2; c < ncols; c++) {
|
||||
size = rr->rr_col[c].rc_size;
|
||||
if (size <= off)
|
||||
continue;
|
||||
|
||||
/* add data column */
|
||||
size = MIN(CHUNK, size - off);
|
||||
abd_t *dabd = rr->rr_col[c].rc_abd;
|
||||
raidz_add(pabd, dabd, off, size);
|
||||
}
|
||||
}
|
||||
|
||||
raidz_math_end();
|
||||
|
@ -423,7 +434,7 @@ raidz_generate_pq_impl(raidz_row_t * const rr)
|
|||
size_t c;
|
||||
const size_t ncols = rr->rr_cols;
|
||||
const size_t csize = rr->rr_col[CODE_P].rc_size;
|
||||
size_t dsize;
|
||||
size_t off, size, dsize;
|
||||
abd_t *dabd;
|
||||
abd_t *cabds[] = {
|
||||
rr->rr_col[CODE_P].rc_abd,
|
||||
|
@ -432,15 +443,20 @@ raidz_generate_pq_impl(raidz_row_t * const rr)
|
|||
|
||||
raidz_math_begin();
|
||||
|
||||
raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, csize);
|
||||
raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, csize);
|
||||
for (off = 0; off < csize; off += CHUNK) {
|
||||
|
||||
for (c = 3; c < ncols; c++) {
|
||||
dabd = rr->rr_col[c].rc_abd;
|
||||
dsize = rr->rr_col[c].rc_size;
|
||||
size = MIN(CHUNK, csize - off);
|
||||
raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, off, size);
|
||||
raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, off, size);
|
||||
|
||||
abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
|
||||
raidz_gen_pq_add);
|
||||
for (c = 3; c < ncols; c++) {
|
||||
dabd = rr->rr_col[c].rc_abd;
|
||||
dsize = rr->rr_col[c].rc_size;
|
||||
dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
|
||||
|
||||
abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 2,
|
||||
raidz_gen_pq_add);
|
||||
}
|
||||
}
|
||||
|
||||
raidz_math_end();
|
||||
|
@ -496,7 +512,7 @@ raidz_generate_pqr_impl(raidz_row_t * const rr)
|
|||
size_t c;
|
||||
const size_t ncols = rr->rr_cols;
|
||||
const size_t csize = rr->rr_col[CODE_P].rc_size;
|
||||
size_t dsize;
|
||||
size_t off, size, dsize;
|
||||
abd_t *dabd;
|
||||
abd_t *cabds[] = {
|
||||
rr->rr_col[CODE_P].rc_abd,
|
||||
|
@ -506,16 +522,21 @@ raidz_generate_pqr_impl(raidz_row_t * const rr)
|
|||
|
||||
raidz_math_begin();
|
||||
|
||||
raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, csize);
|
||||
raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, csize);
|
||||
raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, csize);
|
||||
for (off = 0; off < csize; off += CHUNK) {
|
||||
|
||||
for (c = 4; c < ncols; c++) {
|
||||
dabd = rr->rr_col[c].rc_abd;
|
||||
dsize = rr->rr_col[c].rc_size;
|
||||
size = MIN(CHUNK, csize - off);
|
||||
raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, off, size);
|
||||
raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, off, size);
|
||||
raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, off, size);
|
||||
|
||||
abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
|
||||
raidz_gen_pqr_add);
|
||||
for (c = 4; c < ncols; c++) {
|
||||
dabd = rr->rr_col[c].rc_abd;
|
||||
dsize = rr->rr_col[c].rc_size;
|
||||
dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
|
||||
|
||||
abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 3,
|
||||
raidz_gen_pqr_add);
|
||||
}
|
||||
}
|
||||
|
||||
raidz_math_end();
|
||||
|
@ -592,26 +613,31 @@ raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
const size_t x = tgtidx[TARGET_X];
|
||||
const size_t xsize = rr->rr_col[x].rc_size;
|
||||
abd_t *xabd = rr->rr_col[x].rc_abd;
|
||||
size_t size;
|
||||
abd_t *dabd;
|
||||
size_t off, size;
|
||||
|
||||
if (xabd == NULL)
|
||||
return (1 << CODE_P);
|
||||
|
||||
raidz_math_begin();
|
||||
|
||||
/* copy P into target */
|
||||
raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, xsize);
|
||||
for (off = 0; off < xsize; off += CHUNK) {
|
||||
|
||||
/* generate p_syndrome */
|
||||
for (c = firstdc; c < ncols; c++) {
|
||||
if (c == x)
|
||||
continue;
|
||||
/* copy P into target */
|
||||
size = MIN(CHUNK, xsize - off);
|
||||
raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, off, size);
|
||||
|
||||
dabd = rr->rr_col[c].rc_abd;
|
||||
size = MIN(rr->rr_col[c].rc_size, xsize);
|
||||
/* generate p_syndrome */
|
||||
for (c = firstdc; c < ncols; c++) {
|
||||
if (c == x)
|
||||
continue;
|
||||
size = rr->rr_col[c].rc_size;
|
||||
if (size <= off)
|
||||
continue;
|
||||
|
||||
raidz_add(xabd, dabd, size);
|
||||
size = MIN(CHUNK, MIN(size, xsize) - off);
|
||||
abd_t *dabd = rr->rr_col[c].rc_abd;
|
||||
raidz_add(xabd, dabd, off, size);
|
||||
}
|
||||
}
|
||||
|
||||
raidz_math_end();
|
||||
|
@ -683,7 +709,7 @@ raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
|
||||
/* Start with first data column if present */
|
||||
if (firstdc != x) {
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
|
||||
} else {
|
||||
raidz_zero(xabd, xsize);
|
||||
}
|
||||
|
@ -698,12 +724,12 @@ raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
dsize = rr->rr_col[c].rc_size;
|
||||
}
|
||||
|
||||
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
|
||||
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
|
||||
raidz_syn_q_abd);
|
||||
}
|
||||
|
||||
/* add Q to the syndrome */
|
||||
raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, xsize);
|
||||
raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, 0, xsize);
|
||||
|
||||
/* transform the syndrome */
|
||||
abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
|
||||
|
@ -777,7 +803,7 @@ raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
|
||||
/* Start with first data column if present */
|
||||
if (firstdc != x) {
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
|
||||
} else {
|
||||
raidz_zero(xabd, xsize);
|
||||
}
|
||||
|
@ -793,12 +819,12 @@ raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
dsize = rr->rr_col[c].rc_size;
|
||||
}
|
||||
|
||||
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
|
||||
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
|
||||
raidz_syn_r_abd);
|
||||
}
|
||||
|
||||
/* add R to the syndrome */
|
||||
raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, xsize);
|
||||
raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, 0, xsize);
|
||||
|
||||
/* transform the syndrome */
|
||||
abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
|
||||
|
@ -934,8 +960,8 @@ raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
|
||||
/* Start with first data column if present */
|
||||
if (firstdc != x) {
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
|
||||
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
|
||||
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
|
||||
} else {
|
||||
raidz_zero(xabd, xsize);
|
||||
raidz_zero(yabd, xsize);
|
||||
|
@ -951,7 +977,7 @@ raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
dsize = rr->rr_col[c].rc_size;
|
||||
}
|
||||
|
||||
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
|
||||
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
|
||||
raidz_syn_pq_abd);
|
||||
}
|
||||
|
||||
|
@ -959,7 +985,7 @@ raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
|
||||
/* Copy shorter targets back to the original abd buffer */
|
||||
if (ysize < xsize)
|
||||
raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
|
||||
raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
|
||||
|
||||
raidz_math_end();
|
||||
|
||||
|
@ -1094,8 +1120,8 @@ raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
|
||||
/* Start with first data column if present */
|
||||
if (firstdc != x) {
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
|
||||
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
|
||||
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
|
||||
} else {
|
||||
raidz_zero(xabd, xsize);
|
||||
raidz_zero(yabd, xsize);
|
||||
|
@ -1111,7 +1137,7 @@ raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
dsize = rr->rr_col[c].rc_size;
|
||||
}
|
||||
|
||||
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
|
||||
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
|
||||
raidz_syn_pr_abd);
|
||||
}
|
||||
|
||||
|
@ -1121,7 +1147,7 @@ raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
* Copy shorter targets back to the original abd buffer
|
||||
*/
|
||||
if (ysize < xsize)
|
||||
raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
|
||||
raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
|
||||
|
||||
raidz_math_end();
|
||||
|
||||
|
@ -1261,8 +1287,8 @@ raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
|
||||
/* Start with first data column if present */
|
||||
if (firstdc != x) {
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
|
||||
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
|
||||
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
|
||||
} else {
|
||||
raidz_zero(xabd, xsize);
|
||||
raidz_zero(yabd, xsize);
|
||||
|
@ -1278,7 +1304,7 @@ raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
dsize = rr->rr_col[c].rc_size;
|
||||
}
|
||||
|
||||
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
|
||||
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
|
||||
raidz_syn_qr_abd);
|
||||
}
|
||||
|
||||
|
@ -1288,7 +1314,7 @@ raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
* Copy shorter targets back to the original abd buffer
|
||||
*/
|
||||
if (ysize < xsize)
|
||||
raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
|
||||
raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
|
||||
|
||||
raidz_math_end();
|
||||
|
||||
|
@ -1456,9 +1482,9 @@ raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
|
||||
/* Start with first data column if present */
|
||||
if (firstdc != x) {
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
|
||||
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
|
||||
raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, xsize);
|
||||
raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
|
||||
raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
|
||||
raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
|
||||
} else {
|
||||
raidz_zero(xabd, xsize);
|
||||
raidz_zero(yabd, xsize);
|
||||
|
@ -1475,7 +1501,7 @@ raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
dsize = rr->rr_col[c].rc_size;
|
||||
}
|
||||
|
||||
abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
|
||||
abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 3,
|
||||
raidz_syn_pqr_abd);
|
||||
}
|
||||
|
||||
|
@ -1485,9 +1511,9 @@ raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
|
|||
* Copy shorter targets back to the original abd buffer
|
||||
*/
|
||||
if (ysize < xsize)
|
||||
raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
|
||||
raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
|
||||
if (zsize < xsize)
|
||||
raidz_copy(rr->rr_col[z].rc_abd, zabd, zsize);
|
||||
raidz_copy(rr->rr_col[z].rc_abd, zabd, 0, zsize);
|
||||
|
||||
raidz_math_end();
|
||||
|
||||
|
|
|
@ -238,7 +238,6 @@ uint64_t zfs_max_nvlist_src_size = 0;
|
|||
*/
|
||||
static uint64_t zfs_history_output_max = 1024 * 1024;
|
||||
|
||||
uint_t zfs_fsyncer_key;
|
||||
uint_t zfs_allow_log_key;
|
||||
|
||||
/* DATA_TYPE_ANY is used when zkey_type can vary. */
|
||||
|
@ -2523,11 +2522,27 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
|
|||
case ZFS_PROP_VOLSIZE:
|
||||
err = zvol_set_volsize(dsname, intval);
|
||||
break;
|
||||
case ZFS_PROP_SNAPDEV:
|
||||
err = zvol_set_snapdev(dsname, source, intval);
|
||||
case ZFS_PROP_VOLTHREADING:
|
||||
err = zvol_set_volthreading(dsname, intval);
|
||||
/*
|
||||
* Set err to -1 to force the zfs_set_prop_nvlist code down the
|
||||
* default path to set the value in the nvlist.
|
||||
*/
|
||||
if (err == 0)
|
||||
err = -1;
|
||||
break;
|
||||
case ZFS_PROP_SNAPDEV:
|
||||
case ZFS_PROP_VOLMODE:
|
||||
err = zvol_set_volmode(dsname, source, intval);
|
||||
err = zvol_set_common(dsname, prop, source, intval);
|
||||
break;
|
||||
case ZFS_PROP_READONLY:
|
||||
err = zvol_set_ro(dsname, intval);
|
||||
/*
|
||||
* Set err to -1 to force the zfs_set_prop_nvlist code down the
|
||||
* default path to set the value in the nvlist.
|
||||
*/
|
||||
if (err == 0)
|
||||
err = -1;
|
||||
break;
|
||||
case ZFS_PROP_VERSION:
|
||||
{
|
||||
|
@ -7882,7 +7897,6 @@ zfs_kmod_init(void)
|
|||
if ((error = zfsdev_attach()) != 0)
|
||||
goto out;
|
||||
|
||||
tsd_create(&zfs_fsyncer_key, NULL);
|
||||
tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
|
||||
tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
|
||||
|
||||
|
@ -7919,7 +7933,6 @@ zfs_kmod_fini(void)
|
|||
spa_fini();
|
||||
zvol_fini();
|
||||
|
||||
tsd_destroy(&zfs_fsyncer_key);
|
||||
tsd_destroy(&rrw_tsd_key);
|
||||
tsd_destroy(&zfs_allow_log_key);
|
||||
}
|
||||
|
|
|
@ -606,13 +606,12 @@ static int64_t zfs_immediate_write_sz = 32768;
|
|||
|
||||
void
|
||||
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, offset_t off, ssize_t resid, int ioflag,
|
||||
znode_t *zp, offset_t off, ssize_t resid, boolean_t commit,
|
||||
zil_callback_t callback, void *callback_data)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
|
||||
uint32_t blocksize = zp->z_blksz;
|
||||
itx_wr_state_t write_state;
|
||||
uintptr_t fsync_cnt;
|
||||
uint64_t gen = 0;
|
||||
ssize_t size = resid;
|
||||
|
||||
|
@ -628,15 +627,11 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
|||
else if (!spa_has_slogs(zilog->zl_spa) &&
|
||||
resid >= zfs_immediate_write_sz)
|
||||
write_state = WR_INDIRECT;
|
||||
else if (ioflag & (O_SYNC | O_DSYNC))
|
||||
else if (commit)
|
||||
write_state = WR_COPIED;
|
||||
else
|
||||
write_state = WR_NEED_COPY;
|
||||
|
||||
if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
|
||||
(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
|
||||
}
|
||||
|
||||
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen,
|
||||
sizeof (gen));
|
||||
|
||||
|
@ -687,12 +682,9 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
|||
BP_ZERO(&lr->lr_blkptr);
|
||||
|
||||
itx->itx_private = ZTOZSB(zp);
|
||||
itx->itx_sync = (zp->z_sync_cnt != 0);
|
||||
itx->itx_gen = gen;
|
||||
|
||||
if (!(ioflag & (O_SYNC | O_DSYNC)) && (zp->z_sync_cnt == 0) &&
|
||||
(fsync_cnt == 0))
|
||||
itx->itx_sync = B_FALSE;
|
||||
|
||||
itx->itx_callback = callback;
|
||||
itx->itx_callback_data = callback_data;
|
||||
zil_itx_assign(zilog, itx, tx);
|
||||
|
|
|
@ -58,27 +58,20 @@
|
|||
#include <sys/zfs_znode.h>
|
||||
|
||||
|
||||
static ulong_t zfs_fsync_sync_cnt = 4;
|
||||
|
||||
int
|
||||
zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
|
||||
{
|
||||
int error = 0;
|
||||
zfsvfs_t *zfsvfs = ZTOZSB(zp);
|
||||
|
||||
(void) tsd_set(zfs_fsyncer_key, (void *)(uintptr_t)zfs_fsync_sync_cnt);
|
||||
|
||||
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
|
||||
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
||||
goto out;
|
||||
return (error);
|
||||
atomic_inc_32(&zp->z_sync_writes_cnt);
|
||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||
atomic_dec_32(&zp->z_sync_writes_cnt);
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
}
|
||||
out:
|
||||
tsd_set(zfs_fsyncer_key, NULL);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
@ -520,6 +513,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
|||
|
||||
uint64_t end_size = MAX(zp->z_size, woff + n);
|
||||
zilog_t *zilog = zfsvfs->z_log;
|
||||
boolean_t commit = (ioflag & (O_SYNC | O_DSYNC)) ||
|
||||
(zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS);
|
||||
|
||||
const uint64_t uid = KUID_TO_SUID(ZTOUID(zp));
|
||||
const uint64_t gid = KGID_TO_SGID(ZTOGID(zp));
|
||||
|
@ -741,7 +736,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
|||
* zfs_clear_setid_bits_if_necessary must precede any of
|
||||
* the TX_WRITE records logged here.
|
||||
*/
|
||||
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
|
||||
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, commit,
|
||||
NULL, NULL);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
@ -767,8 +762,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
|
|||
return (error);
|
||||
}
|
||||
|
||||
if (ioflag & (O_SYNC | O_DSYNC) ||
|
||||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||
if (commit)
|
||||
zil_commit(zilog, zp->z_id);
|
||||
|
||||
const int64_t nwritten = start_resid - zfs_uio_resid(uio);
|
||||
|
@ -1094,6 +1088,15 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
|||
|
||||
ASSERT(!outzfsvfs->z_replay);
|
||||
|
||||
/*
|
||||
* Block cloning from an unencrypted dataset into an encrypted
|
||||
* dataset and vice versa is not supported.
|
||||
*/
|
||||
if (inos->os_encrypted != outos->os_encrypted) {
|
||||
zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
|
||||
return (SET_ERROR(EXDEV));
|
||||
}
|
||||
|
||||
error = zfs_verify_zp(inzp);
|
||||
if (error == 0)
|
||||
error = zfs_verify_zp(outzp);
|
||||
|
|
|
@ -158,23 +158,22 @@ zio_init(void)
|
|||
zio_link_cache = kmem_cache_create("zio_link_cache",
|
||||
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
|
||||
|
||||
/*
|
||||
* For small buffers, we want a cache for each multiple of
|
||||
* SPA_MINBLOCKSIZE. For larger buffers, we want a cache
|
||||
* for each quarter-power of 2.
|
||||
*/
|
||||
for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
|
||||
size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
|
||||
size_t align, cflags, data_cflags;
|
||||
char name[32];
|
||||
|
||||
/*
|
||||
* Create cache for each half-power of 2 size, starting from
|
||||
* SPA_MINBLOCKSIZE. It should give us memory space efficiency
|
||||
* of ~7/8, sufficient for transient allocations mostly using
|
||||
* these caches.
|
||||
*/
|
||||
size_t p2 = size;
|
||||
size_t align = 0;
|
||||
size_t data_cflags, cflags;
|
||||
|
||||
data_cflags = KMC_NODEBUG;
|
||||
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
|
||||
KMC_NODEBUG : 0;
|
||||
|
||||
while (!ISP2(p2))
|
||||
p2 &= p2 - 1;
|
||||
if (!IS_P2ALIGNED(size, p2 / 2))
|
||||
continue;
|
||||
|
||||
#ifndef _KERNEL
|
||||
/*
|
||||
|
@ -185,47 +184,37 @@ zio_init(void)
|
|||
*/
|
||||
if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
|
||||
continue;
|
||||
/*
|
||||
* Here's the problem - on 4K native devices in userland on
|
||||
* Linux using O_DIRECT, buffers must be 4K aligned or I/O
|
||||
* will fail with EINVAL, causing zdb (and others) to coredump.
|
||||
* Since userland probably doesn't need optimized buffer caches,
|
||||
* we just force 4K alignment on everything.
|
||||
*/
|
||||
align = 8 * SPA_MINBLOCKSIZE;
|
||||
#else
|
||||
if (size < PAGESIZE) {
|
||||
align = SPA_MINBLOCKSIZE;
|
||||
} else if (IS_P2ALIGNED(size, p2 >> 2)) {
|
||||
align = PAGESIZE;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (align != 0) {
|
||||
char name[36];
|
||||
if (cflags == data_cflags) {
|
||||
/*
|
||||
* Resulting kmem caches would be identical.
|
||||
* Save memory by creating only one.
|
||||
*/
|
||||
(void) snprintf(name, sizeof (name),
|
||||
"zio_buf_comb_%lu", (ulong_t)size);
|
||||
zio_buf_cache[c] = kmem_cache_create(name,
|
||||
size, align, NULL, NULL, NULL, NULL, NULL,
|
||||
cflags);
|
||||
zio_data_buf_cache[c] = zio_buf_cache[c];
|
||||
continue;
|
||||
}
|
||||
(void) snprintf(name, sizeof (name), "zio_buf_%lu",
|
||||
(ulong_t)size);
|
||||
zio_buf_cache[c] = kmem_cache_create(name, size,
|
||||
align, NULL, NULL, NULL, NULL, NULL, cflags);
|
||||
if (IS_P2ALIGNED(size, PAGESIZE))
|
||||
align = PAGESIZE;
|
||||
else
|
||||
align = 1 << (highbit64(size ^ (size - 1)) - 1);
|
||||
|
||||
(void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
|
||||
(ulong_t)size);
|
||||
zio_data_buf_cache[c] = kmem_cache_create(name, size,
|
||||
align, NULL, NULL, NULL, NULL, NULL, data_cflags);
|
||||
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
|
||||
KMC_NODEBUG : 0;
|
||||
data_cflags = KMC_NODEBUG;
|
||||
if (cflags == data_cflags) {
|
||||
/*
|
||||
* Resulting kmem caches would be identical.
|
||||
* Save memory by creating only one.
|
||||
*/
|
||||
(void) snprintf(name, sizeof (name),
|
||||
"zio_buf_comb_%lu", (ulong_t)size);
|
||||
zio_buf_cache[c] = kmem_cache_create(name, size, align,
|
||||
NULL, NULL, NULL, NULL, NULL, cflags);
|
||||
zio_data_buf_cache[c] = zio_buf_cache[c];
|
||||
continue;
|
||||
}
|
||||
(void) snprintf(name, sizeof (name), "zio_buf_%lu",
|
||||
(ulong_t)size);
|
||||
zio_buf_cache[c] = kmem_cache_create(name, size, align,
|
||||
NULL, NULL, NULL, NULL, NULL, cflags);
|
||||
|
||||
(void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
|
||||
(ulong_t)size);
|
||||
zio_data_buf_cache[c] = kmem_cache_create(name, size, align,
|
||||
NULL, NULL, NULL, NULL, NULL, data_cflags);
|
||||
}
|
||||
|
||||
while (--c != 0) {
|
||||
|
|
|
@ -369,6 +369,40 @@ zvol_set_volsize(const char *name, uint64_t volsize)
|
|||
return (SET_ERROR(error));
|
||||
}
|
||||
|
||||
/*
|
||||
* Update volthreading.
|
||||
*/
|
||||
int
|
||||
zvol_set_volthreading(const char *name, boolean_t value)
|
||||
{
|
||||
zvol_state_t *zv = zvol_find_by_name(name, RW_NONE);
|
||||
if (zv == NULL)
|
||||
return (ENOENT);
|
||||
zv->zv_threading = value;
|
||||
mutex_exit(&zv->zv_state_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update zvol ro property.
|
||||
*/
|
||||
int
|
||||
zvol_set_ro(const char *name, boolean_t value)
|
||||
{
|
||||
zvol_state_t *zv = zvol_find_by_name(name, RW_NONE);
|
||||
if (zv == NULL)
|
||||
return (-1);
|
||||
if (value) {
|
||||
zvol_os_set_disk_ro(zv, 1);
|
||||
zv->zv_flags |= ZVOL_RDONLY;
|
||||
} else {
|
||||
zvol_os_set_disk_ro(zv, 0);
|
||||
zv->zv_flags &= ~ZVOL_RDONLY;
|
||||
}
|
||||
mutex_exit(&zv->zv_state_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Sanity check volume block size.
|
||||
*/
|
||||
|
@ -583,7 +617,7 @@ static const ssize_t zvol_immediate_write_sz = 32768;
|
|||
|
||||
void
|
||||
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
||||
uint64_t size, int sync)
|
||||
uint64_t size, boolean_t commit)
|
||||
{
|
||||
uint32_t blocksize = zv->zv_volblocksize;
|
||||
zilog_t *zilog = zv->zv_zilog;
|
||||
|
@ -598,7 +632,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
|||
else if (!spa_has_slogs(zilog->zl_spa) &&
|
||||
size >= blocksize && blocksize > zvol_immediate_write_sz)
|
||||
write_state = WR_INDIRECT;
|
||||
else if (sync)
|
||||
else if (commit)
|
||||
write_state = WR_COPIED;
|
||||
else
|
||||
write_state = WR_NEED_COPY;
|
||||
|
@ -633,7 +667,6 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
|||
BP_ZERO(&lr->lr_blkptr);
|
||||
|
||||
itx->itx_private = zv;
|
||||
itx->itx_sync = sync;
|
||||
|
||||
(void) zil_itx_assign(zilog, itx, tx);
|
||||
|
||||
|
@ -650,8 +683,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
|
|||
* Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
|
||||
*/
|
||||
void
|
||||
zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
|
||||
boolean_t sync)
|
||||
zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len)
|
||||
{
|
||||
itx_t *itx;
|
||||
lr_truncate_t *lr;
|
||||
|
@ -666,7 +698,6 @@ zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
|
|||
lr->lr_offset = off;
|
||||
lr->lr_length = len;
|
||||
|
||||
itx->itx_sync = sync;
|
||||
zil_itx_assign(zilog, itx, tx);
|
||||
}
|
||||
|
||||
|
@ -1541,6 +1572,7 @@ typedef struct zvol_set_prop_int_arg {
|
|||
const char *zsda_name;
|
||||
uint64_t zsda_value;
|
||||
zprop_source_t zsda_source;
|
||||
zfs_prop_t zsda_prop;
|
||||
dmu_tx_t *zsda_tx;
|
||||
} zvol_set_prop_int_arg_t;
|
||||
|
||||
|
@ -1549,7 +1581,7 @@ typedef struct zvol_set_prop_int_arg {
|
|||
* conditions are imposed.
|
||||
*/
|
||||
static int
|
||||
zvol_set_snapdev_check(void *arg, dmu_tx_t *tx)
|
||||
zvol_set_common_check(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
zvol_set_prop_int_arg_t *zsda = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
|
@ -1566,17 +1598,33 @@ zvol_set_snapdev_check(void *arg, dmu_tx_t *tx)
|
|||
}
|
||||
|
||||
static int
|
||||
zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
|
||||
zvol_set_common_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
|
||||
{
|
||||
(void) arg;
|
||||
zvol_set_prop_int_arg_t *zsda = arg;
|
||||
char dsname[MAXNAMELEN];
|
||||
zvol_task_t *task;
|
||||
uint64_t snapdev;
|
||||
uint64_t prop;
|
||||
|
||||
const char *prop_name = zfs_prop_to_name(zsda->zsda_prop);
|
||||
dsl_dataset_name(ds, dsname);
|
||||
if (dsl_prop_get_int_ds(ds, "snapdev", &snapdev) != 0)
|
||||
|
||||
if (dsl_prop_get_int_ds(ds, prop_name, &prop) != 0)
|
||||
return (0);
|
||||
task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname, NULL, snapdev);
|
||||
|
||||
switch (zsda->zsda_prop) {
|
||||
case ZFS_PROP_VOLMODE:
|
||||
task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname,
|
||||
NULL, prop);
|
||||
break;
|
||||
case ZFS_PROP_SNAPDEV:
|
||||
task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname,
|
||||
NULL, prop);
|
||||
break;
|
||||
default:
|
||||
task = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (task == NULL)
|
||||
return (0);
|
||||
|
||||
|
@ -1586,14 +1634,14 @@ zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
|
|||
}
|
||||
|
||||
/*
|
||||
* Traverse all child datasets and apply snapdev appropriately.
|
||||
* Traverse all child datasets and apply the property appropriately.
|
||||
* We call dsl_prop_set_sync_impl() here to set the value only on the toplevel
|
||||
* dataset and read the effective "snapdev" on every child in the callback
|
||||
* dataset and read the effective "property" on every child in the callback
|
||||
* function: this is because the value is not guaranteed to be the same in the
|
||||
* whole dataset hierarchy.
|
||||
*/
|
||||
static void
|
||||
zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx)
|
||||
zvol_set_common_sync(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
zvol_set_prop_int_arg_t *zsda = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
|
@ -1606,115 +1654,31 @@ zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx)
|
|||
|
||||
error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds);
|
||||
if (error == 0) {
|
||||
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_SNAPDEV),
|
||||
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(zsda->zsda_prop),
|
||||
zsda->zsda_source, sizeof (zsda->zsda_value), 1,
|
||||
&zsda->zsda_value, zsda->zsda_tx);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_snapdev_sync_cb,
|
||||
|
||||
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_common_sync_cb,
|
||||
zsda, DS_FIND_CHILDREN);
|
||||
|
||||
dsl_dir_rele(dd, FTAG);
|
||||
}
|
||||
|
||||
int
|
||||
zvol_set_snapdev(const char *ddname, zprop_source_t source, uint64_t snapdev)
|
||||
zvol_set_common(const char *ddname, zfs_prop_t prop, zprop_source_t source,
|
||||
uint64_t val)
|
||||
{
|
||||
zvol_set_prop_int_arg_t zsda;
|
||||
|
||||
zsda.zsda_name = ddname;
|
||||
zsda.zsda_source = source;
|
||||
zsda.zsda_value = snapdev;
|
||||
zsda.zsda_value = val;
|
||||
zsda.zsda_prop = prop;
|
||||
|
||||
return (dsl_sync_task(ddname, zvol_set_snapdev_check,
|
||||
zvol_set_snapdev_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
|
||||
}
|
||||
|
||||
/*
|
||||
* Sanity check the dataset for safe use by the sync task. No additional
|
||||
* conditions are imposed.
|
||||
*/
|
||||
static int
|
||||
zvol_set_volmode_check(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
zvol_set_prop_int_arg_t *zsda = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
dsl_dir_t *dd;
|
||||
int error;
|
||||
|
||||
error = dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
dsl_dir_rele(dd, FTAG);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zvol_set_volmode_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
|
||||
{
|
||||
(void) arg;
|
||||
char dsname[MAXNAMELEN];
|
||||
zvol_task_t *task;
|
||||
uint64_t volmode;
|
||||
|
||||
dsl_dataset_name(ds, dsname);
|
||||
if (dsl_prop_get_int_ds(ds, "volmode", &volmode) != 0)
|
||||
return (0);
|
||||
task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname, NULL, volmode);
|
||||
if (task == NULL)
|
||||
return (0);
|
||||
|
||||
(void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
|
||||
task, TQ_SLEEP);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Traverse all child datasets and apply volmode appropriately.
|
||||
* We call dsl_prop_set_sync_impl() here to set the value only on the toplevel
|
||||
* dataset and read the effective "volmode" on every child in the callback
|
||||
* function: this is because the value is not guaranteed to be the same in the
|
||||
* whole dataset hierarchy.
|
||||
*/
|
||||
static void
|
||||
zvol_set_volmode_sync(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
zvol_set_prop_int_arg_t *zsda = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
dsl_dir_t *dd;
|
||||
dsl_dataset_t *ds;
|
||||
int error;
|
||||
|
||||
VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL));
|
||||
zsda->zsda_tx = tx;
|
||||
|
||||
error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds);
|
||||
if (error == 0) {
|
||||
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_VOLMODE),
|
||||
zsda->zsda_source, sizeof (zsda->zsda_value), 1,
|
||||
&zsda->zsda_value, zsda->zsda_tx);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
|
||||
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_volmode_sync_cb,
|
||||
zsda, DS_FIND_CHILDREN);
|
||||
|
||||
dsl_dir_rele(dd, FTAG);
|
||||
}
|
||||
|
||||
int
|
||||
zvol_set_volmode(const char *ddname, zprop_source_t source, uint64_t volmode)
|
||||
{
|
||||
zvol_set_prop_int_arg_t zsda;
|
||||
|
||||
zsda.zsda_name = ddname;
|
||||
zsda.zsda_source = source;
|
||||
zsda.zsda_value = volmode;
|
||||
|
||||
return (dsl_sync_task(ddname, zvol_set_volmode_check,
|
||||
zvol_set_volmode_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
|
||||
return (dsl_sync_task(ddname, zvol_set_common_check,
|
||||
zvol_set_common_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -1110,7 +1110,7 @@
|
|||
/* #undef ZFS_IS_GPL_COMPATIBLE */
|
||||
|
||||
/* Define the project alias string. */
|
||||
#define ZFS_META_ALIAS "zfs-2.2.99-FreeBSD_g043c6ee3b"
|
||||
#define ZFS_META_ALIAS "zfs-2.2.99-184-FreeBSD_g41e55b476"
|
||||
|
||||
/* Define the project author. */
|
||||
#define ZFS_META_AUTHOR "OpenZFS"
|
||||
|
@ -1140,7 +1140,7 @@
|
|||
#define ZFS_META_NAME "zfs"
|
||||
|
||||
/* Define the project release. */
|
||||
#define ZFS_META_RELEASE "FreeBSD_g043c6ee3b"
|
||||
#define ZFS_META_RELEASE "184-FreeBSD_g41e55b476"
|
||||
|
||||
/* Define the project version. */
|
||||
#define ZFS_META_VERSION "2.2.99"
|
||||
|
|
|
@ -1 +1 @@
|
|||
#define ZFS_META_GITREV "zfs-2.2.99-174-g043c6ee3b"
|
||||
#define ZFS_META_GITREV "zfs-2.2.99-184-g41e55b476"
|
||||
|
|
Loading…
Reference in a new issue