Notable upstream pull request merges:
 #15665 9b1677fb5 dmu: Allow buffer fills to fail

Obtained from:	OpenZFS
OpenZFS commit:	dbda45160f
This commit is contained in:
Martin Matuska 2023-12-19 23:17:48 +01:00
commit 188408da9f
15 changed files with 139 additions and 34 deletions

View File

@ -62,7 +62,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off)
}
static inline void
zfs_uio_advance(zfs_uio_t *uio, size_t size)
zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
{
zfs_uio_resid(uio) -= size;
zfs_uio_offset(uio) += size;

View File

@ -95,7 +95,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off)
}
static inline void
zfs_uio_advance(zfs_uio_t *uio, size_t size)
zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
{
uio->uio_resid -= size;
uio->uio_loffset += size;

View File

@ -380,8 +380,8 @@ dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
void dmu_buf_will_clone(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail);
boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed);
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,

View File

@ -90,7 +90,7 @@ zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len)
}
static inline void
zfs_uio_advance(zfs_uio_t *uio, size_t size)
zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
{
uio->uio_resid -= size;
uio->uio_loffset += size;

View File

@ -107,7 +107,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
if (tocpy == db->db_size)
dmu_buf_will_fill(db, tx);
dmu_buf_will_fill(db, tx, B_FALSE);
else
dmu_buf_will_dirty(db, tx);
@ -123,7 +123,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
}
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
dmu_buf_fill_done(db, tx, B_FALSE);
offset += tocpy;
size -= tocpy;

View File

@ -2751,7 +2751,7 @@ dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
}
void
dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
@ -2769,8 +2769,14 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
* Block cloning: We will be completely overwriting a block
* cloned in this transaction group, so let's undirty the
* pending clone and mark the block as uncached. This will be
* as if the clone was never done.
* as if the clone was never done. But if the fill can fail
* we should have a way to return back to the cloned data.
*/
if (canfail && dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) {
mutex_exit(&db->db_mtx);
dmu_buf_will_dirty(db_fake, tx);
return;
}
VERIFY(!dbuf_undirty(db, tx));
db->db_state = DB_UNCACHED;
}
@ -2831,32 +2837,41 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx)
dl->dr_overridden_by.blk_birth = dr->dr_txg;
}
void
dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx)
boolean_t
dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed)
{
(void) tx;
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
dbuf_states_t old_state;
mutex_enter(&db->db_mtx);
DBUF_VERIFY(db);
old_state = db->db_state;
db->db_state = DB_CACHED;
if (old_state == DB_FILL) {
if (db->db_state == DB_FILL) {
if (db->db_level == 0 && db->db_freed_in_flight) {
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
/* we were freed while filling */
/* XXX dbuf_undirty? */
memset(db->db.db_data, 0, db->db.db_size);
db->db_freed_in_flight = FALSE;
db->db_state = DB_CACHED;
DTRACE_SET_STATE(db,
"fill done handling freed in flight");
failed = B_FALSE;
} else if (failed) {
VERIFY(!dbuf_undirty(db, tx));
db->db_buf = NULL;
dbuf_clear_data(db);
DTRACE_SET_STATE(db, "fill failed");
} else {
db->db_state = DB_CACHED;
DTRACE_SET_STATE(db, "fill done");
}
cv_broadcast(&db->db_changed);
} else {
db->db_state = DB_CACHED;
failed = B_FALSE;
}
mutex_exit(&db->db_mtx);
return (failed);
}
void
@ -3001,7 +3016,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
DTRACE_SET_STATE(db, "filling assigned arcbuf");
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
dmu_buf_fill_done(&db->db, tx);
dmu_buf_fill_done(&db->db, tx, B_FALSE);
}
void

View File

@ -1134,14 +1134,14 @@ dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size,
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
if (tocpy == db->db_size)
dmu_buf_will_fill(db, tx);
dmu_buf_will_fill(db, tx, B_FALSE);
else
dmu_buf_will_dirty(db, tx);
(void) memcpy((char *)db->db_data + bufoff, buf, tocpy);
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
dmu_buf_fill_done(db, tx, B_FALSE);
offset += tocpy;
size -= tocpy;
@ -1349,27 +1349,24 @@ dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx)
ASSERT(size > 0);
bufoff = zfs_uio_offset(uio) - db->db_offset;
offset_t off = zfs_uio_offset(uio);
bufoff = off - db->db_offset;
tocpy = MIN(db->db_size - bufoff, size);
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
if (tocpy == db->db_size)
dmu_buf_will_fill(db, tx);
dmu_buf_will_fill(db, tx, B_TRUE);
else
dmu_buf_will_dirty(db, tx);
/*
* XXX zfs_uiomove could block forever (eg.nfs-backed
* pages). There needs to be a uiolockdown() function
* to lock the pages in memory, so that zfs_uiomove won't
* block.
*/
err = zfs_uio_fault_move((char *)db->db_data + bufoff,
tocpy, UIO_WRITE, uio);
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
if (tocpy == db->db_size && dmu_buf_fill_done(db, tx, err)) {
/* The fill was reverted. Undo any uio progress. */
zfs_uio_advance(uio, off - zfs_uio_offset(uio));
}
if (err)
break;

View File

@ -2532,7 +2532,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
* size of the provided arc_buf_t.
*/
if (db_spill->db_size != drrs->drr_length) {
dmu_buf_will_fill(db_spill, tx);
dmu_buf_will_fill(db_spill, tx, B_FALSE);
VERIFY0(dbuf_spill_set_blksz(db_spill,
drrs->drr_length, tx));
}

View File

@ -490,7 +490,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
dmu_buf_t *db;
VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
DB_RF_MUST_SUCCEED, FTAG, &db));
dmu_buf_will_fill(db, tx);
dmu_buf_will_fill(db, tx, B_FALSE);
VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
SPA_MINBLOCKSIZE), tx));
local_rl->rl_phys = db->db_data;

View File

@ -44,7 +44,8 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial',
'block_cloning_copyfilerange_cross_dataset',
'block_cloning_cross_enc_dataset',
'block_cloning_copyfilerange_fallback_same_txg',
'block_cloning_replay', 'block_cloning_replay_encrypted']
'block_cloning_replay', 'block_cloning_replay_encrypted',
'block_cloning_lwb_buffer_overflow']
tags = ['functional', 'block_cloning']
[tests/functional/chattr:Linux]

View File

@ -305,6 +305,8 @@ elif sys.platform.startswith('linux'):
['SKIP', cfr_reason],
'block_cloning/block_cloning_replay_encrypted':
['SKIP', cfr_reason],
'block_cloning/block_cloning_lwb_buffer_overflow':
['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange_cross_dataset':
['SKIP', cfr_cross_reason],
'block_cloning/block_cloning_copyfilerange_fallback_same_txg':

View File

@ -454,6 +454,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/block_cloning/block_cloning_cross_enc_dataset.ksh \
functional/block_cloning/block_cloning_replay.ksh \
functional/block_cloning/block_cloning_replay_encrypted.ksh \
functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh \
functional/bootfs/bootfs_001_pos.ksh \
functional/bootfs/bootfs_002_neg.ksh \
functional/bootfs/bootfs_003_pos.ksh \

View File

@ -0,0 +1,89 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by iXsystems, Inc. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
#
# DESCRIPTION:
# Test for LWB buffer overflow with multiple VDEVs ZIL when 128KB
# block write is split into two 68KB ones, trying to write maximum
# sizes 128KB TX_CLONE_RANGE record with 1022 block pointers into
# 68KB buffer.
#
# STRATEGY:
# 1. Create a pool with multiple VDEVs ZIL
# 2. Write maximum sizes TX_CLONE_RANGE record with 1022 block
# pointers into 68KB buffer
# 3. Sync TXG
# 4. Clone the file
# 5. Synchronize cached writes
#
verify_runnable "global"
if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
log_unsupported "copy_file_range not available before Linux 4.5"
fi
VDIR=$TEST_BASE_DIR/disk-bclone
VDEV="$VDIR/a $VDIR/b $VDIR/c"
LDEV="$VDIR/e $VDIR/f"
function cleanup
{
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
rm -rf $VDIR
}
log_onexit cleanup
log_assert "Test for LWB buffer overflow with multiple VDEVs ZIL"
log_must rm -rf $VDIR
log_must mkdir -p $VDIR
log_must truncate -s $MINVDEVSIZE $VDEV $LDEV
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \
log mirror $LDEV
log_must zfs create -o recordsize=32K $TESTPOOL/$TESTFS
# Each ZIL log entry can fit 130816 bytes for a block cloning operation,
# so it can store 1022 block pointers. When LWB optimization is enabled,
# an assert is hit when 128KB block write is split into two 68KB ones
# for 2 SLOG devices
log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 bs=32K count=1022 \
conv=fsync
sync_pool $TESTPOOL
log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2
log_must sync
sync_pool $TESTPOOL
log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2
typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 $TESTPOOL/$TESTFS file2)
log_must [ "$blocks" = "$(seq -s " " 0 1021)" ]
log_pass "LWB buffer overflow is not triggered with multiple VDEVs ZIL"

View File

@ -1113,7 +1113,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.2.99-268-FreeBSD_g86e115e21"
#define ZFS_META_ALIAS "zfs-2.2.99-270-FreeBSD_gdbda45160"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@ -1143,7 +1143,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
#define ZFS_META_RELEASE "268-FreeBSD_g86e115e21"
#define ZFS_META_RELEASE "270-FreeBDS_gdbda45160"
/* Define the project version. */
#define ZFS_META_VERSION "2.2.99"

View File

@ -1 +1 @@
#define ZFS_META_GITREV "zfs-2.2.99-268-g86e115e21"
#define ZFS_META_GITREV "zfs-2.2.99-270-gdbda45160"