mirror of
https://github.com/freebsd/freebsd-src
synced 2024-10-06 16:40:47 +00:00
zfs: merge openzfs/zfs@feff9dfed
Notable upstream pull request merges: #14833 Update compatibility.d files #14841 ZIL: Reduce scope of per-dataset zl_issuer_lock #14863 zil: Add some more statistics #14866 btree: Implement faster binary search algorithm #14894 Fix inconsistent definition of zfs_scrub_error_blocks_per_txg #14892 Fix concurrent resilvers initiated at same time #14903 Fix NULL pointer dereference when doing concurrent 'send' operations #14910 ZIL: Allow to replay blocks of any size #14939 Fix the L2ARC write size calculating logic #14934 Introduce zfs_refcount_(add|remove)_few() #14946 Improve l2arc reporting in arc_summary #14953 Finally drop long disabled vdev cache #14954 Fix the L2ARC write size calculating logic (2) #14955 Use list_remove_head() where possible #14959 ZIL: Fix race introduced byf63811f072
Obtained from: OpenZFS OpenZFS commit:feff9dfed3
This commit is contained in:
commit
4e8d558c9d
|
@ -135,7 +135,6 @@ KERNEL_C = \
|
|||
uberblock.c \
|
||||
unique.c \
|
||||
vdev.c \
|
||||
vdev_cache.c \
|
||||
vdev_draid.c \
|
||||
vdev_draid_rand.c \
|
||||
vdev_file.c \
|
||||
|
|
|
@ -326,7 +326,6 @@ contrib/openzfs/module/zfs/txg.c optional zfs compile-with "${ZFS_C}"
|
|||
contrib/openzfs/module/zfs/uberblock.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/unique.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/vdev.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/vdev_cache.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/vdev_draid.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/vdev_draid_rand.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/vdev_indirect.c optional zfs compile-with "${ZFS_C}"
|
||||
|
|
|
@ -6,5 +6,5 @@ Release: 1
|
|||
Release-Tags: relext
|
||||
License: CDDL
|
||||
Author: OpenZFS
|
||||
Linux-Maximum: 6.2
|
||||
Linux-Maximum: 6.3
|
||||
Linux-Minimum: 3.10
|
||||
|
|
|
@ -64,7 +64,6 @@ SECTION_HELP = 'print info from one section ('+' '.join(SECTIONS)+')'
|
|||
SECTION_PATHS = {'arc': 'arcstats',
|
||||
'dmu': 'dmu_tx',
|
||||
'l2arc': 'arcstats', # L2ARC stuff lives in arcstats
|
||||
'vdev': 'vdev_cache_stats',
|
||||
'zfetch': 'zfetchstats',
|
||||
'zil': 'zil'}
|
||||
|
||||
|
@ -90,8 +89,6 @@ if sys.platform.startswith('freebsd'):
|
|||
# Requires py36-sysctl on FreeBSD
|
||||
import sysctl
|
||||
|
||||
VDEV_CACHE_SIZE = 'vdev.cache_size'
|
||||
|
||||
def is_value(ctl):
|
||||
return ctl.type != sysctl.CTLTYPE_NODE
|
||||
|
||||
|
@ -135,8 +132,6 @@ elif sys.platform.startswith('linux'):
|
|||
SPL_PATH = '/sys/module/spl/parameters'
|
||||
TUNABLES_PATH = '/sys/module/zfs/parameters'
|
||||
|
||||
VDEV_CACHE_SIZE = 'zfs_vdev_cache_size'
|
||||
|
||||
def load_kstats(section):
|
||||
path = os.path.join(KSTAT_PATH, section)
|
||||
with open(path) as f:
|
||||
|
@ -842,7 +837,8 @@ def section_l2arc(kstats_dict):
|
|||
('Free on write:', 'l2_free_on_write'),
|
||||
('R/W clashes:', 'l2_rw_clash'),
|
||||
('Bad checksums:', 'l2_cksum_bad'),
|
||||
('I/O errors:', 'l2_io_error'))
|
||||
('Read errors:', 'l2_io_error'),
|
||||
('Write errors:', 'l2_writes_error'))
|
||||
|
||||
for title, value in l2_todo:
|
||||
prt_i1(title, f_hits(arc_stats[value]))
|
||||
|
@ -878,28 +874,20 @@ def section_l2arc(kstats_dict):
|
|||
prt_i2('Miss ratio:',
|
||||
f_perc(arc_stats['l2_misses'], l2_access_total),
|
||||
f_hits(arc_stats['l2_misses']))
|
||||
prt_i1('Feeds:', f_hits(arc_stats['l2_feeds']))
|
||||
|
||||
print()
|
||||
print('L2ARC writes:')
|
||||
|
||||
if arc_stats['l2_writes_done'] != arc_stats['l2_writes_sent']:
|
||||
prt_i2('Writes sent:', 'FAULTED', f_hits(arc_stats['l2_writes_sent']))
|
||||
prt_i2('Done ratio:',
|
||||
f_perc(arc_stats['l2_writes_done'],
|
||||
arc_stats['l2_writes_sent']),
|
||||
f_hits(arc_stats['l2_writes_done']))
|
||||
prt_i2('Error ratio:',
|
||||
f_perc(arc_stats['l2_writes_error'],
|
||||
arc_stats['l2_writes_sent']),
|
||||
f_hits(arc_stats['l2_writes_error']))
|
||||
else:
|
||||
prt_i2('Writes sent:', '100 %', f_hits(arc_stats['l2_writes_sent']))
|
||||
print('L2ARC I/O:')
|
||||
prt_i2('Reads:',
|
||||
f_bytes(arc_stats['l2_read_bytes']),
|
||||
f_hits(arc_stats['l2_hits']))
|
||||
prt_i2('Writes:',
|
||||
f_bytes(arc_stats['l2_write_bytes']),
|
||||
f_hits(arc_stats['l2_writes_sent']))
|
||||
|
||||
print()
|
||||
print('L2ARC evicts:')
|
||||
prt_i1('Lock retries:', f_hits(arc_stats['l2_evict_lock_retry']))
|
||||
prt_i1('Upon reading:', f_hits(arc_stats['l2_evict_reading']))
|
||||
prt_i1('L1 cached:', f_hits(arc_stats['l2_evict_l1cached']))
|
||||
prt_i1('While reading:', f_hits(arc_stats['l2_evict_reading']))
|
||||
print()
|
||||
|
||||
|
||||
|
@ -959,35 +947,6 @@ def section_tunables(*_):
|
|||
print()
|
||||
|
||||
|
||||
def section_vdev(kstats_dict):
|
||||
"""Collect information on VDEV caches"""
|
||||
|
||||
# Currently [Nov 2017] the VDEV cache is disabled, because it is actually
|
||||
# harmful. When this is the case, we just skip the whole entry. See
|
||||
# https://github.com/openzfs/zfs/blob/master/module/zfs/vdev_cache.c
|
||||
# for details
|
||||
tunables = get_vdev_params()
|
||||
|
||||
if tunables[VDEV_CACHE_SIZE] == '0':
|
||||
print('VDEV cache disabled, skipping section\n')
|
||||
return
|
||||
|
||||
vdev_stats = isolate_section('vdev_cache_stats', kstats_dict)
|
||||
|
||||
vdev_cache_total = int(vdev_stats['hits']) +\
|
||||
int(vdev_stats['misses']) +\
|
||||
int(vdev_stats['delegations'])
|
||||
|
||||
prt_1('VDEV cache summary:', f_hits(vdev_cache_total))
|
||||
prt_i2('Hit ratio:', f_perc(vdev_stats['hits'], vdev_cache_total),
|
||||
f_hits(vdev_stats['hits']))
|
||||
prt_i2('Miss ratio:', f_perc(vdev_stats['misses'], vdev_cache_total),
|
||||
f_hits(vdev_stats['misses']))
|
||||
prt_i2('Delegations:', f_perc(vdev_stats['delegations'], vdev_cache_total),
|
||||
f_hits(vdev_stats['delegations']))
|
||||
print()
|
||||
|
||||
|
||||
def section_zil(kstats_dict):
|
||||
"""Collect information on the ZFS Intent Log. Some of the information
|
||||
taken from https://github.com/openzfs/zfs/blob/master/include/sys/zil.h
|
||||
|
@ -1015,7 +974,6 @@ section_calls = {'arc': section_arc,
|
|||
'l2arc': section_l2arc,
|
||||
'spl': section_spl,
|
||||
'tunables': section_tunables,
|
||||
'vdev': section_vdev,
|
||||
'zil': section_zil}
|
||||
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
* under sponsorship from the FreeBSD Foundation.
|
||||
* Copyright (c) 2021 Allan Jude
|
||||
* Copyright (c) 2021 Toomas Soome <tsoome@me.com>
|
||||
* Copyright (c) 2023, Klara Inc.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -326,7 +327,7 @@ sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
|
|||
int err;
|
||||
struct sublivelist_verify *sv = args;
|
||||
|
||||
zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare,
|
||||
zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare, NULL,
|
||||
sizeof (sublivelist_verify_block_refcnt_t));
|
||||
|
||||
err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
|
||||
|
@ -390,7 +391,7 @@ sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle)
|
|||
{
|
||||
(void) args;
|
||||
sublivelist_verify_t sv;
|
||||
zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
|
||||
zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
|
||||
sizeof (sublivelist_verify_block_t));
|
||||
int err = sublivelist_verify_func(&sv, dle);
|
||||
zfs_btree_clear(&sv.sv_leftover);
|
||||
|
@ -682,7 +683,7 @@ livelist_metaslab_validate(spa_t *spa)
|
|||
(void) printf("Verifying deleted livelist entries\n");
|
||||
|
||||
sublivelist_verify_t sv;
|
||||
zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
|
||||
zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
|
||||
sizeof (sublivelist_verify_block_t));
|
||||
iterate_deleted_livelists(spa, livelist_verify, &sv);
|
||||
|
||||
|
@ -716,7 +717,7 @@ livelist_metaslab_validate(spa_t *spa)
|
|||
mv.mv_start = m->ms_start;
|
||||
mv.mv_end = m->ms_start + m->ms_size;
|
||||
zfs_btree_create(&mv.mv_livelist_allocs,
|
||||
livelist_block_compare,
|
||||
livelist_block_compare, NULL,
|
||||
sizeof (sublivelist_verify_block_t));
|
||||
|
||||
mv_populate_livelist_allocs(&mv, &sv);
|
||||
|
@ -789,6 +790,9 @@ usage(void)
|
|||
"\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]]\n"
|
||||
"\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] [-K <key>]\n"
|
||||
"\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]\n"
|
||||
"\t%s -B [-e [-V] [-p <path> ...]] [-I <inflight I/Os>]\n"
|
||||
"\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
|
||||
"\t\t[-K <key>] <poolname>/<objset id> [<backupflags>]\n"
|
||||
"\t%s [-v] <bookmark>\n"
|
||||
"\t%s -C [-A] [-U <cache>]\n"
|
||||
"\t%s -l [-Aqu] <device>\n"
|
||||
|
@ -802,7 +806,7 @@ usage(void)
|
|||
"\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
|
||||
"<poolname>\n\n",
|
||||
cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
|
||||
cmdname, cmdname, cmdname, cmdname);
|
||||
cmdname, cmdname, cmdname, cmdname, cmdname);
|
||||
|
||||
(void) fprintf(stderr, " Dataset name must include at least one "
|
||||
"separator character '/' or '@'\n");
|
||||
|
@ -825,6 +829,8 @@ usage(void)
|
|||
(void) fprintf(stderr, " Options to control amount of output:\n");
|
||||
(void) fprintf(stderr, " -b --block-stats "
|
||||
"block statistics\n");
|
||||
(void) fprintf(stderr, " -B --backup "
|
||||
"backup stream\n");
|
||||
(void) fprintf(stderr, " -c --checksum "
|
||||
"checksum all metadata (twice for all data) blocks\n");
|
||||
(void) fprintf(stderr, " -C --config "
|
||||
|
@ -4875,6 +4881,81 @@ dump_path(char *ds, char *path, uint64_t *retobj)
|
|||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
dump_backup_bytes(objset_t *os, void *buf, int len, void *arg)
|
||||
{
|
||||
const char *p = (const char *)buf;
|
||||
ssize_t nwritten;
|
||||
|
||||
(void) os;
|
||||
(void) arg;
|
||||
|
||||
/* Write the data out, handling short writes and signals. */
|
||||
while ((nwritten = write(STDOUT_FILENO, p, len)) < len) {
|
||||
if (nwritten < 0) {
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
return (errno);
|
||||
}
|
||||
p += nwritten;
|
||||
len -= nwritten;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
dump_backup(const char *pool, uint64_t objset_id, const char *flagstr)
|
||||
{
|
||||
boolean_t embed = B_FALSE;
|
||||
boolean_t large_block = B_FALSE;
|
||||
boolean_t compress = B_FALSE;
|
||||
boolean_t raw = B_FALSE;
|
||||
|
||||
const char *c;
|
||||
for (c = flagstr; c != NULL && *c != '\0'; c++) {
|
||||
switch (*c) {
|
||||
case 'e':
|
||||
embed = B_TRUE;
|
||||
break;
|
||||
case 'L':
|
||||
large_block = B_TRUE;
|
||||
break;
|
||||
case 'c':
|
||||
compress = B_TRUE;
|
||||
break;
|
||||
case 'w':
|
||||
raw = B_TRUE;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "dump_backup: invalid flag "
|
||||
"'%c'\n", *c);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (isatty(STDOUT_FILENO)) {
|
||||
fprintf(stderr, "dump_backup: stream cannot be written "
|
||||
"to a terminal\n");
|
||||
return;
|
||||
}
|
||||
|
||||
offset_t off = 0;
|
||||
dmu_send_outparams_t out = {
|
||||
.dso_outfunc = dump_backup_bytes,
|
||||
.dso_dryrun = B_FALSE,
|
||||
};
|
||||
|
||||
int err = dmu_send_obj(pool, objset_id, /* fromsnap */0, embed,
|
||||
large_block, compress, raw, /* saved */ B_FALSE, STDOUT_FILENO,
|
||||
&off, &out);
|
||||
if (err != 0) {
|
||||
fprintf(stderr, "dump_backup: dmu_send_obj: %s\n",
|
||||
strerror(err));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
|
||||
{
|
||||
|
@ -8465,9 +8546,9 @@ zdb_read_block(char *thing, spa_t *spa)
|
|||
*/
|
||||
zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
|
||||
psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
|
||||
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE |
|
||||
ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
|
||||
ZIO_FLAG_OPTIONAL, NULL, NULL));
|
||||
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL,
|
||||
NULL, NULL));
|
||||
}
|
||||
|
||||
error = zio_wait(zio);
|
||||
|
@ -8561,7 +8642,6 @@ zdb_read_block(char *thing, spa_t *spa)
|
|||
zio_nowait(zio_vdev_child_io(czio, bp, vd,
|
||||
offset, pabd, psize, ZIO_TYPE_READ,
|
||||
ZIO_PRIORITY_SYNC_READ,
|
||||
ZIO_FLAG_DONT_CACHE |
|
||||
ZIO_FLAG_DONT_PROPAGATE |
|
||||
ZIO_FLAG_DONT_RETRY |
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
|
||||
|
@ -8695,6 +8775,7 @@ main(int argc, char **argv)
|
|||
struct option long_options[] = {
|
||||
{"ignore-assertions", no_argument, NULL, 'A'},
|
||||
{"block-stats", no_argument, NULL, 'b'},
|
||||
{"backup", no_argument, NULL, 'B'},
|
||||
{"checksum", no_argument, NULL, 'c'},
|
||||
{"config", no_argument, NULL, 'C'},
|
||||
{"datasets", no_argument, NULL, 'd'},
|
||||
|
@ -8736,10 +8817,11 @@ main(int argc, char **argv)
|
|||
};
|
||||
|
||||
while ((c = getopt_long(argc, argv,
|
||||
"AbcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ",
|
||||
"AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ",
|
||||
long_options, NULL)) != -1) {
|
||||
switch (c) {
|
||||
case 'b':
|
||||
case 'B':
|
||||
case 'c':
|
||||
case 'C':
|
||||
case 'd':
|
||||
|
@ -8887,7 +8969,7 @@ main(int argc, char **argv)
|
|||
verbose = MAX(verbose, 1);
|
||||
|
||||
for (c = 0; c < 256; c++) {
|
||||
if (dump_all && strchr("AeEFkKlLNOPrRSXy", c) == NULL)
|
||||
if (dump_all && strchr("ABeEFkKlLNOPrRSXy", c) == NULL)
|
||||
dump_opt[c] = 1;
|
||||
if (dump_opt[c])
|
||||
dump_opt[c] += verbose;
|
||||
|
@ -9073,7 +9155,8 @@ main(int argc, char **argv)
|
|||
checkpoint_pool, error);
|
||||
}
|
||||
|
||||
} else if (target_is_spa || dump_opt['R'] || objset_id == 0) {
|
||||
} else if (target_is_spa || dump_opt['R'] || dump_opt['B'] ||
|
||||
objset_id == 0) {
|
||||
zdb_set_skip_mmp(target);
|
||||
error = spa_open_rewind(target, &spa, FTAG, policy,
|
||||
NULL);
|
||||
|
@ -9209,7 +9292,10 @@ main(int argc, char **argv)
|
|||
strerror(errno));
|
||||
}
|
||||
}
|
||||
if (os != NULL) {
|
||||
if (dump_opt['B']) {
|
||||
dump_backup(target, objset_id,
|
||||
argc > 0 ? argv[0] : NULL);
|
||||
} else if (os != NULL) {
|
||||
dump_objset(os);
|
||||
} else if (zopt_object_args > 0 && !dump_opt['m']) {
|
||||
dump_objset(spa->spa_meta_objset);
|
||||
|
|
|
@ -369,9 +369,7 @@ zfs_agent_consumer_thread(void *arg)
|
|||
return (NULL);
|
||||
}
|
||||
|
||||
if ((event = (list_head(&agent_events))) != NULL) {
|
||||
list_remove(&agent_events, event);
|
||||
|
||||
if ((event = list_remove_head(&agent_events)) != NULL) {
|
||||
(void) pthread_mutex_unlock(&agent_lock);
|
||||
|
||||
/* dispatch to all event subscribers */
|
||||
|
@ -434,8 +432,7 @@ zfs_agent_fini(void)
|
|||
(void) pthread_join(g_agents_tid, NULL);
|
||||
|
||||
/* drain any pending events */
|
||||
while ((event = (list_head(&agent_events))) != NULL) {
|
||||
list_remove(&agent_events, event);
|
||||
while ((event = list_remove_head(&agent_events)) != NULL) {
|
||||
nvlist_free(event->ae_nvl);
|
||||
free(event);
|
||||
}
|
||||
|
|
|
@ -1288,17 +1288,14 @@ zfs_slm_fini(void)
|
|||
tpool_destroy(g_tpool);
|
||||
}
|
||||
|
||||
while ((pool = (list_head(&g_pool_list))) != NULL) {
|
||||
list_remove(&g_pool_list, pool);
|
||||
while ((pool = list_remove_head(&g_pool_list)) != NULL) {
|
||||
zpool_close(pool->uap_zhp);
|
||||
free(pool);
|
||||
}
|
||||
list_destroy(&g_pool_list);
|
||||
|
||||
while ((device = (list_head(&g_device_list))) != NULL) {
|
||||
list_remove(&g_device_list, device);
|
||||
while ((device = list_remove_head(&g_device_list)) != NULL)
|
||||
free(device);
|
||||
}
|
||||
list_destroy(&g_device_list);
|
||||
|
||||
libzfs_fini(g_zfshdl);
|
||||
|
|
|
@ -36,31 +36,49 @@ import argparse
|
|||
from argparse import RawTextHelpFormatter
|
||||
|
||||
cols = {
|
||||
# hdr: [size, scale, kstat name]
|
||||
# hdr: [size, scale, kstat name]
|
||||
"time": [8, -1, "time"],
|
||||
"pool": [12, -1, "pool"],
|
||||
"ds": [12, -1, "dataset_name"],
|
||||
"obj": [12, -1, "objset"],
|
||||
"zcc": [10, 1000, "zil_commit_count"],
|
||||
"zcwc": [10, 1000, "zil_commit_writer_count"],
|
||||
"ziic": [10, 1000, "zil_itx_indirect_count"],
|
||||
"zic": [10, 1000, "zil_itx_count"],
|
||||
"ziib": [10, 1024, "zil_itx_indirect_bytes"],
|
||||
"zicc": [10, 1000, "zil_itx_copied_count"],
|
||||
"zicb": [10, 1024, "zil_itx_copied_bytes"],
|
||||
"zinc": [10, 1000, "zil_itx_needcopy_count"],
|
||||
"zinb": [10, 1024, "zil_itx_needcopy_bytes"],
|
||||
"zimnc": [10, 1000, "zil_itx_metaslab_normal_count"],
|
||||
"zimnb": [10, 1024, "zil_itx_metaslab_normal_bytes"],
|
||||
"zimsc": [10, 1000, "zil_itx_metaslab_slog_count"],
|
||||
"zimsb": [10, 1024, "zil_itx_metaslab_slog_bytes"],
|
||||
"cc": [5, 1000, "zil_commit_count"],
|
||||
"cwc": [5, 1000, "zil_commit_writer_count"],
|
||||
"ic": [5, 1000, "zil_itx_count"],
|
||||
"iic": [5, 1000, "zil_itx_indirect_count"],
|
||||
"iib": [5, 1024, "zil_itx_indirect_bytes"],
|
||||
"icc": [5, 1000, "zil_itx_copied_count"],
|
||||
"icb": [5, 1024, "zil_itx_copied_bytes"],
|
||||
"inc": [5, 1000, "zil_itx_needcopy_count"],
|
||||
"inb": [5, 1024, "zil_itx_needcopy_bytes"],
|
||||
"idc": [5, 1000, "icc+inc"],
|
||||
"idb": [5, 1024, "icb+inb"],
|
||||
"iwc": [5, 1000, "iic+idc"],
|
||||
"iwb": [5, 1024, "iib+idb"],
|
||||
"imnc": [6, 1000, "zil_itx_metaslab_normal_count"],
|
||||
"imnb": [6, 1024, "zil_itx_metaslab_normal_bytes"],
|
||||
"imnw": [6, 1024, "zil_itx_metaslab_normal_write"],
|
||||
"imna": [6, 1024, "zil_itx_metaslab_normal_alloc"],
|
||||
"imsc": [6, 1000, "zil_itx_metaslab_slog_count"],
|
||||
"imsb": [6, 1024, "zil_itx_metaslab_slog_bytes"],
|
||||
"imsw": [6, 1024, "zil_itx_metaslab_slog_write"],
|
||||
"imsa": [6, 1024, "zil_itx_metaslab_slog_alloc"],
|
||||
"imc": [5, 1000, "imnc+imsc"],
|
||||
"imb": [5, 1024, "imnb+imsb"],
|
||||
"imw": [5, 1024, "imnw+imsw"],
|
||||
"ima": [5, 1024, "imna+imsa"],
|
||||
"se%": [3, 100, "imb/ima"],
|
||||
"sen%": [4, 100, "imnb/imna"],
|
||||
"ses%": [4, 100, "imsb/imsa"],
|
||||
"te%": [3, 100, "imb/imw"],
|
||||
"ten%": [4, 100, "imnb/imnw"],
|
||||
"tes%": [4, 100, "imsb/imsw"],
|
||||
}
|
||||
|
||||
hdr = ["time", "pool", "ds", "obj", "zcc", "zcwc", "ziic", "zic", "ziib", \
|
||||
"zicc", "zicb", "zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]
|
||||
hdr = ["time", "ds", "cc", "ic", "idc", "idb", "iic", "iib",
|
||||
"imnc", "imnw", "imsc", "imsw"]
|
||||
|
||||
ghdr = ["time", "zcc", "zcwc", "ziic", "zic", "ziib", "zicc", "zicb",
|
||||
"zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]
|
||||
ghdr = ["time", "cc", "ic", "idc", "idb", "iic", "iib",
|
||||
"imnc", "imnw", "imsc", "imsw"]
|
||||
|
||||
cmd = ("Usage: zilstat [-hgdv] [-i interval] [-p pool_name]")
|
||||
|
||||
|
@ -105,7 +123,7 @@ def print_header():
|
|||
global sep
|
||||
for col in hdr:
|
||||
new_col = col
|
||||
if interval > 0 and col not in ['time', 'pool', 'ds', 'obj']:
|
||||
if interval > 0 and cols[col][1] > 100:
|
||||
new_col += "/s"
|
||||
sys.stdout.write("%*s%s" % (cols[col][0], new_col, sep))
|
||||
sys.stdout.write("\n")
|
||||
|
@ -115,7 +133,7 @@ def print_values(v):
|
|||
global sep
|
||||
for col in hdr:
|
||||
val = v[cols[col][2]]
|
||||
if col not in ['time', 'pool', 'ds', 'obj'] and interval > 0:
|
||||
if interval > 0 and cols[col][1] > 100:
|
||||
val = v[cols[col][2]] // interval
|
||||
sys.stdout.write("%s%s" % (
|
||||
prettynum(cols[col][0], cols[col][1], val), sep))
|
||||
|
@ -237,9 +255,7 @@ def init():
|
|||
|
||||
invalid = []
|
||||
for ele in hdr:
|
||||
if gFlag and ele not in ghdr:
|
||||
invalid.append(ele)
|
||||
elif ele not in cols:
|
||||
if ele not in cols:
|
||||
invalid.append(ele)
|
||||
|
||||
if len(invalid) > 0:
|
||||
|
@ -403,17 +419,17 @@ def calculate_diff():
|
|||
diff = copy.deepcopy(curr)
|
||||
for pool in curr:
|
||||
for objset in curr[pool]:
|
||||
for col in hdr:
|
||||
if col not in ['time', 'pool', 'ds', 'obj']:
|
||||
key = cols[col][2]
|
||||
# If prev is NULL, this is the
|
||||
# first time we are here
|
||||
if not prev:
|
||||
diff[pool][objset][key] = 0
|
||||
else:
|
||||
diff[pool][objset][key] \
|
||||
= curr[pool][objset][key] \
|
||||
- prev[pool][objset][key]
|
||||
for key in curr[pool][objset]:
|
||||
if not isinstance(diff[pool][objset][key], int):
|
||||
continue
|
||||
# If prev is NULL, this is the
|
||||
# first time we are here
|
||||
if not prev:
|
||||
diff[pool][objset][key] = 0
|
||||
else:
|
||||
diff[pool][objset][key] \
|
||||
= curr[pool][objset][key] \
|
||||
- prev[pool][objset][key]
|
||||
|
||||
def zil_build_dict(pool = "GLOBAL"):
|
||||
global kstat
|
||||
|
@ -425,10 +441,77 @@ def zil_build_dict(pool = "GLOBAL"):
|
|||
if objset not in curr[pool]:
|
||||
curr[pool][objset] = dict()
|
||||
curr[pool][objset][key] = val
|
||||
curr[pool][objset]["pool"] = pool
|
||||
curr[pool][objset]["objset"] = objset
|
||||
curr[pool][objset]["time"] = time.strftime("%H:%M:%S", \
|
||||
time.localtime())
|
||||
|
||||
def zil_extend_dict():
|
||||
global diff
|
||||
for pool in diff:
|
||||
for objset in diff[pool]:
|
||||
diff[pool][objset]["pool"] = pool
|
||||
diff[pool][objset]["objset"] = objset
|
||||
diff[pool][objset]["time"] = time.strftime("%H:%M:%S", \
|
||||
time.localtime())
|
||||
diff[pool][objset]["icc+inc"] = \
|
||||
diff[pool][objset]["zil_itx_copied_count"] + \
|
||||
diff[pool][objset]["zil_itx_needcopy_count"]
|
||||
diff[pool][objset]["icb+inb"] = \
|
||||
diff[pool][objset]["zil_itx_copied_bytes"] + \
|
||||
diff[pool][objset]["zil_itx_needcopy_bytes"]
|
||||
diff[pool][objset]["iic+idc"] = \
|
||||
diff[pool][objset]["zil_itx_indirect_count"] + \
|
||||
diff[pool][objset]["zil_itx_copied_count"] + \
|
||||
diff[pool][objset]["zil_itx_needcopy_count"]
|
||||
diff[pool][objset]["iib+idb"] = \
|
||||
diff[pool][objset]["zil_itx_indirect_bytes"] + \
|
||||
diff[pool][objset]["zil_itx_copied_bytes"] + \
|
||||
diff[pool][objset]["zil_itx_needcopy_bytes"]
|
||||
diff[pool][objset]["imnc+imsc"] = \
|
||||
diff[pool][objset]["zil_itx_metaslab_normal_count"] + \
|
||||
diff[pool][objset]["zil_itx_metaslab_slog_count"]
|
||||
diff[pool][objset]["imnb+imsb"] = \
|
||||
diff[pool][objset]["zil_itx_metaslab_normal_bytes"] + \
|
||||
diff[pool][objset]["zil_itx_metaslab_slog_bytes"]
|
||||
diff[pool][objset]["imnw+imsw"] = \
|
||||
diff[pool][objset]["zil_itx_metaslab_normal_write"] + \
|
||||
diff[pool][objset]["zil_itx_metaslab_slog_write"]
|
||||
diff[pool][objset]["imna+imsa"] = \
|
||||
diff[pool][objset]["zil_itx_metaslab_normal_alloc"] + \
|
||||
diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
|
||||
if diff[pool][objset]["imna+imsa"] > 0:
|
||||
diff[pool][objset]["imb/ima"] = 100 * \
|
||||
diff[pool][objset]["imnb+imsb"] // \
|
||||
diff[pool][objset]["imna+imsa"]
|
||||
else:
|
||||
diff[pool][objset]["imb/ima"] = 100
|
||||
if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
|
||||
diff[pool][objset]["imnb/imna"] = 100 * \
|
||||
diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
|
||||
diff[pool][objset]["zil_itx_metaslab_normal_alloc"]
|
||||
else:
|
||||
diff[pool][objset]["imnb/imna"] = 100
|
||||
if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
|
||||
diff[pool][objset]["imsb/imsa"] = 100 * \
|
||||
diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
|
||||
diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
|
||||
else:
|
||||
diff[pool][objset]["imsb/imsa"] = 100
|
||||
if diff[pool][objset]["imnw+imsw"] > 0:
|
||||
diff[pool][objset]["imb/imw"] = 100 * \
|
||||
diff[pool][objset]["imnb+imsb"] // \
|
||||
diff[pool][objset]["imnw+imsw"]
|
||||
else:
|
||||
diff[pool][objset]["imb/imw"] = 100
|
||||
if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
|
||||
diff[pool][objset]["imnb/imnw"] = 100 * \
|
||||
diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
|
||||
diff[pool][objset]["zil_itx_metaslab_normal_write"]
|
||||
else:
|
||||
diff[pool][objset]["imnb/imnw"] = 100
|
||||
if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
|
||||
diff[pool][objset]["imsb/imsw"] = 100 * \
|
||||
diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
|
||||
diff[pool][objset]["zil_itx_metaslab_slog_write"]
|
||||
else:
|
||||
diff[pool][objset]["imsb/imsw"] = 100
|
||||
|
||||
def sign_handler_epipe(sig, frame):
|
||||
print("Caught EPIPE signal: " + str(frame))
|
||||
|
@ -437,30 +520,31 @@ def sign_handler_epipe(sig, frame):
|
|||
|
||||
def main():
|
||||
global interval
|
||||
global curr
|
||||
global curr, diff
|
||||
hprint = False
|
||||
init()
|
||||
signal.signal(signal.SIGINT, signal.SIG_DFL)
|
||||
signal.signal(signal.SIGPIPE, sign_handler_epipe)
|
||||
|
||||
zil_process_kstat()
|
||||
if not curr:
|
||||
print ("Error: No stats to show")
|
||||
sys.exit(0)
|
||||
print_header()
|
||||
if interval > 0:
|
||||
time.sleep(interval)
|
||||
while True:
|
||||
calculate_diff()
|
||||
if not diff:
|
||||
print ("Error: No stats to show")
|
||||
sys.exit(0)
|
||||
if hprint == False:
|
||||
print_header()
|
||||
hprint = True
|
||||
zil_extend_dict()
|
||||
print_dict(diff)
|
||||
time.sleep(interval)
|
||||
else:
|
||||
zil_process_kstat()
|
||||
if not curr:
|
||||
print ("Error: No stats to show")
|
||||
sys.exit(0)
|
||||
print_header()
|
||||
print_dict(curr)
|
||||
diff = curr
|
||||
zil_extend_dict()
|
||||
print_dict(diff)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -145,6 +145,7 @@ dist_zpoolcompat_DATA = \
|
|||
%D%/compatibility.d/openzfs-2.0-linux \
|
||||
%D%/compatibility.d/openzfs-2.1-freebsd \
|
||||
%D%/compatibility.d/openzfs-2.1-linux \
|
||||
%D%/compatibility.d/openzfs-2.2 \
|
||||
%D%/compatibility.d/openzfsonosx-1.7.0 \
|
||||
%D%/compatibility.d/openzfsonosx-1.8.1 \
|
||||
%D%/compatibility.d/openzfsonosx-1.9.3 \
|
||||
|
@ -168,12 +169,20 @@ zpoolcompatlinks = \
|
|||
"freebsd-11.3 freebsd-12.0" \
|
||||
"freebsd-11.3 freebsd-12.1" \
|
||||
"freebsd-11.3 freebsd-12.2" \
|
||||
"freebsd-11.3 freebsd-12.3" \
|
||||
"freebsd-11.3 freebsd-12.4" \
|
||||
"openzfs-2.1-freebsd freebsd-13.0" \
|
||||
"openzfs-2.1-freebsd freebsd-13.1" \
|
||||
"openzfs-2.1-freebsd freebsd-13.2" \
|
||||
"freebsd-11.3 freenas-11.3" \
|
||||
"freenas-11.0 freenas-11.1" \
|
||||
"openzfsonosx-1.9.3 openzfsonosx-1.9.4" \
|
||||
"openzfs-2.0-freebsd truenas-12.0" \
|
||||
"zol-0.7 ubuntu-18.04" \
|
||||
"zol-0.8 ubuntu-20.04"
|
||||
"zol-0.8 ubuntu-20.04" \
|
||||
"openzfs-2.1-linux ubuntu-22.04" \
|
||||
"openzfs-2.2 openzfs-2.2-linux" \
|
||||
"openzfs-2.2 openzfs-2.2-freebsd"
|
||||
|
||||
zpoolconfdir = $(sysconfdir)/zfs/zpool.d
|
||||
INSTALL_DATA_HOOKS += zpool-install-data-hook
|
||||
|
|
|
@ -8,5 +8,7 @@ extensible_dataset
|
|||
filesystem_limits
|
||||
hole_birth
|
||||
large_blocks
|
||||
livelist
|
||||
lz4_compress
|
||||
spacemap_histogram
|
||||
zpool_checkpoint
|
||||
|
|
40
sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.2
Normal file
40
sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.2
Normal file
|
@ -0,0 +1,40 @@
|
|||
# Features supported by OpenZFS 2.2 on Linux and FreeBSD
|
||||
allocation_classes
|
||||
async_destroy
|
||||
blake3
|
||||
block_cloning
|
||||
bookmark_v2
|
||||
bookmark_written
|
||||
bookmarks
|
||||
device_rebuild
|
||||
device_removal
|
||||
draid
|
||||
edonr
|
||||
embedded_data
|
||||
empty_bpobj
|
||||
enabled_txg
|
||||
encryption
|
||||
extensible_dataset
|
||||
filesystem_limits
|
||||
head_errlog
|
||||
hole_birth
|
||||
large_blocks
|
||||
large_dnode
|
||||
livelist
|
||||
log_spacemap
|
||||
lz4_compress
|
||||
multi_vdev_crash_dump
|
||||
obsolete_counts
|
||||
project_quota
|
||||
redacted_datasets
|
||||
redaction_bookmarks
|
||||
resilver_defer
|
||||
sha512
|
||||
skein
|
||||
spacemap_histogram
|
||||
spacemap_v2
|
||||
userobj_accounting
|
||||
vdev_zaps_v2
|
||||
zilsaxattr
|
||||
zpool_checkpoint
|
||||
zstd_compress
|
26
sys/contrib/openzfs/config/kernel-reclaim_state.m4
Normal file
26
sys/contrib/openzfs/config/kernel-reclaim_state.m4
Normal file
|
@ -0,0 +1,26 @@
|
|||
AC_DEFUN([ZFS_AC_KERNEL_SRC_RECLAIMED], [
|
||||
dnl #
|
||||
dnl # 6.4 API change
|
||||
dnl # The reclaimed_slab of struct reclaim_state
|
||||
dnl # is renamed to reclaimed
|
||||
dnl #
|
||||
ZFS_LINUX_TEST_SRC([reclaim_state_reclaimed], [
|
||||
#include <linux/swap.h>
|
||||
static const struct reclaim_state
|
||||
rs __attribute__ ((unused)) = {
|
||||
.reclaimed = 100,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_RECLAIMED], [
|
||||
AC_MSG_CHECKING([whether struct reclaim_state has reclaimed field])
|
||||
ZFS_LINUX_TEST_RESULT([reclaim_state_reclaimed], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_RECLAIM_STATE_RECLAIMED, 1,
|
||||
[struct reclaim_state has reclaimed])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
|
@ -153,6 +153,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
|||
ZFS_AC_KERNEL_SRC_IATTR_VFSID
|
||||
ZFS_AC_KERNEL_SRC_FILEMAP
|
||||
ZFS_AC_KERNEL_SRC_WRITEPAGE_T
|
||||
ZFS_AC_KERNEL_SRC_RECLAIMED
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
|
||||
|
@ -285,6 +286,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
|||
ZFS_AC_KERNEL_IATTR_VFSID
|
||||
ZFS_AC_KERNEL_FILEMAP
|
||||
ZFS_AC_KERNEL_WRITEPAGE_T
|
||||
ZFS_AC_KERNEL_RECLAIMED
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_CPU_HAS_FEATURE
|
||||
|
|
|
@ -7,8 +7,8 @@ NAME := $(shell awk '$$1 == "Name:" { print $$2; }' META)
|
|||
LINUX_MIN := $(shell awk '/Linux-Minimum:/{print $$2}' META)
|
||||
LINUX_NEXT := $(shell awk -F'[ .]' '/Linux-Maximum:/{print $$2 "." $$3+1}' META)
|
||||
|
||||
DKMSFILES := module include config zfs.release.in autogen.sh META AUTHORS \
|
||||
COPYRIGHT LICENSE README.md
|
||||
DKMSFILES := module include config zfs.release.in autogen.sh copy-builtin META AUTHORS \
|
||||
COPYRIGHT LICENSE README.md CODE_OF_CONDUCT.md NEWS NOTICE RELEASES.md
|
||||
|
||||
ifndef KVERS
|
||||
KVERS=$(shell uname -r)
|
||||
|
|
|
@ -344,7 +344,7 @@ mount_fs()
|
|||
|
||||
# Need the _original_ datasets mountpoint!
|
||||
mountpoint=$(get_fs_value "$fs" mountpoint)
|
||||
ZFS_CMD="mount.zfs -o zfsutil"
|
||||
ZFS_CMD="mount -o zfsutil -t zfs"
|
||||
if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then
|
||||
# Can't use the mountpoint property. Might be one of our
|
||||
# clones. Check the 'org.zol:mountpoint' property set in
|
||||
|
@ -361,7 +361,7 @@ mount_fs()
|
|||
fi
|
||||
# Don't use mount.zfs -o zfsutils for legacy mountpoint
|
||||
if [ "$mountpoint" = "legacy" ]; then
|
||||
ZFS_CMD="mount.zfs"
|
||||
ZFS_CMD="mount -t zfs"
|
||||
fi
|
||||
# Last hail-mary: Hope 'rootmnt' is set!
|
||||
mountpoint=""
|
||||
|
@ -944,7 +944,7 @@ mountroot()
|
|||
echo " not specified on the kernel command line."
|
||||
echo ""
|
||||
echo "Manually mount the root filesystem on $rootmnt and then exit."
|
||||
echo "Hint: Try: mount.zfs -o zfsutil ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
|
||||
echo "Hint: Try: mount -o zfsutil -t zfs ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
|
||||
shell
|
||||
fi
|
||||
|
||||
|
|
|
@ -67,6 +67,7 @@ pam_syslog(pam_handle_t *pamh, int loglevel, const char *fmt, ...)
|
|||
#include <sys/mman.h>
|
||||
|
||||
static const char PASSWORD_VAR_NAME[] = "pam_zfs_key_authtok";
|
||||
static const char OLD_PASSWORD_VAR_NAME[] = "pam_zfs_key_oldauthtok";
|
||||
|
||||
static libzfs_handle_t *g_zfs;
|
||||
|
||||
|
@ -160,10 +161,10 @@ pw_free(pw_password_t *pw)
|
|||
}
|
||||
|
||||
static pw_password_t *
|
||||
pw_fetch(pam_handle_t *pamh)
|
||||
pw_fetch(pam_handle_t *pamh, int tok)
|
||||
{
|
||||
const char *token;
|
||||
if (pam_get_authtok(pamh, PAM_AUTHTOK, &token, NULL) != PAM_SUCCESS) {
|
||||
if (pam_get_authtok(pamh, tok, &token, NULL) != PAM_SUCCESS) {
|
||||
pam_syslog(pamh, LOG_ERR,
|
||||
"couldn't get password from PAM stack");
|
||||
return (NULL);
|
||||
|
@ -177,13 +178,13 @@ pw_fetch(pam_handle_t *pamh)
|
|||
}
|
||||
|
||||
static const pw_password_t *
|
||||
pw_fetch_lazy(pam_handle_t *pamh)
|
||||
pw_fetch_lazy(pam_handle_t *pamh, int tok, const char *var_name)
|
||||
{
|
||||
pw_password_t *pw = pw_fetch(pamh);
|
||||
pw_password_t *pw = pw_fetch(pamh, tok);
|
||||
if (pw == NULL) {
|
||||
return (NULL);
|
||||
}
|
||||
int ret = pam_set_data(pamh, PASSWORD_VAR_NAME, pw, destroy_pw);
|
||||
int ret = pam_set_data(pamh, var_name, pw, destroy_pw);
|
||||
if (ret != PAM_SUCCESS) {
|
||||
pw_free(pw);
|
||||
pam_syslog(pamh, LOG_ERR, "pam_set_data failed");
|
||||
|
@ -193,23 +194,23 @@ pw_fetch_lazy(pam_handle_t *pamh)
|
|||
}
|
||||
|
||||
static const pw_password_t *
|
||||
pw_get(pam_handle_t *pamh)
|
||||
pw_get(pam_handle_t *pamh, int tok, const char *var_name)
|
||||
{
|
||||
const pw_password_t *authtok = NULL;
|
||||
int ret = pam_get_data(pamh, PASSWORD_VAR_NAME,
|
||||
int ret = pam_get_data(pamh, var_name,
|
||||
(const void**)(&authtok));
|
||||
if (ret == PAM_SUCCESS)
|
||||
return (authtok);
|
||||
if (ret == PAM_NO_MODULE_DATA)
|
||||
return (pw_fetch_lazy(pamh));
|
||||
return (pw_fetch_lazy(pamh, tok, var_name));
|
||||
pam_syslog(pamh, LOG_ERR, "password not available");
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
pw_clear(pam_handle_t *pamh)
|
||||
pw_clear(pam_handle_t *pamh, const char *var_name)
|
||||
{
|
||||
int ret = pam_set_data(pamh, PASSWORD_VAR_NAME, NULL, NULL);
|
||||
int ret = pam_set_data(pamh, var_name, NULL, NULL);
|
||||
if (ret != PAM_SUCCESS) {
|
||||
pam_syslog(pamh, LOG_ERR, "clearing password failed");
|
||||
return (-1);
|
||||
|
@ -386,7 +387,7 @@ decrypt_mount(pam_handle_t *pamh, const char *ds_name,
|
|||
int ret = lzc_load_key(ds_name, noop, (uint8_t *)key->value,
|
||||
WRAPPING_KEY_LEN);
|
||||
pw_free(key);
|
||||
if (ret) {
|
||||
if (ret && ret != EEXIST) {
|
||||
pam_syslog(pamh, LOG_ERR, "load_key failed: %d", ret);
|
||||
zfs_close(ds);
|
||||
return (-1);
|
||||
|
@ -406,14 +407,14 @@ decrypt_mount(pam_handle_t *pamh, const char *ds_name,
|
|||
}
|
||||
|
||||
static int
|
||||
unmount_unload(pam_handle_t *pamh, const char *ds_name)
|
||||
unmount_unload(pam_handle_t *pamh, const char *ds_name, boolean_t force)
|
||||
{
|
||||
zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM);
|
||||
if (ds == NULL) {
|
||||
pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name);
|
||||
return (-1);
|
||||
}
|
||||
int ret = zfs_unmount(ds, NULL, 0);
|
||||
int ret = zfs_unmount(ds, NULL, force ? MS_FORCE : 0);
|
||||
if (ret) {
|
||||
pam_syslog(pamh, LOG_ERR, "zfs_unmount failed with: %d", ret);
|
||||
zfs_close(ds);
|
||||
|
@ -435,9 +436,13 @@ typedef struct {
|
|||
char *runstatedir;
|
||||
char *homedir;
|
||||
char *dsname;
|
||||
uid_t uid_min;
|
||||
uid_t uid_max;
|
||||
uid_t uid;
|
||||
const char *username;
|
||||
int unmount_and_unload;
|
||||
boolean_t unmount_and_unload;
|
||||
boolean_t force_unmount;
|
||||
boolean_t recursive_homes;
|
||||
} zfs_key_config_t;
|
||||
|
||||
static int
|
||||
|
@ -469,9 +474,13 @@ zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config,
|
|||
free(config->homes_prefix);
|
||||
return (PAM_USER_UNKNOWN);
|
||||
}
|
||||
config->uid_min = 1000;
|
||||
config->uid_max = MAXUID;
|
||||
config->uid = entry->pw_uid;
|
||||
config->username = name;
|
||||
config->unmount_and_unload = 1;
|
||||
config->unmount_and_unload = B_TRUE;
|
||||
config->force_unmount = B_FALSE;
|
||||
config->recursive_homes = B_FALSE;
|
||||
config->dsname = NULL;
|
||||
config->homedir = NULL;
|
||||
for (int c = 0; c < argc; c++) {
|
||||
|
@ -481,8 +490,16 @@ zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config,
|
|||
} else if (strncmp(argv[c], "runstatedir=", 12) == 0) {
|
||||
free(config->runstatedir);
|
||||
config->runstatedir = strdup(argv[c] + 12);
|
||||
} else if (strncmp(argv[c], "uid_min=", 8) == 0) {
|
||||
sscanf(argv[c] + 8, "%u", &config->uid_min);
|
||||
} else if (strncmp(argv[c], "uid_max=", 8) == 0) {
|
||||
sscanf(argv[c] + 8, "%u", &config->uid_max);
|
||||
} else if (strcmp(argv[c], "nounmount") == 0) {
|
||||
config->unmount_and_unload = 0;
|
||||
config->unmount_and_unload = B_FALSE;
|
||||
} else if (strcmp(argv[c], "forceunmount") == 0) {
|
||||
config->force_unmount = B_TRUE;
|
||||
} else if (strcmp(argv[c], "recursive_homes") == 0) {
|
||||
config->recursive_homes = B_TRUE;
|
||||
} else if (strcmp(argv[c], "prop_mountpoint") == 0) {
|
||||
if (config->homedir == NULL)
|
||||
config->homedir = strdup(entry->pw_dir);
|
||||
|
@ -517,8 +534,12 @@ find_dsname_by_prop_value(zfs_handle_t *zhp, void *data)
|
|||
(void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
|
||||
sizeof (mountpoint), NULL, NULL, 0, B_FALSE);
|
||||
if (strcmp(target->homedir, mountpoint) != 0) {
|
||||
if (target->recursive_homes) {
|
||||
(void) zfs_iter_filesystems_v2(zhp, 0,
|
||||
find_dsname_by_prop_value, target);
|
||||
}
|
||||
zfs_close(zhp);
|
||||
return (0);
|
||||
return (target->dsname != NULL);
|
||||
}
|
||||
|
||||
target->dsname = strdup(zfs_get_name(zhp));
|
||||
|
@ -531,17 +552,23 @@ zfs_key_config_get_dataset(zfs_key_config_t *config)
|
|||
{
|
||||
if (config->homedir != NULL &&
|
||||
config->homes_prefix != NULL) {
|
||||
zfs_handle_t *zhp = zfs_open(g_zfs, config->homes_prefix,
|
||||
ZFS_TYPE_FILESYSTEM);
|
||||
if (zhp == NULL) {
|
||||
pam_syslog(NULL, LOG_ERR, "dataset %s not found",
|
||||
config->homes_prefix);
|
||||
return (NULL);
|
||||
}
|
||||
if (strcmp(config->homes_prefix, "*") == 0) {
|
||||
(void) zfs_iter_root(g_zfs,
|
||||
find_dsname_by_prop_value, config);
|
||||
} else {
|
||||
zfs_handle_t *zhp = zfs_open(g_zfs,
|
||||
config->homes_prefix, ZFS_TYPE_FILESYSTEM);
|
||||
if (zhp == NULL) {
|
||||
pam_syslog(NULL, LOG_ERR,
|
||||
"dataset %s not found",
|
||||
config->homes_prefix);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
(void) zfs_iter_filesystems_v2(zhp, 0,
|
||||
find_dsname_by_prop_value, config);
|
||||
zfs_close(zhp);
|
||||
(void) zfs_iter_filesystems_v2(zhp, 0,
|
||||
find_dsname_by_prop_value, config);
|
||||
zfs_close(zhp);
|
||||
}
|
||||
char *dsname = config->dsname;
|
||||
config->dsname = NULL;
|
||||
return (dsname);
|
||||
|
@ -655,8 +682,13 @@ pam_sm_authenticate(pam_handle_t *pamh, int flags,
|
|||
if (config_err != PAM_SUCCESS) {
|
||||
return (config_err);
|
||||
}
|
||||
if (config.uid < config.uid_min || config.uid > config.uid_max) {
|
||||
zfs_key_config_free(&config);
|
||||
return (PAM_SERVICE_ERR);
|
||||
}
|
||||
|
||||
const pw_password_t *token = pw_fetch_lazy(pamh);
|
||||
const pw_password_t *token = pw_fetch_lazy(pamh,
|
||||
PAM_AUTHTOK, PASSWORD_VAR_NAME);
|
||||
if (token == NULL) {
|
||||
zfs_key_config_free(&config);
|
||||
return (PAM_AUTH_ERR);
|
||||
|
@ -706,10 +738,12 @@ pam_sm_chauthtok(pam_handle_t *pamh, int flags,
|
|||
if (zfs_key_config_load(pamh, &config, argc, argv) != PAM_SUCCESS) {
|
||||
return (PAM_SERVICE_ERR);
|
||||
}
|
||||
if (config.uid < 1000) {
|
||||
if (config.uid < config.uid_min || config.uid > config.uid_max) {
|
||||
zfs_key_config_free(&config);
|
||||
return (PAM_SUCCESS);
|
||||
return (PAM_SERVICE_ERR);
|
||||
}
|
||||
const pw_password_t *old_token = pw_get(pamh,
|
||||
PAM_OLDAUTHTOK, OLD_PASSWORD_VAR_NAME);
|
||||
{
|
||||
if (pam_zfs_init(pamh) != 0) {
|
||||
zfs_key_config_free(&config);
|
||||
|
@ -721,49 +755,62 @@ pam_sm_chauthtok(pam_handle_t *pamh, int flags,
|
|||
zfs_key_config_free(&config);
|
||||
return (PAM_SERVICE_ERR);
|
||||
}
|
||||
int key_loaded = is_key_loaded(pamh, dataset);
|
||||
if (key_loaded == -1) {
|
||||
if (!old_token) {
|
||||
pam_syslog(pamh, LOG_ERR,
|
||||
"old password from PAM stack is null");
|
||||
free(dataset);
|
||||
pam_zfs_free();
|
||||
zfs_key_config_free(&config);
|
||||
return (PAM_SERVICE_ERR);
|
||||
}
|
||||
free(dataset);
|
||||
pam_zfs_free();
|
||||
if (! key_loaded) {
|
||||
if (decrypt_mount(pamh, dataset,
|
||||
old_token->value, B_TRUE) == -1) {
|
||||
pam_syslog(pamh, LOG_ERR,
|
||||
"key not loaded, returning try_again");
|
||||
"old token mismatch");
|
||||
free(dataset);
|
||||
pam_zfs_free();
|
||||
zfs_key_config_free(&config);
|
||||
return (PAM_PERM_DENIED);
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & PAM_UPDATE_AUTHTOK) != 0) {
|
||||
const pw_password_t *token = pw_get(pamh);
|
||||
const pw_password_t *token = pw_get(pamh, PAM_AUTHTOK,
|
||||
PASSWORD_VAR_NAME);
|
||||
if (token == NULL) {
|
||||
pam_syslog(pamh, LOG_ERR, "new password unavailable");
|
||||
pam_zfs_free();
|
||||
zfs_key_config_free(&config);
|
||||
return (PAM_SERVICE_ERR);
|
||||
}
|
||||
if (pam_zfs_init(pamh) != 0) {
|
||||
zfs_key_config_free(&config);
|
||||
pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
|
||||
return (PAM_SERVICE_ERR);
|
||||
}
|
||||
char *dataset = zfs_key_config_get_dataset(&config);
|
||||
if (!dataset) {
|
||||
pam_zfs_free();
|
||||
zfs_key_config_free(&config);
|
||||
pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
|
||||
pw_clear(pamh, PASSWORD_VAR_NAME);
|
||||
return (PAM_SERVICE_ERR);
|
||||
}
|
||||
if (change_key(pamh, dataset, token->value) == -1) {
|
||||
int was_loaded = is_key_loaded(pamh, dataset);
|
||||
if (!was_loaded && decrypt_mount(pamh, dataset,
|
||||
old_token->value, B_FALSE) == -1) {
|
||||
free(dataset);
|
||||
pam_zfs_free();
|
||||
zfs_key_config_free(&config);
|
||||
pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
|
||||
pw_clear(pamh, PASSWORD_VAR_NAME);
|
||||
return (PAM_SERVICE_ERR);
|
||||
}
|
||||
int changed = change_key(pamh, dataset, token->value);
|
||||
if (!was_loaded) {
|
||||
unmount_unload(pamh, dataset, config.force_unmount);
|
||||
}
|
||||
free(dataset);
|
||||
pam_zfs_free();
|
||||
zfs_key_config_free(&config);
|
||||
if (pw_clear(pamh) == -1) {
|
||||
if (pw_clear(pamh, OLD_PASSWORD_VAR_NAME) == -1 ||
|
||||
pw_clear(pamh, PASSWORD_VAR_NAME) == -1 || changed == -1) {
|
||||
return (PAM_SERVICE_ERR);
|
||||
}
|
||||
} else {
|
||||
|
@ -788,7 +835,7 @@ pam_sm_open_session(pam_handle_t *pamh, int flags,
|
|||
return (PAM_SESSION_ERR);
|
||||
}
|
||||
|
||||
if (config.uid < 1000) {
|
||||
if (config.uid < config.uid_min || config.uid > config.uid_max) {
|
||||
zfs_key_config_free(&config);
|
||||
return (PAM_SUCCESS);
|
||||
}
|
||||
|
@ -799,7 +846,8 @@ pam_sm_open_session(pam_handle_t *pamh, int flags,
|
|||
return (PAM_SUCCESS);
|
||||
}
|
||||
|
||||
const pw_password_t *token = pw_get(pamh);
|
||||
const pw_password_t *token = pw_get(pamh,
|
||||
PAM_AUTHTOK, PASSWORD_VAR_NAME);
|
||||
if (token == NULL) {
|
||||
zfs_key_config_free(&config);
|
||||
return (PAM_SESSION_ERR);
|
||||
|
@ -823,7 +871,7 @@ pam_sm_open_session(pam_handle_t *pamh, int flags,
|
|||
free(dataset);
|
||||
pam_zfs_free();
|
||||
zfs_key_config_free(&config);
|
||||
if (pw_clear(pamh) == -1) {
|
||||
if (pw_clear(pamh, PASSWORD_VAR_NAME) == -1) {
|
||||
return (PAM_SERVICE_ERR);
|
||||
}
|
||||
return (PAM_SUCCESS);
|
||||
|
@ -846,7 +894,7 @@ pam_sm_close_session(pam_handle_t *pamh, int flags,
|
|||
if (zfs_key_config_load(pamh, &config, argc, argv) != PAM_SUCCESS) {
|
||||
return (PAM_SESSION_ERR);
|
||||
}
|
||||
if (config.uid < 1000) {
|
||||
if (config.uid < config.uid_min || config.uid > config.uid_max) {
|
||||
zfs_key_config_free(&config);
|
||||
return (PAM_SUCCESS);
|
||||
}
|
||||
|
@ -868,7 +916,7 @@ pam_sm_close_session(pam_handle_t *pamh, int flags,
|
|||
zfs_key_config_free(&config);
|
||||
return (PAM_SESSION_ERR);
|
||||
}
|
||||
if (unmount_unload(pamh, dataset) == -1) {
|
||||
if (unmount_unload(pamh, dataset, config.force_unmount) == -1) {
|
||||
free(dataset);
|
||||
pam_zfs_free();
|
||||
zfs_key_config_free(&config);
|
||||
|
|
|
@ -75,7 +75,7 @@ typedef struct kmem_cache {
|
|||
extern uint64_t spl_kmem_cache_inuse(kmem_cache_t *cache);
|
||||
extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache);
|
||||
|
||||
__attribute__((alloc_size(1)))
|
||||
__attribute__((malloc, alloc_size(1)))
|
||||
void *zfs_kmem_alloc(size_t size, int kmflags);
|
||||
void zfs_kmem_free(void *buf, size_t size);
|
||||
uint64_t kmem_size(void);
|
||||
|
@ -83,6 +83,7 @@ kmem_cache_t *kmem_cache_create(const char *name, size_t bufsize, size_t align,
|
|||
int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
|
||||
void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
|
||||
void kmem_cache_destroy(kmem_cache_t *cache);
|
||||
__attribute__((malloc))
|
||||
void *kmem_cache_alloc(kmem_cache_t *cache, int flags);
|
||||
void kmem_cache_free(kmem_cache_t *cache, void *buf);
|
||||
boolean_t kmem_cache_reap_active(void);
|
||||
|
|
|
@ -68,7 +68,6 @@ enum scope_prefix_types {
|
|||
zfs_trim,
|
||||
zfs_txg,
|
||||
zfs_vdev,
|
||||
zfs_vdev_cache,
|
||||
zfs_vdev_file,
|
||||
zfs_vdev_mirror,
|
||||
zfs_vnops,
|
||||
|
|
|
@ -31,10 +31,10 @@
|
|||
#include <linux/vmalloc.h>
|
||||
|
||||
extern int kmem_debugging(void);
|
||||
extern char *kmem_vasprintf(const char *fmt, va_list ap)
|
||||
__attribute__((format(printf, 1, 0)));
|
||||
extern char *kmem_asprintf(const char *fmt, ...)
|
||||
__attribute__((format(printf, 1, 2)));
|
||||
__attribute__((format(printf, 1, 0)))
|
||||
extern char *kmem_vasprintf(const char *fmt, va_list ap);
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
extern char *kmem_asprintf(const char *fmt, ...);
|
||||
extern char *kmem_strdup(const char *str);
|
||||
extern void kmem_strfree(char *str);
|
||||
|
||||
|
@ -186,10 +186,10 @@ extern unsigned int spl_kmem_alloc_max;
|
|||
#define kmem_free(ptr, sz) spl_kmem_free((ptr), (sz))
|
||||
#define kmem_cache_reap_active spl_kmem_cache_reap_active
|
||||
|
||||
extern void *spl_kmem_alloc(size_t sz, int fl, const char *func, int line)
|
||||
__attribute__((alloc_size(1)));
|
||||
extern void *spl_kmem_zalloc(size_t sz, int fl, const char *func, int line)
|
||||
__attribute__((alloc_size(1)));
|
||||
__attribute__((malloc, alloc_size(1)))
|
||||
extern void *spl_kmem_alloc(size_t sz, int fl, const char *func, int line);
|
||||
__attribute__((malloc, alloc_size(1)))
|
||||
extern void *spl_kmem_zalloc(size_t sz, int fl, const char *func, int line);
|
||||
extern void spl_kmem_free(const void *ptr, size_t sz);
|
||||
|
||||
/*
|
||||
|
|
|
@ -91,8 +91,10 @@ typedef struct vmem { } vmem_t;
|
|||
#define vmem_zalloc(sz, fl) spl_vmem_zalloc((sz), (fl), __func__, __LINE__)
|
||||
#define vmem_free(ptr, sz) spl_vmem_free((ptr), (sz))
|
||||
|
||||
extern void *spl_vmem_alloc(size_t sz, int fl, const char *func, int line);
|
||||
extern void *spl_vmem_zalloc(size_t sz, int fl, const char *func, int line);
|
||||
extern void *spl_vmem_alloc(size_t sz, int fl, const char *func, int line)
|
||||
__attribute__((malloc, alloc_size(1)));
|
||||
extern void *spl_vmem_zalloc(size_t sz, int fl, const char *func, int line)
|
||||
__attribute__((malloc, alloc_size(1)));
|
||||
extern void spl_vmem_free(const void *ptr, size_t sz);
|
||||
|
||||
int spl_vmem_init(void);
|
||||
|
|
|
@ -215,6 +215,39 @@ DEFINE_EVENT(zfs_zil_commit_io_error_class, name, \
|
|||
TP_ARGS(zilog, zcw))
|
||||
DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(zfs_zil__commit__io__error);
|
||||
|
||||
/*
|
||||
* Generic support for three argument tracepoints of the form:
|
||||
*
|
||||
* DTRACE_PROBE3(...,
|
||||
* zilog_t *, ...,
|
||||
* uint64_t, ...,
|
||||
* uint64_t, ...);
|
||||
*/
|
||||
/* BEGIN CSTYLED */
|
||||
DECLARE_EVENT_CLASS(zfs_zil_block_size_class,
|
||||
TP_PROTO(zilog_t *zilog, uint64_t res, uint64_t s1),
|
||||
TP_ARGS(zilog, res, s1),
|
||||
TP_STRUCT__entry(
|
||||
ZILOG_TP_STRUCT_ENTRY
|
||||
__field(uint64_t, res)
|
||||
__field(uint64_t, s1)
|
||||
),
|
||||
TP_fast_assign(
|
||||
ZILOG_TP_FAST_ASSIGN
|
||||
__entry->res = res;
|
||||
__entry->s1 = s1;
|
||||
),
|
||||
TP_printk(
|
||||
ZILOG_TP_PRINTK_FMT " res %llu s1 %llu",
|
||||
ZILOG_TP_PRINTK_ARGS, __entry->res, __entry->s1)
|
||||
);
|
||||
|
||||
#define DEFINE_ZIL_BLOCK_SIZE_EVENT(name) \
|
||||
DEFINE_EVENT(zfs_zil_block_size_class, name, \
|
||||
TP_PROTO(zilog_t *zilog, uint64_t res, uint64_t s1), \
|
||||
TP_ARGS(zilog, res, s1))
|
||||
DEFINE_ZIL_BLOCK_SIZE_EVENT(zfs_zil__block__size);
|
||||
|
||||
#endif /* _TRACE_ZIL_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
@ -228,6 +261,7 @@ DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(zfs_zil__commit__io__error);
|
|||
DEFINE_DTRACE_PROBE2(zil__process__commit__itx);
|
||||
DEFINE_DTRACE_PROBE2(zil__process__normal__itx);
|
||||
DEFINE_DTRACE_PROBE2(zil__commit__io__error);
|
||||
DEFINE_DTRACE_PROBE3(zil__block__size);
|
||||
|
||||
#endif /* HAVE_DECLARE_EVENT_CLASS */
|
||||
#endif /* _KERNEL */
|
||||
|
|
|
@ -86,10 +86,15 @@ extern int zfs_abd_scatter_enabled;
|
|||
* Allocations and deallocations
|
||||
*/
|
||||
|
||||
__attribute__((malloc))
|
||||
abd_t *abd_alloc(size_t, boolean_t);
|
||||
__attribute__((malloc))
|
||||
abd_t *abd_alloc_linear(size_t, boolean_t);
|
||||
__attribute__((malloc))
|
||||
abd_t *abd_alloc_gang(void);
|
||||
__attribute__((malloc))
|
||||
abd_t *abd_alloc_for_io(size_t, boolean_t);
|
||||
__attribute__((malloc))
|
||||
abd_t *abd_alloc_sametype(abd_t *, size_t);
|
||||
boolean_t abd_size_alloc_linear(size_t);
|
||||
void abd_gang_add(abd_t *, abd_t *, boolean_t);
|
||||
|
|
|
@ -105,8 +105,13 @@ typedef struct zfs_btree_index {
|
|||
boolean_t bti_before;
|
||||
} zfs_btree_index_t;
|
||||
|
||||
typedef struct btree {
|
||||
typedef struct btree zfs_btree_t;
|
||||
typedef void * (*bt_find_in_buf_f) (zfs_btree_t *, uint8_t *, uint32_t,
|
||||
const void *, zfs_btree_index_t *);
|
||||
|
||||
struct btree {
|
||||
int (*bt_compar) (const void *, const void *);
|
||||
bt_find_in_buf_f bt_find_in_buf;
|
||||
size_t bt_elem_size;
|
||||
size_t bt_leaf_size;
|
||||
uint32_t bt_leaf_cap;
|
||||
|
@ -115,7 +120,54 @@ typedef struct btree {
|
|||
uint64_t bt_num_nodes;
|
||||
zfs_btree_hdr_t *bt_root;
|
||||
zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading
|
||||
} zfs_btree_t;
|
||||
};
|
||||
|
||||
/*
|
||||
* Implementation of Shar's algorithm designed to accelerate binary search by
|
||||
* eliminating impossible to predict branches.
|
||||
*
|
||||
* For optimality, this should be used to generate the search function in the
|
||||
* same file as the comparator and the comparator should be marked
|
||||
* `__attribute__((always_inline) inline` so that the compiler will inline it.
|
||||
*
|
||||
* Arguments are:
|
||||
*
|
||||
* NAME - The function name for this instance of the search function. Use it
|
||||
* in a subsequent call to zfs_btree_create().
|
||||
* T - The element type stored inside the B-Tree.
|
||||
* COMP - A comparator to compare two nodes, it must return exactly: -1, 0,
|
||||
* or +1 -1 for <, 0 for ==, and +1 for >. For trivial comparisons,
|
||||
* TREE_CMP() from avl.h can be used in a boilerplate function.
|
||||
*/
|
||||
/* BEGIN CSTYLED */
|
||||
#define ZFS_BTREE_FIND_IN_BUF_FUNC(NAME, T, COMP) \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") \
|
||||
static void * \
|
||||
NAME(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems, \
|
||||
const void *value, zfs_btree_index_t *where) \
|
||||
{ \
|
||||
T *i = (T *)buf; \
|
||||
(void) tree; \
|
||||
_Pragma("GCC unroll 9") \
|
||||
while (nelems > 1) { \
|
||||
uint32_t half = nelems / 2; \
|
||||
nelems -= half; \
|
||||
i += (COMP(&i[half - 1], value) < 0) * half; \
|
||||
} \
|
||||
\
|
||||
int comp = COMP(i, value); \
|
||||
where->bti_offset = (i - (T *)buf) + (comp < 0); \
|
||||
where->bti_before = (comp != 0); \
|
||||
\
|
||||
if (comp == 0) { \
|
||||
return (i); \
|
||||
} \
|
||||
\
|
||||
return (NULL); \
|
||||
} \
|
||||
_Pragma("GCC diagnostic pop")
|
||||
/* END CSTYLED */
|
||||
|
||||
/*
|
||||
* Allocate and deallocate caches for btree nodes.
|
||||
|
@ -129,13 +181,19 @@ void zfs_btree_fini(void);
|
|||
* tree - the tree to be initialized
|
||||
* compar - function to compare two nodes, it must return exactly: -1, 0, or +1
|
||||
* -1 for <, 0 for ==, and +1 for >
|
||||
* find - optional function to accelerate searches inside B-Tree nodes
|
||||
* through Shar's algorithm and comparator inlining. Setting this to
|
||||
* NULL will use a generic function. The function should be created
|
||||
* using ZFS_BTREE_FIND_IN_BUF_FUNC() in the same file as compar.
|
||||
* compar should be marked `__attribute__((always_inline)) inline` or
|
||||
* performance is unlikely to improve very much.
|
||||
* size - the value of sizeof(struct my_type)
|
||||
* lsize - custom leaf size
|
||||
*/
|
||||
void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *),
|
||||
size_t);
|
||||
bt_find_in_buf_f, size_t);
|
||||
void zfs_btree_create_custom(zfs_btree_t *, int (*)(const void *, const void *),
|
||||
size_t, size_t);
|
||||
bt_find_in_buf_f, size_t, size_t);
|
||||
|
||||
/*
|
||||
* Find a node with a matching value in the tree. Returns the matching node
|
||||
|
|
|
@ -1174,10 +1174,6 @@ extern void zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep,
|
|||
zbookmark_phys_t *zb);
|
||||
extern void name_to_errphys(char *buf, zbookmark_err_phys_t *zep);
|
||||
|
||||
/* vdev cache */
|
||||
extern void vdev_cache_stat_init(void);
|
||||
extern void vdev_cache_stat_fini(void);
|
||||
|
||||
/* vdev mirror */
|
||||
extern void vdev_mirror_stat_init(void);
|
||||
extern void vdev_mirror_stat_fini(void);
|
||||
|
|
|
@ -158,12 +158,6 @@ extern boolean_t vdev_allocatable(vdev_t *vd);
|
|||
extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
|
||||
extern boolean_t vdev_is_spacemap_addressable(vdev_t *vd);
|
||||
|
||||
extern void vdev_cache_init(vdev_t *vd);
|
||||
extern void vdev_cache_fini(vdev_t *vd);
|
||||
extern boolean_t vdev_cache_read(zio_t *zio);
|
||||
extern void vdev_cache_write(zio_t *zio);
|
||||
extern void vdev_cache_purge(vdev_t *vd);
|
||||
|
||||
extern void vdev_queue_init(vdev_t *vd);
|
||||
extern void vdev_queue_fini(vdev_t *vd);
|
||||
extern zio_t *vdev_queue_io(zio_t *zio);
|
||||
|
|
|
@ -57,8 +57,6 @@ extern "C" {
|
|||
* Forward declarations that lots of things need.
|
||||
*/
|
||||
typedef struct vdev_queue vdev_queue_t;
|
||||
typedef struct vdev_cache vdev_cache_t;
|
||||
typedef struct vdev_cache_entry vdev_cache_entry_t;
|
||||
struct abd;
|
||||
|
||||
extern uint_t zfs_vdev_queue_depth_pct;
|
||||
|
@ -132,23 +130,6 @@ typedef const struct vdev_ops {
|
|||
/*
|
||||
* Virtual device properties
|
||||
*/
|
||||
struct vdev_cache_entry {
|
||||
struct abd *ve_abd;
|
||||
uint64_t ve_offset;
|
||||
clock_t ve_lastused;
|
||||
avl_node_t ve_offset_node;
|
||||
avl_node_t ve_lastused_node;
|
||||
uint32_t ve_hits;
|
||||
uint16_t ve_missed_update;
|
||||
zio_t *ve_fill_io;
|
||||
};
|
||||
|
||||
struct vdev_cache {
|
||||
avl_tree_t vc_offset_tree;
|
||||
avl_tree_t vc_lastused_tree;
|
||||
kmutex_t vc_lock;
|
||||
};
|
||||
|
||||
typedef struct vdev_queue_class {
|
||||
uint32_t vqc_active;
|
||||
|
||||
|
@ -443,7 +424,6 @@ struct vdev {
|
|||
boolean_t vdev_resilver_deferred; /* resilver deferred */
|
||||
boolean_t vdev_kobj_flag; /* kobj event record */
|
||||
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
|
||||
vdev_cache_t vdev_cache; /* physical block cache */
|
||||
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
|
||||
zio_t *vdev_probe_zio; /* root of current probe */
|
||||
vdev_aux_t vdev_label_aux; /* on-disk aux state */
|
||||
|
|
|
@ -73,13 +73,15 @@ int64_t zfs_refcount_count(zfs_refcount_t *);
|
|||
int64_t zfs_refcount_add(zfs_refcount_t *, const void *);
|
||||
int64_t zfs_refcount_remove(zfs_refcount_t *, const void *);
|
||||
/*
|
||||
* Note that (add|remove)_many add/remove one reference with "number" N,
|
||||
* _not_ make N references with "number" 1, which is what vanilla
|
||||
* zfs_refcount_(add|remove) would do if called N times.
|
||||
* Note that (add|remove)_many adds/removes one reference with "number" N,
|
||||
* _not_ N references with "number" 1, which is what (add|remove)_few does,
|
||||
* or what vanilla zfs_refcount_(add|remove) called N times would do.
|
||||
*
|
||||
* Attempting to remove a reference with number N when none exists is a
|
||||
* panic on debug kernels with reference_tracking enabled.
|
||||
*/
|
||||
void zfs_refcount_add_few(zfs_refcount_t *, uint64_t, const void *);
|
||||
void zfs_refcount_remove_few(zfs_refcount_t *, uint64_t, const void *);
|
||||
int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, const void *);
|
||||
int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, const void *);
|
||||
void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *);
|
||||
|
@ -108,6 +110,10 @@ typedef struct refcount {
|
|||
#define zfs_refcount_count(rc) atomic_load_64(&(rc)->rc_count)
|
||||
#define zfs_refcount_add(rc, holder) atomic_inc_64_nv(&(rc)->rc_count)
|
||||
#define zfs_refcount_remove(rc, holder) atomic_dec_64_nv(&(rc)->rc_count)
|
||||
#define zfs_refcount_add_few(rc, number, holder) \
|
||||
atomic_add_64(&(rc)->rc_count, number)
|
||||
#define zfs_refcount_remove_few(rc, number, holder) \
|
||||
atomic_add_64(&(rc)->rc_count, -number)
|
||||
#define zfs_refcount_add_many(rc, number, holder) \
|
||||
atomic_add_64_nv(&(rc)->rc_count, number)
|
||||
#define zfs_refcount_remove_many(rc, number, holder) \
|
||||
|
|
|
@ -158,6 +158,7 @@ extern "C" {
|
|||
#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
|
||||
|
||||
extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
|
||||
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/zfs_znode_impl.h>
|
||||
|
@ -280,7 +281,6 @@ extern void zfs_znode_delete(znode_t *, dmu_tx_t *);
|
|||
extern void zfs_remove_op_tables(void);
|
||||
extern int zfs_create_op_tables(void);
|
||||
extern dev_t zfs_cmpldev(uint64_t);
|
||||
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
|
||||
extern int zfs_get_stats(objset_t *os, nvlist_t *nv);
|
||||
extern boolean_t zfs_get_vfs_flag_unmounted(objset_t *os);
|
||||
extern void zfs_znode_dmu_fini(znode_t *);
|
||||
|
|
|
@ -489,18 +489,22 @@ typedef struct zil_stats {
|
|||
* Transactions which have been allocated to the "normal"
|
||||
* (i.e. not slog) storage pool. Note that "bytes" accumulate
|
||||
* the actual log record sizes - which do not include the actual
|
||||
* data in case of indirect writes.
|
||||
* data in case of indirect writes. bytes <= write <= alloc.
|
||||
*/
|
||||
kstat_named_t zil_itx_metaslab_normal_count;
|
||||
kstat_named_t zil_itx_metaslab_normal_bytes;
|
||||
kstat_named_t zil_itx_metaslab_normal_write;
|
||||
kstat_named_t zil_itx_metaslab_normal_alloc;
|
||||
|
||||
/*
|
||||
* Transactions which have been allocated to the "slog" storage pool.
|
||||
* If there are no separate log devices, this is the same as the
|
||||
* "normal" pool.
|
||||
* "normal" pool. bytes <= write <= alloc.
|
||||
*/
|
||||
kstat_named_t zil_itx_metaslab_slog_count;
|
||||
kstat_named_t zil_itx_metaslab_slog_bytes;
|
||||
kstat_named_t zil_itx_metaslab_slog_write;
|
||||
kstat_named_t zil_itx_metaslab_slog_alloc;
|
||||
} zil_kstat_values_t;
|
||||
|
||||
typedef struct zil_sums {
|
||||
|
@ -515,8 +519,12 @@ typedef struct zil_sums {
|
|||
wmsum_t zil_itx_needcopy_bytes;
|
||||
wmsum_t zil_itx_metaslab_normal_count;
|
||||
wmsum_t zil_itx_metaslab_normal_bytes;
|
||||
wmsum_t zil_itx_metaslab_normal_write;
|
||||
wmsum_t zil_itx_metaslab_normal_alloc;
|
||||
wmsum_t zil_itx_metaslab_slog_count;
|
||||
wmsum_t zil_itx_metaslab_slog_bytes;
|
||||
wmsum_t zil_itx_metaslab_slog_write;
|
||||
wmsum_t zil_itx_metaslab_slog_alloc;
|
||||
} zil_sums_t;
|
||||
|
||||
#define ZIL_STAT_INCR(zil, stat, val) \
|
||||
|
|
|
@ -44,7 +44,7 @@ extern "C" {
|
|||
* must be held.
|
||||
*
|
||||
* After the lwb is "opened", it can transition into the "issued" state
|
||||
* via zil_lwb_write_issue(). Again, the zilog's "zl_issuer_lock" must
|
||||
* via zil_lwb_write_close(). Again, the zilog's "zl_issuer_lock" must
|
||||
* be held when making this transition.
|
||||
*
|
||||
* After the lwb's write zio completes, it transitions into the "write
|
||||
|
@ -93,20 +93,23 @@ typedef struct lwb {
|
|||
blkptr_t lwb_blk; /* on disk address of this log blk */
|
||||
boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */
|
||||
boolean_t lwb_slog; /* lwb_blk is on SLOG device */
|
||||
boolean_t lwb_indirect; /* do not postpone zil_lwb_commit() */
|
||||
int lwb_nused; /* # used bytes in buffer */
|
||||
int lwb_nfilled; /* # filled bytes in buffer */
|
||||
int lwb_sz; /* size of block and buffer */
|
||||
lwb_state_t lwb_state; /* the state of this lwb */
|
||||
char *lwb_buf; /* log write buffer */
|
||||
zio_t *lwb_write_zio; /* zio for the lwb buffer */
|
||||
zio_t *lwb_root_zio; /* root zio for lwb write and flushes */
|
||||
hrtime_t lwb_issued_timestamp; /* when was the lwb issued? */
|
||||
uint64_t lwb_issued_txg; /* the txg when the write is issued */
|
||||
uint64_t lwb_max_txg; /* highest txg in this lwb */
|
||||
list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
|
||||
list_node_t lwb_issue_node; /* linkage of lwbs ready for issue */
|
||||
list_t lwb_itxs; /* list of itx's */
|
||||
list_t lwb_waiters; /* list of zil_commit_waiter's */
|
||||
avl_tree_t lwb_vdev_tree; /* vdevs to flush after lwb write */
|
||||
kmutex_t lwb_vdev_lock; /* protects lwb_vdev_tree */
|
||||
hrtime_t lwb_issued_timestamp; /* when was the lwb issued? */
|
||||
} lwb_t;
|
||||
|
||||
/*
|
||||
|
|
|
@ -190,7 +190,6 @@ typedef uint64_t zio_flag_t;
|
|||
#define ZIO_FLAG_SPECULATIVE (1ULL << 8)
|
||||
#define ZIO_FLAG_CONFIG_WRITER (1ULL << 9)
|
||||
#define ZIO_FLAG_DONT_RETRY (1ULL << 10)
|
||||
#define ZIO_FLAG_DONT_CACHE (1ULL << 11)
|
||||
#define ZIO_FLAG_NODATA (1ULL << 12)
|
||||
#define ZIO_FLAG_INDUCE_DAMAGE (1ULL << 13)
|
||||
#define ZIO_FLAG_IO_ALLOCATING (1ULL << 14)
|
||||
|
|
|
@ -83,7 +83,7 @@ const char *_umem_debug_init(void);
|
|||
const char *_umem_options_init(void);
|
||||
const char *_umem_logging_init(void);
|
||||
|
||||
__attribute__((alloc_size(1)))
|
||||
__attribute__((malloc, alloc_size(1)))
|
||||
static inline void *
|
||||
umem_alloc(size_t size, int flags)
|
||||
{
|
||||
|
@ -96,7 +96,7 @@ umem_alloc(size_t size, int flags)
|
|||
return (ptr);
|
||||
}
|
||||
|
||||
__attribute__((alloc_size(1)))
|
||||
__attribute__((malloc, alloc_size(1)))
|
||||
static inline void *
|
||||
umem_alloc_aligned(size_t size, size_t align, int flags)
|
||||
{
|
||||
|
@ -118,7 +118,7 @@ umem_alloc_aligned(size_t size, size_t align, int flags)
|
|||
return (ptr);
|
||||
}
|
||||
|
||||
__attribute__((alloc_size(1)))
|
||||
__attribute__((malloc, alloc_size(1)))
|
||||
static inline void *
|
||||
umem_zalloc(size_t size, int flags)
|
||||
{
|
||||
|
@ -188,6 +188,7 @@ umem_cache_destroy(umem_cache_t *cp)
|
|||
umem_free(cp, sizeof (umem_cache_t));
|
||||
}
|
||||
|
||||
__attribute__((malloc))
|
||||
static inline void *
|
||||
umem_cache_alloc(umem_cache_t *cp, int flags)
|
||||
{
|
||||
|
|
|
@ -135,7 +135,6 @@ nodist_libzpool_la_SOURCES = \
|
|||
module/zfs/uberblock.c \
|
||||
module/zfs/unique.c \
|
||||
module/zfs/vdev.c \
|
||||
module/zfs/vdev_cache.c \
|
||||
module/zfs/vdev_draid.c \
|
||||
module/zfs/vdev_draid_rand.c \
|
||||
module/zfs/vdev_indirect.c \
|
||||
|
|
|
@ -2028,21 +2028,6 @@ Max vdev I/O aggregation size.
|
|||
.It Sy zfs_vdev_aggregation_limit_non_rotating Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq uint
|
||||
Max vdev I/O aggregation size for non-rotating media.
|
||||
.
|
||||
.It Sy zfs_vdev_cache_bshift Ns = Ns Sy 16 Po 64 KiB Pc Pq uint
|
||||
Shift size to inflate reads to.
|
||||
.
|
||||
.It Sy zfs_vdev_cache_max Ns = Ns Sy 16384 Ns B Po 16 KiB Pc Pq uint
|
||||
Inflate reads smaller than this value to meet the
|
||||
.Sy zfs_vdev_cache_bshift
|
||||
size
|
||||
.Pq default Sy 64 KiB .
|
||||
.
|
||||
.It Sy zfs_vdev_cache_size Ns = Ns Sy 0 Pq uint
|
||||
Total size of the per-disk cache in bytes.
|
||||
.Pp
|
||||
Currently this feature is disabled, as it has been found to not be helpful
|
||||
for performance and in some cases harmful.
|
||||
.
|
||||
.It Sy zfs_vdev_mirror_rotating_inc Ns = Ns Sy 0 Pq int
|
||||
A number by which the balancing algorithm increments the load calculation for
|
||||
the purpose of selecting the least busy mirror member when an I/O operation
|
||||
|
|
|
@ -228,8 +228,10 @@ extensible_dataset
|
|||
filesystem_limits
|
||||
hole_birth
|
||||
large_blocks
|
||||
livelist
|
||||
lz4_compress
|
||||
spacemap_histogram
|
||||
zpool_checkpoint
|
||||
|
||||
.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev
|
||||
.Ed
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
|
||||
.\" Copyright (c) 2017 Intel Corporation.
|
||||
.\"
|
||||
.Dd October 7, 2020
|
||||
.Dd June 4, 2023
|
||||
.Dt ZDB 8
|
||||
.Os
|
||||
.
|
||||
|
@ -41,6 +41,13 @@
|
|||
.Ar poolname Ns Op Ar / Ns Ar dataset Ns | Ns Ar objset-ID
|
||||
.Op Ar object Ns | Ns Ar range Ns …
|
||||
.Nm
|
||||
.Fl B
|
||||
.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
|
||||
.Op Fl U Ar cache
|
||||
.Op Fl K Ar key
|
||||
.Ar poolname Ns Ar / Ns Ar objset-ID
|
||||
.Op Ar backup-flags
|
||||
.Nm
|
||||
.Fl C
|
||||
.Op Fl A
|
||||
.Op Fl U Ar cache
|
||||
|
@ -123,6 +130,22 @@ Display options:
|
|||
Display statistics regarding the number, size
|
||||
.Pq logical, physical and allocated
|
||||
and deduplication of blocks.
|
||||
.It Fl B , -backup
|
||||
Generate a backup stream, similar to
|
||||
.Nm zfs Cm send ,
|
||||
but for the numeric objset ID, and without opening the dataset.
|
||||
This can be useful in recovery scenarios if dataset metadata has become
|
||||
corrupted but the dataset itself is readable.
|
||||
The optional
|
||||
.Ar flags
|
||||
argument is a string of one or more of the letters
|
||||
.Sy e ,
|
||||
.Sy L ,
|
||||
.Sy c ,
|
||||
and
|
||||
.Sy w ,
|
||||
which correspond to the same flags in
|
||||
.Xr zfs-send 8 .
|
||||
.It Fl c , -checksum
|
||||
Verify the checksum of all metadata blocks while printing block statistics
|
||||
.Po see
|
||||
|
|
|
@ -234,14 +234,11 @@ if the volume is not sparse.
|
|||
Print verbose information about the created dataset.
|
||||
.El
|
||||
.El
|
||||
.Ss ZFS Volumes as Swap
|
||||
ZFS volumes may be used as swap devices.
|
||||
After creating the volume with the
|
||||
.Nm zfs Cm create Fl V
|
||||
enable the swap area using the
|
||||
.Xr swapon 8
|
||||
command.
|
||||
Swapping to files on ZFS filesystems is not supported.
|
||||
.Ss ZFS for Swap
|
||||
Swapping to a ZFS volume is prone to deadlock and not recommended.
|
||||
See OpenZFS FAQ.
|
||||
.Pp
|
||||
Swapping to a file on a ZFS filesystem is not supported.
|
||||
.
|
||||
.Sh EXAMPLES
|
||||
.\" These are, respectively, examples 1, 10 from zfs.8
|
||||
|
|
|
@ -456,7 +456,6 @@ ZIO_FLAG_CANFAIL:0x00000080
|
|||
ZIO_FLAG_SPECULATIVE:0x00000100
|
||||
ZIO_FLAG_CONFIG_WRITER:0x00000200
|
||||
ZIO_FLAG_DONT_RETRY:0x00000400
|
||||
ZIO_FLAG_DONT_CACHE:0x00000800
|
||||
ZIO_FLAG_NODATA:0x00001000
|
||||
ZIO_FLAG_INDUCE_DAMAGE:0x00002000
|
||||
|
||||
|
|
|
@ -34,6 +34,20 @@ ifeq ($(CONFIG_KASAN),y)
|
|||
ZFS_MODULE_CFLAGS += -Wno-error=frame-larger-than=
|
||||
endif
|
||||
|
||||
# Generated binary search code is particularly bad with this optimization.
|
||||
# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
|
||||
# is not affected when unrolling is done.
|
||||
# Disable it until the following upstream issue is resolved:
|
||||
# https://github.com/llvm/llvm-project/issues/62790
|
||||
ifeq ($(CONFIG_X86),y)
|
||||
ifeq ($(CONFIG_CC_IS_CLANG),y)
|
||||
CFLAGS_zfs/dsl_scan.o += -mllvm -x86-cmov-converter=false
|
||||
CFLAGS_zfs/metaslab.o += -mllvm -x86-cmov-converter=false
|
||||
CFLAGS_zfs/range_tree.o += -mllvm -x86-cmov-converter=false
|
||||
CFLAGS_zfs/zap_micro.o += -mllvm -x86-cmov-converter=false
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(KBUILD_EXTMOD),)
|
||||
@CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include
|
||||
@CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
|
||||
|
@ -368,7 +382,6 @@ ZFS_OBJS := \
|
|||
uberblock.o \
|
||||
unique.o \
|
||||
vdev.o \
|
||||
vdev_cache.o \
|
||||
vdev_draid.o \
|
||||
vdev_draid_rand.o \
|
||||
vdev_indirect.o \
|
||||
|
|
|
@ -308,7 +308,6 @@ SRCS+= abd.c \
|
|||
uberblock.c \
|
||||
unique.c \
|
||||
vdev.c \
|
||||
vdev_cache.c \
|
||||
vdev_draid.c \
|
||||
vdev_draid_rand.c \
|
||||
vdev_indirect.c \
|
||||
|
@ -400,6 +399,20 @@ beforeinstall:
|
|||
|
||||
.include <bsd.kmod.mk>
|
||||
|
||||
# Generated binary search code is particularly bad with this optimization.
|
||||
# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
|
||||
# is not affected when unrolling is done.
|
||||
# Disable it until the following upstream issue is resolved:
|
||||
# https://github.com/llvm/llvm-project/issues/62790
|
||||
.if ${CC} == "clang"
|
||||
.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "amd64"
|
||||
CFLAGS.dsl_scan.c= -mllvm -x86-cmov-converter=false
|
||||
CFLAGS.metaslab.c= -mllvm -x86-cmov-converter=false
|
||||
CFLAGS.range_tree.c= -mllvm -x86-cmov-converter=false
|
||||
CFLAGS.zap_micro.c= -mllvm -x86-cmov-converter=false
|
||||
.endif
|
||||
.endif
|
||||
|
||||
CFLAGS.sysctl_os.c= -include ../zfs_config.h
|
||||
CFLAGS.xxhash.c+= -include ${SYSDIR}/sys/_null.h
|
||||
|
||||
|
|
|
@ -872,8 +872,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip,
|
|||
"Enable to bypass vdev_validate().");
|
||||
/* END CSTYLED */
|
||||
|
||||
/* vdev_cache.c */
|
||||
|
||||
/* vdev_mirror.c */
|
||||
|
||||
/* vdev_queue.c */
|
||||
|
|
|
@ -495,10 +495,8 @@ zfs_acl_release_nodes(zfs_acl_t *aclp)
|
|||
{
|
||||
zfs_acl_node_t *aclnode;
|
||||
|
||||
while ((aclnode = list_head(&aclp->z_acl))) {
|
||||
list_remove(&aclp->z_acl, aclnode);
|
||||
while ((aclnode = list_remove_head(&aclp->z_acl)))
|
||||
zfs_acl_node_free(aclnode);
|
||||
}
|
||||
aclp->z_acl_count = 0;
|
||||
aclp->z_acl_bytes = 0;
|
||||
}
|
||||
|
|
|
@ -2220,92 +2220,6 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
|
|||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a property stored within the master node.
|
||||
*/
|
||||
int
|
||||
zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
|
||||
{
|
||||
uint64_t *cached_copy = NULL;
|
||||
|
||||
/*
|
||||
* Figure out where in the objset_t the cached copy would live, if it
|
||||
* is available for the requested property.
|
||||
*/
|
||||
if (os != NULL) {
|
||||
switch (prop) {
|
||||
case ZFS_PROP_VERSION:
|
||||
cached_copy = &os->os_version;
|
||||
break;
|
||||
case ZFS_PROP_NORMALIZE:
|
||||
cached_copy = &os->os_normalization;
|
||||
break;
|
||||
case ZFS_PROP_UTF8ONLY:
|
||||
cached_copy = &os->os_utf8only;
|
||||
break;
|
||||
case ZFS_PROP_CASE:
|
||||
cached_copy = &os->os_casesensitivity;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
|
||||
*value = *cached_copy;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the property wasn't cached, look up the file system's value for
|
||||
* the property. For the version property, we look up a slightly
|
||||
* different string.
|
||||
*/
|
||||
const char *pname;
|
||||
int error = ENOENT;
|
||||
if (prop == ZFS_PROP_VERSION) {
|
||||
pname = ZPL_VERSION_STR;
|
||||
} else {
|
||||
pname = zfs_prop_to_name(prop);
|
||||
}
|
||||
|
||||
if (os != NULL) {
|
||||
ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
|
||||
}
|
||||
|
||||
if (error == ENOENT) {
|
||||
/* No value set, use the default value */
|
||||
switch (prop) {
|
||||
case ZFS_PROP_VERSION:
|
||||
*value = ZPL_VERSION;
|
||||
break;
|
||||
case ZFS_PROP_NORMALIZE:
|
||||
case ZFS_PROP_UTF8ONLY:
|
||||
*value = 0;
|
||||
break;
|
||||
case ZFS_PROP_CASE:
|
||||
*value = ZFS_CASE_SENSITIVE;
|
||||
break;
|
||||
case ZFS_PROP_ACLTYPE:
|
||||
*value = ZFS_ACLTYPE_NFSV4;
|
||||
break;
|
||||
default:
|
||||
return (error);
|
||||
}
|
||||
error = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If one of the methods for getting the property value above worked,
|
||||
* copy it into the objset_t's cache.
|
||||
*/
|
||||
if (error == 0 && cached_copy != NULL) {
|
||||
*cached_copy = *value;
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the corresponding vfs's unmounted flag is set.
|
||||
* Otherwise return false.
|
||||
|
|
|
@ -2069,6 +2069,93 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
|
|||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a property stored within the master node.
|
||||
*/
|
||||
int
|
||||
zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
|
||||
{
|
||||
uint64_t *cached_copy = NULL;
|
||||
|
||||
/*
|
||||
* Figure out where in the objset_t the cached copy would live, if it
|
||||
* is available for the requested property.
|
||||
*/
|
||||
if (os != NULL) {
|
||||
switch (prop) {
|
||||
case ZFS_PROP_VERSION:
|
||||
cached_copy = &os->os_version;
|
||||
break;
|
||||
case ZFS_PROP_NORMALIZE:
|
||||
cached_copy = &os->os_normalization;
|
||||
break;
|
||||
case ZFS_PROP_UTF8ONLY:
|
||||
cached_copy = &os->os_utf8only;
|
||||
break;
|
||||
case ZFS_PROP_CASE:
|
||||
cached_copy = &os->os_casesensitivity;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
|
||||
*value = *cached_copy;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the property wasn't cached, look up the file system's value for
|
||||
* the property. For the version property, we look up a slightly
|
||||
* different string.
|
||||
*/
|
||||
const char *pname;
|
||||
int error = ENOENT;
|
||||
if (prop == ZFS_PROP_VERSION) {
|
||||
pname = ZPL_VERSION_STR;
|
||||
} else {
|
||||
pname = zfs_prop_to_name(prop);
|
||||
}
|
||||
|
||||
if (os != NULL) {
|
||||
ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
|
||||
}
|
||||
|
||||
if (error == ENOENT) {
|
||||
/* No value set, use the default value */
|
||||
switch (prop) {
|
||||
case ZFS_PROP_VERSION:
|
||||
*value = ZPL_VERSION;
|
||||
break;
|
||||
case ZFS_PROP_NORMALIZE:
|
||||
case ZFS_PROP_UTF8ONLY:
|
||||
*value = 0;
|
||||
break;
|
||||
case ZFS_PROP_CASE:
|
||||
*value = ZFS_CASE_SENSITIVE;
|
||||
break;
|
||||
case ZFS_PROP_ACLTYPE:
|
||||
*value = ZFS_ACLTYPE_NFSV4;
|
||||
break;
|
||||
default:
|
||||
return (error);
|
||||
}
|
||||
error = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If one of the methods for getting the property value above worked,
|
||||
* copy it into the objset_t's cache.
|
||||
*/
|
||||
if (error == 0 && cached_copy != NULL) {
|
||||
*cached_copy = *value;
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
zfs_znode_update_vfs(znode_t *zp)
|
||||
|
|
|
@ -182,8 +182,11 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
|
|||
* of that infrastructure we are responsible for incrementing it.
|
||||
*/
|
||||
if (current->reclaim_state)
|
||||
#ifdef HAVE_RECLAIM_STATE_RECLAIMED
|
||||
current->reclaim_state->reclaimed += size >> PAGE_SHIFT;
|
||||
#else
|
||||
current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
|
||||
|
||||
#endif
|
||||
vfree(ptr);
|
||||
}
|
||||
|
||||
|
@ -1012,9 +1015,19 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
|
|||
ASSERT0(flags & ~KM_PUBLIC_MASK);
|
||||
ASSERT(skc->skc_magic == SKC_MAGIC);
|
||||
ASSERT((skc->skc_flags & KMC_SLAB) == 0);
|
||||
might_sleep();
|
||||
|
||||
*obj = NULL;
|
||||
|
||||
/*
|
||||
* Since we can't sleep attempt an emergency allocation to satisfy
|
||||
* the request. The only alterative is to fail the allocation but
|
||||
* it's preferable try. The use of KM_NOSLEEP is expected to be rare.
|
||||
*/
|
||||
if (flags & KM_NOSLEEP)
|
||||
return (spl_emergency_alloc(skc, flags, obj));
|
||||
|
||||
might_sleep();
|
||||
|
||||
/*
|
||||
* Before allocating a new slab wait for any reaping to complete and
|
||||
* then return so the local magazine can be rechecked for new objects.
|
||||
|
|
|
@ -219,7 +219,11 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
|
|||
arc_reduce_target_size(ptob(sc->nr_to_scan));
|
||||
arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
|
||||
if (current->reclaim_state != NULL)
|
||||
#ifdef HAVE_RECLAIM_STATE_RECLAIMED
|
||||
current->reclaim_state->reclaimed += sc->nr_to_scan;
|
||||
#else
|
||||
current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We are experiencing memory pressure which the arc_evict_zthr was
|
||||
|
|
|
@ -493,10 +493,8 @@ zfs_acl_release_nodes(zfs_acl_t *aclp)
|
|||
{
|
||||
zfs_acl_node_t *aclnode;
|
||||
|
||||
while ((aclnode = list_head(&aclp->z_acl))) {
|
||||
list_remove(&aclp->z_acl, aclnode);
|
||||
while ((aclnode = list_remove_head(&aclp->z_acl)))
|
||||
zfs_acl_node_free(aclnode);
|
||||
}
|
||||
aclp->z_acl_count = 0;
|
||||
aclp->z_acl_bytes = 0;
|
||||
}
|
||||
|
|
|
@ -2052,91 +2052,6 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
|
|||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a property stored within the master node.
|
||||
*/
|
||||
int
|
||||
zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
|
||||
{
|
||||
uint64_t *cached_copy = NULL;
|
||||
|
||||
/*
|
||||
* Figure out where in the objset_t the cached copy would live, if it
|
||||
* is available for the requested property.
|
||||
*/
|
||||
if (os != NULL) {
|
||||
switch (prop) {
|
||||
case ZFS_PROP_VERSION:
|
||||
cached_copy = &os->os_version;
|
||||
break;
|
||||
case ZFS_PROP_NORMALIZE:
|
||||
cached_copy = &os->os_normalization;
|
||||
break;
|
||||
case ZFS_PROP_UTF8ONLY:
|
||||
cached_copy = &os->os_utf8only;
|
||||
break;
|
||||
case ZFS_PROP_CASE:
|
||||
cached_copy = &os->os_casesensitivity;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
|
||||
*value = *cached_copy;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the property wasn't cached, look up the file system's value for
|
||||
* the property. For the version property, we look up a slightly
|
||||
* different string.
|
||||
*/
|
||||
const char *pname;
|
||||
int error = ENOENT;
|
||||
if (prop == ZFS_PROP_VERSION)
|
||||
pname = ZPL_VERSION_STR;
|
||||
else
|
||||
pname = zfs_prop_to_name(prop);
|
||||
|
||||
if (os != NULL) {
|
||||
ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
|
||||
}
|
||||
|
||||
if (error == ENOENT) {
|
||||
/* No value set, use the default value */
|
||||
switch (prop) {
|
||||
case ZFS_PROP_VERSION:
|
||||
*value = ZPL_VERSION;
|
||||
break;
|
||||
case ZFS_PROP_NORMALIZE:
|
||||
case ZFS_PROP_UTF8ONLY:
|
||||
*value = 0;
|
||||
break;
|
||||
case ZFS_PROP_CASE:
|
||||
*value = ZFS_CASE_SENSITIVE;
|
||||
break;
|
||||
case ZFS_PROP_ACLTYPE:
|
||||
*value = ZFS_ACLTYPE_OFF;
|
||||
break;
|
||||
default:
|
||||
return (error);
|
||||
}
|
||||
error = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If one of the methods for getting the property value above worked,
|
||||
* copy it into the objset_t's cache.
|
||||
*/
|
||||
if (error == 0 && cached_copy != NULL) {
|
||||
*cached_copy = *value;
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the corresponding vfs's unmounted flag is set.
|
||||
* Otherwise return false.
|
||||
|
|
|
@ -2254,6 +2254,91 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
|
|||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a property stored within the master node.
|
||||
*/
|
||||
int
|
||||
zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
|
||||
{
|
||||
uint64_t *cached_copy = NULL;
|
||||
|
||||
/*
|
||||
* Figure out where in the objset_t the cached copy would live, if it
|
||||
* is available for the requested property.
|
||||
*/
|
||||
if (os != NULL) {
|
||||
switch (prop) {
|
||||
case ZFS_PROP_VERSION:
|
||||
cached_copy = &os->os_version;
|
||||
break;
|
||||
case ZFS_PROP_NORMALIZE:
|
||||
cached_copy = &os->os_normalization;
|
||||
break;
|
||||
case ZFS_PROP_UTF8ONLY:
|
||||
cached_copy = &os->os_utf8only;
|
||||
break;
|
||||
case ZFS_PROP_CASE:
|
||||
cached_copy = &os->os_casesensitivity;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
|
||||
*value = *cached_copy;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the property wasn't cached, look up the file system's value for
|
||||
* the property. For the version property, we look up a slightly
|
||||
* different string.
|
||||
*/
|
||||
const char *pname;
|
||||
int error = ENOENT;
|
||||
if (prop == ZFS_PROP_VERSION)
|
||||
pname = ZPL_VERSION_STR;
|
||||
else
|
||||
pname = zfs_prop_to_name(prop);
|
||||
|
||||
if (os != NULL) {
|
||||
ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
|
||||
}
|
||||
|
||||
if (error == ENOENT) {
|
||||
/* No value set, use the default value */
|
||||
switch (prop) {
|
||||
case ZFS_PROP_VERSION:
|
||||
*value = ZPL_VERSION;
|
||||
break;
|
||||
case ZFS_PROP_NORMALIZE:
|
||||
case ZFS_PROP_UTF8ONLY:
|
||||
*value = 0;
|
||||
break;
|
||||
case ZFS_PROP_CASE:
|
||||
*value = ZFS_CASE_SENSITIVE;
|
||||
break;
|
||||
case ZFS_PROP_ACLTYPE:
|
||||
*value = ZFS_ACLTYPE_OFF;
|
||||
break;
|
||||
default:
|
||||
return (error);
|
||||
}
|
||||
error = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If one of the methods for getting the property value above worked,
|
||||
* copy it into the objset_t's cache.
|
||||
*/
|
||||
if (error == 0 && cached_copy != NULL) {
|
||||
*cached_copy = *value;
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
EXPORT_SYMBOL(zfs_create_fs);
|
||||
EXPORT_SYMBOL(zfs_obj_to_path);
|
||||
|
|
|
@ -965,7 +965,7 @@ static void l2arc_hdr_restore(const l2arc_log_ent_phys_t *le,
|
|||
l2arc_dev_t *dev);
|
||||
|
||||
/* L2ARC persistence write I/O routines. */
|
||||
static void l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio,
|
||||
static uint64_t l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio,
|
||||
l2arc_write_callback_t *cb);
|
||||
|
||||
/* L2ARC persistence auxiliary routines. */
|
||||
|
@ -6106,8 +6106,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
|||
asize, abd,
|
||||
ZIO_CHECKSUM_OFF,
|
||||
l2arc_read_done, cb, priority,
|
||||
zio_flags | ZIO_FLAG_DONT_CACHE |
|
||||
ZIO_FLAG_CANFAIL |
|
||||
zio_flags | ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_DONT_PROPAGATE |
|
||||
ZIO_FLAG_DONT_RETRY, B_FALSE);
|
||||
acb->acb_zio_head = rzio;
|
||||
|
@ -7866,8 +7865,7 @@ arc_fini(void)
|
|||
taskq_destroy(arc_prune_taskq);
|
||||
|
||||
mutex_enter(&arc_prune_mtx);
|
||||
while ((p = list_head(&arc_prune_list)) != NULL) {
|
||||
list_remove(&arc_prune_list, p);
|
||||
while ((p = list_remove_head(&arc_prune_list)) != NULL) {
|
||||
zfs_refcount_remove(&p->p_refcnt, &arc_prune_list);
|
||||
zfs_refcount_destroy(&p->p_refcnt);
|
||||
kmem_free(p, sizeof (*p));
|
||||
|
@ -8175,7 +8173,7 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr)
|
|||
static uint64_t
|
||||
l2arc_write_size(l2arc_dev_t *dev)
|
||||
{
|
||||
uint64_t size, dev_size, tsize;
|
||||
uint64_t size;
|
||||
|
||||
/*
|
||||
* Make sure our globals have meaningful values in case the user
|
||||
|
@ -8192,35 +8190,45 @@ l2arc_write_size(l2arc_dev_t *dev)
|
|||
if (arc_warm == B_FALSE)
|
||||
size += l2arc_write_boost;
|
||||
|
||||
/*
|
||||
* Make sure the write size does not exceed the size of the cache
|
||||
* device. This is important in l2arc_evict(), otherwise infinite
|
||||
* iteration can occur.
|
||||
*/
|
||||
dev_size = dev->l2ad_end - dev->l2ad_start;
|
||||
|
||||
/* We need to add in the worst case scenario of log block overhead. */
|
||||
tsize = size + l2arc_log_blk_overhead(size, dev);
|
||||
size += l2arc_log_blk_overhead(size, dev);
|
||||
if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
|
||||
/*
|
||||
* Trim ahead of the write size 64MB or (l2arc_trim_ahead/100)
|
||||
* times the writesize, whichever is greater.
|
||||
*/
|
||||
tsize += MAX(64 * 1024 * 1024,
|
||||
(tsize * l2arc_trim_ahead) / 100);
|
||||
size += MAX(64 * 1024 * 1024,
|
||||
(size * l2arc_trim_ahead) / 100);
|
||||
}
|
||||
|
||||
if (tsize >= dev_size) {
|
||||
/*
|
||||
* Make sure the write size does not exceed the size of the cache
|
||||
* device. This is important in l2arc_evict(), otherwise infinite
|
||||
* iteration can occur.
|
||||
*/
|
||||
if (size > dev->l2ad_end - dev->l2ad_start) {
|
||||
cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
|
||||
"plus the overhead of log blocks (persistent L2ARC, "
|
||||
"%llu bytes) exceeds the size of the cache device "
|
||||
"(guid %llu), resetting them to the default (%d)",
|
||||
(u_longlong_t)l2arc_log_blk_overhead(size, dev),
|
||||
(u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
|
||||
|
||||
size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;
|
||||
|
||||
if (l2arc_trim_ahead > 1) {
|
||||
cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1");
|
||||
l2arc_trim_ahead = 1;
|
||||
}
|
||||
|
||||
if (arc_warm == B_FALSE)
|
||||
size += l2arc_write_boost;
|
||||
|
||||
size += l2arc_log_blk_overhead(size, dev);
|
||||
if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
|
||||
size += MAX(64 * 1024 * 1024,
|
||||
(size * l2arc_trim_ahead) / 100);
|
||||
}
|
||||
}
|
||||
|
||||
return (size);
|
||||
|
@ -8319,20 +8327,14 @@ l2arc_dev_get_next(void)
|
|||
static void
|
||||
l2arc_do_free_on_write(void)
|
||||
{
|
||||
list_t *buflist;
|
||||
l2arc_data_free_t *df, *df_prev;
|
||||
l2arc_data_free_t *df;
|
||||
|
||||
mutex_enter(&l2arc_free_on_write_mtx);
|
||||
buflist = l2arc_free_on_write;
|
||||
|
||||
for (df = list_tail(buflist); df; df = df_prev) {
|
||||
df_prev = list_prev(buflist, df);
|
||||
while ((df = list_remove_head(l2arc_free_on_write)) != NULL) {
|
||||
ASSERT3P(df->l2df_abd, !=, NULL);
|
||||
abd_free(df->l2df_abd);
|
||||
list_remove(buflist, df);
|
||||
kmem_free(df, sizeof (l2arc_data_free_t));
|
||||
}
|
||||
|
||||
mutex_exit(&l2arc_free_on_write_mtx);
|
||||
}
|
||||
|
||||
|
@ -8845,7 +8847,7 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
|
|||
|
||||
top:
|
||||
rerun = B_FALSE;
|
||||
if (dev->l2ad_hand >= (dev->l2ad_end - distance)) {
|
||||
if (dev->l2ad_hand + distance > dev->l2ad_end) {
|
||||
/*
|
||||
* When there is no space to accommodate upcoming writes,
|
||||
* evict to the end. Then bump the write and evict hands
|
||||
|
@ -9039,7 +9041,7 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
|
|||
*/
|
||||
ASSERT3U(dev->l2ad_hand + distance, <, dev->l2ad_end);
|
||||
if (!dev->l2ad_first)
|
||||
ASSERT3U(dev->l2ad_hand, <, dev->l2ad_evict);
|
||||
ASSERT3U(dev->l2ad_hand, <=, dev->l2ad_evict);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9299,7 +9301,13 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|||
uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
|
||||
psize);
|
||||
|
||||
if ((write_asize + asize) > target_sz) {
|
||||
/*
|
||||
* If the allocated size of this buffer plus the max
|
||||
* size for the pending log block exceeds the evicted
|
||||
* target size, terminate writing buffers for this run.
|
||||
*/
|
||||
if (write_asize + asize +
|
||||
sizeof (l2arc_log_blk_phys_t) > target_sz) {
|
||||
full = B_TRUE;
|
||||
mutex_exit(hash_lock);
|
||||
break;
|
||||
|
@ -9413,8 +9421,14 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|||
* arcstat_l2_{size,asize} kstats are updated
|
||||
* internally.
|
||||
*/
|
||||
if (l2arc_log_blk_insert(dev, hdr))
|
||||
l2arc_log_blk_commit(dev, pio, cb);
|
||||
if (l2arc_log_blk_insert(dev, hdr)) {
|
||||
/*
|
||||
* l2ad_hand will be adjusted in
|
||||
* l2arc_log_blk_commit().
|
||||
*/
|
||||
write_asize +=
|
||||
l2arc_log_blk_commit(dev, pio, cb);
|
||||
}
|
||||
|
||||
zio_nowait(wzio);
|
||||
}
|
||||
|
@ -10173,8 +10187,7 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev)
|
|||
err = zio_wait(zio_read_phys(NULL, dev->l2ad_vdev,
|
||||
VDEV_LABEL_START_SIZE, l2dhdr_asize, abd,
|
||||
ZIO_CHECKSUM_LABEL, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
|
||||
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
|
||||
ZIO_FLAG_SPECULATIVE, B_FALSE));
|
||||
|
||||
abd_free(abd);
|
||||
|
@ -10494,11 +10507,10 @@ l2arc_log_blk_fetch(vdev_t *vd, const l2arc_log_blkptr_t *lbp,
|
|||
cb = kmem_zalloc(sizeof (l2arc_read_callback_t), KM_SLEEP);
|
||||
cb->l2rcb_abd = abd_get_from_buf(lb, asize);
|
||||
pio = zio_root(vd->vdev_spa, l2arc_blk_fetch_done, cb,
|
||||
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
|
||||
ZIO_FLAG_DONT_RETRY);
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY);
|
||||
(void) zio_nowait(zio_read_phys(pio, vd, lbp->lbp_daddr, asize,
|
||||
cb->l2rcb_abd, ZIO_CHECKSUM_OFF, NULL, NULL,
|
||||
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
|
||||
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY, B_FALSE));
|
||||
|
||||
return (pio);
|
||||
|
@ -10564,7 +10576,7 @@ l2arc_dev_hdr_update(l2arc_dev_t *dev)
|
|||
* This function allocates some memory to temporarily hold the serialized
|
||||
* buffer to be written. This is then released in l2arc_write_done.
|
||||
*/
|
||||
static void
|
||||
static uint64_t
|
||||
l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
|
||||
{
|
||||
l2arc_log_blk_phys_t *lb = &dev->l2ad_log_blk;
|
||||
|
@ -10675,6 +10687,8 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
|
|||
dev->l2ad_log_ent_idx = 0;
|
||||
dev->l2ad_log_blk_payload_asize = 0;
|
||||
dev->l2ad_log_blk_payload_start = 0;
|
||||
|
||||
return (asize);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -65,9 +65,8 @@ bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
|
|||
bplist_entry_t *bpe;
|
||||
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
while ((bpe = list_head(&bpl->bpl_list))) {
|
||||
while ((bpe = list_remove_head(&bpl->bpl_list))) {
|
||||
bplist_iterate_last_removed = bpe;
|
||||
list_remove(&bpl->bpl_list, bpe);
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
func(arg, &bpe->bpe_blk, tx);
|
||||
kmem_free(bpe, sizeof (*bpe));
|
||||
|
@ -82,10 +81,7 @@ bplist_clear(bplist_t *bpl)
|
|||
bplist_entry_t *bpe;
|
||||
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
while ((bpe = list_head(&bpl->bpl_list))) {
|
||||
bplist_iterate_last_removed = bpe;
|
||||
list_remove(&bpl->bpl_list, bpe);
|
||||
while ((bpe = list_remove_head(&bpl->bpl_list)))
|
||||
kmem_free(bpe, sizeof (*bpe));
|
||||
}
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
}
|
||||
|
|
|
@ -193,14 +193,20 @@ zfs_btree_leaf_free(zfs_btree_t *tree, void *ptr)
|
|||
|
||||
void
|
||||
zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
|
||||
size_t size)
|
||||
bt_find_in_buf_f bt_find_in_buf, size_t size)
|
||||
{
|
||||
zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE);
|
||||
zfs_btree_create_custom(tree, compar, bt_find_in_buf, size,
|
||||
BTREE_LEAF_SIZE);
|
||||
}
|
||||
|
||||
static void *
|
||||
zfs_btree_find_in_buf(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems,
|
||||
const void *value, zfs_btree_index_t *where);
|
||||
|
||||
void
|
||||
zfs_btree_create_custom(zfs_btree_t *tree,
|
||||
int (*compar) (const void *, const void *),
|
||||
bt_find_in_buf_f bt_find_in_buf,
|
||||
size_t size, size_t lsize)
|
||||
{
|
||||
size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems);
|
||||
|
@ -208,6 +214,8 @@ zfs_btree_create_custom(zfs_btree_t *tree,
|
|||
ASSERT3U(size, <=, esize / 2);
|
||||
memset(tree, 0, sizeof (*tree));
|
||||
tree->bt_compar = compar;
|
||||
tree->bt_find_in_buf = (bt_find_in_buf == NULL) ?
|
||||
zfs_btree_find_in_buf : bt_find_in_buf;
|
||||
tree->bt_elem_size = size;
|
||||
tree->bt_leaf_size = lsize;
|
||||
tree->bt_leaf_cap = P2ALIGN(esize / size, 2);
|
||||
|
@ -303,7 +311,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
|
|||
* element in the last leaf, it's in the last leaf or
|
||||
* it's not in the tree.
|
||||
*/
|
||||
void *d = zfs_btree_find_in_buf(tree,
|
||||
void *d = tree->bt_find_in_buf(tree,
|
||||
last_leaf->btl_elems +
|
||||
last_leaf->btl_hdr.bth_first * size,
|
||||
last_leaf->btl_hdr.bth_count, value, &idx);
|
||||
|
@ -327,7 +335,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
|
|||
for (node = (zfs_btree_core_t *)tree->bt_root; depth < tree->bt_height;
|
||||
node = (zfs_btree_core_t *)node->btc_children[child], depth++) {
|
||||
ASSERT3P(node, !=, NULL);
|
||||
void *d = zfs_btree_find_in_buf(tree, node->btc_elems,
|
||||
void *d = tree->bt_find_in_buf(tree, node->btc_elems,
|
||||
node->btc_hdr.bth_count, value, &idx);
|
||||
EQUIV(d != NULL, !idx.bti_before);
|
||||
if (d != NULL) {
|
||||
|
@ -347,7 +355,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
|
|||
*/
|
||||
zfs_btree_leaf_t *leaf = (depth == 0 ?
|
||||
(zfs_btree_leaf_t *)tree->bt_root : (zfs_btree_leaf_t *)node);
|
||||
void *d = zfs_btree_find_in_buf(tree, leaf->btl_elems +
|
||||
void *d = tree->bt_find_in_buf(tree, leaf->btl_elems +
|
||||
leaf->btl_hdr.bth_first * size,
|
||||
leaf->btl_hdr.bth_count, value, &idx);
|
||||
|
||||
|
@ -671,7 +679,7 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
|
|||
zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
|
||||
zfs_btree_index_t idx;
|
||||
ASSERT(zfs_btree_is_core(par_hdr));
|
||||
VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
|
||||
VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
|
||||
par_hdr->bth_count, buf, &idx), ==, NULL);
|
||||
ASSERT(idx.bti_before);
|
||||
uint32_t offset = idx.bti_offset;
|
||||
|
@ -897,7 +905,7 @@ zfs_btree_find_parent_idx(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
|
|||
}
|
||||
zfs_btree_index_t idx;
|
||||
zfs_btree_core_t *parent = hdr->bth_parent;
|
||||
VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
|
||||
VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
|
||||
parent->btc_hdr.bth_count, buf, &idx), ==, NULL);
|
||||
ASSERT(idx.bti_before);
|
||||
ASSERT3U(idx.bti_offset, <=, parent->btc_hdr.bth_count);
|
||||
|
|
|
@ -49,8 +49,12 @@ static dataset_kstat_values_t empty_dataset_kstats = {
|
|||
{ "zil_itx_needcopy_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_normal_count", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_normal_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_normal_write", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_normal_alloc", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_slog_count", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 }
|
||||
{ "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_slog_write", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_slog_alloc", KSTAT_DATA_UINT64 }
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -1755,9 +1755,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
|
|||
taskq_wait(dmu_objset_pool(os)->dp_sync_taskq);
|
||||
|
||||
list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
|
||||
while ((dr = list_head(list)) != NULL) {
|
||||
while ((dr = list_remove_head(list)) != NULL) {
|
||||
ASSERT0(dr->dr_dbuf->db_level);
|
||||
list_remove(list, dr);
|
||||
zio_nowait(dr->dr_zio);
|
||||
}
|
||||
|
||||
|
|
|
@ -1371,8 +1371,8 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw,
|
|||
dnode_t *dn;
|
||||
abd_t *abd = rrd->abd;
|
||||
zio_cksum_t bp_cksum = bp->blk_cksum;
|
||||
zio_flag_t flags = ZIO_FLAG_SPECULATIVE |
|
||||
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL;
|
||||
zio_flag_t flags = ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_RETRY |
|
||||
ZIO_FLAG_CANFAIL;
|
||||
|
||||
if (rwa->raw)
|
||||
flags |= ZIO_FLAG_RAW;
|
||||
|
|
|
@ -1955,7 +1955,7 @@ setup_featureflags(struct dmu_send_params *dspp, objset_t *os,
|
|||
{
|
||||
dsl_dataset_t *to_ds = dspp->to_ds;
|
||||
dsl_pool_t *dp = dspp->dp;
|
||||
#ifdef _KERNEL
|
||||
|
||||
if (dmu_objset_type(os) == DMU_OST_ZFS) {
|
||||
uint64_t version;
|
||||
if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0)
|
||||
|
@ -1964,7 +1964,6 @@ setup_featureflags(struct dmu_send_params *dspp, objset_t *os,
|
|||
if (version >= ZPL_VERSION_SA)
|
||||
*featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* raw sends imply large_block_ok */
|
||||
if ((dspp->rawok || dspp->large_block_ok) &&
|
||||
|
@ -2793,6 +2792,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
|
|||
}
|
||||
|
||||
if (err == 0) {
|
||||
owned = B_TRUE;
|
||||
err = zap_lookup(dspp.dp->dp_meta_objset,
|
||||
dspp.to_ds->ds_object,
|
||||
DS_FIELD_RESUME_TOGUID, 8, 1,
|
||||
|
@ -2806,21 +2806,24 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
|
|||
sizeof (dspp.saved_toname),
|
||||
dspp.saved_toname);
|
||||
}
|
||||
if (err != 0)
|
||||
/* Only disown if there was an error in the lookups */
|
||||
if (owned && (err != 0))
|
||||
dsl_dataset_disown(dspp.to_ds, dsflags, FTAG);
|
||||
|
||||
kmem_strfree(name);
|
||||
} else {
|
||||
err = dsl_dataset_own(dspp.dp, tosnap, dsflags,
|
||||
FTAG, &dspp.to_ds);
|
||||
if (err == 0)
|
||||
owned = B_TRUE;
|
||||
}
|
||||
owned = B_TRUE;
|
||||
} else {
|
||||
err = dsl_dataset_hold_flags(dspp.dp, tosnap, dsflags, FTAG,
|
||||
&dspp.to_ds);
|
||||
}
|
||||
|
||||
if (err != 0) {
|
||||
/* Note: dsl dataset is not owned at this point */
|
||||
dsl_pool_rele(dspp.dp, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
|
|
@ -1396,8 +1396,7 @@ dmu_tx_do_callbacks(list_t *cb_list, int error)
|
|||
{
|
||||
dmu_tx_callback_t *dcb;
|
||||
|
||||
while ((dcb = list_tail(cb_list)) != NULL) {
|
||||
list_remove(cb_list, dcb);
|
||||
while ((dcb = list_remove_tail(cb_list)) != NULL) {
|
||||
dcb->dcb_func(dcb->dcb_data, error);
|
||||
kmem_free(dcb, sizeof (dmu_tx_callback_t));
|
||||
}
|
||||
|
|
|
@ -520,8 +520,7 @@ dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock)
|
|||
issued = pf_end - pf_start + ipf_end - ipf_start;
|
||||
if (issued > 1) {
|
||||
/* More references on top of taken in dmu_zfetch_prepare(). */
|
||||
for (int i = 0; i < issued - 1; i++)
|
||||
zfs_refcount_add(&zs->zs_refs, NULL);
|
||||
zfs_refcount_add_few(&zs->zs_refs, issued - 1, NULL);
|
||||
} else if (issued == 0) {
|
||||
/* Some other thread has done our work, so drop the ref. */
|
||||
if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
|
||||
|
|
|
@ -3782,8 +3782,7 @@ snaplist_destroy(list_t *l, const void *tag)
|
|||
if (l == NULL || !list_link_active(&l->list_head))
|
||||
return;
|
||||
|
||||
while ((snap = list_tail(l)) != NULL) {
|
||||
list_remove(l, snap);
|
||||
while ((snap = list_remove_tail(l)) != NULL) {
|
||||
dsl_dataset_rele(snap->ds, tag);
|
||||
kmem_free(snap, sizeof (*snap));
|
||||
}
|
||||
|
|
|
@ -1490,7 +1490,7 @@ dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
|
|||
if (tr_cookie == NULL)
|
||||
return;
|
||||
|
||||
while ((tr = list_head(tr_list)) != NULL) {
|
||||
while ((tr = list_remove_head(tr_list)) != NULL) {
|
||||
if (tr->tr_ds) {
|
||||
mutex_enter(&tr->tr_ds->dd_lock);
|
||||
ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
|
||||
|
@ -1500,7 +1500,6 @@ dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
|
|||
} else {
|
||||
arc_tempreserve_clear(tr->tr_size);
|
||||
}
|
||||
list_remove(tr_list, tr);
|
||||
kmem_free(tr, sizeof (struct tempreserve));
|
||||
}
|
||||
|
||||
|
|
|
@ -234,7 +234,7 @@ static int zfs_resilver_disable_defer = B_FALSE;
|
|||
static int zfs_free_bpobj_enabled = 1;
|
||||
|
||||
/* Error blocks to be scrubbed in one txg. */
|
||||
uint_t zfs_scrub_error_blocks_per_txg = 1 << 12;
|
||||
static uint_t zfs_scrub_error_blocks_per_txg = 1 << 12;
|
||||
|
||||
/* the order has to match pool_scan_type */
|
||||
static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = {
|
||||
|
@ -3437,10 +3437,8 @@ scan_io_queues_run_one(void *arg)
|
|||
* If we were suspended in the middle of processing,
|
||||
* requeue any unfinished sios and exit.
|
||||
*/
|
||||
while ((sio = list_head(&sio_list)) != NULL) {
|
||||
list_remove(&sio_list, sio);
|
||||
while ((sio = list_remove_head(&sio_list)) != NULL)
|
||||
scan_io_queue_insert_impl(queue, sio);
|
||||
}
|
||||
|
||||
queue->q_zio = NULL;
|
||||
mutex_exit(q_lock);
|
||||
|
@ -4877,6 +4875,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
|
|||
* with single operation. Plus it makes scrubs more sequential and reduces
|
||||
* chances that minor extent change move it within the B-tree.
|
||||
*/
|
||||
__attribute__((always_inline)) inline
|
||||
static int
|
||||
ext_size_compare(const void *x, const void *y)
|
||||
{
|
||||
|
@ -4885,13 +4884,17 @@ ext_size_compare(const void *x, const void *y)
|
|||
return (TREE_CMP(*a, *b));
|
||||
}
|
||||
|
||||
ZFS_BTREE_FIND_IN_BUF_FUNC(ext_size_find_in_buf, uint64_t,
|
||||
ext_size_compare)
|
||||
|
||||
static void
|
||||
ext_size_create(range_tree_t *rt, void *arg)
|
||||
{
|
||||
(void) rt;
|
||||
zfs_btree_t *size_tree = arg;
|
||||
|
||||
zfs_btree_create(size_tree, ext_size_compare, sizeof (uint64_t));
|
||||
zfs_btree_create(size_tree, ext_size_compare, ext_size_find_in_buf,
|
||||
sizeof (uint64_t));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -148,8 +148,7 @@ zfs_zevent_drain(zevent_t *ev)
|
|||
list_remove(&zevent_list, ev);
|
||||
|
||||
/* Remove references to this event in all private file data */
|
||||
while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
|
||||
list_remove(&ev->ev_ze_list, ze);
|
||||
while ((ze = list_remove_head(&ev->ev_ze_list)) != NULL) {
|
||||
ze->ze_zevent = NULL;
|
||||
ze->ze_dropped++;
|
||||
}
|
||||
|
|
|
@ -1342,6 +1342,7 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
|
|||
* Comparison function for the private size-ordered tree using 32-bit
|
||||
* ranges. Tree is sorted by size, larger sizes at the end of the tree.
|
||||
*/
|
||||
__attribute__((always_inline)) inline
|
||||
static int
|
||||
metaslab_rangesize32_compare(const void *x1, const void *x2)
|
||||
{
|
||||
|
@ -1352,16 +1353,15 @@ metaslab_rangesize32_compare(const void *x1, const void *x2)
|
|||
uint64_t rs_size2 = r2->rs_end - r2->rs_start;
|
||||
|
||||
int cmp = TREE_CMP(rs_size1, rs_size2);
|
||||
if (likely(cmp))
|
||||
return (cmp);
|
||||
|
||||
return (TREE_CMP(r1->rs_start, r2->rs_start));
|
||||
return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
|
||||
}
|
||||
|
||||
/*
|
||||
* Comparison function for the private size-ordered tree using 64-bit
|
||||
* ranges. Tree is sorted by size, larger sizes at the end of the tree.
|
||||
*/
|
||||
__attribute__((always_inline)) inline
|
||||
static int
|
||||
metaslab_rangesize64_compare(const void *x1, const void *x2)
|
||||
{
|
||||
|
@ -1372,11 +1372,10 @@ metaslab_rangesize64_compare(const void *x1, const void *x2)
|
|||
uint64_t rs_size2 = r2->rs_end - r2->rs_start;
|
||||
|
||||
int cmp = TREE_CMP(rs_size1, rs_size2);
|
||||
if (likely(cmp))
|
||||
return (cmp);
|
||||
|
||||
return (TREE_CMP(r1->rs_start, r2->rs_start));
|
||||
return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
|
||||
}
|
||||
|
||||
typedef struct metaslab_rt_arg {
|
||||
zfs_btree_t *mra_bt;
|
||||
uint32_t mra_floor_shift;
|
||||
|
@ -1412,6 +1411,13 @@ metaslab_size_tree_full_load(range_tree_t *rt)
|
|||
range_tree_walk(rt, metaslab_size_sorted_add, &arg);
|
||||
}
|
||||
|
||||
|
||||
ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize32_in_buf,
|
||||
range_seg32_t, metaslab_rangesize32_compare)
|
||||
|
||||
ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize64_in_buf,
|
||||
range_seg64_t, metaslab_rangesize64_compare)
|
||||
|
||||
/*
|
||||
* Create any block allocator specific components. The current allocators
|
||||
* rely on using both a size-ordered range_tree_t and an array of uint64_t's.
|
||||
|
@ -1424,19 +1430,22 @@ metaslab_rt_create(range_tree_t *rt, void *arg)
|
|||
|
||||
size_t size;
|
||||
int (*compare) (const void *, const void *);
|
||||
bt_find_in_buf_f bt_find;
|
||||
switch (rt->rt_type) {
|
||||
case RANGE_SEG32:
|
||||
size = sizeof (range_seg32_t);
|
||||
compare = metaslab_rangesize32_compare;
|
||||
bt_find = metaslab_rt_find_rangesize32_in_buf;
|
||||
break;
|
||||
case RANGE_SEG64:
|
||||
size = sizeof (range_seg64_t);
|
||||
compare = metaslab_rangesize64_compare;
|
||||
bt_find = metaslab_rt_find_rangesize64_in_buf;
|
||||
break;
|
||||
default:
|
||||
panic("Invalid range seg type %d", rt->rt_type);
|
||||
}
|
||||
zfs_btree_create(size_tree, compare, size);
|
||||
zfs_btree_create(size_tree, compare, bt_find, size);
|
||||
mrap->mra_floor_shift = metaslab_by_size_min_shift;
|
||||
}
|
||||
|
||||
|
@ -5641,8 +5650,7 @@ metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, int allocator,
|
|||
* We reserve the slots individually so that we can unreserve
|
||||
* them individually when an I/O completes.
|
||||
*/
|
||||
for (int d = 0; d < slots; d++)
|
||||
zfs_refcount_add(&mca->mca_alloc_slots, zio);
|
||||
zfs_refcount_add_few(&mca->mca_alloc_slots, slots, zio);
|
||||
zio->io_flags |= ZIO_FLAG_IO_ALLOCATING;
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
@ -5656,8 +5664,7 @@ metaslab_class_throttle_unreserve(metaslab_class_t *mc, int slots,
|
|||
metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
|
||||
|
||||
ASSERT(mc->mc_alloc_throttle_enabled);
|
||||
for (int d = 0; d < slots; d++)
|
||||
zfs_refcount_remove(&mca->mca_alloc_slots, zio);
|
||||
zfs_refcount_remove_few(&mca->mca_alloc_slots, slots, zio);
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
|
@ -151,6 +151,7 @@ range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
|
|||
rt->rt_histogram[idx]--;
|
||||
}
|
||||
|
||||
__attribute__((always_inline)) inline
|
||||
static int
|
||||
range_tree_seg32_compare(const void *x1, const void *x2)
|
||||
{
|
||||
|
@ -163,6 +164,7 @@ range_tree_seg32_compare(const void *x1, const void *x2)
|
|||
return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
|
||||
}
|
||||
|
||||
__attribute__((always_inline)) inline
|
||||
static int
|
||||
range_tree_seg64_compare(const void *x1, const void *x2)
|
||||
{
|
||||
|
@ -175,6 +177,7 @@ range_tree_seg64_compare(const void *x1, const void *x2)
|
|||
return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
|
||||
}
|
||||
|
||||
__attribute__((always_inline)) inline
|
||||
static int
|
||||
range_tree_seg_gap_compare(const void *x1, const void *x2)
|
||||
{
|
||||
|
@ -187,6 +190,15 @@ range_tree_seg_gap_compare(const void *x1, const void *x2)
|
|||
return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
|
||||
}
|
||||
|
||||
ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg32_find_in_buf, range_seg32_t,
|
||||
range_tree_seg32_compare)
|
||||
|
||||
ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg64_find_in_buf, range_seg64_t,
|
||||
range_tree_seg64_compare)
|
||||
|
||||
ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg_gap_find_in_buf, range_seg_gap_t,
|
||||
range_tree_seg_gap_compare)
|
||||
|
||||
range_tree_t *
|
||||
range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
|
||||
void *arg, uint64_t start, uint64_t shift, uint64_t gap)
|
||||
|
@ -197,23 +209,27 @@ range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
|
|||
ASSERT3U(type, <=, RANGE_SEG_NUM_TYPES);
|
||||
size_t size;
|
||||
int (*compare) (const void *, const void *);
|
||||
bt_find_in_buf_f bt_find;
|
||||
switch (type) {
|
||||
case RANGE_SEG32:
|
||||
size = sizeof (range_seg32_t);
|
||||
compare = range_tree_seg32_compare;
|
||||
bt_find = range_tree_seg32_find_in_buf;
|
||||
break;
|
||||
case RANGE_SEG64:
|
||||
size = sizeof (range_seg64_t);
|
||||
compare = range_tree_seg64_compare;
|
||||
bt_find = range_tree_seg64_find_in_buf;
|
||||
break;
|
||||
case RANGE_SEG_GAP:
|
||||
size = sizeof (range_seg_gap_t);
|
||||
compare = range_tree_seg_gap_compare;
|
||||
bt_find = range_tree_seg_gap_find_in_buf;
|
||||
break;
|
||||
default:
|
||||
panic("Invalid range seg type %d", type);
|
||||
}
|
||||
zfs_btree_create(&rt->rt_root, compare, size);
|
||||
zfs_btree_create(&rt->rt_root, compare, bt_find, size);
|
||||
|
||||
rt->rt_ops = ops;
|
||||
rt->rt_gap = gap;
|
||||
|
|
|
@ -88,14 +88,11 @@ zfs_refcount_destroy_many(zfs_refcount_t *rc, uint64_t number)
|
|||
reference_t *ref;
|
||||
|
||||
ASSERT3U(rc->rc_count, ==, number);
|
||||
while ((ref = list_head(&rc->rc_list))) {
|
||||
list_remove(&rc->rc_list, ref);
|
||||
while ((ref = list_remove_head(&rc->rc_list)))
|
||||
kmem_cache_free(reference_cache, ref);
|
||||
}
|
||||
list_destroy(&rc->rc_list);
|
||||
|
||||
while ((ref = list_head(&rc->rc_removed))) {
|
||||
list_remove(&rc->rc_removed, ref);
|
||||
while ((ref = list_remove_head(&rc->rc_removed))) {
|
||||
kmem_cache_free(reference_history_cache, ref->ref_removed);
|
||||
kmem_cache_free(reference_cache, ref);
|
||||
}
|
||||
|
@ -151,6 +148,15 @@ zfs_refcount_add(zfs_refcount_t *rc, const void *holder)
|
|||
return (zfs_refcount_add_many(rc, 1, holder));
|
||||
}
|
||||
|
||||
void
|
||||
zfs_refcount_add_few(zfs_refcount_t *rc, uint64_t number, const void *holder)
|
||||
{
|
||||
if (!rc->rc_tracked)
|
||||
(void) zfs_refcount_add_many(rc, number, holder);
|
||||
else for (; number > 0; number--)
|
||||
(void) zfs_refcount_add(rc, holder);
|
||||
}
|
||||
|
||||
int64_t
|
||||
zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number,
|
||||
const void *holder)
|
||||
|
@ -204,6 +210,15 @@ zfs_refcount_remove(zfs_refcount_t *rc, const void *holder)
|
|||
return (zfs_refcount_remove_many(rc, 1, holder));
|
||||
}
|
||||
|
||||
void
|
||||
zfs_refcount_remove_few(zfs_refcount_t *rc, uint64_t number, const void *holder)
|
||||
{
|
||||
if (!rc->rc_tracked)
|
||||
(void) zfs_refcount_remove_many(rc, number, holder);
|
||||
else for (; number > 0; number--)
|
||||
(void) zfs_refcount_remove(rc, holder);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src)
|
||||
{
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
* Copyright 2017 Joyent, Inc.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
|
||||
* Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -1608,16 +1609,16 @@ spa_unload_log_sm_metadata(spa_t *spa)
|
|||
{
|
||||
void *cookie = NULL;
|
||||
spa_log_sm_t *sls;
|
||||
log_summary_entry_t *e;
|
||||
|
||||
while ((sls = avl_destroy_nodes(&spa->spa_sm_logs_by_txg,
|
||||
&cookie)) != NULL) {
|
||||
VERIFY0(sls->sls_mscount);
|
||||
kmem_free(sls, sizeof (spa_log_sm_t));
|
||||
}
|
||||
|
||||
for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
|
||||
e != NULL; e = list_head(&spa->spa_log_summary)) {
|
||||
while ((e = list_remove_head(&spa->spa_log_summary)) != NULL) {
|
||||
VERIFY0(e->lse_mscount);
|
||||
list_remove(&spa->spa_log_summary, e);
|
||||
kmem_free(e, sizeof (log_summary_entry_t));
|
||||
}
|
||||
|
||||
|
@ -6874,9 +6875,11 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
|
|||
if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
|
||||
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
|
||||
|
||||
if (dsl_scan_resilvering(spa_get_dsl(spa)))
|
||||
if (dsl_scan_resilvering(spa_get_dsl(spa)) ||
|
||||
dsl_scan_resilver_scheduled(spa_get_dsl(spa))) {
|
||||
return (spa_vdev_exit(spa, NULL, txg,
|
||||
ZFS_ERR_RESILVER_IN_PROGRESS));
|
||||
}
|
||||
} else {
|
||||
if (vdev_rebuild_active(rvd))
|
||||
return (spa_vdev_exit(spa, NULL, txg,
|
||||
|
|
|
@ -814,8 +814,7 @@ spa_remove(spa_t *spa)
|
|||
if (spa->spa_root)
|
||||
spa_strfree(spa->spa_root);
|
||||
|
||||
while ((dp = list_head(&spa->spa_config_list)) != NULL) {
|
||||
list_remove(&spa->spa_config_list, dp);
|
||||
while ((dp = list_remove_head(&spa->spa_config_list)) != NULL) {
|
||||
if (dp->scd_path != NULL)
|
||||
spa_strfree(dp->scd_path);
|
||||
kmem_free(dp, sizeof (spa_config_dirent_t));
|
||||
|
@ -2439,7 +2438,6 @@ spa_init(spa_mode_t mode)
|
|||
zio_init();
|
||||
dmu_init();
|
||||
zil_init();
|
||||
vdev_cache_stat_init();
|
||||
vdev_mirror_stat_init();
|
||||
vdev_raidz_math_init();
|
||||
vdev_file_init();
|
||||
|
@ -2463,7 +2461,6 @@ spa_fini(void)
|
|||
spa_evict_all();
|
||||
|
||||
vdev_file_fini();
|
||||
vdev_cache_stat_fini();
|
||||
vdev_mirror_stat_fini();
|
||||
vdev_raidz_math_fini();
|
||||
chksum_fini();
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2019, Datto Inc. All rights reserved.
|
||||
* Copyright (c) 2021, Klara Inc.
|
||||
* Copyright [2021] Hewlett Packard Enterprise Development LP
|
||||
* Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
@ -715,7 +715,6 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
|||
offsetof(struct vdev, vdev_dtl_node));
|
||||
vd->vdev_stat.vs_timestamp = gethrtime();
|
||||
vdev_queue_init(vd);
|
||||
vdev_cache_init(vd);
|
||||
|
||||
return (vd);
|
||||
}
|
||||
|
@ -1096,7 +1095,6 @@ vdev_free(vdev_t *vd)
|
|||
* Clean up vdev structure.
|
||||
*/
|
||||
vdev_queue_fini(vd);
|
||||
vdev_cache_fini(vd);
|
||||
|
||||
if (vd->vdev_path)
|
||||
spa_strfree(vd->vdev_path);
|
||||
|
@ -1720,8 +1718,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
|
|||
vps = kmem_zalloc(sizeof (*vps), KM_SLEEP);
|
||||
|
||||
vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
|
||||
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
|
||||
ZIO_FLAG_TRYHARD;
|
||||
ZIO_FLAG_DONT_AGGREGATE | ZIO_FLAG_TRYHARD;
|
||||
|
||||
if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
|
||||
/*
|
||||
|
@ -2612,8 +2609,6 @@ vdev_close(vdev_t *vd)
|
|||
|
||||
vd->vdev_ops->vdev_op_close(vd);
|
||||
|
||||
vdev_cache_purge(vd);
|
||||
|
||||
/*
|
||||
* We record the previous state before we close it, so that if we are
|
||||
* doing a reopen(), we don't generate FMA ereports if we notice that
|
||||
|
@ -2699,6 +2694,17 @@ vdev_reopen(vdev_t *vd)
|
|||
(void) vdev_validate(vd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Recheck if resilver is still needed and cancel any
|
||||
* scheduled resilver if resilver is unneeded.
|
||||
*/
|
||||
if (!vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL) &&
|
||||
spa->spa_async_tasks & SPA_ASYNC_RESILVER) {
|
||||
mutex_enter(&spa->spa_async_lock);
|
||||
spa->spa_async_tasks &= ~SPA_ASYNC_RESILVER;
|
||||
mutex_exit(&spa->spa_async_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reassess parent vdev's health.
|
||||
*/
|
||||
|
|
|
@ -1,436 +0,0 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/kstat.h>
|
||||
#include <sys/abd.h>
|
||||
|
||||
/*
|
||||
* Virtual device read-ahead caching.
|
||||
*
|
||||
* This file implements a simple LRU read-ahead cache. When the DMU reads
|
||||
* a given block, it will often want other, nearby blocks soon thereafter.
|
||||
* We take advantage of this by reading a larger disk region and caching
|
||||
* the result. In the best case, this can turn 128 back-to-back 512-byte
|
||||
* reads into a single 64k read followed by 127 cache hits; this reduces
|
||||
* latency dramatically. In the worst case, it can turn an isolated 512-byte
|
||||
* read into a 64k read, which doesn't affect latency all that much but is
|
||||
* terribly wasteful of bandwidth. A more intelligent version of the cache
|
||||
* could keep track of access patterns and not do read-ahead unless it sees
|
||||
* at least two temporally close I/Os to the same region. Currently, only
|
||||
* metadata I/O is inflated. A further enhancement could take advantage of
|
||||
* more semantic information about the I/O. And it could use something
|
||||
* faster than an AVL tree; that was chosen solely for convenience.
|
||||
*
|
||||
* There are five cache operations: allocate, fill, read, write, evict.
|
||||
*
|
||||
* (1) Allocate. This reserves a cache entry for the specified region.
|
||||
* We separate the allocate and fill operations so that multiple threads
|
||||
* don't generate I/O for the same cache miss.
|
||||
*
|
||||
* (2) Fill. When the I/O for a cache miss completes, the fill routine
|
||||
* places the data in the previously allocated cache entry.
|
||||
*
|
||||
* (3) Read. Read data from the cache.
|
||||
*
|
||||
* (4) Write. Update cache contents after write completion.
|
||||
*
|
||||
* (5) Evict. When allocating a new entry, we evict the oldest (LRU) entry
|
||||
* if the total cache size exceeds zfs_vdev_cache_size.
|
||||
*/
|
||||
|
||||
/*
|
||||
* These tunables are for performance analysis.
|
||||
*/
|
||||
/*
|
||||
* All i/os smaller than zfs_vdev_cache_max will be turned into
|
||||
* 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
|
||||
* track buffer). At most zfs_vdev_cache_size bytes will be kept in each
|
||||
* vdev's vdev_cache.
|
||||
*
|
||||
* TODO: Note that with the current ZFS code, it turns out that the
|
||||
* vdev cache is not helpful, and in some cases actually harmful. It
|
||||
* is better if we disable this. Once some time has passed, we should
|
||||
* actually remove this to simplify the code. For now we just disable
|
||||
* it by setting the zfs_vdev_cache_size to zero. Note that Solaris 11
|
||||
* has made these same changes.
|
||||
*/
|
||||
static uint_t zfs_vdev_cache_max = 1 << 14; /* 16KB */
|
||||
static uint_t zfs_vdev_cache_size = 0;
|
||||
static uint_t zfs_vdev_cache_bshift = 16;
|
||||
|
||||
#define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */
|
||||
|
||||
static kstat_t *vdc_ksp = NULL;
|
||||
|
||||
typedef struct vdc_stats {
|
||||
kstat_named_t vdc_stat_delegations;
|
||||
kstat_named_t vdc_stat_hits;
|
||||
kstat_named_t vdc_stat_misses;
|
||||
} vdc_stats_t;
|
||||
|
||||
static vdc_stats_t vdc_stats = {
|
||||
{ "delegations", KSTAT_DATA_UINT64 },
|
||||
{ "hits", KSTAT_DATA_UINT64 },
|
||||
{ "misses", KSTAT_DATA_UINT64 }
|
||||
};
|
||||
|
||||
#define VDCSTAT_BUMP(stat) atomic_inc_64(&vdc_stats.stat.value.ui64);
|
||||
|
||||
static inline int
|
||||
vdev_cache_offset_compare(const void *a1, const void *a2)
|
||||
{
|
||||
const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
|
||||
const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
|
||||
|
||||
return (TREE_CMP(ve1->ve_offset, ve2->ve_offset));
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_cache_lastused_compare(const void *a1, const void *a2)
|
||||
{
|
||||
const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
|
||||
const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
|
||||
|
||||
int cmp = TREE_CMP(ve1->ve_lastused, ve2->ve_lastused);
|
||||
if (likely(cmp))
|
||||
return (cmp);
|
||||
|
||||
/*
|
||||
* Among equally old entries, sort by offset to ensure uniqueness.
|
||||
*/
|
||||
return (vdev_cache_offset_compare(a1, a2));
|
||||
}
|
||||
|
||||
/*
|
||||
* Evict the specified entry from the cache.
|
||||
*/
|
||||
static void
|
||||
vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&vc->vc_lock));
|
||||
ASSERT3P(ve->ve_fill_io, ==, NULL);
|
||||
ASSERT3P(ve->ve_abd, !=, NULL);
|
||||
|
||||
avl_remove(&vc->vc_lastused_tree, ve);
|
||||
avl_remove(&vc->vc_offset_tree, ve);
|
||||
abd_free(ve->ve_abd);
|
||||
kmem_free(ve, sizeof (vdev_cache_entry_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an entry in the cache. At the point we don't have the data,
|
||||
* we're just creating a placeholder so that multiple threads don't all
|
||||
* go off and read the same blocks.
|
||||
*/
|
||||
static vdev_cache_entry_t *
|
||||
vdev_cache_allocate(zio_t *zio)
|
||||
{
|
||||
vdev_cache_t *vc = &zio->io_vd->vdev_cache;
|
||||
uint64_t offset = P2ALIGN(zio->io_offset, VCBS);
|
||||
vdev_cache_entry_t *ve;
|
||||
|
||||
ASSERT(MUTEX_HELD(&vc->vc_lock));
|
||||
|
||||
if (zfs_vdev_cache_size == 0)
|
||||
return (NULL);
|
||||
|
||||
/*
|
||||
* If adding a new entry would exceed the cache size,
|
||||
* evict the oldest entry (LRU).
|
||||
*/
|
||||
if ((avl_numnodes(&vc->vc_lastused_tree) << zfs_vdev_cache_bshift) >
|
||||
zfs_vdev_cache_size) {
|
||||
ve = avl_first(&vc->vc_lastused_tree);
|
||||
if (ve->ve_fill_io != NULL)
|
||||
return (NULL);
|
||||
ASSERT3U(ve->ve_hits, !=, 0);
|
||||
vdev_cache_evict(vc, ve);
|
||||
}
|
||||
|
||||
ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP);
|
||||
ve->ve_offset = offset;
|
||||
ve->ve_lastused = ddi_get_lbolt();
|
||||
ve->ve_abd = abd_alloc_for_io(VCBS, B_TRUE);
|
||||
|
||||
avl_add(&vc->vc_offset_tree, ve);
|
||||
avl_add(&vc->vc_lastused_tree, ve);
|
||||
|
||||
return (ve);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio)
|
||||
{
|
||||
uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
|
||||
|
||||
ASSERT(MUTEX_HELD(&vc->vc_lock));
|
||||
ASSERT3P(ve->ve_fill_io, ==, NULL);
|
||||
|
||||
if (ve->ve_lastused != ddi_get_lbolt()) {
|
||||
avl_remove(&vc->vc_lastused_tree, ve);
|
||||
ve->ve_lastused = ddi_get_lbolt();
|
||||
avl_add(&vc->vc_lastused_tree, ve);
|
||||
}
|
||||
|
||||
ve->ve_hits++;
|
||||
abd_copy_off(zio->io_abd, ve->ve_abd, 0, cache_phase, zio->io_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill a previously allocated cache entry with data.
|
||||
*/
|
||||
static void
|
||||
vdev_cache_fill(zio_t *fio)
|
||||
{
|
||||
vdev_t *vd = fio->io_vd;
|
||||
vdev_cache_t *vc = &vd->vdev_cache;
|
||||
vdev_cache_entry_t *ve = fio->io_private;
|
||||
zio_t *pio;
|
||||
|
||||
ASSERT3U(fio->io_size, ==, VCBS);
|
||||
|
||||
/*
|
||||
* Add data to the cache.
|
||||
*/
|
||||
mutex_enter(&vc->vc_lock);
|
||||
|
||||
ASSERT3P(ve->ve_fill_io, ==, fio);
|
||||
ASSERT3U(ve->ve_offset, ==, fio->io_offset);
|
||||
ASSERT3P(ve->ve_abd, ==, fio->io_abd);
|
||||
|
||||
ve->ve_fill_io = NULL;
|
||||
|
||||
/*
|
||||
* Even if this cache line was invalidated by a missed write update,
|
||||
* any reads that were queued up before the missed update are still
|
||||
* valid, so we can satisfy them from this line before we evict it.
|
||||
*/
|
||||
zio_link_t *zl = NULL;
|
||||
while ((pio = zio_walk_parents(fio, &zl)) != NULL)
|
||||
vdev_cache_hit(vc, ve, pio);
|
||||
|
||||
if (fio->io_error || ve->ve_missed_update)
|
||||
vdev_cache_evict(vc, ve);
|
||||
|
||||
mutex_exit(&vc->vc_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read data from the cache. Returns B_TRUE cache hit, B_FALSE on miss.
|
||||
*/
|
||||
boolean_t
|
||||
vdev_cache_read(zio_t *zio)
|
||||
{
|
||||
vdev_cache_t *vc = &zio->io_vd->vdev_cache;
|
||||
vdev_cache_entry_t *ve, ve_search;
|
||||
uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS);
|
||||
zio_t *fio;
|
||||
uint64_t cache_phase __maybe_unused = P2PHASE(zio->io_offset, VCBS);
|
||||
|
||||
ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
|
||||
|
||||
if (zfs_vdev_cache_size == 0)
|
||||
return (B_FALSE);
|
||||
|
||||
if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
|
||||
return (B_FALSE);
|
||||
|
||||
if (zio->io_size > zfs_vdev_cache_max)
|
||||
return (B_FALSE);
|
||||
|
||||
/*
|
||||
* If the I/O straddles two or more cache blocks, don't cache it.
|
||||
*/
|
||||
if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS))
|
||||
return (B_FALSE);
|
||||
|
||||
ASSERT3U(cache_phase + zio->io_size, <=, VCBS);
|
||||
|
||||
mutex_enter(&vc->vc_lock);
|
||||
|
||||
ve_search.ve_offset = cache_offset;
|
||||
ve = avl_find(&vc->vc_offset_tree, &ve_search, NULL);
|
||||
|
||||
if (ve != NULL) {
|
||||
if (ve->ve_missed_update) {
|
||||
mutex_exit(&vc->vc_lock);
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
if ((fio = ve->ve_fill_io) != NULL) {
|
||||
zio_vdev_io_bypass(zio);
|
||||
zio_add_child(zio, fio);
|
||||
mutex_exit(&vc->vc_lock);
|
||||
VDCSTAT_BUMP(vdc_stat_delegations);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
vdev_cache_hit(vc, ve, zio);
|
||||
zio_vdev_io_bypass(zio);
|
||||
|
||||
mutex_exit(&vc->vc_lock);
|
||||
VDCSTAT_BUMP(vdc_stat_hits);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
ve = vdev_cache_allocate(zio);
|
||||
|
||||
if (ve == NULL) {
|
||||
mutex_exit(&vc->vc_lock);
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
fio = zio_vdev_delegated_io(zio->io_vd, cache_offset,
|
||||
ve->ve_abd, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW,
|
||||
ZIO_FLAG_DONT_CACHE, vdev_cache_fill, ve);
|
||||
|
||||
ve->ve_fill_io = fio;
|
||||
zio_vdev_io_bypass(zio);
|
||||
zio_add_child(zio, fio);
|
||||
|
||||
mutex_exit(&vc->vc_lock);
|
||||
zio_nowait(fio);
|
||||
VDCSTAT_BUMP(vdc_stat_misses);
|
||||
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update cache contents upon write completion.
|
||||
*/
|
||||
void
|
||||
vdev_cache_write(zio_t *zio)
|
||||
{
|
||||
vdev_cache_t *vc = &zio->io_vd->vdev_cache;
|
||||
vdev_cache_entry_t *ve, ve_search;
|
||||
uint64_t io_start = zio->io_offset;
|
||||
uint64_t io_end = io_start + zio->io_size;
|
||||
uint64_t min_offset = P2ALIGN(io_start, VCBS);
|
||||
uint64_t max_offset = P2ROUNDUP(io_end, VCBS);
|
||||
avl_index_t where;
|
||||
|
||||
ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
|
||||
|
||||
mutex_enter(&vc->vc_lock);
|
||||
|
||||
ve_search.ve_offset = min_offset;
|
||||
ve = avl_find(&vc->vc_offset_tree, &ve_search, &where);
|
||||
|
||||
if (ve == NULL)
|
||||
ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER);
|
||||
|
||||
while (ve != NULL && ve->ve_offset < max_offset) {
|
||||
uint64_t start = MAX(ve->ve_offset, io_start);
|
||||
uint64_t end = MIN(ve->ve_offset + VCBS, io_end);
|
||||
|
||||
if (ve->ve_fill_io != NULL) {
|
||||
ve->ve_missed_update = 1;
|
||||
} else {
|
||||
abd_copy_off(ve->ve_abd, zio->io_abd,
|
||||
start - ve->ve_offset, start - io_start,
|
||||
end - start);
|
||||
}
|
||||
ve = AVL_NEXT(&vc->vc_offset_tree, ve);
|
||||
}
|
||||
mutex_exit(&vc->vc_lock);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_cache_purge(vdev_t *vd)
|
||||
{
|
||||
vdev_cache_t *vc = &vd->vdev_cache;
|
||||
vdev_cache_entry_t *ve;
|
||||
|
||||
mutex_enter(&vc->vc_lock);
|
||||
while ((ve = avl_first(&vc->vc_offset_tree)) != NULL)
|
||||
vdev_cache_evict(vc, ve);
|
||||
mutex_exit(&vc->vc_lock);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_cache_init(vdev_t *vd)
|
||||
{
|
||||
vdev_cache_t *vc = &vd->vdev_cache;
|
||||
|
||||
mutex_init(&vc->vc_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
avl_create(&vc->vc_offset_tree, vdev_cache_offset_compare,
|
||||
sizeof (vdev_cache_entry_t),
|
||||
offsetof(struct vdev_cache_entry, ve_offset_node));
|
||||
|
||||
avl_create(&vc->vc_lastused_tree, vdev_cache_lastused_compare,
|
||||
sizeof (vdev_cache_entry_t),
|
||||
offsetof(struct vdev_cache_entry, ve_lastused_node));
|
||||
}
|
||||
|
||||
void
|
||||
vdev_cache_fini(vdev_t *vd)
|
||||
{
|
||||
vdev_cache_t *vc = &vd->vdev_cache;
|
||||
|
||||
vdev_cache_purge(vd);
|
||||
|
||||
avl_destroy(&vc->vc_offset_tree);
|
||||
avl_destroy(&vc->vc_lastused_tree);
|
||||
|
||||
mutex_destroy(&vc->vc_lock);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_cache_stat_init(void)
|
||||
{
|
||||
vdc_ksp = kstat_create("zfs", 0, "vdev_cache_stats", "misc",
|
||||
KSTAT_TYPE_NAMED, sizeof (vdc_stats) / sizeof (kstat_named_t),
|
||||
KSTAT_FLAG_VIRTUAL);
|
||||
if (vdc_ksp != NULL) {
|
||||
vdc_ksp->ks_data = &vdc_stats;
|
||||
kstat_install(vdc_ksp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vdev_cache_stat_fini(void)
|
||||
{
|
||||
if (vdc_ksp != NULL) {
|
||||
kstat_delete(vdc_ksp);
|
||||
vdc_ksp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_max, UINT, ZMOD_RW,
|
||||
"Inflate reads small than max");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_size, UINT, ZMOD_RD,
|
||||
"Total size of the per-disk cache");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_bshift, UINT, ZMOD_RW,
|
||||
"Shift size to inflate reads too");
|
|
@ -293,17 +293,16 @@ vdev_indirect_map_free(zio_t *zio)
|
|||
indirect_vsd_t *iv = zio->io_vsd;
|
||||
|
||||
indirect_split_t *is;
|
||||
while ((is = list_head(&iv->iv_splits)) != NULL) {
|
||||
while ((is = list_remove_head(&iv->iv_splits)) != NULL) {
|
||||
for (int c = 0; c < is->is_children; c++) {
|
||||
indirect_child_t *ic = &is->is_child[c];
|
||||
if (ic->ic_data != NULL)
|
||||
abd_free(ic->ic_data);
|
||||
}
|
||||
list_remove(&iv->iv_splits, is);
|
||||
|
||||
indirect_child_t *ic;
|
||||
while ((ic = list_head(&is->is_unique_child)) != NULL)
|
||||
list_remove(&is->is_unique_child, ic);
|
||||
while ((ic = list_remove_head(&is->is_unique_child)) != NULL)
|
||||
;
|
||||
|
||||
list_destroy(&is->is_unique_child);
|
||||
|
||||
|
@ -1659,8 +1658,8 @@ vdev_indirect_splits_damage(indirect_vsd_t *iv, zio_t *zio)
|
|||
for (indirect_split_t *is = list_head(&iv->iv_splits);
|
||||
is != NULL; is = list_next(&iv->iv_splits, is)) {
|
||||
indirect_child_t *ic;
|
||||
while ((ic = list_head(&is->is_unique_child)) != NULL)
|
||||
list_remove(&is->is_unique_child, ic);
|
||||
while ((ic = list_remove_head(&is->is_unique_child)) != NULL)
|
||||
;
|
||||
|
||||
is->is_unique_children = 0;
|
||||
}
|
||||
|
|
|
@ -748,8 +748,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
|
|||
|
||||
aio = zio_vdev_delegated_io(first->io_vd, first->io_offset,
|
||||
abd, size, first->io_type, zio->io_priority,
|
||||
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
|
||||
vdev_queue_agg_io_done, NULL);
|
||||
flags | ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL);
|
||||
aio->io_timestamp = first->io_timestamp;
|
||||
|
||||
nio = first;
|
||||
|
@ -907,7 +906,7 @@ vdev_queue_io(zio_t *zio)
|
|||
ASSERT(zio->io_priority == ZIO_PRIORITY_TRIM);
|
||||
}
|
||||
|
||||
zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
|
||||
zio->io_flags |= ZIO_FLAG_DONT_QUEUE;
|
||||
zio->io_timestamp = gethrtime();
|
||||
|
||||
mutex_enter(&vq->vq_lock);
|
||||
|
|
|
@ -285,6 +285,7 @@ zap_byteswap(void *buf, size_t size)
|
|||
}
|
||||
}
|
||||
|
||||
__attribute__((always_inline)) inline
|
||||
static int
|
||||
mze_compare(const void *arg1, const void *arg2)
|
||||
{
|
||||
|
@ -295,6 +296,9 @@ mze_compare(const void *arg1, const void *arg2)
|
|||
(uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd));
|
||||
}
|
||||
|
||||
ZFS_BTREE_FIND_IN_BUF_FUNC(mze_find_in_buf, mzap_ent_t,
|
||||
mze_compare)
|
||||
|
||||
static void
|
||||
mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
|
||||
{
|
||||
|
@ -461,7 +465,7 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
|
|||
* 62 entries before we have to add 2KB B-tree core node.
|
||||
*/
|
||||
zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare,
|
||||
sizeof (mzap_ent_t), 512);
|
||||
mze_find_in_buf, sizeof (mzap_ent_t), 512);
|
||||
|
||||
zap_name_t *zn = zap_name_alloc(zap);
|
||||
for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) {
|
||||
|
|
|
@ -1522,9 +1522,8 @@ zfs_ereport_fini(void)
|
|||
{
|
||||
recent_events_node_t *entry;
|
||||
|
||||
while ((entry = list_head(&recent_events_list)) != NULL) {
|
||||
while ((entry = list_remove_head(&recent_events_list)) != NULL) {
|
||||
avl_remove(&recent_events_tree, entry);
|
||||
list_remove(&recent_events_list, entry);
|
||||
kmem_free(entry, sizeof (*entry));
|
||||
}
|
||||
avl_destroy(&recent_events_tree);
|
||||
|
|
|
@ -699,19 +699,15 @@ zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
|
|||
zfs_fuid_t *zfuid;
|
||||
zfs_fuid_domain_t *zdomain;
|
||||
|
||||
while ((zfuid = list_head(&fuidp->z_fuids)) != NULL) {
|
||||
list_remove(&fuidp->z_fuids, zfuid);
|
||||
while ((zfuid = list_remove_head(&fuidp->z_fuids)) != NULL)
|
||||
kmem_free(zfuid, sizeof (zfs_fuid_t));
|
||||
}
|
||||
|
||||
if (fuidp->z_domain_table != NULL)
|
||||
kmem_free(fuidp->z_domain_table,
|
||||
(sizeof (char *)) * fuidp->z_domain_cnt);
|
||||
|
||||
while ((zdomain = list_head(&fuidp->z_domains)) != NULL) {
|
||||
list_remove(&fuidp->z_domains, zdomain);
|
||||
while ((zdomain = list_remove_head(&fuidp->z_domains)) != NULL)
|
||||
kmem_free(zdomain, sizeof (zfs_fuid_domain_t));
|
||||
}
|
||||
|
||||
kmem_free(fuidp, sizeof (zfs_fuid_info_t));
|
||||
}
|
||||
|
|
|
@ -87,8 +87,7 @@ zfs_onexit_destroy(zfs_onexit_t *zo)
|
|||
zfs_onexit_action_node_t *ap;
|
||||
|
||||
mutex_enter(&zo->zo_lock);
|
||||
while ((ap = list_head(&zo->zo_actions)) != NULL) {
|
||||
list_remove(&zo->zo_actions, ap);
|
||||
while ((ap = list_remove_head(&zo->zo_actions)) != NULL) {
|
||||
mutex_exit(&zo->zo_lock);
|
||||
ap->za_func(ap->za_data);
|
||||
kmem_free(ap, sizeof (zfs_onexit_action_node_t));
|
||||
|
|
|
@ -116,8 +116,12 @@ static zil_kstat_values_t zil_stats = {
|
|||
{ "zil_itx_needcopy_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_normal_count", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_normal_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_normal_write", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_normal_alloc", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_slog_count", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_slog_write", KSTAT_DATA_UINT64 },
|
||||
{ "zil_itx_metaslab_slog_alloc", KSTAT_DATA_UINT64 },
|
||||
};
|
||||
|
||||
static zil_sums_t zil_sums_global;
|
||||
|
@ -146,6 +150,9 @@ static uint64_t zil_slog_bulk = 768 * 1024;
|
|||
static kmem_cache_t *zil_lwb_cache;
|
||||
static kmem_cache_t *zil_zcw_cache;
|
||||
|
||||
static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
|
||||
static itx_t *zil_itx_clone(itx_t *oitx);
|
||||
|
||||
static int
|
||||
zil_bp_compare(const void *x1, const void *x2)
|
||||
{
|
||||
|
@ -241,11 +248,10 @@ zil_kstats_global_update(kstat_t *ksp, int rw)
|
|||
*/
|
||||
static int
|
||||
zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
|
||||
blkptr_t *nbp, void *dst, char **end)
|
||||
blkptr_t *nbp, char **begin, char **end, arc_buf_t **abuf)
|
||||
{
|
||||
zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
|
||||
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||
arc_buf_t *abuf = NULL;
|
||||
zbookmark_phys_t zb;
|
||||
int error;
|
||||
|
||||
|
@ -262,7 +268,7 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
|
|||
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
|
||||
|
||||
error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func,
|
||||
&abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
|
||||
abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
|
||||
|
||||
if (error == 0) {
|
||||
zio_cksum_t cksum = bp->blk_cksum;
|
||||
|
@ -277,23 +283,23 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
|
|||
*/
|
||||
cksum.zc_word[ZIL_ZC_SEQ]++;
|
||||
|
||||
uint64_t size = BP_GET_LSIZE(bp);
|
||||
if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
|
||||
zil_chain_t *zilc = abuf->b_data;
|
||||
zil_chain_t *zilc = (*abuf)->b_data;
|
||||
char *lr = (char *)(zilc + 1);
|
||||
uint64_t len = zilc->zc_nused - sizeof (zil_chain_t);
|
||||
|
||||
if (memcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
|
||||
sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) {
|
||||
sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) ||
|
||||
zilc->zc_nused < sizeof (*zilc) ||
|
||||
zilc->zc_nused > size) {
|
||||
error = SET_ERROR(ECKSUM);
|
||||
} else {
|
||||
ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE);
|
||||
memcpy(dst, lr, len);
|
||||
*end = (char *)dst + len;
|
||||
*begin = lr;
|
||||
*end = lr + zilc->zc_nused - sizeof (*zilc);
|
||||
*nbp = zilc->zc_next_blk;
|
||||
}
|
||||
} else {
|
||||
char *lr = abuf->b_data;
|
||||
uint64_t size = BP_GET_LSIZE(bp);
|
||||
char *lr = (*abuf)->b_data;
|
||||
zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1;
|
||||
|
||||
if (memcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
|
||||
|
@ -301,15 +307,11 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
|
|||
(zilc->zc_nused > (size - sizeof (*zilc)))) {
|
||||
error = SET_ERROR(ECKSUM);
|
||||
} else {
|
||||
ASSERT3U(zilc->zc_nused, <=,
|
||||
SPA_OLD_MAXBLOCKSIZE);
|
||||
memcpy(dst, lr, zilc->zc_nused);
|
||||
*end = (char *)dst + zilc->zc_nused;
|
||||
*begin = lr;
|
||||
*end = lr + zilc->zc_nused;
|
||||
*nbp = zilc->zc_next_blk;
|
||||
}
|
||||
}
|
||||
|
||||
arc_buf_destroy(abuf, &abuf);
|
||||
}
|
||||
|
||||
return (error);
|
||||
|
@ -375,8 +377,12 @@ zil_sums_init(zil_sums_t *zs)
|
|||
wmsum_init(&zs->zil_itx_needcopy_bytes, 0);
|
||||
wmsum_init(&zs->zil_itx_metaslab_normal_count, 0);
|
||||
wmsum_init(&zs->zil_itx_metaslab_normal_bytes, 0);
|
||||
wmsum_init(&zs->zil_itx_metaslab_normal_write, 0);
|
||||
wmsum_init(&zs->zil_itx_metaslab_normal_alloc, 0);
|
||||
wmsum_init(&zs->zil_itx_metaslab_slog_count, 0);
|
||||
wmsum_init(&zs->zil_itx_metaslab_slog_bytes, 0);
|
||||
wmsum_init(&zs->zil_itx_metaslab_slog_write, 0);
|
||||
wmsum_init(&zs->zil_itx_metaslab_slog_alloc, 0);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -393,8 +399,12 @@ zil_sums_fini(zil_sums_t *zs)
|
|||
wmsum_fini(&zs->zil_itx_needcopy_bytes);
|
||||
wmsum_fini(&zs->zil_itx_metaslab_normal_count);
|
||||
wmsum_fini(&zs->zil_itx_metaslab_normal_bytes);
|
||||
wmsum_fini(&zs->zil_itx_metaslab_normal_write);
|
||||
wmsum_fini(&zs->zil_itx_metaslab_normal_alloc);
|
||||
wmsum_fini(&zs->zil_itx_metaslab_slog_count);
|
||||
wmsum_fini(&zs->zil_itx_metaslab_slog_bytes);
|
||||
wmsum_fini(&zs->zil_itx_metaslab_slog_write);
|
||||
wmsum_fini(&zs->zil_itx_metaslab_slog_alloc);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -422,10 +432,18 @@ zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums)
|
|||
wmsum_value(&zil_sums->zil_itx_metaslab_normal_count);
|
||||
zs->zil_itx_metaslab_normal_bytes.value.ui64 =
|
||||
wmsum_value(&zil_sums->zil_itx_metaslab_normal_bytes);
|
||||
zs->zil_itx_metaslab_normal_write.value.ui64 =
|
||||
wmsum_value(&zil_sums->zil_itx_metaslab_normal_write);
|
||||
zs->zil_itx_metaslab_normal_alloc.value.ui64 =
|
||||
wmsum_value(&zil_sums->zil_itx_metaslab_normal_alloc);
|
||||
zs->zil_itx_metaslab_slog_count.value.ui64 =
|
||||
wmsum_value(&zil_sums->zil_itx_metaslab_slog_count);
|
||||
zs->zil_itx_metaslab_slog_bytes.value.ui64 =
|
||||
wmsum_value(&zil_sums->zil_itx_metaslab_slog_bytes);
|
||||
zs->zil_itx_metaslab_slog_write.value.ui64 =
|
||||
wmsum_value(&zil_sums->zil_itx_metaslab_slog_write);
|
||||
zs->zil_itx_metaslab_slog_alloc.value.ui64 =
|
||||
wmsum_value(&zil_sums->zil_itx_metaslab_slog_alloc);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -445,7 +463,6 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
|
|||
uint64_t blk_count = 0;
|
||||
uint64_t lr_count = 0;
|
||||
blkptr_t blk, next_blk = {{{{0}}}};
|
||||
char *lrbuf, *lrp;
|
||||
int error = 0;
|
||||
|
||||
/*
|
||||
|
@ -463,13 +480,13 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
|
|||
* If the log has been claimed, stop if we encounter a sequence
|
||||
* number greater than the highest claimed sequence number.
|
||||
*/
|
||||
lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE);
|
||||
zil_bp_tree_init(zilog);
|
||||
|
||||
for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
|
||||
uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
|
||||
int reclen;
|
||||
char *end = NULL;
|
||||
char *lrp, *end;
|
||||
arc_buf_t *abuf = NULL;
|
||||
|
||||
if (blk_seq > claim_blk_seq)
|
||||
break;
|
||||
|
@ -485,8 +502,10 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
|
|||
break;
|
||||
|
||||
error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
|
||||
lrbuf, &end);
|
||||
&lrp, &end, &abuf);
|
||||
if (error != 0) {
|
||||
if (abuf)
|
||||
arc_buf_destroy(abuf, &abuf);
|
||||
if (claimed) {
|
||||
char name[ZFS_MAX_DATASET_NAME_LEN];
|
||||
|
||||
|
@ -499,7 +518,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
|
|||
break;
|
||||
}
|
||||
|
||||
for (lrp = lrbuf; lrp < end; lrp += reclen) {
|
||||
for (; lrp < end; lrp += reclen) {
|
||||
lr_t *lr = (lr_t *)lrp;
|
||||
reclen = lr->lrc_reclen;
|
||||
ASSERT3U(reclen, >=, sizeof (lr_t));
|
||||
|
@ -513,6 +532,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
|
|||
max_lr_seq = lr->lrc_seq;
|
||||
lr_count++;
|
||||
}
|
||||
arc_buf_destroy(abuf, &abuf);
|
||||
}
|
||||
done:
|
||||
zilog->zl_parse_error = error;
|
||||
|
@ -522,7 +542,6 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
|
|||
zilog->zl_parse_lr_count = lr_count;
|
||||
|
||||
zil_bp_tree_fini(zilog);
|
||||
zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
@ -747,20 +766,21 @@ zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg,
|
|||
lwb->lwb_blk = *bp;
|
||||
lwb->lwb_fastwrite = fastwrite;
|
||||
lwb->lwb_slog = slog;
|
||||
lwb->lwb_indirect = B_FALSE;
|
||||
if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
|
||||
lwb->lwb_nused = lwb->lwb_nfilled = sizeof (zil_chain_t);
|
||||
lwb->lwb_sz = BP_GET_LSIZE(bp);
|
||||
} else {
|
||||
lwb->lwb_nused = lwb->lwb_nfilled = 0;
|
||||
lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t);
|
||||
}
|
||||
lwb->lwb_state = LWB_STATE_CLOSED;
|
||||
lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp));
|
||||
lwb->lwb_max_txg = txg;
|
||||
lwb->lwb_write_zio = NULL;
|
||||
lwb->lwb_root_zio = NULL;
|
||||
lwb->lwb_issued_timestamp = 0;
|
||||
lwb->lwb_issued_txg = 0;
|
||||
if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
|
||||
lwb->lwb_nused = sizeof (zil_chain_t);
|
||||
lwb->lwb_sz = BP_GET_LSIZE(bp);
|
||||
} else {
|
||||
lwb->lwb_nused = 0;
|
||||
lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t);
|
||||
}
|
||||
lwb->lwb_max_txg = txg;
|
||||
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
list_insert_tail(&zilog->zl_lwb_list, lwb);
|
||||
|
@ -1373,9 +1393,14 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
|
|||
zil_commit_waiter_t *zcw;
|
||||
itx_t *itx;
|
||||
uint64_t txg;
|
||||
list_t itxs, waiters;
|
||||
|
||||
spa_config_exit(zilog->zl_spa, SCL_STATE, lwb);
|
||||
|
||||
list_create(&itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
|
||||
list_create(&waiters, sizeof (zil_commit_waiter_t),
|
||||
offsetof(zil_commit_waiter_t, zcw_node));
|
||||
|
||||
hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp;
|
||||
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
|
@ -1384,9 +1409,6 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
|
|||
|
||||
lwb->lwb_root_zio = NULL;
|
||||
|
||||
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
|
||||
lwb->lwb_state = LWB_STATE_FLUSH_DONE;
|
||||
|
||||
if (zilog->zl_last_lwb_opened == lwb) {
|
||||
/*
|
||||
* Remember the highest committed log sequence number
|
||||
|
@ -1397,13 +1419,21 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
|
|||
zilog->zl_commit_lr_seq = zilog->zl_lr_seq;
|
||||
}
|
||||
|
||||
while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
|
||||
zil_itx_destroy(itx);
|
||||
list_move_tail(&itxs, &lwb->lwb_itxs);
|
||||
list_move_tail(&waiters, &lwb->lwb_waiters);
|
||||
|
||||
while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
|
||||
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
|
||||
lwb->lwb_state = LWB_STATE_FLUSH_DONE;
|
||||
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
|
||||
while ((itx = list_remove_head(&itxs)) != NULL)
|
||||
zil_itx_destroy(itx);
|
||||
list_destroy(&itxs);
|
||||
|
||||
while ((zcw = list_remove_head(&waiters)) != NULL) {
|
||||
mutex_enter(&zcw->zcw_lock);
|
||||
|
||||
ASSERT3P(zcw->zcw_lwb, ==, lwb);
|
||||
zcw->zcw_lwb = NULL;
|
||||
/*
|
||||
* We expect any ZIO errors from child ZIOs to have been
|
||||
|
@ -1428,8 +1458,7 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
|
|||
|
||||
mutex_exit(&zcw->zcw_lock);
|
||||
}
|
||||
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
list_destroy(&waiters);
|
||||
|
||||
mutex_enter(&zilog->zl_lwb_io_lock);
|
||||
txg = lwb->lwb_issued_txg;
|
||||
|
@ -1666,46 +1695,41 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
|
|||
EQUIV(lwb->lwb_root_zio == NULL, lwb->lwb_state == LWB_STATE_CLOSED);
|
||||
EQUIV(lwb->lwb_root_zio != NULL, lwb->lwb_state == LWB_STATE_OPENED);
|
||||
|
||||
if (lwb->lwb_root_zio != NULL)
|
||||
return;
|
||||
|
||||
lwb->lwb_root_zio = zio_root(zilog->zl_spa,
|
||||
zil_lwb_flush_vdevs_done, lwb, ZIO_FLAG_CANFAIL);
|
||||
|
||||
abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf,
|
||||
BP_GET_LSIZE(&lwb->lwb_blk));
|
||||
|
||||
if (!lwb->lwb_slog || zilog->zl_cur_used <= zil_slog_bulk)
|
||||
prio = ZIO_PRIORITY_SYNC_WRITE;
|
||||
else
|
||||
prio = ZIO_PRIORITY_ASYNC_WRITE;
|
||||
|
||||
SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
|
||||
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
|
||||
lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
|
||||
|
||||
/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
if (lwb->lwb_root_zio == NULL) {
|
||||
abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf,
|
||||
BP_GET_LSIZE(&lwb->lwb_blk));
|
||||
|
||||
if (!lwb->lwb_fastwrite) {
|
||||
metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
|
||||
lwb->lwb_fastwrite = 1;
|
||||
}
|
||||
|
||||
if (!lwb->lwb_slog || zilog->zl_cur_used <= zil_slog_bulk)
|
||||
prio = ZIO_PRIORITY_SYNC_WRITE;
|
||||
else
|
||||
prio = ZIO_PRIORITY_ASYNC_WRITE;
|
||||
|
||||
lwb->lwb_root_zio = zio_root(zilog->zl_spa,
|
||||
zil_lwb_flush_vdevs_done, lwb, ZIO_FLAG_CANFAIL);
|
||||
ASSERT3P(lwb->lwb_root_zio, !=, NULL);
|
||||
|
||||
lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio,
|
||||
zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd,
|
||||
BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb,
|
||||
prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
|
||||
ASSERT3P(lwb->lwb_write_zio, !=, NULL);
|
||||
|
||||
lwb->lwb_state = LWB_STATE_OPENED;
|
||||
|
||||
zil_lwb_set_zio_dependency(zilog, lwb);
|
||||
zilog->zl_last_lwb_opened = lwb;
|
||||
if (!lwb->lwb_fastwrite) {
|
||||
metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
|
||||
lwb->lwb_fastwrite = 1;
|
||||
}
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
|
||||
ASSERT3P(lwb->lwb_root_zio, !=, NULL);
|
||||
ASSERT3P(lwb->lwb_write_zio, !=, NULL);
|
||||
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
|
||||
lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0,
|
||||
&lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk),
|
||||
zil_lwb_write_done, lwb, prio,
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
|
||||
|
||||
lwb->lwb_state = LWB_STATE_OPENED;
|
||||
|
||||
zil_lwb_set_zio_dependency(zilog, lwb);
|
||||
zilog->zl_last_lwb_opened = lwb;
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1736,11 +1760,11 @@ static const struct {
|
|||
static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE;
|
||||
|
||||
/*
|
||||
* Start a log block write and advance to the next log block.
|
||||
* Calls are serialized.
|
||||
* Close the log block for being issued and allocate the next one.
|
||||
* Has to be called under zl_issuer_lock to chain more lwbs.
|
||||
*/
|
||||
static lwb_t *
|
||||
zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
|
||||
zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb)
|
||||
{
|
||||
lwb_t *nlwb = NULL;
|
||||
zil_chain_t *zilc;
|
||||
|
@ -1748,7 +1772,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
|
|||
blkptr_t *bp;
|
||||
dmu_tx_t *tx;
|
||||
uint64_t txg;
|
||||
uint64_t zil_blksz, wsz;
|
||||
uint64_t zil_blksz;
|
||||
int i, error;
|
||||
boolean_t slog;
|
||||
|
||||
|
@ -1757,16 +1781,17 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
|
|||
ASSERT3P(lwb->lwb_write_zio, !=, NULL);
|
||||
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
|
||||
|
||||
if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
|
||||
zilc = (zil_chain_t *)lwb->lwb_buf;
|
||||
bp = &zilc->zc_next_blk;
|
||||
} else {
|
||||
zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
|
||||
bp = &zilc->zc_next_blk;
|
||||
/*
|
||||
* If this lwb includes indirect writes, we have to commit before
|
||||
* creating the transaction, otherwise we may end up in dead lock.
|
||||
*/
|
||||
if (lwb->lwb_indirect) {
|
||||
for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
|
||||
itx = list_next(&lwb->lwb_itxs, itx))
|
||||
zil_lwb_commit(zilog, lwb, itx);
|
||||
lwb->lwb_nused = lwb->lwb_nfilled;
|
||||
}
|
||||
|
||||
ASSERT(lwb->lwb_nused <= lwb->lwb_sz);
|
||||
|
||||
/*
|
||||
* Allocate the next block and save its address in this block
|
||||
* before writing it in order to establish the log chain.
|
||||
|
@ -1814,19 +1839,18 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
|
|||
zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz;
|
||||
for (i = 0; i < ZIL_PREV_BLKS; i++)
|
||||
zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]);
|
||||
DTRACE_PROBE3(zil__block__size, zilog_t *, zilog,
|
||||
uint64_t, zil_blksz,
|
||||
uint64_t, zilog->zl_prev_blks[zilog->zl_prev_rotor]);
|
||||
zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);
|
||||
|
||||
if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2)
|
||||
zilc = (zil_chain_t *)lwb->lwb_buf;
|
||||
else
|
||||
zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
|
||||
bp = &zilc->zc_next_blk;
|
||||
BP_ZERO(bp);
|
||||
error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, zil_blksz, &slog);
|
||||
if (slog) {
|
||||
ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count);
|
||||
ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes,
|
||||
lwb->lwb_nused);
|
||||
} else {
|
||||
ZIL_STAT_BUMP(zilog, zil_itx_metaslab_normal_count);
|
||||
ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_bytes,
|
||||
lwb->lwb_nused);
|
||||
}
|
||||
if (error == 0) {
|
||||
ASSERT3U(bp->blk_birth, ==, txg);
|
||||
bp->blk_cksum = lwb->lwb_blk.blk_cksum;
|
||||
|
@ -1838,17 +1862,47 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
|
|||
nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE);
|
||||
}
|
||||
|
||||
lwb->lwb_state = LWB_STATE_ISSUED;
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
/*
|
||||
* If there was an allocation failure then nlwb will be null which
|
||||
* forces a txg_wait_synced().
|
||||
*/
|
||||
return (nlwb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Finalize previously closed block and issue the write zio.
|
||||
* Does not require locking.
|
||||
*/
|
||||
static void
|
||||
zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
|
||||
{
|
||||
zil_chain_t *zilc;
|
||||
int wsz;
|
||||
|
||||
/* Actually fill the lwb with the data if not yet. */
|
||||
if (!lwb->lwb_indirect) {
|
||||
for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
|
||||
itx = list_next(&lwb->lwb_itxs, itx))
|
||||
zil_lwb_commit(zilog, lwb, itx);
|
||||
lwb->lwb_nused = lwb->lwb_nfilled;
|
||||
}
|
||||
|
||||
if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
|
||||
/* For Slim ZIL only write what is used. */
|
||||
wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t);
|
||||
ASSERT3U(wsz, <=, lwb->lwb_sz);
|
||||
wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, int);
|
||||
ASSERT3S(wsz, <=, lwb->lwb_sz);
|
||||
zio_shrink(lwb->lwb_write_zio, wsz);
|
||||
wsz = lwb->lwb_write_zio->io_size;
|
||||
|
||||
zilc = (zil_chain_t *)lwb->lwb_buf;
|
||||
} else {
|
||||
wsz = lwb->lwb_sz;
|
||||
zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
|
||||
}
|
||||
|
||||
zilc->zc_pad = 0;
|
||||
zilc->zc_nused = lwb->lwb_nused;
|
||||
zilc->zc_eck.zec_cksum = lwb->lwb_blk.blk_cksum;
|
||||
|
@ -1858,22 +1912,28 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
|
|||
*/
|
||||
memset(lwb->lwb_buf + lwb->lwb_nused, 0, wsz - lwb->lwb_nused);
|
||||
|
||||
if (lwb->lwb_slog) {
|
||||
ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count);
|
||||
ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes,
|
||||
lwb->lwb_nused);
|
||||
ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_write,
|
||||
wsz);
|
||||
ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_alloc,
|
||||
BP_GET_LSIZE(&lwb->lwb_blk));
|
||||
} else {
|
||||
ZIL_STAT_BUMP(zilog, zil_itx_metaslab_normal_count);
|
||||
ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_bytes,
|
||||
lwb->lwb_nused);
|
||||
ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_write,
|
||||
wsz);
|
||||
ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc,
|
||||
BP_GET_LSIZE(&lwb->lwb_blk));
|
||||
}
|
||||
spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER);
|
||||
|
||||
zil_lwb_add_block(lwb, &lwb->lwb_blk);
|
||||
lwb->lwb_issued_timestamp = gethrtime();
|
||||
lwb->lwb_state = LWB_STATE_ISSUED;
|
||||
|
||||
zio_nowait(lwb->lwb_root_zio);
|
||||
zio_nowait(lwb->lwb_write_zio);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
/*
|
||||
* If there was an allocation failure then nlwb will be null which
|
||||
* forces a txg_wait_synced().
|
||||
*/
|
||||
return (nlwb);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1909,13 +1969,19 @@ zil_max_copied_data(zilog_t *zilog)
|
|||
sizeof (lr_write_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* Estimate space needed in the lwb for the itx. Allocate more lwbs or
|
||||
* split the itx as needed, but don't touch the actual transaction data.
|
||||
* Has to be called under zl_issuer_lock to call zil_lwb_write_close()
|
||||
* to chain more lwbs.
|
||||
*/
|
||||
static lwb_t *
|
||||
zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
|
||||
zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
|
||||
{
|
||||
lr_t *lrcb, *lrc;
|
||||
lr_write_t *lrwb, *lrw;
|
||||
char *lr_buf;
|
||||
uint64_t dlen, dnow, dpad, lwb_sp, reclen, txg, max_log_data;
|
||||
itx_t *citx;
|
||||
lr_t *lr, *clr;
|
||||
lr_write_t *lrw;
|
||||
uint64_t dlen, dnow, lwb_sp, reclen, max_log_data;
|
||||
|
||||
ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
|
||||
ASSERT3P(lwb, !=, NULL);
|
||||
|
@ -1923,8 +1989,8 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
|
|||
|
||||
zil_lwb_write_open(zilog, lwb);
|
||||
|
||||
lrc = &itx->itx_lr;
|
||||
lrw = (lr_write_t *)lrc;
|
||||
lr = &itx->itx_lr;
|
||||
lrw = (lr_write_t *)lr;
|
||||
|
||||
/*
|
||||
* A commit itx doesn't represent any on-disk state; instead
|
||||
|
@ -1938,24 +2004,23 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
|
|||
*
|
||||
* For more details, see the comment above zil_commit().
|
||||
*/
|
||||
if (lrc->lrc_txtype == TX_COMMIT) {
|
||||
if (lr->lrc_txtype == TX_COMMIT) {
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
zil_commit_waiter_link_lwb(itx->itx_private, lwb);
|
||||
itx->itx_private = NULL;
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
list_insert_tail(&lwb->lwb_itxs, itx);
|
||||
return (lwb);
|
||||
}
|
||||
|
||||
if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
|
||||
if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
|
||||
dlen = P2ROUNDUP_TYPED(
|
||||
lrw->lr_length, sizeof (uint64_t), uint64_t);
|
||||
dpad = dlen - lrw->lr_length;
|
||||
} else {
|
||||
dlen = dpad = 0;
|
||||
dlen = 0;
|
||||
}
|
||||
reclen = lrc->lrc_reclen;
|
||||
reclen = lr->lrc_reclen;
|
||||
zilog->zl_cur_used += (reclen + dlen);
|
||||
txg = lrc->lrc_txg;
|
||||
|
||||
cont:
|
||||
/*
|
||||
|
@ -1968,7 +2033,8 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
|
|||
lwb_sp < zil_max_waste_space(zilog) &&
|
||||
(dlen % max_log_data == 0 ||
|
||||
lwb_sp < reclen + dlen % max_log_data))) {
|
||||
lwb = zil_lwb_write_issue(zilog, lwb);
|
||||
list_insert_tail(ilwbs, lwb);
|
||||
lwb = zil_lwb_write_close(zilog, lwb);
|
||||
if (lwb == NULL)
|
||||
return (NULL);
|
||||
zil_lwb_write_open(zilog, lwb);
|
||||
|
@ -1987,19 +2053,99 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
|
|||
}
|
||||
|
||||
dnow = MIN(dlen, lwb_sp - reclen);
|
||||
lr_buf = lwb->lwb_buf + lwb->lwb_nused;
|
||||
memcpy(lr_buf, lrc, reclen);
|
||||
lrcb = (lr_t *)lr_buf; /* Like lrc, but inside lwb. */
|
||||
lrwb = (lr_write_t *)lrcb; /* Like lrw, but inside lwb. */
|
||||
if (dlen > dnow) {
|
||||
ASSERT3U(lr->lrc_txtype, ==, TX_WRITE);
|
||||
ASSERT3U(itx->itx_wr_state, ==, WR_NEED_COPY);
|
||||
citx = zil_itx_clone(itx);
|
||||
clr = &citx->itx_lr;
|
||||
lr_write_t *clrw = (lr_write_t *)clr;
|
||||
clrw->lr_length = dnow;
|
||||
lrw->lr_offset += dnow;
|
||||
lrw->lr_length -= dnow;
|
||||
} else {
|
||||
citx = itx;
|
||||
clr = lr;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're actually making an entry, so update lrc_seq to be the
|
||||
* log record sequence number. Note that this is generally not
|
||||
* equal to the itx sequence number because not all transactions
|
||||
* are synchronous, and sometimes spa_sync() gets there first.
|
||||
*/
|
||||
clr->lrc_seq = ++zilog->zl_lr_seq;
|
||||
|
||||
lwb->lwb_nused += reclen + dnow;
|
||||
ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
|
||||
ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
|
||||
|
||||
zil_lwb_add_txg(lwb, lr->lrc_txg);
|
||||
list_insert_tail(&lwb->lwb_itxs, citx);
|
||||
|
||||
dlen -= dnow;
|
||||
if (dlen > 0) {
|
||||
zilog->zl_cur_used += reclen;
|
||||
goto cont;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have to really issue all queued LWBs before we may have to
|
||||
* wait for a txg sync. Otherwise we may end up in a dead lock.
|
||||
*/
|
||||
if (lr->lrc_txtype == TX_WRITE) {
|
||||
boolean_t frozen = lr->lrc_txg > spa_freeze_txg(zilog->zl_spa);
|
||||
if (frozen || itx->itx_wr_state == WR_INDIRECT) {
|
||||
lwb_t *tlwb;
|
||||
while ((tlwb = list_remove_head(ilwbs)) != NULL)
|
||||
zil_lwb_write_issue(zilog, tlwb);
|
||||
}
|
||||
if (itx->itx_wr_state == WR_INDIRECT)
|
||||
lwb->lwb_indirect = B_TRUE;
|
||||
if (frozen)
|
||||
txg_wait_synced(zilog->zl_dmu_pool, lr->lrc_txg);
|
||||
}
|
||||
|
||||
return (lwb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill the actual transaction data into the lwb, following zil_lwb_assign().
|
||||
* Does not require locking.
|
||||
*/
|
||||
static void
|
||||
zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
|
||||
{
|
||||
lr_t *lr, *lrb;
|
||||
lr_write_t *lrw, *lrwb;
|
||||
char *lr_buf;
|
||||
uint64_t dlen, reclen;
|
||||
|
||||
lr = &itx->itx_lr;
|
||||
lrw = (lr_write_t *)lr;
|
||||
|
||||
if (lr->lrc_txtype == TX_COMMIT)
|
||||
return;
|
||||
|
||||
if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
|
||||
dlen = P2ROUNDUP_TYPED(
|
||||
lrw->lr_length, sizeof (uint64_t), uint64_t);
|
||||
} else {
|
||||
dlen = 0;
|
||||
}
|
||||
reclen = lr->lrc_reclen;
|
||||
ASSERT3U(reclen + dlen, <=, lwb->lwb_nused - lwb->lwb_nfilled);
|
||||
|
||||
lr_buf = lwb->lwb_buf + lwb->lwb_nfilled;
|
||||
memcpy(lr_buf, lr, reclen);
|
||||
lrb = (lr_t *)lr_buf; /* Like lr, but inside lwb. */
|
||||
lrwb = (lr_write_t *)lrb; /* Like lrw, but inside lwb. */
|
||||
|
||||
ZIL_STAT_BUMP(zilog, zil_itx_count);
|
||||
|
||||
/*
|
||||
* If it's a write, fetch the data or get its blkptr as appropriate.
|
||||
*/
|
||||
if (lrc->lrc_txtype == TX_WRITE) {
|
||||
if (txg > spa_freeze_txg(zilog->zl_spa))
|
||||
txg_wait_synced(zilog->zl_dmu_pool, txg);
|
||||
if (lr->lrc_txtype == TX_WRITE) {
|
||||
if (itx->itx_wr_state == WR_COPIED) {
|
||||
ZIL_STAT_BUMP(zilog, zil_itx_copied_count);
|
||||
ZIL_STAT_INCR(zilog, zil_itx_copied_bytes,
|
||||
|
@ -2010,14 +2156,10 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
|
|||
|
||||
if (itx->itx_wr_state == WR_NEED_COPY) {
|
||||
dbuf = lr_buf + reclen;
|
||||
lrcb->lrc_reclen += dnow;
|
||||
if (lrwb->lr_length > dnow)
|
||||
lrwb->lr_length = dnow;
|
||||
lrw->lr_offset += dnow;
|
||||
lrw->lr_length -= dnow;
|
||||
lrb->lrc_reclen += dlen;
|
||||
ZIL_STAT_BUMP(zilog, zil_itx_needcopy_count);
|
||||
ZIL_STAT_INCR(zilog, zil_itx_needcopy_bytes,
|
||||
dnow);
|
||||
dlen);
|
||||
} else {
|
||||
ASSERT3S(itx->itx_wr_state, ==, WR_INDIRECT);
|
||||
dbuf = NULL;
|
||||
|
@ -2044,9 +2186,11 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
|
|||
error = zilog->zl_get_data(itx->itx_private,
|
||||
itx->itx_gen, lrwb, dbuf, lwb,
|
||||
lwb->lwb_write_zio);
|
||||
if (dbuf != NULL && error == 0 && dnow == dlen)
|
||||
if (dbuf != NULL && error == 0) {
|
||||
/* Zero any padding bytes in the last block. */
|
||||
memset((char *)dbuf + lrwb->lr_length, 0, dpad);
|
||||
memset((char *)dbuf + lrwb->lr_length, 0,
|
||||
dlen - lrwb->lr_length);
|
||||
}
|
||||
|
||||
/*
|
||||
* Typically, the only return values we should see from
|
||||
|
@ -2074,39 +2218,26 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
|
|||
error);
|
||||
zfs_fallthrough;
|
||||
case EIO:
|
||||
txg_wait_synced(zilog->zl_dmu_pool, txg);
|
||||
if (lwb->lwb_indirect) {
|
||||
txg_wait_synced(zilog->zl_dmu_pool,
|
||||
lr->lrc_txg);
|
||||
} else {
|
||||
lwb->lwb_write_zio->io_error = error;
|
||||
}
|
||||
zfs_fallthrough;
|
||||
case ENOENT:
|
||||
zfs_fallthrough;
|
||||
case EEXIST:
|
||||
zfs_fallthrough;
|
||||
case EALREADY:
|
||||
return (lwb);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We're actually making an entry, so update lrc_seq to be the
|
||||
* log record sequence number. Note that this is generally not
|
||||
* equal to the itx sequence number because not all transactions
|
||||
* are synchronous, and sometimes spa_sync() gets there first.
|
||||
*/
|
||||
lrcb->lrc_seq = ++zilog->zl_lr_seq;
|
||||
lwb->lwb_nused += reclen + dnow;
|
||||
|
||||
zil_lwb_add_txg(lwb, txg);
|
||||
|
||||
ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
|
||||
ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
|
||||
|
||||
dlen -= dnow;
|
||||
if (dlen > 0) {
|
||||
zilog->zl_cur_used += reclen;
|
||||
goto cont;
|
||||
}
|
||||
|
||||
return (lwb);
|
||||
lwb->lwb_nfilled += reclen + dlen;
|
||||
ASSERT3S(lwb->lwb_nfilled, <=, lwb->lwb_nused);
|
||||
ASSERT0(P2PHASE(lwb->lwb_nfilled, sizeof (uint64_t)));
|
||||
}
|
||||
|
||||
itx_t *
|
||||
|
@ -2131,6 +2262,16 @@ zil_itx_create(uint64_t txtype, size_t olrsize)
|
|||
return (itx);
|
||||
}
|
||||
|
||||
static itx_t *
|
||||
zil_itx_clone(itx_t *oitx)
|
||||
{
|
||||
itx_t *itx = zio_data_buf_alloc(oitx->itx_size);
|
||||
memcpy(itx, oitx, oitx->itx_size);
|
||||
itx->itx_callback = NULL;
|
||||
itx->itx_callback_data = NULL;
|
||||
return (itx);
|
||||
}
|
||||
|
||||
void
|
||||
zil_itx_destroy(itx_t *itx)
|
||||
{
|
||||
|
@ -2162,7 +2303,7 @@ zil_itxg_clean(void *arg)
|
|||
/*
|
||||
* In the general case, commit itxs will not be found
|
||||
* here, as they'll be committed to an lwb via
|
||||
* zil_lwb_commit(), and free'd in that function. Having
|
||||
* zil_lwb_assign(), and free'd in that function. Having
|
||||
* said that, it is still possible for commit itxs to be
|
||||
* found here, due to the following race:
|
||||
*
|
||||
|
@ -2561,7 +2702,7 @@ zil_commit_writer_stall(zilog_t *zilog)
|
|||
* lwb will be issued to the zio layer to be written to disk.
|
||||
*/
|
||||
static void
|
||||
zil_process_commit_list(zilog_t *zilog)
|
||||
zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
|
||||
{
|
||||
spa_t *spa = zilog->zl_spa;
|
||||
list_t nolwb_itxs;
|
||||
|
@ -2663,18 +2804,23 @@ zil_process_commit_list(zilog_t *zilog)
|
|||
*/
|
||||
if (frozen || !synced || lrc->lrc_txtype == TX_COMMIT) {
|
||||
if (lwb != NULL) {
|
||||
lwb = zil_lwb_commit(zilog, itx, lwb);
|
||||
|
||||
if (lwb == NULL)
|
||||
lwb = zil_lwb_assign(zilog, lwb, itx, ilwbs);
|
||||
if (lwb == NULL) {
|
||||
list_insert_tail(&nolwb_itxs, itx);
|
||||
else
|
||||
list_insert_tail(&lwb->lwb_itxs, itx);
|
||||
} else if ((zcw->zcw_lwb != NULL &&
|
||||
zcw->zcw_lwb != lwb) || zcw->zcw_done) {
|
||||
/*
|
||||
* Our lwb is done, leave the rest of
|
||||
* itx list to somebody else who care.
|
||||
*/
|
||||
first = B_FALSE;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (lrc->lrc_txtype == TX_COMMIT) {
|
||||
zil_commit_waiter_link_nolwb(
|
||||
itx->itx_private, &nolwb_waiters);
|
||||
}
|
||||
|
||||
list_insert_tail(&nolwb_itxs, itx);
|
||||
}
|
||||
} else {
|
||||
|
@ -2690,6 +2836,8 @@ zil_process_commit_list(zilog_t *zilog)
|
|||
* the ZIL write pipeline; see the comment within
|
||||
* zil_commit_writer_stall() for more details.
|
||||
*/
|
||||
while ((lwb = list_remove_head(ilwbs)) != NULL)
|
||||
zil_lwb_write_issue(zilog, lwb);
|
||||
zil_commit_writer_stall(zilog);
|
||||
|
||||
/*
|
||||
|
@ -2735,13 +2883,13 @@ zil_process_commit_list(zilog_t *zilog)
|
|||
* on the system, such that this function will be
|
||||
* immediately called again (not necessarily by the same
|
||||
* thread) and this lwb's zio will be issued via
|
||||
* zil_lwb_commit(). This way, the lwb is guaranteed to
|
||||
* zil_lwb_assign(). This way, the lwb is guaranteed to
|
||||
* be "full" when it is issued to disk, and we'll make
|
||||
* use of the lwb's size the best we can.
|
||||
*
|
||||
* 2. If there isn't sufficient ZIL activity occurring on
|
||||
* the system, such that this lwb's zio isn't issued via
|
||||
* zil_lwb_commit(), zil_commit_waiter() will issue the
|
||||
* zil_lwb_assign(), zil_commit_waiter() will issue the
|
||||
* lwb's zio. If this occurs, the lwb is not guaranteed
|
||||
* to be "full" by the time its zio is issued, and means
|
||||
* the size of the lwb was "too large" given the amount
|
||||
|
@ -2773,10 +2921,15 @@ zil_process_commit_list(zilog_t *zilog)
|
|||
zfs_commit_timeout_pct / 100;
|
||||
if (sleep < zil_min_commit_timeout ||
|
||||
lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) {
|
||||
lwb = zil_lwb_write_issue(zilog, lwb);
|
||||
list_insert_tail(ilwbs, lwb);
|
||||
lwb = zil_lwb_write_close(zilog, lwb);
|
||||
zilog->zl_cur_used = 0;
|
||||
if (lwb == NULL)
|
||||
if (lwb == NULL) {
|
||||
while ((lwb = list_remove_head(ilwbs))
|
||||
!= NULL)
|
||||
zil_lwb_write_issue(zilog, lwb);
|
||||
zil_commit_writer_stall(zilog);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2799,9 +2952,13 @@ zil_process_commit_list(zilog_t *zilog)
|
|||
static void
|
||||
zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw)
|
||||
{
|
||||
list_t ilwbs;
|
||||
lwb_t *lwb;
|
||||
|
||||
ASSERT(!MUTEX_HELD(&zilog->zl_lock));
|
||||
ASSERT(spa_writeable(zilog->zl_spa));
|
||||
|
||||
list_create(&ilwbs, sizeof (lwb_t), offsetof(lwb_t, lwb_issue_node));
|
||||
mutex_enter(&zilog->zl_issuer_lock);
|
||||
|
||||
if (zcw->zcw_lwb != NULL || zcw->zcw_done) {
|
||||
|
@ -2828,10 +2985,13 @@ zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw)
|
|||
|
||||
zil_get_commit_list(zilog);
|
||||
zil_prune_commit_list(zilog);
|
||||
zil_process_commit_list(zilog);
|
||||
zil_process_commit_list(zilog, zcw, &ilwbs);
|
||||
|
||||
out:
|
||||
mutex_exit(&zilog->zl_issuer_lock);
|
||||
while ((lwb = list_remove_head(&ilwbs)) != NULL)
|
||||
zil_lwb_write_issue(zilog, lwb);
|
||||
list_destroy(&ilwbs);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -2858,7 +3018,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
|
|||
return;
|
||||
|
||||
/*
|
||||
* In order to call zil_lwb_write_issue() we must hold the
|
||||
* In order to call zil_lwb_write_close() we must hold the
|
||||
* zilog's "zl_issuer_lock". We can't simply acquire that lock,
|
||||
* since we're already holding the commit waiter's "zcw_lock",
|
||||
* and those two locks are acquired in the opposite order
|
||||
|
@ -2876,8 +3036,10 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
|
|||
* the waiter is marked "done"), so without this check we could
|
||||
* wind up with a use-after-free error below.
|
||||
*/
|
||||
if (zcw->zcw_done)
|
||||
if (zcw->zcw_done) {
|
||||
lwb = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ASSERT3P(lwb, ==, zcw->zcw_lwb);
|
||||
|
||||
|
@ -2896,15 +3058,17 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
|
|||
* if it's ISSUED or OPENED, and block any other threads that might
|
||||
* attempt to issue this lwb. For that reason we hold the
|
||||
* zl_issuer_lock when checking the lwb_state; we must not call
|
||||
* zil_lwb_write_issue() if the lwb had already been issued.
|
||||
* zil_lwb_write_close() if the lwb had already been issued.
|
||||
*
|
||||
* See the comment above the lwb_state_t structure definition for
|
||||
* more details on the lwb states, and locking requirements.
|
||||
*/
|
||||
if (lwb->lwb_state == LWB_STATE_ISSUED ||
|
||||
lwb->lwb_state == LWB_STATE_WRITE_DONE ||
|
||||
lwb->lwb_state == LWB_STATE_FLUSH_DONE)
|
||||
lwb->lwb_state == LWB_STATE_FLUSH_DONE) {
|
||||
lwb = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
|
||||
|
||||
|
@ -2914,7 +3078,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
|
|||
* since we've reached the commit waiter's timeout and it still
|
||||
* hasn't been issued.
|
||||
*/
|
||||
lwb_t *nlwb = zil_lwb_write_issue(zilog, lwb);
|
||||
lwb_t *nlwb = zil_lwb_write_close(zilog, lwb);
|
||||
|
||||
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);
|
||||
|
||||
|
@ -2934,7 +3098,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
|
|||
|
||||
if (nlwb == NULL) {
|
||||
/*
|
||||
* When zil_lwb_write_issue() returns NULL, this
|
||||
* When zil_lwb_write_close() returns NULL, this
|
||||
* indicates zio_alloc_zil() failed to allocate the
|
||||
* "next" lwb on-disk. When this occurs, the ZIL write
|
||||
* pipeline must be stalled; see the comment within the
|
||||
|
@ -2956,12 +3120,16 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
|
|||
* lock, which occurs prior to calling dmu_tx_commit()
|
||||
*/
|
||||
mutex_exit(&zcw->zcw_lock);
|
||||
zil_lwb_write_issue(zilog, lwb);
|
||||
lwb = NULL;
|
||||
zil_commit_writer_stall(zilog);
|
||||
mutex_enter(&zcw->zcw_lock);
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_exit(&zilog->zl_issuer_lock);
|
||||
if (lwb)
|
||||
zil_lwb_write_issue(zilog, lwb);
|
||||
ASSERT(MUTEX_HELD(&zcw->zcw_lock));
|
||||
}
|
||||
|
||||
|
@ -2976,7 +3144,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
|
|||
* waited "long enough" and the lwb is still in the "open" state.
|
||||
*
|
||||
* Given a sufficient amount of itxs being generated and written using
|
||||
* the ZIL, the lwb's zio will be issued via the zil_lwb_commit()
|
||||
* the ZIL, the lwb's zio will be issued via the zil_lwb_assign()
|
||||
* function. If this does not occur, this secondary responsibility will
|
||||
* ensure the lwb is issued even if there is not other synchronous
|
||||
* activity on the system.
|
||||
|
@ -3656,7 +3824,7 @@ zil_close(zilog_t *zilog)
|
|||
/*
|
||||
* zl_lwb_max_issued_txg may be larger than lwb_max_txg. It depends
|
||||
* on the time when the dmu_tx transaction is assigned in
|
||||
* zil_lwb_write_issue().
|
||||
* zil_lwb_write_close().
|
||||
*/
|
||||
mutex_enter(&zilog->zl_lwb_io_lock);
|
||||
txg = MAX(zilog->zl_lwb_max_issued_txg, txg);
|
||||
|
|
|
@ -1617,12 +1617,6 @@ zio_read_bp_init(zio_t *zio)
|
|||
ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy);
|
||||
}
|
||||
|
||||
if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
|
||||
zio->io_flags |= ZIO_FLAG_DONT_CACHE;
|
||||
|
||||
if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
|
||||
zio->io_flags |= ZIO_FLAG_DONT_CACHE;
|
||||
|
||||
if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL)
|
||||
zio->io_pipeline = ZIO_DDT_READ_PIPELINE;
|
||||
|
||||
|
@ -3955,9 +3949,6 @@ zio_vdev_io_start(zio_t *zio)
|
|||
zio->io_type == ZIO_TYPE_WRITE ||
|
||||
zio->io_type == ZIO_TYPE_TRIM)) {
|
||||
|
||||
if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio))
|
||||
return (zio);
|
||||
|
||||
if ((zio = vdev_queue_io(zio)) == NULL)
|
||||
return (NULL);
|
||||
|
||||
|
@ -3994,9 +3985,6 @@ zio_vdev_io_done(zio_t *zio)
|
|||
vd->vdev_ops != &vdev_draid_spare_ops) {
|
||||
vdev_queue_io_done(zio);
|
||||
|
||||
if (zio->io_type == ZIO_TYPE_WRITE)
|
||||
vdev_cache_write(zio);
|
||||
|
||||
if (zio_injection_enabled && zio->io_error == 0)
|
||||
zio->io_error = zio_handle_device_injections(vd, zio,
|
||||
EIO, EILSEQ);
|
||||
|
@ -4106,8 +4094,7 @@ zio_vdev_io_assess(zio_t *zio)
|
|||
ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_QUEUE)); /* not a leaf */
|
||||
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_BYPASS)); /* not a leaf */
|
||||
zio->io_error = 0;
|
||||
zio->io_flags |= ZIO_FLAG_IO_RETRY |
|
||||
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE;
|
||||
zio->io_flags |= ZIO_FLAG_IO_RETRY | ZIO_FLAG_DONT_AGGREGATE;
|
||||
zio->io_stage = ZIO_STAGE_VDEV_IO_START >> 1;
|
||||
zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE,
|
||||
zio_requeue_io_start_cut_in_line);
|
||||
|
|
|
@ -1203,8 +1203,7 @@ zvol_create_minors_recursive(const char *name)
|
|||
* Prefetch is completed, we can do zvol_os_create_minor
|
||||
* sequentially.
|
||||
*/
|
||||
while ((job = list_head(&minors_list)) != NULL) {
|
||||
list_remove(&minors_list, job);
|
||||
while ((job = list_remove_head(&minors_list)) != NULL) {
|
||||
if (!job->error)
|
||||
(void) zvol_os_create_minor(job->name);
|
||||
kmem_strfree(job->name);
|
||||
|
@ -1311,10 +1310,8 @@ zvol_remove_minors_impl(const char *name)
|
|||
rw_exit(&zvol_state_lock);
|
||||
|
||||
/* Drop zvol_state_lock before calling zvol_free() */
|
||||
while ((zv = list_head(&free_list)) != NULL) {
|
||||
list_remove(&free_list, zv);
|
||||
while ((zv = list_remove_head(&free_list)) != NULL)
|
||||
zvol_os_free(zv);
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove minor for this specific volume only */
|
||||
|
|
|
@ -128,7 +128,7 @@ tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos',
|
|||
'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress',
|
||||
'zdb_display_block', 'zdb_encrypted', 'zdb_label_checksum',
|
||||
'zdb_object_range_neg', 'zdb_object_range_pos', 'zdb_objset_id',
|
||||
'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2']
|
||||
'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2', 'zdb_backup']
|
||||
pre =
|
||||
post =
|
||||
tags = ['functional', 'cli_root', 'zdb']
|
||||
|
@ -472,7 +472,8 @@ tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
|
|||
tags = ['functional', 'cli_root', 'zpool_replace']
|
||||
|
||||
[tests/functional/cli_root/zpool_resilver]
|
||||
tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
|
||||
tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart',
|
||||
'zpool_resilver_concurrent']
|
||||
tags = ['functional', 'cli_root', 'zpool_resilver']
|
||||
|
||||
[tests/functional/cli_root/zpool_scrub]
|
||||
|
|
|
@ -25,3 +25,8 @@ tags = ['functional']
|
|||
[tests/functional/cli_root/zfs_jail:FreeBSD]
|
||||
tests = ['zfs_jail_001_pos']
|
||||
tags = ['functional', 'cli_root', 'zfs_jail']
|
||||
|
||||
[tests/functional/pam:FreeBSD]
|
||||
tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive',
|
||||
'pam_short_password']
|
||||
tags = ['functional', 'pam']
|
||||
|
|
|
@ -140,7 +140,8 @@ tests = ['umount_unlinked_drain']
|
|||
tags = ['functional', 'mount']
|
||||
|
||||
[tests/functional/pam:Linux]
|
||||
tests = ['pam_basic', 'pam_nounmount', 'pam_short_password']
|
||||
tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive',
|
||||
'pam_short_password']
|
||||
tags = ['functional', 'pam']
|
||||
|
||||
[tests/functional/procfs:Linux]
|
||||
|
|
|
@ -152,6 +152,7 @@ known = {
|
|||
['FAIL', rewind_reason],
|
||||
'cli_user/misc/zfs_share_001_neg': ['SKIP', na_reason],
|
||||
'cli_user/misc/zfs_unshare_001_neg': ['SKIP', na_reason],
|
||||
'pool_checkpoint/checkpoint_discard_busy': ['SKIP', 12053],
|
||||
'privilege/setup': ['SKIP', na_reason],
|
||||
'refreserv/refreserv_004_pos': ['FAIL', known_reason],
|
||||
'rootpool/setup': ['SKIP', na_reason],
|
||||
|
@ -163,6 +164,8 @@ if sys.platform.startswith('freebsd'):
|
|||
known.update({
|
||||
'cli_root/zfs_receive/receive-o-x_props_override':
|
||||
['FAIL', known_reason],
|
||||
'cli_root/zpool_resilver/zpool_resilver_concurrent':
|
||||
['SKIP', na_reason],
|
||||
'cli_root/zpool_wait/zpool_wait_trim_basic': ['SKIP', trim_reason],
|
||||
'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
|
||||
'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
|
||||
|
@ -277,6 +280,8 @@ elif sys.platform.startswith('linux'):
|
|||
'mmp/mmp_inactive_import': ['FAIL', known_reason],
|
||||
'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621],
|
||||
'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason],
|
||||
'zvol/zvol_misc/zvol_misc_fua': ['SKIP', 14872],
|
||||
'zvol/zvol_misc/zvol_misc_trim': ['SKIP', 14872],
|
||||
'idmap_mount/idmap_mount_001': ['SKIP', idmap_reason],
|
||||
'idmap_mount/idmap_mount_002': ['SKIP', idmap_reason],
|
||||
'idmap_mount/idmap_mount_003': ['SKIP', idmap_reason],
|
||||
|
|
|
@ -501,7 +501,7 @@ main(int argc, char *argv[])
|
|||
srandom(seed);
|
||||
|
||||
zfs_btree_init();
|
||||
zfs_btree_create(&bt, zfs_btree_compare, sizeof (uint64_t));
|
||||
zfs_btree_create(&bt, zfs_btree_compare, NULL, sizeof (uint64_t));
|
||||
|
||||
/*
|
||||
* This runs the named negative test. None of them should
|
||||
|
|
|
@ -572,6 +572,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
|||
functional/cli_root/zdb/zdb_006_pos.ksh \
|
||||
functional/cli_root/zdb/zdb_args_neg.ksh \
|
||||
functional/cli_root/zdb/zdb_args_pos.ksh \
|
||||
functional/cli_root/zdb/zdb_backup.ksh \
|
||||
functional/cli_root/zdb/zdb_block_size_histogram.ksh \
|
||||
functional/cli_root/zdb/zdb_checksum.ksh \
|
||||
functional/cli_root/zdb/zdb_decompress.ksh \
|
||||
|
@ -1142,6 +1143,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
|||
functional/cli_root/zpool_resilver/setup.ksh \
|
||||
functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh \
|
||||
functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh \
|
||||
functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh \
|
||||
functional/cli_root/zpool_scrub/cleanup.ksh \
|
||||
functional/cli_root/zpool_scrub/setup.ksh \
|
||||
functional/cli_root/zpool_scrub/zpool_scrub_001_neg.ksh \
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
#!/bin/ksh
|
||||
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023, Klara Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
write_count=8
|
||||
blksize=131072
|
||||
|
||||
tmpfile=$TEST_BASE_DIR/tmpfile
|
||||
|
||||
function cleanup
|
||||
{
|
||||
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
rm $tmpfile.1 $tmpfile.2
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_assert "Verify that zfs send and zdb -B produce the same stream"
|
||||
|
||||
verify_runnable "global"
|
||||
verify_disk_count "$DISKS" 2
|
||||
|
||||
default_mirror_setup_noexit $DISKS
|
||||
file_write -o create -w -f $TESTDIR/file -b $blksize -c $write_count
|
||||
|
||||
snap=$TESTPOOL/$TESTFS@snap
|
||||
log_must zfs snapshot $snap
|
||||
typeset -i objsetid=$(zfs get -Ho value objsetid $snap)
|
||||
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
log_must eval "zfs send -ecL $snap > $tmpfile.1"
|
||||
log_must eval "zdb -B $TESTPOOL/$objsetid ecL > $tmpfile.2"
|
||||
|
||||
typeset sum1=$(cat $tmpfile.1 | md5sum)
|
||||
typeset sum2=$(cat $tmpfile.2 | md5sum)
|
||||
|
||||
log_must test "$sum1" = "$sum2"
|
||||
|
||||
log_pass "zfs send and zdb -B produce the same stream"
|
|
@ -0,0 +1,101 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Verify 'zpool clear' doesn't cause concurrent resilvers
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create N(10) virtual disk files.
|
||||
# 2. Create draid pool based on the virtual disk files.
|
||||
# 3. Fill the filesystem with directories and files.
|
||||
# 4. Force-fault 2 vdevs and verify distributed spare is kicked in.
|
||||
# 5. Free the distributed spare by replacing the faulty drive.
|
||||
# 6. Run zpool clear and verify that it does not initiate 2 resilvers
|
||||
# concurrently while distributed spare gets kicked in.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
typeset -ir devs=10
|
||||
typeset -ir nparity=1
|
||||
typeset -ir ndata=8
|
||||
typeset -ir dspare=1
|
||||
|
||||
function cleanup
|
||||
{
|
||||
poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
|
||||
|
||||
for i in {0..$devs}; do
|
||||
log_must rm -f "$BASEDIR/vdev$i"
|
||||
done
|
||||
|
||||
for dir in $BASEDIR; do
|
||||
if [[ -d $dir ]]; then
|
||||
log_must rm -rf $dir
|
||||
fi
|
||||
done
|
||||
|
||||
zed_stop
|
||||
zed_cleanup
|
||||
}
|
||||
|
||||
log_assert "Verify zpool clear on draid pool doesn't cause concurrent resilvers"
|
||||
log_onexit cleanup
|
||||
|
||||
setup_test_env $TESTPOOL draid${nparity}:${ndata}d:${dspare}s $devs
|
||||
|
||||
# ZED needed for sequential resilver
|
||||
zed_setup
|
||||
log_must zed_start
|
||||
|
||||
log_must zpool offline -f $TESTPOOL $BASEDIR/vdev5
|
||||
log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60
|
||||
log_must zpool wait -t resilver $TESTPOOL
|
||||
log_must zpool offline -f $TESTPOOL $BASEDIR/vdev6
|
||||
|
||||
log_must zpool labelclear -f $BASEDIR/vdev5
|
||||
log_must zpool labelclear -f $BASEDIR/vdev6
|
||||
|
||||
log_must zpool replace -w $TESTPOOL $BASEDIR/vdev5
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
log_must zpool events -c
|
||||
log_must zpool clear $TESTPOOL
|
||||
log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60
|
||||
log_must zpool wait -t resilver $TESTPOOL
|
||||
log_must zpool wait -t scrub $TESTPOOL
|
||||
|
||||
nof_resilver=$(zpool events | grep -c resilver_start)
|
||||
if [ $nof_resilver = 1 ] ; then
|
||||
log_must verify_pool $TESTPOOL
|
||||
log_pass "zpool clear on draid pool doesn't cause concurrent resilvers"
|
||||
else
|
||||
log_fail "FAIL: sequential and healing resilver initiated concurrently"
|
||||
fi
|
|
@ -25,7 +25,7 @@
|
|||
is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing"
|
||||
|
||||
set -A args "" "-s \",\"" "-v" \
|
||||
"-f time,zcwc,zimnb,zimsb"
|
||||
"-f time,cwc,imnb,imsb"
|
||||
|
||||
log_assert "zilstat generates output and doesn't return an error code"
|
||||
|
||||
|
|
|
@ -25,5 +25,6 @@
|
|||
rmconfig
|
||||
destroy_pool $TESTPOOL
|
||||
del_user ${username}
|
||||
del_user ${username}rec
|
||||
del_group pamtestgroup
|
||||
log_must rm -rf "$runstatedir" $TESTDIRS
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/pam/utilities.kshlib
|
||||
|
||||
if [ -n "$ASAN_OPTIONS" ]; then
|
||||
export LD_PRELOAD=$(ldd "$(command -v zfs)" | awk '/libasan\.so/ {print $3}')
|
||||
fi
|
||||
|
||||
log_mustnot ismounted "$TESTPOOL/pam/${username}"
|
||||
keystatus unavailable
|
||||
|
||||
genconfig "homes=$TESTPOOL/pam runstatedir=${runstatedir}"
|
||||
|
||||
printf "testpass\nsecondpass\nsecondpass\n" | pamtester -v ${pamservice} ${username} chauthtok
|
||||
|
||||
log_mustnot ismounted "$TESTPOOL/pam/${username}"
|
||||
keystatus unavailable
|
||||
|
||||
echo "secondpass" | pamtester ${pamservice} ${username} open_session
|
||||
references 1
|
||||
log_must ismounted "$TESTPOOL/pam/${username}"
|
||||
keystatus available
|
||||
|
||||
printf "secondpass\ntestpass\ntestpass\n" | pamtester -v ${pamservice} ${username} chauthtok
|
||||
|
||||
log_must ismounted "$TESTPOOL/pam/${username}"
|
||||
log_must ismounted "$TESTPOOL/pam/${username}"
|
||||
keystatus available
|
||||
|
||||
log_must pamtester ${pamservice} ${username} close_session
|
||||
references 0
|
||||
log_mustnot ismounted "$TESTPOOL/pam/${username}"
|
||||
keystatus unavailable
|
||||
|
||||
log_pass "done."
|
72
sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_recursive.ksh
Executable file
72
sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_recursive.ksh
Executable file
|
@ -0,0 +1,72 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/pam/utilities.kshlib
|
||||
|
||||
if [ -n "$ASAN_OPTIONS" ]; then
|
||||
export LD_PRELOAD=$(ldd "$(command -v zfs)" | awk '/libasan\.so/ {print $3}')
|
||||
fi
|
||||
|
||||
username="${username}rec"
|
||||
|
||||
# Set up a deeper hierarchy, a mountpoint that doesn't interfere with other tests,
|
||||
# and a user which references that mountpoint
|
||||
log_must zfs create "$TESTPOOL/pampam"
|
||||
log_must zfs create -o mountpoint="$TESTDIR/rec" "$TESTPOOL/pampam/pam"
|
||||
echo "recurpass" | zfs create -o encryption=aes-256-gcm -o keyformat=passphrase \
|
||||
-o keylocation=prompt "$TESTPOOL/pampam/pam/${username}"
|
||||
log_must zfs unmount "$TESTPOOL/pampam/pam/${username}"
|
||||
log_must zfs unload-key "$TESTPOOL/pampam/pam/${username}"
|
||||
log_must add_user pamtestgroup ${username} "$TESTDIR/rec"
|
||||
|
||||
function keystatus {
|
||||
log_must [ "$(get_prop keystatus "$TESTPOOL/pampam/pam/${username}")" = "$1" ]
|
||||
}
|
||||
|
||||
log_mustnot ismounted "$TESTPOOL/pampam/pam/${username}"
|
||||
keystatus unavailable
|
||||
|
||||
function test_session {
|
||||
echo "recurpass" | pamtester ${pamservice} ${username} open_session
|
||||
references 1
|
||||
log_must ismounted "$TESTPOOL/pampam/pam/${username}"
|
||||
keystatus available
|
||||
|
||||
log_must pamtester ${pamservice} ${username} close_session
|
||||
references 0
|
||||
log_mustnot ismounted "$TESTPOOL/pampam/pam/${username}"
|
||||
keystatus unavailable
|
||||
}
|
||||
|
||||
genconfig "homes=$TESTPOOL/pampam/pam prop_mountpoint runstatedir=${runstatedir}"
|
||||
test_session
|
||||
|
||||
genconfig "homes=$TESTPOOL/pampam recursive_homes prop_mountpoint runstatedir=${runstatedir}"
|
||||
test_session
|
||||
|
||||
genconfig "homes=$TESTPOOL recursive_homes prop_mountpoint runstatedir=${runstatedir}"
|
||||
test_session
|
||||
|
||||
genconfig "homes=* recursive_homes prop_mountpoint runstatedir=${runstatedir}"
|
||||
test_session
|
||||
|
||||
log_pass "done."
|
|
@ -52,7 +52,7 @@ log_must ismounted "$TESTPOOL/pam/${username}"
|
|||
keystatus available
|
||||
|
||||
# Change user and dataset password to short one.
|
||||
printf "short\nshort\n" | pamtester ${pamservice} ${username} chauthtok
|
||||
printf "testpass\nshort\nshort\n" | pamtester -v ${pamservice} ${username} chauthtok
|
||||
|
||||
# Unmount and unload key.
|
||||
log_must pamtester ${pamservice} ${username} close_session
|
||||
|
|
|
@ -38,6 +38,8 @@
|
|||
|
||||
verify_runnable "global"
|
||||
|
||||
log_unsupported "Skipping, issue https://github.com/openzfs/zfs/issues/12053"
|
||||
|
||||
function test_cleanup
|
||||
{
|
||||
# reset memory limit to 16M
|
||||
|
|
|
@ -45,6 +45,15 @@ fi
|
|||
|
||||
if ! is_linux ; then
|
||||
log_unsupported "Only linux supports dd with oflag=dsync for FUA writes"
|
||||
else
|
||||
if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then
|
||||
log_unsupported "Disabled while issue #14872 is being worked"
|
||||
fi
|
||||
|
||||
# Disabled for the CentOS 9 kernel
|
||||
if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then
|
||||
log_unsupported "Disabled while issue #14872 is being worked"
|
||||
fi
|
||||
fi
|
||||
|
||||
typeset datafile1="$(mktemp zvol_misc_fua1.XXXXXX)"
|
||||
|
|
|
@ -44,6 +44,15 @@
|
|||
verify_runnable "global"
|
||||
|
||||
if is_linux ; then
|
||||
if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then
|
||||
log_unsupported "Disabled while issue #14872 is being worked"
|
||||
fi
|
||||
|
||||
# Disabled for the CentOS 9 kernel
|
||||
if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then
|
||||
log_unsupported "Disabled while issue #14872 is being worked"
|
||||
fi
|
||||
|
||||
# We need '--force' here since the prior tests may leave a filesystem
|
||||
# on the zvol, and blkdiscard will see that filesystem and print a
|
||||
# warning unless you force it.
|
||||
|
@ -123,7 +132,6 @@ log_must zfs set compression=off $TESTPOOL/$TESTVOL
|
|||
# Remove old data from previous tests
|
||||
log_must $trimcmd $zvolpath
|
||||
|
||||
|
||||
set_blk_mq 1
|
||||
log_must_busy zpool export $TESTPOOL
|
||||
log_must zpool import $TESTPOOL
|
||||
|
|
|
@ -295,7 +295,6 @@ SRCS+= abd.c \
|
|||
uberblock.c \
|
||||
unique.c \
|
||||
vdev.c \
|
||||
vdev_cache.c \
|
||||
vdev_draid.c \
|
||||
vdev_draid_rand.c \
|
||||
vdev_indirect.c \
|
||||
|
|
|
@ -653,6 +653,9 @@
|
|||
/* qat is enabled and existed */
|
||||
/* #undef HAVE_QAT */
|
||||
|
||||
/* struct reclaim_state has reclaimed */
|
||||
/* #undef HAVE_RECLAIM_STATE_RECLAIMED */
|
||||
|
||||
/* register_shrinker is vararg */
|
||||
/* #undef HAVE_REGISTER_SHRINKER_VARARG */
|
||||
|
||||
|
@ -1048,7 +1051,7 @@
|
|||
/* #undef ZFS_IS_GPL_COMPATIBLE */
|
||||
|
||||
/* Define the project alias string. */
|
||||
#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gad0a55461"
|
||||
#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gfeff9dfed"
|
||||
|
||||
/* Define the project author. */
|
||||
#define ZFS_META_AUTHOR "OpenZFS"
|
||||
|
@ -1057,7 +1060,7 @@
|
|||
/* #undef ZFS_META_DATA */
|
||||
|
||||
/* Define the maximum compatible kernel version. */
|
||||
#define ZFS_META_KVER_MAX "6.2"
|
||||
#define ZFS_META_KVER_MAX "6.3"
|
||||
|
||||
/* Define the minimum compatible kernel version. */
|
||||
#define ZFS_META_KVER_MIN "3.10"
|
||||
|
@ -1078,7 +1081,7 @@
|
|||
#define ZFS_META_NAME "zfs"
|
||||
|
||||
/* Define the project release. */
|
||||
#define ZFS_META_RELEASE "FreeBSD_gad0a55461"
|
||||
#define ZFS_META_RELEASE "FreeBSD_gfeff9dfed"
|
||||
|
||||
/* Define the project version. */
|
||||
#define ZFS_META_VERSION "2.1.99"
|
||||
|
|
|
@ -1 +1 @@
|
|||
#define ZFS_META_GITREV "zfs-2.1.99-1955-gad0a55461"
|
||||
#define ZFS_META_GITREV "zfs-2.1.99-1993-gfeff9dfed"
|
||||
|
|
Loading…
Reference in a new issue