Notable upstream pull request merges:
 #15635 eff77a802 ZIL: Improve next log block size prediction
 #15675 6930ecbb7 spa: make read/write queues configurable
 #15677 5a4915660 Don't panic on unencrypted block in encrypted dataset
 #15716 07e95b467 Fix the FreeBSD userspace build

Obtained from:  OpenZFS
OpenZFS commit: 233d34e47e
This commit is contained in:
Martin Matuska 2023-12-27 22:51:21 +01:00
commit b356da806b
70 changed files with 3564 additions and 317 deletions

View file

@ -124,3 +124,24 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
{
return (check_file_generic(file, force, isspare));
}
int
zpool_power_current_state(zpool_handle_t *zhp, char *vdev)
{
(void) zhp;
(void) vdev;
/* Enclosure slot power not supported on FreeBSD yet */
return (-1);
}
int
zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on)
{
(void) zhp;
(void) vdev;
(void) turn_on;
/* Enclosure slot power not supported on FreeBSD yet */
return (ENOTSUP);
}

View file

@ -416,3 +416,258 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
{
return (check_file_generic(file, force, isspare));
}
/*
* Read from a sysfs file and return an allocated string. Removes
* the newline from the end of the string if there is one.
*
* Returns a string on success (which must be freed), or NULL on error.
*/
static char *zpool_sysfs_gets(char *path)
{
int fd;
struct stat statbuf;
char *buf = NULL;
ssize_t count = 0;
fd = open(path, O_RDONLY);
if (fd < 0)
return (NULL);
if (fstat(fd, &statbuf) != 0) {
close(fd);
return (NULL);
}
buf = calloc(sizeof (*buf), statbuf.st_size + 1);
if (buf == NULL) {
close(fd);
return (NULL);
}
/*
* Note, we can read less bytes than st_size, and that's ok. Sysfs
* files will report their size is 4k even if they only return a small
* string.
*/
count = read(fd, buf, statbuf.st_size);
if (count < 0) {
/* Error doing read() or we overran the buffer */
close(fd);
free(buf);
return (NULL);
}
/* Remove trailing newline */
if (buf[count - 1] == '\n')
buf[count - 1] = 0;
close(fd);
return (buf);
}
/*
* Write a string to a sysfs file.
*
* Returns 0 on success, non-zero otherwise.
*/
static int zpool_sysfs_puts(char *path, char *str)
{
FILE *file;
file = fopen(path, "w");
if (!file) {
return (-1);
}
if (fputs(str, file) < 0) {
fclose(file);
return (-2);
}
fclose(file);
return (0);
}
/* Given a vdev nvlist_t, rescan its enclosure sysfs path */
static void
rescan_vdev_config_dev_sysfs_path(nvlist_t *vdev_nv)
{
update_vdev_config_dev_sysfs_path(vdev_nv,
fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH),
ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
}
/*
* Given a power string: "on", "off", "1", or "0", return 0 if it's an
* off value, 1 if it's an on value, and -1 if the value is unrecognized.
*/
static int zpool_power_parse_value(char *str)
{
if ((strcmp(str, "off") == 0) || (strcmp(str, "0") == 0))
return (0);
if ((strcmp(str, "on") == 0) || (strcmp(str, "1") == 0))
return (1);
return (-1);
}
/*
* Given a vdev string return an allocated string containing the sysfs path to
* its power control file. Also do a check if the power control file really
* exists and has correct permissions.
*
* Example returned strings:
*
* /sys/class/enclosure/0:0:122:0/10/power_status
* /sys/bus/pci/slots/10/power
*
* Returns allocated string on success (which must be freed), NULL on failure.
*/
static char *
zpool_power_sysfs_path(zpool_handle_t *zhp, char *vdev)
{
const char *enc_sysfs_dir = NULL;
char *path = NULL;
nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL);
if (vdev_nv == NULL) {
return (NULL);
}
/* Make sure we're getting the updated enclosure sysfs path */
rescan_vdev_config_dev_sysfs_path(vdev_nv);
if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
&enc_sysfs_dir) != 0) {
return (NULL);
}
if (asprintf(&path, "%s/power_status", enc_sysfs_dir) == -1)
return (NULL);
if (access(path, W_OK) != 0) {
free(path);
path = NULL;
/* No HDD 'power_control' file, maybe it's NVMe? */
if (asprintf(&path, "%s/power", enc_sysfs_dir) == -1) {
return (NULL);
}
if (access(path, R_OK | W_OK) != 0) {
/* Not NVMe either */
free(path);
return (NULL);
}
}
return (path);
}
/*
* Given a path to a sysfs power control file, return B_TRUE if you should use
* "on/off" words to control it, or B_FALSE otherwise ("0/1" to control).
*/
static boolean_t
zpool_power_use_word(char *sysfs_path)
{
if (strcmp(&sysfs_path[strlen(sysfs_path) - strlen("power_status")],
"power_status") == 0) {
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Check the sysfs power control value for a vdev.
*
* Returns:
* 0 - Power is off
* 1 - Power is on
* -1 - Error or unsupported
*/
int
zpool_power_current_state(zpool_handle_t *zhp, char *vdev)
{
char *val;
int rc;
char *path = zpool_power_sysfs_path(zhp, vdev);
if (path == NULL)
return (-1);
val = zpool_sysfs_gets(path);
if (val == NULL) {
free(path);
return (-1);
}
rc = zpool_power_parse_value(val);
free(val);
free(path);
return (rc);
}
/*
* Turn on or off the slot to a device
*
* Device path is the full path to the device (like /dev/sda or /dev/sda1).
*
* Return code:
* 0: Success
* ENOTSUP: Power control not supported for OS
* EBADSLT: Couldn't read current power state
* ENOENT: No sysfs path to power control
* EIO: Couldn't write sysfs power value
* EBADE: Sysfs power value didn't change
*/
int
zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on)
{
char *sysfs_path;
const char *val;
int rc;
int timeout_ms;
rc = zpool_power_current_state(zhp, vdev);
if (rc == -1) {
return (EBADSLT);
}
/* Already correct value? */
if (rc == (int)turn_on)
return (0);
sysfs_path = zpool_power_sysfs_path(zhp, vdev);
if (sysfs_path == NULL)
return (ENOENT);
if (zpool_power_use_word(sysfs_path)) {
val = turn_on ? "on" : "off";
} else {
val = turn_on ? "1" : "0";
}
rc = zpool_sysfs_puts(sysfs_path, (char *)val);
free(sysfs_path);
if (rc != 0) {
return (EIO);
}
/*
* Wait up to 30 seconds for sysfs power value to change after
* writing it.
*/
timeout_ms = zpool_getenv_int("ZPOOL_POWER_ON_SLOT_TIMEOUT_MS", 30000);
for (int i = 0; i < MAX(1, timeout_ms / 200); i++) {
rc = zpool_power_current_state(zhp, vdev);
if (rc == (int)turn_on)
return (0); /* success */
fsleep(0.200); /* 200ms */
}
/* sysfs value never changed */
return (EBADE);
}

View file

@ -554,6 +554,10 @@ for_each_vdev_run_cb(void *zhp_data, nvlist_t *nv, void *cb_vcdl)
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
return (1);
/* Make sure we're getting the updated enclosure sysfs path */
update_vdev_config_dev_sysfs_path(nv, path,
ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
&vdev_enc_sysfs_path);

View file

@ -353,7 +353,7 @@ get_usage(zpool_help_t idx)
return (gettext("\tattach [-fsw] [-o property=value] "
"<pool> <device> <new-device>\n"));
case HELP_CLEAR:
return (gettext("\tclear [-nF] <pool> [device]\n"));
return (gettext("\tclear [[--power]|[-nF]] <pool> [device]\n"));
case HELP_CREATE:
return (gettext("\tcreate [-fnd] [-o property=value] ... \n"
"\t [-O file-system-property=value] ... \n"
@ -389,9 +389,11 @@ get_usage(zpool_help_t idx)
"[-T d|u] [pool] ... \n"
"\t [interval [count]]\n"));
case HELP_OFFLINE:
return (gettext("\toffline [-f] [-t] <pool> <device> ...\n"));
return (gettext("\toffline [--power]|[[-f][-t]] <pool> "
"<device> ...\n"));
case HELP_ONLINE:
return (gettext("\tonline [-e] <pool> <device> ...\n"));
return (gettext("\tonline [--power][-e] <pool> <device> "
"...\n"));
case HELP_REPLACE:
return (gettext("\treplace [-fsw] [-o property=value] "
"<pool> <device> [new-device]\n"));
@ -410,7 +412,7 @@ get_usage(zpool_help_t idx)
return (gettext("\ttrim [-dw] [-r <rate>] [-c | -s] <pool> "
"[<device> ...]\n"));
case HELP_STATUS:
return (gettext("\tstatus [-c [script1,script2,...]] "
return (gettext("\tstatus [--power] [-c [script1,script2,...]] "
"[-igLpPstvxD] [-T d|u] [pool] ... \n"
"\t [interval [count]]\n"));
case HELP_UPGRADE:
@ -516,6 +518,77 @@ print_vdev_prop_cb(int prop, void *cb)
return (ZPROP_CONT);
}
/*
* Given a leaf vdev name like 'L5' return its VDEV_CONFIG_PATH like
* '/dev/disk/by-vdev/L5'.
*/
static const char *
vdev_name_to_path(zpool_handle_t *zhp, char *vdev)
{
nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL);
if (vdev_nv == NULL) {
return (NULL);
}
return (fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH));
}
static int
zpool_power_on(zpool_handle_t *zhp, char *vdev)
{
return (zpool_power(zhp, vdev, B_TRUE));
}
static int
zpool_power_on_and_disk_wait(zpool_handle_t *zhp, char *vdev)
{
int rc;
rc = zpool_power_on(zhp, vdev);
if (rc != 0)
return (rc);
zpool_disk_wait(vdev_name_to_path(zhp, vdev));
return (0);
}
static int
zpool_power_on_pool_and_wait_for_devices(zpool_handle_t *zhp)
{
nvlist_t *nv;
const char *path = NULL;
int rc;
/* Power up all the devices first */
FOR_EACH_REAL_LEAF_VDEV(zhp, nv) {
path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH);
if (path != NULL) {
rc = zpool_power_on(zhp, (char *)path);
if (rc != 0) {
return (rc);
}
}
}
/*
* Wait for their devices to show up. Since we powered them on
* at roughly the same time, they should all come online around
* the same time.
*/
FOR_EACH_REAL_LEAF_VDEV(zhp, nv) {
path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH);
zpool_disk_wait(path);
}
return (0);
}
static int
zpool_power_off(zpool_handle_t *zhp, char *vdev)
{
return (zpool_power(zhp, vdev, B_FALSE));
}
/*
* Display usage message. If we're inside a command, display only the usage for
* that command. Otherwise, iterate over the entire command table and display
@ -2093,6 +2166,7 @@ typedef struct status_cbdata {
boolean_t cb_print_vdev_init;
boolean_t cb_print_vdev_trim;
vdev_cmd_data_list_t *vcdl;
boolean_t cb_print_power;
} status_cbdata_t;
/* Return 1 if string is NULL, empty, or whitespace; return 0 otherwise. */
@ -2378,6 +2452,26 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
else
printf(" %5s", rbuf);
}
if (cb->cb_print_power) {
if (children == 0) {
/* Only leaf vdevs have physical slots */
switch (zpool_power_current_state(zhp, (char *)
fnvlist_lookup_string(nv,
ZPOOL_CONFIG_PATH))) {
case 0:
printf_color(ANSI_RED, " %5s",
gettext("off"));
break;
case 1:
printf(" %5s", gettext("on"));
break;
default:
printf(" %5s", "-");
}
} else {
printf(" %5s", "-");
}
}
}
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
@ -5429,19 +5523,6 @@ get_interval_count_filter_guids(int *argc, char **argv, float *interval,
interval, count);
}
/*
* Floating point sleep(). Allows you to pass in a floating point value for
* seconds.
*/
static void
fsleep(float sec)
{
struct timespec req;
req.tv_sec = floor(sec);
req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC;
nanosleep(&req, NULL);
}
/*
* Terminal height, in rows. Returns -1 if stdout is not connected to a TTY or
* if we were unable to determine its size.
@ -6950,10 +7031,12 @@ zpool_do_split(int argc, char **argv)
return (ret);
}
#define POWER_OPT 1024
/*
* zpool online <pool> <device> ...
* zpool online [--power] <pool> <device> ...
*
* --power: Power on the enclosure slot to the drive (if possible)
*/
int
zpool_do_online(int argc, char **argv)
@ -6964,13 +7047,21 @@ zpool_do_online(int argc, char **argv)
int ret = 0;
vdev_state_t newstate;
int flags = 0;
boolean_t is_power_on = B_FALSE;
struct option long_options[] = {
{"power", no_argument, NULL, POWER_OPT},
{0, 0, 0, 0}
};
/* check options */
while ((c = getopt(argc, argv, "e")) != -1) {
while ((c = getopt_long(argc, argv, "e", long_options, NULL)) != -1) {
switch (c) {
case 'e':
flags |= ZFS_ONLINE_EXPAND;
break;
case POWER_OPT:
is_power_on = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@ -6978,6 +7069,9 @@ zpool_do_online(int argc, char **argv)
}
}
if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT"))
is_power_on = B_TRUE;
argc -= optind;
argv += optind;
@ -6999,6 +7093,18 @@ zpool_do_online(int argc, char **argv)
for (i = 1; i < argc; i++) {
vdev_state_t oldstate;
boolean_t avail_spare, l2cache;
int rc;
if (is_power_on) {
rc = zpool_power_on_and_disk_wait(zhp, argv[i]);
if (rc == ENOTSUP) {
(void) fprintf(stderr,
gettext("Power control not supported\n"));
}
if (rc != 0)
return (rc);
}
nvlist_t *tgt = zpool_find_vdev(zhp, argv[i], &avail_spare,
&l2cache, NULL);
if (tgt == NULL) {
@ -7044,12 +7150,15 @@ zpool_do_online(int argc, char **argv)
}
/*
* zpool offline [-ft] <pool> <device> ...
* zpool offline [-ft]|[--power] <pool> <device> ...
*
*
* -f Force the device into a faulted state.
*
* -t Only take the device off-line temporarily. The offline/faulted
* state will not be persistent across reboots.
*
* --power Power off the enclosure slot to the drive (if possible)
*/
int
zpool_do_offline(int argc, char **argv)
@ -7060,9 +7169,15 @@ zpool_do_offline(int argc, char **argv)
int ret = 0;
boolean_t istmp = B_FALSE;
boolean_t fault = B_FALSE;
boolean_t is_power_off = B_FALSE;
struct option long_options[] = {
{"power", no_argument, NULL, POWER_OPT},
{0, 0, 0, 0}
};
/* check options */
while ((c = getopt(argc, argv, "ft")) != -1) {
while ((c = getopt_long(argc, argv, "ft", long_options, NULL)) != -1) {
switch (c) {
case 'f':
fault = B_TRUE;
@ -7070,6 +7185,9 @@ zpool_do_offline(int argc, char **argv)
case 't':
istmp = B_TRUE;
break;
case POWER_OPT:
is_power_off = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@ -7077,6 +7195,20 @@ zpool_do_offline(int argc, char **argv)
}
}
if (is_power_off && fault) {
(void) fprintf(stderr,
gettext("-0 and -f cannot be used together\n"));
usage(B_FALSE);
return (1);
}
if (is_power_off && istmp) {
(void) fprintf(stderr,
gettext("-0 and -t cannot be used together\n"));
usage(B_FALSE);
return (1);
}
argc -= optind;
argv += optind;
@ -7096,8 +7228,22 @@ zpool_do_offline(int argc, char **argv)
return (1);
for (i = 1; i < argc; i++) {
if (fault) {
uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]);
uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]);
if (is_power_off) {
/*
* Note: we have to power off first, then set REMOVED,
* or else zpool_vdev_set_removed_state() returns
* EAGAIN.
*/
ret = zpool_power_off(zhp, argv[i]);
if (ret != 0) {
(void) fprintf(stderr, "%s %s %d\n",
gettext("unable to power off slot for"),
argv[i], ret);
}
zpool_vdev_set_removed_state(zhp, guid, VDEV_AUX_NONE);
} else if (fault) {
vdev_aux_t aux;
if (istmp == B_FALSE) {
/* Force the fault to persist across imports */
@ -7120,7 +7266,7 @@ zpool_do_offline(int argc, char **argv)
}
/*
* zpool clear <pool> [device]
* zpool clear [-nF]|[--power] <pool> [device]
*
* Clear all errors associated with a pool or a particular device.
*/
@ -7132,13 +7278,20 @@ zpool_do_clear(int argc, char **argv)
boolean_t dryrun = B_FALSE;
boolean_t do_rewind = B_FALSE;
boolean_t xtreme_rewind = B_FALSE;
boolean_t is_power_on = B_FALSE;
uint32_t rewind_policy = ZPOOL_NO_REWIND;
nvlist_t *policy = NULL;
zpool_handle_t *zhp;
char *pool, *device;
struct option long_options[] = {
{"power", no_argument, NULL, POWER_OPT},
{0, 0, 0, 0}
};
/* check options */
while ((c = getopt(argc, argv, "FnX")) != -1) {
while ((c = getopt_long(argc, argv, "FnX", long_options,
NULL)) != -1) {
switch (c) {
case 'F':
do_rewind = B_TRUE;
@ -7149,6 +7302,9 @@ zpool_do_clear(int argc, char **argv)
case 'X':
xtreme_rewind = B_TRUE;
break;
case POWER_OPT:
is_power_on = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@ -7156,6 +7312,9 @@ zpool_do_clear(int argc, char **argv)
}
}
if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT"))
is_power_on = B_TRUE;
argc -= optind;
argv += optind;
@ -7196,6 +7355,14 @@ zpool_do_clear(int argc, char **argv)
return (1);
}
if (is_power_on) {
if (device == NULL) {
zpool_power_on_pool_and_wait_for_devices(zhp);
} else {
zpool_power_on_and_disk_wait(zhp, device);
}
}
if (zpool_clear(zhp, device, policy) != 0)
ret = 1;
@ -8908,6 +9075,10 @@ status_callback(zpool_handle_t *zhp, void *data)
printf_color(ANSI_BOLD, " %5s", gettext("SLOW"));
}
if (cbp->cb_print_power) {
printf_color(ANSI_BOLD, " %5s", gettext("POWER"));
}
if (cbp->vcdl != NULL)
print_cmd_columns(cbp->vcdl, 0);
@ -8954,8 +9125,8 @@ status_callback(zpool_handle_t *zhp, void *data)
}
/*
* zpool status [-c [script1,script2,...]] [-igLpPstvx] [-T d|u] [pool] ...
* [interval [count]]
* zpool status [-c [script1,script2,...]] [-igLpPstvx] [--power] [-T d|u] ...
* [pool] [interval [count]]
*
* -c CMD For each vdev, run command CMD
* -i Display vdev initialization status.
@ -8969,6 +9140,7 @@ status_callback(zpool_handle_t *zhp, void *data)
* -D Display dedup status (undocumented)
* -t Display vdev TRIM status.
* -T Display a timestamp in date(1) or Unix format
* --power Display vdev enclosure slot power status
*
* Describes the health status of all pools or some subset.
*/
@ -8982,8 +9154,14 @@ zpool_do_status(int argc, char **argv)
status_cbdata_t cb = { 0 };
char *cmd = NULL;
struct option long_options[] = {
{"power", no_argument, NULL, POWER_OPT},
{0, 0, 0, 0}
};
/* check options */
while ((c = getopt(argc, argv, "c:igLpPsvxDtT:")) != -1) {
while ((c = getopt_long(argc, argv, "c:igLpPsvxDtT:", long_options,
NULL)) != -1) {
switch (c) {
case 'c':
if (cmd != NULL) {
@ -9042,6 +9220,9 @@ zpool_do_status(int argc, char **argv)
case 'T':
get_timestamp_arg(*optarg);
break;
case POWER_OPT:
cb.cb_print_power = B_TRUE;
break;
case '?':
if (optopt == 'c') {
print_zpool_script_list("status");

View file

@ -138,6 +138,9 @@ int check_file(const char *file, boolean_t force, boolean_t isspare);
void after_zpool_upgrade(zpool_handle_t *zhp);
int check_file_generic(const char *file, boolean_t force, boolean_t isspare);
int zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on);
int zpool_power_current_state(zpool_handle_t *zhp, char *vdev);
#ifdef __cplusplus
}
#endif

View file

@ -2,12 +2,15 @@ dnl #
dnl # 4.9, current_time() added
dnl # 4.18, return type changed from timespec to timespec64
dnl #
dnl # Note that we don't care about the return type in this check. If we have
dnl # to implement a fallback, we'll know we're <4.9, which was timespec.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_CURRENT_TIME], [
ZFS_LINUX_TEST_SRC([current_time], [
#include <linux/fs.h>
], [
struct inode ip __attribute__ ((unused));
ip.i_atime = current_time(&ip);
(void) current_time(&ip);
])
])

View file

@ -52,6 +52,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [
memset(&ip, 0, sizeof(ip));
inode_set_ctime_to_ts(&ip, ts);
])
dnl #
dnl # 6.7 API change
dnl # i_atime/i_mtime no longer directly accessible, must use
dnl # inode_get_mtime(ip), inode_set_mtime*(ip) to
dnl # read/write.
dnl #
ZFS_LINUX_TEST_SRC([inode_get_atime], [
#include <linux/fs.h>
],[
struct inode ip;
memset(&ip, 0, sizeof(ip));
inode_get_atime(&ip);
])
ZFS_LINUX_TEST_SRC([inode_get_mtime], [
#include <linux/fs.h>
],[
struct inode ip;
memset(&ip, 0, sizeof(ip));
inode_get_mtime(&ip);
])
ZFS_LINUX_TEST_SRC([inode_set_atime_to_ts], [
#include <linux/fs.h>
],[
struct inode ip;
struct timespec64 ts = {0};
memset(&ip, 0, sizeof(ip));
inode_set_atime_to_ts(&ip, ts);
])
ZFS_LINUX_TEST_SRC([inode_set_mtime_to_ts], [
#include <linux/fs.h>
],[
struct inode ip;
struct timespec64 ts = {0};
memset(&ip, 0, sizeof(ip));
inode_set_mtime_to_ts(&ip, ts);
])
])
AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
@ -90,4 +132,40 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
],[
AC_MSG_RESULT(no)
])
AC_MSG_CHECKING([whether inode_get_atime() exists])
ZFS_LINUX_TEST_RESULT([inode_get_atime], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_INODE_GET_ATIME, 1,
[inode_get_atime() exists in linux/fs.h])
],[
AC_MSG_RESULT(no)
])
AC_MSG_CHECKING([whether inode_set_atime_to_ts() exists])
ZFS_LINUX_TEST_RESULT([inode_set_atime_to_ts], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_INODE_SET_ATIME_TO_TS, 1,
[inode_set_atime_to_ts() exists in linux/fs.h])
],[
AC_MSG_RESULT(no)
])
AC_MSG_CHECKING([whether inode_get_mtime() exists])
ZFS_LINUX_TEST_RESULT([inode_get_mtime], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_INODE_GET_MTIME, 1,
[inode_get_mtime() exists in linux/fs.h])
],[
AC_MSG_RESULT(no)
])
AC_MSG_CHECKING([whether inode_set_mtime_to_ts() exists])
ZFS_LINUX_TEST_RESULT([inode_set_mtime_to_ts], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_INODE_SET_MTIME_TO_TS, 1,
[inode_set_mtime_to_ts() exists in linux/fs.h])
],[
AC_MSG_RESULT(no)
])
])

View file

@ -19,12 +19,44 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK], [
],[])
])
dnl #
dnl # 6.7 API change
dnl # s_shrink is now a pointer.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR], [
ZFS_LINUX_TEST_SRC([super_block_s_shrink_ptr], [
#include <linux/fs.h>
unsigned long shrinker_cb(struct shrinker *shrink,
struct shrink_control *sc) { return 0; }
static struct shrinker shrinker = {
.count_objects = shrinker_cb,
.scan_objects = shrinker_cb,
.seeks = DEFAULT_SEEKS,
};
static const struct super_block
sb __attribute__ ((unused)) = {
.s_shrink = &shrinker,
};
],[])
])
AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK], [
AC_MSG_CHECKING([whether super_block has s_shrink])
ZFS_LINUX_TEST_RESULT([super_block_s_shrink], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SUPER_BLOCK_S_SHRINK, 1,
[have super_block s_shrink])
],[
ZFS_LINUX_TEST_ERROR([sb->s_shrink()])
AC_MSG_RESULT(no)
AC_MSG_CHECKING([whether super_block has s_shrink pointer])
ZFS_LINUX_TEST_RESULT([super_block_s_shrink_ptr], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SUPER_BLOCK_S_SHRINK_PTR, 1,
[have super_block s_shrink pointer])
],[
AC_MSG_RESULT(no)
ZFS_LINUX_TEST_ERROR([sb->s_shrink()])
])
])
])
@ -96,6 +128,25 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [
])
])
dnl #
dnl # 6.7 API change
dnl # register_shrinker has been replaced by shrinker_register.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER], [
ZFS_LINUX_TEST_SRC([shrinker_register], [
#include <linux/shrinker.h>
unsigned long shrinker_cb(struct shrinker *shrink,
struct shrink_control *sc) { return 0; }
],[
struct shrinker cache_shrinker = {
.count_objects = shrinker_cb,
.scan_objects = shrinker_cb,
.seeks = DEFAULT_SEEKS,
};
shrinker_register(&cache_shrinker);
])
])
AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[
dnl #
dnl # 6.0 API change
@ -133,14 +184,36 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[
dnl # cs->shrink() is logically split in to
dnl # cs->count_objects() and cs->scan_objects()
dnl #
AC_MSG_CHECKING([if cs->count_objects callback exists])
AC_MSG_CHECKING(
[whether cs->count_objects callback exists])
ZFS_LINUX_TEST_RESULT(
[shrinker_cb_shrink_control_split],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1,
[cs->count_objects exists])
[shrinker_cb_shrink_control_split],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1,
[cs->count_objects exists])
],[
AC_MSG_RESULT(no)
AC_MSG_CHECKING(
[whether shrinker_register exists])
ZFS_LINUX_TEST_RESULT([shrinker_register], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SHRINKER_REGISTER, 1,
[shrinker_register exists])
dnl # We assume that the split shrinker
dnl # callback exists if
dnl # shrinker_register() exists,
dnl # because the latter is a much more
dnl # recent addition, and the macro
dnl # test for shrinker_register() only
dnl # works if the callback is split
AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK,
1, [cs->count_objects exists])
],[
AC_MSG_RESULT(no)
ZFS_LINUX_TEST_ERROR([shrinker])
])
])
])
])
@ -174,10 +247,12 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT], [
AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER], [
ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK
ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR
ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID
ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK
ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT
ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG
ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER
])
AC_DEFUN([ZFS_AC_KERNEL_SHRINKER], [

View file

@ -319,6 +319,9 @@ _LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *);
_LIBZFS_H int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
_LIBZFS_H int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
_LIBZFS_H int zpool_vdev_set_removed_state(zpool_handle_t *, uint64_t,
vdev_aux_t);
_LIBZFS_H int zpool_vdev_clear(zpool_handle_t *, uint64_t);
_LIBZFS_H nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,

View file

@ -97,6 +97,7 @@ _LIBZUTIL_H int zpool_find_config(libpc_handle_t *, const char *, nvlist_t **,
_LIBZUTIL_H const char * const * zpool_default_search_paths(size_t *count);
_LIBZUTIL_H int zpool_read_label(int, nvlist_t **, int *);
_LIBZUTIL_H int zpool_label_disk_wait(const char *, int);
_LIBZUTIL_H int zpool_disk_wait(const char *);
struct udev_device;
@ -163,6 +164,8 @@ _LIBZUTIL_H void zfs_niceraw(uint64_t, char *, size_t);
_LIBZUTIL_H void zpool_dump_ddt(const ddt_stat_t *, const ddt_histogram_t *);
_LIBZUTIL_H int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***,
uint_t *);
_LIBZUTIL_H void fsleep(float sec);
_LIBZUTIL_H int zpool_getenv_int(const char *env, int default_val);
struct zfs_cmd;
@ -205,6 +208,60 @@ _LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...);
typedef int (*pool_vdev_iter_f)(void *, nvlist_t *, void *);
int for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func,
void *data);
int for_each_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv, void *data);
int for_each_real_leaf_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv,
void *data);
/*
* Often you'll want to iterate over all the vdevs in the pool, but don't want
* to use for_each_vdev() since it requires a callback function.
*
* Instead you can use FOR_EACH_VDEV():
*
* zpool_handle_t *zhp // Assume this is initialized
* nvlist_t *nv
* ...
* FOR_EACH_VDEV(zhp, nv) {
* const char *path = NULL;
* nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path);
* printf("Looking at vdev %s\n", path);
* }
*
* Note: FOR_EACH_VDEV runs in O(n^2) time where n = number of vdevs. However,
* there's an upper limit of 256 vdevs per dRAID top-level vdevs (TLDs), 255 for
* raidz2 TLDs, a real world limit of ~500 vdevs for mirrors, so this shouldn't
* really be an issue.
*
* Here are some micro-benchmarks of a complete FOR_EACH_VDEV loop on a RAID0
* pool:
*
* 100 vdevs = 0.7ms
* 500 vdevs = 17ms
* 750 vdevs = 40ms
* 1000 vdevs = 82ms
*
* The '__nv += 0' at the end of the for() loop gets around a "comma or
* semicolon followed by non-blank" checkstyle error. Note on most compliers
* the '__nv += 0' can just be replaced with 'NULL', but gcc on Centos 7
* will give a 'warning: statement with no effect' error if you do that.
*/
#define __FOR_EACH_VDEV(__zhp, __nv, __func) { \
__nv = zpool_get_config(__zhp, NULL); \
VERIFY0(nvlist_lookup_nvlist(__nv, ZPOOL_CONFIG_VDEV_TREE, &__nv)); \
} \
for (nvlist_t *__root_nv = __nv, *__state = (nvlist_t *)0; \
for_each_vdev_cb(&__state, __root_nv, __func, &__nv) == 1; \
__nv += 0)
#define FOR_EACH_VDEV(__zhp, __nv) \
__FOR_EACH_VDEV(__zhp, __nv, for_each_vdev_macro_helper_func)
/*
* "real leaf" vdevs are leaf vdevs that are real devices (disks or files).
* This excludes leaf vdevs like like draid spares.
*/
#define FOR_EACH_REAL_LEAF_VDEV(__zhp, __nv) \
__FOR_EACH_VDEV(__zhp, __nv, for_each_real_leaf_vdev_macro_helper_func)
int for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func,
void *data);
void update_vdevs_config_dev_sysfs_path(nvlist_t *config);

View file

@ -94,6 +94,12 @@
#define param_set_max_auto_ashift_args(var) \
CTLTYPE_UINT, NULL, 0, param_set_max_auto_ashift, "IU"
#define spa_taskq_read_param_set_args(var) \
CTLTYPE_STRING, NULL, 0, spa_taskq_read_param, "A"
#define spa_taskq_write_param_set_args(var) \
CTLTYPE_STRING, NULL, 0, spa_taskq_write_param, "A"
#define fletcher_4_param_set_args(var) \
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"

View file

@ -29,12 +29,13 @@
/*
* Due to frequent changes in the shrinker API the following
* compatibility wrappers should be used. They are as follows:
* compatibility wrapper should be used.
*
* SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost);
* shrinker = spl_register_shrinker(name, countfunc, scanfunc, seek_cost);
* spl_unregister_shrinker(shrinker);
*
* SPL_SHRINKER_DECLARE is used to declare a shrinker with the name varname,
* which is passed to spl_register_shrinker()/spl_unregister_shrinker().
* spl_register_shrinker is used to create and register a shrinker with the
* given name.
* The countfunc returns the number of free-able objects.
* The scanfunc returns the number of objects that were freed.
* The callbacks can return SHRINK_STOP if further calls can't make any more
@ -57,57 +58,28 @@
* ...scan objects in the cache and reclaim them...
* }
*
* SPL_SHRINKER_DECLARE(my_shrinker, my_count, my_scan, DEFAULT_SEEKS);
* static struct shrinker *my_shrinker;
*
* void my_init_func(void) {
* spl_register_shrinker(&my_shrinker);
* my_shrinker = spl_register_shrinker("my-shrinker",
* my_count, my_scan, DEFAULT_SEEKS);
* }
*
* void my_fini_func(void) {
* spl_unregister_shrinker(my_shrinker);
* }
*/
#ifdef HAVE_REGISTER_SHRINKER_VARARG
#define spl_register_shrinker(x) register_shrinker(x, "zfs-arc-shrinker")
#else
#define spl_register_shrinker(x) register_shrinker(x)
#endif
#define spl_unregister_shrinker(x) unregister_shrinker(x)
typedef unsigned long (*spl_shrinker_cb)
(struct shrinker *, struct shrink_control *);
/*
* Linux 3.0 to 3.11 Shrinker API Compatibility.
*/
#if defined(HAVE_SINGLE_SHRINKER_CALLBACK)
#define SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost) \
static int \
__ ## varname ## _wrapper(struct shrinker *shrink, struct shrink_control *sc)\
{ \
if (sc->nr_to_scan != 0) { \
(void) scanfunc(shrink, sc); \
} \
return (countfunc(shrink, sc)); \
} \
\
static struct shrinker varname = { \
.shrink = __ ## varname ## _wrapper, \
.seeks = seek_cost, \
}
struct shrinker *spl_register_shrinker(const char *name,
spl_shrinker_cb countfunc, spl_shrinker_cb scanfunc, int seek_cost);
void spl_unregister_shrinker(struct shrinker *);
#ifndef SHRINK_STOP
/* 3.0-3.11 compatibility */
#define SHRINK_STOP (-1)
/*
* Linux 3.12 and later Shrinker API Compatibility.
*/
#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
#define SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost) \
static struct shrinker varname = { \
.count_objects = countfunc, \
.scan_objects = scanfunc, \
.seeks = seek_cost, \
}
#else
/*
* Linux 2.x to 2.6.22, or a newer shrinker API has been introduced.
*/
#error "Unknown shrinker callback"
#endif
#endif /* SPL_SHRINKER_H */

View file

@ -273,5 +273,25 @@ extern long zpl_ioctl_fideduperange(struct file *filp, void *arg);
#else
#define zpl_inode_set_ctime_to_ts(ip, ts) (ip->i_ctime = ts)
#endif
#ifdef HAVE_INODE_GET_ATIME
#define zpl_inode_get_atime(ip) inode_get_atime(ip)
#else
#define zpl_inode_get_atime(ip) (ip->i_atime)
#endif
#ifdef HAVE_INODE_SET_ATIME_TO_TS
#define zpl_inode_set_atime_to_ts(ip, ts) inode_set_atime_to_ts(ip, ts)
#else
#define zpl_inode_set_atime_to_ts(ip, ts) (ip->i_atime = ts)
#endif
#ifdef HAVE_INODE_GET_MTIME
#define zpl_inode_get_mtime(ip) inode_get_mtime(ip)
#else
#define zpl_inode_get_mtime(ip) (ip->i_mtime)
#endif
#ifdef HAVE_INODE_SET_MTIME_TO_TS
#define zpl_inode_set_mtime_to_ts(ip, ts) inode_set_mtime_to_ts(ip, ts)
#else
#define zpl_inode_set_mtime_to_ts(ip, ts) (ip->i_mtime = ts)
#endif
#endif /* _SYS_ZPL_H */

View file

@ -182,7 +182,6 @@ typedef struct zil_vdev_node {
} zil_vdev_node_t;
#define ZIL_BURSTS 8
#define ZIL_PREV_BLKS 16
/*
* Stable storage intent log management structure. One per dataset.
@ -217,7 +216,9 @@ struct zilog {
uint64_t zl_parse_lr_count; /* number of log records parsed */
itxg_t zl_itxg[TXG_SIZE]; /* intent log txg chains */
list_t zl_itx_commit_list; /* itx list to be committed */
uint64_t zl_cur_used; /* current commit log size used */
uint64_t zl_cur_size; /* current burst full size */
uint64_t zl_cur_left; /* current burst remaining size */
uint64_t zl_cur_max; /* biggest record in current burst */
list_t zl_lwb_list; /* in-flight log write list */
avl_tree_t zl_bp_tree; /* track bps during log parse */
clock_t zl_replay_time; /* lbolt of when replay started */
@ -225,7 +226,8 @@ struct zilog {
zil_header_t zl_old_header; /* debugging aid */
uint_t zl_parallel; /* workload is multi-threaded */
uint_t zl_prev_rotor; /* rotor for zl_prev[] */
uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */
uint_t zl_prev_opt[ZIL_BURSTS]; /* optimal block size */
uint_t zl_prev_min[ZIL_BURSTS]; /* minimal first block size */
txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */
uint64_t zl_dirty_max_txg; /* highest txg used to dirty zilog */

View file

@ -179,6 +179,7 @@
<elf-symbol name='fletcher_4_native' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='fletcher_4_native_varsize' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='fletcher_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='fsleep' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='get_dataset_depth' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='get_system_hostid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='getexecname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -466,6 +467,7 @@
<elf-symbol name='zpool_disable_datasets_os' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_disable_volume_os' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_discard_checkpoint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_disk_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_dump_ddt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_enable_datasets' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_events_clear' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -497,6 +499,7 @@
<elf-symbol name='zpool_get_userprop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_get_vdev_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_get_vdev_prop_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_getenv_int' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_history_unpack' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_import' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_import_props' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -567,6 +570,7 @@
<elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_script_alloc_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_script_free_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_set_removed_state' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -1368,8 +1372,6 @@
<qualified-type-def type-id='0897719a' const='yes' id='c4a7b189'/>
<pointer-type-def type-id='c4a7b189' size-in-bits='64' id='36fca399'/>
<qualified-type-def type-id='36fca399' restrict='yes' id='37e4897b'/>
<qualified-type-def type-id='a9c79a1f' const='yes' id='cd087e36'/>
<pointer-type-def type-id='cd087e36' size-in-bits='64' id='e05e8614'/>
<qualified-type-def type-id='e05e8614' restrict='yes' id='0be2e71c'/>
<pointer-type-def type-id='8037c762' size-in-bits='64' id='d74a6869'/>
<qualified-type-def type-id='7292109c' restrict='yes' id='6942f6a4'/>
@ -6426,6 +6428,12 @@
<parameter type-id='9d774e0b' name='aux'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_vdev_set_removed_state' mangled-name='zpool_vdev_set_removed_state' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_set_removed_state'>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='9c313c2d' name='guid'/>
<parameter type-id='9d774e0b' name='aux'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_vdev_attach' mangled-name='zpool_vdev_attach' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_attach'>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='80f4b756' name='old_disk'/>
@ -7792,6 +7800,12 @@
<qualified-type-def type-id='d33f11cb' restrict='yes' id='5c53ba29'/>
<pointer-type-def type-id='ffa52b96' size-in-bits='64' id='76c8174b'/>
<pointer-type-def type-id='f3d87113' size-in-bits='64' id='0d2a0670'/>
<function-decl name='zpool_label_disk' mangled-name='zpool_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_label_disk'>
<parameter type-id='b0382bb3'/>
<parameter type-id='4c81de99'/>
<parameter type-id='80f4b756'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zfs_version_kernel' mangled-name='zfs_version_kernel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_kernel'>
<return type-id='26a90f95'/>
</function-decl>
@ -7801,6 +7815,10 @@
<function-decl name='libzfs_core_fini' visibility='default' binding='global' size-in-bits='64'>
<return type-id='48b5725f'/>
</function-decl>
<function-decl name='zfs_get_underlying_path' mangled-name='zfs_get_underlying_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_underlying_path'>
<parameter type-id='80f4b756'/>
<return type-id='26a90f95'/>
</function-decl>
<function-decl name='zpool_prop_unsupported' mangled-name='zpool_prop_unsupported' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_unsupported'>
<parameter type-id='80f4b756'/>
<return type-id='c19b74c3'/>
@ -7918,6 +7936,11 @@
<parameter type-id='b59d7dce'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='access' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/>
<parameter type-id='95e97e5e'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='dup2' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='95e97e5e'/>
<parameter type-id='95e97e5e'/>
@ -8085,6 +8108,37 @@
<parameter is-variadic='yes'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_vdev_script_alloc_env' mangled-name='zpool_vdev_script_alloc_env' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_script_alloc_env'>
<parameter type-id='80f4b756' name='pool_name'/>
<parameter type-id='80f4b756' name='vdev_path'/>
<parameter type-id='80f4b756' name='vdev_upath'/>
<parameter type-id='80f4b756' name='vdev_enc_sysfs_path'/>
<parameter type-id='80f4b756' name='opt_key'/>
<parameter type-id='80f4b756' name='opt_val'/>
<return type-id='9b23c9ad'/>
</function-decl>
<function-decl name='zpool_vdev_script_free_env' mangled-name='zpool_vdev_script_free_env' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_script_free_env'>
<parameter type-id='9b23c9ad' name='env'/>
<return type-id='48b5725f'/>
</function-decl>
<function-decl name='zpool_prepare_disk' mangled-name='zpool_prepare_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prepare_disk'>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='5ce45b60' name='vdev_nv'/>
<parameter type-id='80f4b756' name='prepare_str'/>
<parameter type-id='c0563f85' name='lines'/>
<parameter type-id='7292109c' name='lines_cnt'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_prepare_and_label_disk' mangled-name='zpool_prepare_and_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prepare_and_label_disk'>
<parameter type-id='b0382bb3' name='hdl'/>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='80f4b756' name='name'/>
<parameter type-id='5ce45b60' name='vdev_nv'/>
<parameter type-id='80f4b756' name='prepare_str'/>
<parameter type-id='c0563f85' name='lines'/>
<parameter type-id='7292109c' name='lines_cnt'/>
<return type-id='95e97e5e'/>
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='lib/libzfs/os/linux/libzfs_mount_os.c' language='LANG_C99'>
<pointer-type-def type-id='7359adad' size-in-bits='64' id='1d2c2b85'/>
@ -8274,12 +8328,6 @@
<parameter type-id='95e97e5e'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_label_disk' mangled-name='zpool_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_label_disk'>
<parameter type-id='b0382bb3' name='hdl'/>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='80f4b756' name='name'/>
<return type-id='95e97e5e'/>
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='lib/libzfs/os/linux/libzfs_util_os.c' language='LANG_C99'>
<typedef-decl name='nfds_t' type-id='7359adad' id='555eef66'/>
@ -8295,11 +8343,6 @@
</data-member>
</class-decl>
<pointer-type-def type-id='b440e872' size-in-bits='64' id='3ac36db0'/>
<function-decl name='access' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/>
<parameter type-id='95e97e5e'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='__poll_chk' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='3ac36db0'/>
<parameter type-id='555eef66'/>
@ -8384,10 +8427,6 @@
<parameter type-id='80f4b756' name='dev_name'/>
<return type-id='c19b74c3'/>
</function-decl>
<function-decl name='zfs_get_underlying_path' mangled-name='zfs_get_underlying_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_underlying_path'>
<parameter type-id='80f4b756' name='dev_name'/>
<return type-id='26a90f95'/>
</function-decl>
<function-decl name='is_mpath_whole_disk' mangled-name='is_mpath_whole_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='is_mpath_whole_disk'>
<parameter type-id='80f4b756' name='path'/>
<return type-id='c19b74c3'/>
@ -8523,6 +8562,10 @@
<parameter type-id='b59d7dce' name='buflen'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_disk_wait' mangled-name='zpool_disk_wait' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_disk_wait'>
<parameter type-id='80f4b756' name='path'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='update_vdev_config_dev_sysfs_path' mangled-name='update_vdev_config_dev_sysfs_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='update_vdev_config_dev_sysfs_path'>
<parameter type-id='5ce45b60' name='nv'/>
<parameter type-id='80f4b756' name='path'/>
@ -8548,6 +8591,9 @@
<parameter type-id='95e97e5e'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='clearenv' visibility='default' binding='global' size-in-bits='64'>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zfs_setproctitle_init' mangled-name='zfs_setproctitle_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_setproctitle_init'>
<parameter type-id='95e97e5e' name='argc'/>
<parameter type-id='9b23c9ad' name='argv'/>
@ -8778,6 +8824,7 @@
<array-type-def dimensions='1' type-id='853fd5dc' size-in-bits='32768' id='b505fc2f'>
<subrange length='64' type-id='7359adad' id='b10be967'/>
</array-type-def>
<type-decl name='float' size-in-bits='32' id='a6c45d85'/>
<class-decl name='ddt_stat' size-in-bits='512' is-struct='yes' visibility='default' id='65242dfe'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='dds_blocks' type-id='9c313c2d' visibility='default'/>
@ -8815,11 +8862,27 @@
<pointer-type-def type-id='ec92d602' size-in-bits='64' id='932720f8'/>
<qualified-type-def type-id='853fd5dc' const='yes' id='764c298c'/>
<pointer-type-def type-id='764c298c' size-in-bits='64' id='dfe59052'/>
<qualified-type-def type-id='a9c79a1f' const='yes' id='cd087e36'/>
<pointer-type-def type-id='cd087e36' size-in-bits='64' id='e05e8614'/>
<function-decl name='nanosleep' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='e05e8614'/>
<parameter type-id='3d83ba87'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_dump_ddt' mangled-name='zpool_dump_ddt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_dump_ddt'>
<parameter type-id='dfe59052' name='dds_total'/>
<parameter type-id='932720f8' name='ddh'/>
<return type-id='48b5725f'/>
</function-decl>
<function-decl name='fsleep' mangled-name='fsleep' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fsleep'>
<parameter type-id='a6c45d85' name='sec'/>
<return type-id='48b5725f'/>
</function-decl>
<function-decl name='zpool_getenv_int' mangled-name='zpool_getenv_int' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_getenv_int'>
<parameter type-id='80f4b756' name='env'/>
<parameter type-id='95e97e5e' name='default_val'/>
<return type-id='95e97e5e'/>
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='module/avl/avl.c' language='LANG_C99'>
<function-decl name='avl_last' mangled-name='avl_last' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_last'>

View file

@ -3036,6 +3036,9 @@ zpool_vdev_is_interior(const char *name)
return (B_FALSE);
}
/*
* Lookup the nvlist for a given vdev.
*/
nvlist_t *
zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
boolean_t *l2cache, boolean_t *log)
@ -3043,6 +3046,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
char *end;
nvlist_t *nvroot, *search, *ret;
uint64_t guid;
boolean_t __avail_spare, __l2cache, __log;
search = fnvlist_alloc();
@ -3058,6 +3062,18 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
nvroot = fnvlist_lookup_nvlist(zhp->zpool_config,
ZPOOL_CONFIG_VDEV_TREE);
/*
* User can pass NULL for avail_spare, l2cache, and log, but
* we still need to provide variables to vdev_to_nvlist_iter(), so
* just point them to junk variables here.
*/
if (!avail_spare)
avail_spare = &__avail_spare;
if (!l2cache)
l2cache = &__l2cache;
if (!log)
log = &__log;
*avail_spare = B_FALSE;
*l2cache = B_FALSE;
if (log != NULL)
@ -3313,21 +3329,23 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
}
/*
* Mark the given vdev degraded.
* Generic set vdev state function
*/
int
zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
static int
zpool_vdev_set_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux,
vdev_state_t state)
{
zfs_cmd_t zc = {"\0"};
char errbuf[ERRBUFLEN];
libzfs_handle_t *hdl = zhp->zpool_hdl;
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
dgettext(TEXT_DOMAIN, "cannot set %s %llu"),
zpool_state_to_name(state, aux), (u_longlong_t)guid);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_guid = guid;
zc.zc_cookie = VDEV_STATE_DEGRADED;
zc.zc_cookie = state;
zc.zc_obj = aux;
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
@ -3336,6 +3354,27 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
return (zpool_standard_error(hdl, errno, errbuf));
}
/*
* Mark the given vdev degraded.
*/
int
zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
{
return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_DEGRADED));
}
/*
* Mark the given vdev as in a removed state (as if the device does not exist).
*
* This is different than zpool_vdev_remove() which does a removal of a device
* from the pool (but the device does exist).
*/
int
zpool_vdev_set_removed_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
{
return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_REMOVED));
}
/*
* Returns TRUE if the given nvlist is a vdev that was originally swapped in as
* a hot spare.

View file

@ -263,3 +263,11 @@ update_vdevs_config_dev_sysfs_path(nvlist_t *config)
{
(void) config;
}
int
zpool_disk_wait(const char *path)
{
(void) path;
return (ENOTSUP);
}

View file

@ -170,25 +170,17 @@ zpool_open_func(void *arg)
if (rn->rn_labelpaths) {
const char *path = NULL;
const char *devid = NULL;
const char *env = NULL;
rdsk_node_t *slice;
avl_index_t where;
int timeout;
int error;
if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
return;
env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS");
if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 ||
timeout < 0) {
timeout = DISK_LABEL_WAIT;
}
/*
* Allow devlinks to stabilize so all paths are available.
*/
zpool_label_disk_wait(rn->rn_name, timeout);
zpool_disk_wait(rn->rn_name);
if (path != NULL) {
slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
@ -682,6 +674,20 @@ zpool_label_disk_wait(const char *path, int timeout_ms)
#endif /* HAVE_LIBUDEV */
}
/*
* Simplified version of zpool_label_disk_wait() where we wait for a device
* to appear using the default timeouts.
*/
int
zpool_disk_wait(const char *path)
{
int timeout;
timeout = zpool_getenv_int("ZPOOL_IMPORT_UDEV_TIMEOUT_MS",
DISK_LABEL_WAIT);
return (zpool_label_disk_wait(path, timeout));
}
/*
* Encode the persistent devices strings
* used for the vdev disk label
@ -767,6 +773,10 @@ encode_device_strings(const char *path, vdev_dev_strs_t *ds,
* in the nvlist * (if applicable). Like:
* vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
*
* If an old path was in the nvlist, and the rescan can not find a new path,
* then keep the old path, since the disk may have been removed.
*
* path: The vdev path (value from ZPOOL_CONFIG_PATH)
* key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH)
*/
void
@ -774,6 +784,9 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
const char *key)
{
char *upath, *spath;
const char *oldpath = NULL;
(void) nvlist_lookup_string(nv, key, &oldpath);
/* Add enclosure sysfs path (if disk is in an enclosure). */
upath = zfs_get_underlying_path(path);
@ -782,7 +795,14 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
if (spath) {
(void) nvlist_add_string(nv, key, spath);
} else {
(void) nvlist_remove_all(nv, key);
/*
* We couldn't dynamically scan the disk's enclosure sysfs path.
* This could be because the disk went away. If there's an old
* enclosure sysfs path in the nvlist, then keep using it.
*/
if (!oldpath) {
(void) nvlist_remove_all(nv, key);
}
}
free(upath);

View file

@ -1898,6 +1898,104 @@ zpool_find_config(libpc_handle_t *hdl, const char *target, nvlist_t **configp,
return (0);
}
/* Return if a vdev is a leaf vdev. Note: draid spares are leaf vdevs. */
static boolean_t
vdev_is_leaf(nvlist_t *nv)
{
uint_t children = 0;
nvlist_t **child;
(void) nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children);
return (children == 0);
}
/* Return if a vdev is a leaf vdev and a real device (disk or file) */
static boolean_t
vdev_is_real_leaf(nvlist_t *nv)
{
const char *type = NULL;
if (!vdev_is_leaf(nv))
return (B_FALSE);
(void) nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type);
if ((strcmp(type, VDEV_TYPE_DISK) == 0) ||
(strcmp(type, VDEV_TYPE_FILE) == 0)) {
return (B_TRUE);
}
return (B_FALSE);
}
/*
* This function is called by our FOR_EACH_VDEV() macros.
*
* state: State machine status (stored inside of a (nvlist_t *))
* nv: The current vdev nvlist_t we are iterating over.
* last_nv: The previous vdev nvlist_t we returned to the user in
* the last iteration of FOR_EACH_VDEV(). We use it
* to find the next vdev nvlist_t we should return.
* real_leaves_only: Only return leaf vdevs.
*
* Returns 1 if we found the next vdev nvlist_t for this iteration. 0 if
* we're still searching for it.
*/
static int
__for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv,
boolean_t real_leaves_only)
{
enum {FIRST_NV = 0, NEXT_IS_MATCH = 1, STOP_LOOKING = 2};
/* The very first entry in the NV list is a special case */
if (*((nvlist_t **)state) == (nvlist_t *)FIRST_NV) {
if (real_leaves_only && !vdev_is_real_leaf(nv))
return (0);
*((nvlist_t **)last_nv) = nv;
*((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING;
return (1);
}
/*
* We came across our last_nv, meaning the next one is the one we
* want
*/
if (nv == *((nvlist_t **)last_nv)) {
/* Next iteration of this function will return the nvlist_t */
*((nvlist_t **)state) = (nvlist_t *)NEXT_IS_MATCH;
return (0);
}
/*
* We marked NEXT_IS_MATCH on the previous iteration, so this is the one
* we want.
*/
if (*(nvlist_t **)state == (nvlist_t *)NEXT_IS_MATCH) {
if (real_leaves_only && !vdev_is_real_leaf(nv))
return (0);
*((nvlist_t **)last_nv) = nv;
*((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING;
return (1);
}
return (0);
}
int
for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv)
{
return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_FALSE));
}
int
for_each_real_leaf_vdev_macro_helper_func(void *state, nvlist_t *nv,
void *last_nv)
{
return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_TRUE));
}
/*
* Internal function for iterating over the vdevs.
*

View file

@ -28,6 +28,7 @@
#include <string.h>
#include <sys/nvpair.h>
#include <sys/fs/zfs.h>
#include <math.h>
#include <libzutil.h>
@ -144,3 +145,33 @@ zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
*leftover = bytes_read;
return (0);
}
/*
* Floating point sleep(). Allows you to pass in a floating point value for
* seconds.
*/
void
fsleep(float sec)
{
struct timespec req;
req.tv_sec = floor(sec);
req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC;
nanosleep(&req, NULL);
}
/*
* Get environment variable 'env' and return it as an integer.
* If 'env' is not set, then return 'default_val' instead.
*/
int
zpool_getenv_int(const char *env, int default_val)
{
char *str;
int val;
str = getenv(env);
if ((str == NULL) || sscanf(str, "%d", &val) != 1 ||
val < 0) {
val = default_val;
}
return (val);
}

View file

@ -2297,6 +2297,16 @@ as the number of actual CPUs in the system divided by the
.Sy spa_num_allocators
value.
.
.It Sy zio_taskq_read Ns = Ns Sy fixed,1,8 null scale null Pq charp
Set the queue and thread configuration for the IO read queues.
This is an advanced debugging parameter.
Don't change this unless you understand what it does.
.
.It Sy zio_taskq_write Ns = Ns Sy sync fixed,1,5 scale fixed,1,5 Pq charp
Set the queue and thread configuration for the IO write queues.
This is an advanced debugging parameter.
Don't change this unless you understand what it does.
.
.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint
Do not create zvol device nodes.
This may slightly improve startup time on

View file

@ -36,6 +36,7 @@
.Sh SYNOPSIS
.Nm zpool
.Cm clear
.Op Fl -power
.Ar pool
.Oo Ar device Oc Ns
.
@ -52,6 +53,16 @@ Pools with
enabled which have been suspended cannot be resumed.
While the pool was suspended, it may have been imported on
another host, and resuming I/O could result in pool damage.
.Bl -tag -width Ds
.It Fl -power
Power on the devices's slot in the storage enclosure and wait for the device
to show up before attempting to clear errors.
This is done on all the devices specified.
Alternatively, you can set the
.Sy ZPOOL_AUTO_POWER_ON_SLOT
environment variable to always enable this behavior.
Note: This flag currently works on Linux only.
.El
.
.Sh SEE ALSO
.Xr zdb 8 ,

View file

@ -36,12 +36,13 @@
.Sh SYNOPSIS
.Nm zpool
.Cm offline
.Op Fl ft
.Op Fl Sy -power Ns | Ns Op Fl Sy ft
.Ar pool
.Ar device Ns
.Nm zpool
.Cm online
.Op Fl e
.Op Fl Sy -power
.Op Fl Sy e
.Ar pool
.Ar device Ns
.
@ -50,7 +51,7 @@
.It Xo
.Nm zpool
.Cm offline
.Op Fl ft
.Op Fl Sy -power Ns | Ns Op Fl Sy ft
.Ar pool
.Ar device Ns
.Xc
@ -60,6 +61,9 @@ While the
is offline, no attempt is made to read or write to the device.
This command is not applicable to spares.
.Bl -tag -width Ds
.It Fl -power
Power off the device's slot in the storage enclosure.
This flag currently works on Linux only
.It Fl f
Force fault.
Instead of offlining the disk, put it into a faulted state.
@ -73,6 +77,7 @@ Upon reboot, the specified physical device reverts to its previous state.
.It Xo
.Nm zpool
.Cm online
.Op Fl -power
.Op Fl e
.Ar pool
.Ar device Ns
@ -80,6 +85,13 @@ Upon reboot, the specified physical device reverts to its previous state.
Brings the specified physical device online.
This command is not applicable to spares.
.Bl -tag -width Ds
.It Fl -power
Power on the device's slot in the storage enclosure and wait for the device
to show up before attempting to online it.
Alternatively, you can set the
.Sy ZPOOL_AUTO_POWER_ON_SLOT
environment variable to always enable this behavior.
This flag currently works on Linux only
.It Fl e
Expand the device to use all available space.
If the device is part of a mirror or raidz then all devices must be expanded

View file

@ -57,6 +57,8 @@ and the estimated time to completion.
Both of these are only approximate, because the amount of data in the pool and
the other workloads on the system can change.
.Bl -tag -width Ds
.It Fl -power
Display vdev enclosure slot power status (on or off).
.It Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns
Run a script (or scripts) on each vdev and include the output as a new column
in the

View file

@ -444,7 +444,7 @@ rpool 14.6G 54.9G 4 55 250K 2.69M
.Ed
.
.Sh ENVIRONMENT VARIABLES
.Bl -tag -compact -width "ZPOOL_IMPORT_UDEV_TIMEOUT_MS"
.Bl -tag -compact -width "ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE"
.It Sy ZFS_ABORT
Cause
.Nm
@ -456,6 +456,23 @@ Use ANSI color in
and
.Nm zpool Cm iostat
output.
.It Sy ZPOOL_AUTO_POWER_ON_SLOT
Automatically attempt to turn on the drives enclosure slot power to a drive when
running the
.Nm zpool Cm online
or
.Nm zpool Cm clear
commands.
This has the same effect as passing the
.Fl -power
option to those commands.
.It Sy ZPOOL_POWER_ON_SLOT_TIMEOUT_MS
The maximum time in milliseconds to wait for a slot power sysfs value
to return the correct value after writing it.
For example, after writing "on" to the sysfs enclosure slot power_control file,
it can take some time for the enclosure to power down the slot and return
"on" if you read back the 'power_control' value.
Defaults to 30 seconds (30000ms) if not set.
.It Sy ZPOOL_IMPORT_PATH
The search path for devices or files to use with the pool.
This is a colon-separated list of directories in which

View file

@ -79,6 +79,7 @@ SPL_OBJS := \
spl-kstat.o \
spl-proc.o \
spl-procfs-list.o \
spl-shrinker.o \
spl-taskq.o \
spl-thread.o \
spl-trace.o \

View file

@ -0,0 +1,115 @@
/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*
* Solaris Porting Layer (SPL) Shrinker Implementation.
*/
#include <sys/kmem.h>
#include <sys/shrinker.h>
#ifdef HAVE_SINGLE_SHRINKER_CALLBACK
/* 3.0-3.11: single shrink() callback, which we wrap to carry both functions */
struct spl_shrinker_wrap {
struct shrinker shrinker;
spl_shrinker_cb countfunc;
spl_shrinker_cb scanfunc;
};
static int
spl_shrinker_single_cb(struct shrinker *shrinker, struct shrink_control *sc)
{
struct spl_shrinker_wrap *sw = (struct spl_shrinker_wrap *)shrinker;
if (sc->nr_to_scan != 0)
(void) sw->scanfunc(&sw->shrinker, sc);
return (sw->countfunc(&sw->shrinker, sc));
}
#endif
struct shrinker *
spl_register_shrinker(const char *name, spl_shrinker_cb countfunc,
spl_shrinker_cb scanfunc, int seek_cost)
{
struct shrinker *shrinker;
/* allocate shrinker */
#if defined(HAVE_SHRINKER_REGISTER)
/* 6.7: kernel will allocate the shrinker for us */
shrinker = shrinker_alloc(0, name);
#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
/* 3.12-6.6: we allocate the shrinker */
shrinker = kmem_zalloc(sizeof (struct shrinker), KM_SLEEP);
#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK)
/* 3.0-3.11: allocate a wrapper */
struct spl_shrinker_wrap *sw =
kmem_zalloc(sizeof (struct spl_shrinker_wrap), KM_SLEEP);
shrinker = &sw->shrinker;
#else
/* 2.x-2.6.22, or a newer shrinker API has been introduced. */
#error "Unknown shrinker API"
#endif
if (shrinker == NULL)
return (NULL);
/* set callbacks */
#ifdef HAVE_SINGLE_SHRINKER_CALLBACK
sw->countfunc = countfunc;
sw->scanfunc = scanfunc;
shrinker->shrink = spl_shrinker_single_cb;
#else
shrinker->count_objects = countfunc;
shrinker->scan_objects = scanfunc;
#endif
/* set params */
shrinker->seeks = seek_cost;
/* register with kernel */
#if defined(HAVE_SHRINKER_REGISTER)
shrinker_register(shrinker);
#elif defined(HAVE_REGISTER_SHRINKER_VARARG)
register_shrinker(shrinker, name);
#else
register_shrinker(shrinker);
#endif
return (shrinker);
}
EXPORT_SYMBOL(spl_register_shrinker);
void
spl_unregister_shrinker(struct shrinker *shrinker)
{
#if defined(HAVE_SHRINKER_REGISTER)
shrinker_free(shrinker);
#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
unregister_shrinker(shrinker);
kmem_free(shrinker, sizeof (struct shrinker));
#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK)
unregister_shrinker(shrinker);
kmem_free(shrinker, sizeof (struct spl_shrinker_wrap));
#else
#error "Unknown shrinker API"
#endif
}
EXPORT_SYMBOL(spl_unregister_shrinker);

View file

@ -253,8 +253,7 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
return (sc->nr_to_scan);
}
SPL_SHRINKER_DECLARE(arc_shrinker,
arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS);
static struct shrinker *arc_shrinker = NULL;
int
arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
@ -357,14 +356,18 @@ arc_lowmem_init(void)
* reclaim from the arc. This is done to prevent kswapd from
* swapping out pages when it is preferable to shrink the arc.
*/
spl_register_shrinker(&arc_shrinker);
arc_shrinker = spl_register_shrinker("zfs-arc-shrinker",
arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS);
VERIFY(arc_shrinker);
arc_set_sys_free(allmem);
}
void
arc_lowmem_fini(void)
{
spl_unregister_shrinker(&arc_shrinker);
spl_unregister_shrinker(arc_shrinker);
arc_shrinker = NULL;
}
int

View file

@ -85,7 +85,7 @@ static blk_mode_t
#else
static fmode_t
#endif
vdev_bdev_mode(spa_mode_t spa_mode)
vdev_bdev_mode(spa_mode_t spa_mode, boolean_t exclusive)
{
#ifdef HAVE_BLK_MODE_T
blk_mode_t mode = 0;
@ -95,6 +95,9 @@ vdev_bdev_mode(spa_mode_t spa_mode)
if (spa_mode & SPA_MODE_WRITE)
mode |= BLK_OPEN_WRITE;
if (exclusive)
mode |= BLK_OPEN_EXCL;
#else
fmode_t mode = 0;
@ -103,6 +106,9 @@ vdev_bdev_mode(spa_mode_t spa_mode)
if (spa_mode & SPA_MODE_WRITE)
mode |= FMODE_WRITE;
if (exclusive)
mode |= FMODE_EXCL;
#endif
return (mode);
@ -225,10 +231,10 @@ vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder,
{
#ifdef HAVE_BLKDEV_GET_BY_PATH_4ARG
return (blkdev_get_by_path(path,
vdev_bdev_mode(mode) | BLK_OPEN_EXCL, holder, hops));
vdev_bdev_mode(mode, B_TRUE), holder, hops));
#else
return (blkdev_get_by_path(path,
vdev_bdev_mode(mode) | FMODE_EXCL, holder));
vdev_bdev_mode(mode, B_TRUE), holder));
#endif
}
@ -238,7 +244,7 @@ vdev_blkdev_put(struct block_device *bdev, spa_mode_t mode, void *holder)
#ifdef HAVE_BLKDEV_PUT_HOLDER
return (blkdev_put(bdev, holder));
#else
return (blkdev_put(bdev, vdev_bdev_mode(mode) | FMODE_EXCL));
return (blkdev_put(bdev, vdev_bdev_mode(mode, B_TRUE)));
#endif
}
@ -248,9 +254,9 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
{
struct block_device *bdev;
#ifdef HAVE_BLK_MODE_T
blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa), B_FALSE);
#else
fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa), B_FALSE);
#endif
hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms);
vdev_disk_t *vd;

View file

@ -520,8 +520,8 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
ip->i_uid = SUID_TO_KUID(0);
ip->i_gid = SGID_TO_KGID(0);
ip->i_blkbits = SPA_MINBLOCKSHIFT;
ip->i_atime = now;
ip->i_mtime = now;
zpl_inode_set_atime_to_ts(ip, now);
zpl_inode_set_mtime_to_ts(ip, now);
zpl_inode_set_ctime_to_ts(ip, now);
ip->i_fop = fops;
ip->i_op = ops;

View file

@ -1240,12 +1240,18 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
* and inode caches. This can occur when the ARC needs to free meta data
* blocks but can't because they are all pinned by entries in these caches.
*/
#if defined(HAVE_SUPER_BLOCK_S_SHRINK)
#define S_SHRINK(sb) (&(sb)->s_shrink)
#elif defined(HAVE_SUPER_BLOCK_S_SHRINK_PTR)
#define S_SHRINK(sb) ((sb)->s_shrink)
#endif
int
zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
{
zfsvfs_t *zfsvfs = sb->s_fs_info;
int error = 0;
struct shrinker *shrinker = &sb->s_shrink;
struct shrinker *shrinker = S_SHRINK(sb);
struct shrink_control sc = {
.nr_to_scan = nr_to_scan,
.gfp_mask = GFP_KERNEL,
@ -1257,7 +1263,7 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
#if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \
defined(SHRINK_CONTROL_HAS_NID) && \
defined(SHRINKER_NUMA_AWARE)
if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) {
if (shrinker->flags & SHRINKER_NUMA_AWARE) {
*objects = 0;
for_each_online_node(sc.nid) {
*objects += (*shrinker->scan_objects)(shrinker, &sc);

View file

@ -2438,15 +2438,17 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
zp->z_atime_dirty = B_FALSE;
ZFS_TIME_ENCODE(&ip->i_atime, atime);
inode_timespec_t tmp_atime;
ZFS_TIME_ENCODE(&tmp_atime, atime);
zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_atime);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
&atime, sizeof (atime));
}
if (mask & (ATTR_MTIME | ATTR_SIZE)) {
ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
ZTOI(zp)->i_mtime = zpl_inode_timestamp_truncate(
vap->va_mtime, ZTOI(zp));
zpl_inode_set_mtime_to_ts(ZTOI(zp),
zpl_inode_timestamp_truncate(vap->va_mtime, ZTOI(zp)));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
mtime, sizeof (mtime));
@ -3660,7 +3662,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
caddr_t va;
int err = 0;
uint64_t mtime[2], ctime[2];
inode_timespec_t tmp_ctime;
inode_timespec_t tmp_ts;
sa_bulk_attr_t bulk[3];
int cnt = 0;
struct address_space *mapping;
@ -3824,9 +3826,10 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
&zp->z_pflags, 8);
/* Preserve the mtime and ctime provided by the inode */
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
tmp_ctime = zpl_inode_get_ctime(ip);
ZFS_TIME_ENCODE(&tmp_ctime, ctime);
tmp_ts = zpl_inode_get_mtime(ip);
ZFS_TIME_ENCODE(&tmp_ts, mtime);
tmp_ts = zpl_inode_get_ctime(ip);
ZFS_TIME_ENCODE(&tmp_ts, ctime);
zp->z_atime_dirty = B_FALSE;
zp->z_seq++;
@ -3880,7 +3883,7 @@ zfs_dirty_inode(struct inode *ip, int flags)
zfsvfs_t *zfsvfs = ITOZSB(ip);
dmu_tx_t *tx;
uint64_t mode, atime[2], mtime[2], ctime[2];
inode_timespec_t tmp_ctime;
inode_timespec_t tmp_ts;
sa_bulk_attr_t bulk[4];
int error = 0;
int cnt = 0;
@ -3925,10 +3928,12 @@ zfs_dirty_inode(struct inode *ip, int flags)
SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
/* Preserve the mode, mtime and ctime provided by the inode */
ZFS_TIME_ENCODE(&ip->i_atime, atime);
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
tmp_ctime = zpl_inode_get_ctime(ip);
ZFS_TIME_ENCODE(&tmp_ctime, ctime);
tmp_ts = zpl_inode_get_atime(ip);
ZFS_TIME_ENCODE(&tmp_ts, atime);
tmp_ts = zpl_inode_get_mtime(ip);
ZFS_TIME_ENCODE(&tmp_ts, mtime);
tmp_ts = zpl_inode_get_ctime(ip);
ZFS_TIME_ENCODE(&tmp_ts, ctime);
mode = ip->i_mode;
zp->z_mode = mode;
@ -3971,7 +3976,9 @@ zfs_inactive(struct inode *ip)
if (error) {
dmu_tx_abort(tx);
} else {
ZFS_TIME_ENCODE(&ip->i_atime, atime);
inode_timespec_t tmp_atime;
tmp_atime = zpl_inode_get_atime(ip);
ZFS_TIME_ENCODE(&tmp_atime, atime);
mutex_enter(&zp->z_lock);
(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
(void *)&atime, sizeof (atime), tx);

View file

@ -542,7 +542,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
uint64_t links;
uint64_t z_uid, z_gid;
uint64_t atime[2], mtime[2], ctime[2], btime[2];
inode_timespec_t tmp_ctime;
inode_timespec_t tmp_ts;
uint64_t projid = ZFS_DEFAULT_PROJID;
sa_bulk_attr_t bulk[12];
int count = 0;
@ -614,10 +614,12 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
if (zp->z_pflags & ZFS_XATTR)
zp->z_xattr_parent = parent;
ZFS_TIME_DECODE(&ip->i_atime, atime);
ZFS_TIME_DECODE(&ip->i_mtime, mtime);
ZFS_TIME_DECODE(&tmp_ctime, ctime);
zpl_inode_set_ctime_to_ts(ip, tmp_ctime);
ZFS_TIME_DECODE(&tmp_ts, atime);
zpl_inode_set_atime_to_ts(ip, tmp_ts);
ZFS_TIME_DECODE(&tmp_ts, mtime);
zpl_inode_set_mtime_to_ts(ip, tmp_ts);
ZFS_TIME_DECODE(&tmp_ts, ctime);
zpl_inode_set_ctime_to_ts(ip, tmp_ts);
ZFS_TIME_DECODE(&zp->z_btime, btime);
ip->i_ino = zp->z_id;
@ -1197,7 +1199,7 @@ zfs_rezget(znode_t *zp)
uint64_t gen;
uint64_t z_uid, z_gid;
uint64_t atime[2], mtime[2], ctime[2], btime[2];
inode_timespec_t tmp_ctime;
inode_timespec_t tmp_ts;
uint64_t projid = ZFS_DEFAULT_PROJID;
znode_hold_t *zh;
@ -1290,10 +1292,12 @@ zfs_rezget(znode_t *zp)
zfs_uid_write(ZTOI(zp), z_uid);
zfs_gid_write(ZTOI(zp), z_gid);
ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
ZFS_TIME_DECODE(&tmp_ctime, ctime);
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
ZFS_TIME_DECODE(&tmp_ts, atime);
zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_ts);
ZFS_TIME_DECODE(&tmp_ts, mtime);
zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts);
ZFS_TIME_DECODE(&tmp_ts, ctime);
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts);
ZFS_TIME_DECODE(&zp->z_btime, btime);
if ((uint32_t)gen != ZTOI(zp)->i_generation) {
@ -1401,22 +1405,24 @@ zfs_zinactive(znode_t *zp)
boolean_t
zfs_relatime_need_update(const struct inode *ip)
{
inode_timespec_t now, tmp_ctime;
inode_timespec_t now, tmp_atime, tmp_ts;
gethrestime(&now);
tmp_atime = zpl_inode_get_atime(ip);
/*
* In relatime mode, only update the atime if the previous atime
* is earlier than either the ctime or mtime or if at least a day
* has passed since the last update of atime.
*/
if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
tmp_ts = zpl_inode_get_mtime(ip);
if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0)
return (B_TRUE);
tmp_ctime = zpl_inode_get_ctime(ip);
if (zfs_compare_timespec(&tmp_ctime, &ip->i_atime) >= 0)
tmp_ts = zpl_inode_get_ctime(ip);
if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0)
return (B_TRUE);
if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
if ((hrtime_t)now.tv_sec - (hrtime_t)tmp_atime.tv_sec >= 24*60*60)
return (B_TRUE);
return (B_FALSE);
@ -1439,7 +1445,7 @@ void
zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
uint64_t ctime[2])
{
inode_timespec_t now, tmp_ctime;
inode_timespec_t now, tmp_ts;
gethrestime(&now);
@ -1447,7 +1453,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
if (flag & ATTR_MTIME) {
ZFS_TIME_ENCODE(&now, mtime);
ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime);
ZFS_TIME_DECODE(&tmp_ts, mtime);
zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts);
if (ZTOZSB(zp)->z_use_fuids) {
zp->z_pflags |= (ZFS_ARCHIVE |
ZFS_AV_MODIFIED);
@ -1456,8 +1463,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
if (flag & ATTR_CTIME) {
ZFS_TIME_ENCODE(&now, ctime);
ZFS_TIME_DECODE(&tmp_ctime, ctime);
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
ZFS_TIME_DECODE(&tmp_ts, ctime);
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts);
if (ZTOZSB(zp)->z_use_fuids)
zp->z_pflags |= ZFS_ARCHIVE;
}

View file

@ -526,7 +526,8 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
vap->va_ctime = ia->ia_ctime;
if (vap->va_mask & ATTR_ATIME)
ip->i_atime = zpl_inode_timestamp_truncate(ia->ia_atime, ip);
zpl_inode_set_atime_to_ts(ip,
zpl_inode_timestamp_truncate(ia->ia_atime, ip));
cookie = spl_fstrans_mark();
#ifdef HAVE_USERNS_IOPS_SETATTR

View file

@ -1634,8 +1634,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
*/
if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) {
spa_log_error(db->db_objset->os_spa, &zb, &bpp->blk_birth);
zfs_panic_recover("unencrypted block in encrypted "
"object set %llu", dmu_objset_id(db->db_objset));
err = SET_ERROR(EIO);
goto early_unlock;
}

View file

@ -1124,8 +1124,6 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (sta->os->os_encrypted &&
!BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) {
spa_log_error(spa, zb, &bp->blk_birth);
zfs_panic_recover("unencrypted block in encrypted "
"object set %llu", dmu_objset_id(sta->os));
return (SET_ERROR(EIO));
}

View file

@ -173,7 +173,7 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
* and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
* need to be handled with minimum delay.
*/
static const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
static zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
{ ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */
@ -1211,6 +1211,292 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
tqs->stqs_taskq = NULL;
}
#ifdef _KERNEL
/*
* The READ and WRITE rows of zio_taskqs are configurable at module load time
* by setting zio_taskq_read or zio_taskq_write.
*
* Example (the defaults for READ and WRITE)
* zio_taskq_read='fixed,1,8 null scale null'
* zio_taskq_write='sync fixed,1,5 scale fixed,1,5'
*
* Each sets the entire row at a time.
*
* 'fixed' is parameterised: fixed,Q,T where Q is number of taskqs, T is number
* of threads per taskq.
*
* 'null' can only be set on the high-priority queues (queue selection for
* high-priority queues will fall back to the regular queue if the high-pri
* is NULL.
*/
static const char *const modes[ZTI_NMODES] = {
"fixed", "scale", "sync", "null"
};
/* Parse the incoming config string. Modifies cfg */
static int
spa_taskq_param_set(zio_type_t t, char *cfg)
{
int err = 0;
zio_taskq_info_t row[ZIO_TASKQ_TYPES] = {{0}};
char *next = cfg, *tok, *c;
/*
* Parse out each element from the string and fill `row`. The entire
* row has to be set at once, so any errors are flagged by just
* breaking out of this loop early.
*/
uint_t q;
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
/* `next` is the start of the config */
if (next == NULL)
break;
/* Eat up leading space */
while (isspace(*next))
next++;
if (*next == '\0')
break;
/* Mode ends at space or end of string */
tok = next;
next = strchr(tok, ' ');
if (next != NULL) *next++ = '\0';
/* Parameters start after a comma */
c = strchr(tok, ',');
if (c != NULL) *c++ = '\0';
/* Match mode string */
uint_t mode;
for (mode = 0; mode < ZTI_NMODES; mode++)
if (strcmp(tok, modes[mode]) == 0)
break;
if (mode == ZTI_NMODES)
break;
/* Invalid canary */
row[q].zti_mode = ZTI_NMODES;
/* Per-mode setup */
switch (mode) {
/*
* FIXED is parameterised: number of queues, and number of
* threads per queue.
*/
case ZTI_MODE_FIXED: {
/* No parameters? */
if (c == NULL || *c == '\0')
break;
/* Find next parameter */
tok = c;
c = strchr(tok, ',');
if (c == NULL)
break;
/* Take digits and convert */
unsigned long long nq;
if (!(isdigit(*tok)))
break;
err = ddi_strtoull(tok, &tok, 10, &nq);
/* Must succeed and also end at the next param sep */
if (err != 0 || tok != c)
break;
/* Move past the comma */
tok++;
/* Need another number */
if (!(isdigit(*tok)))
break;
/* Remember start to make sure we moved */
c = tok;
/* Take digits */
unsigned long long ntpq;
err = ddi_strtoull(tok, &tok, 10, &ntpq);
/* Must succeed, and moved forward */
if (err != 0 || tok == c || *tok != '\0')
break;
/*
* sanity; zero queues/threads make no sense, and
* 16K is almost certainly more than anyone will ever
* need and avoids silly numbers like UINT32_MAX
*/
if (nq == 0 || nq >= 16384 ||
ntpq == 0 || ntpq >= 16384)
break;
const zio_taskq_info_t zti = ZTI_P(ntpq, nq);
row[q] = zti;
break;
}
case ZTI_MODE_SCALE: {
const zio_taskq_info_t zti = ZTI_SCALE;
row[q] = zti;
break;
}
case ZTI_MODE_SYNC: {
const zio_taskq_info_t zti = ZTI_SYNC;
row[q] = zti;
break;
}
case ZTI_MODE_NULL: {
/*
* Can only null the high-priority queues; the general-
* purpose ones have to exist.
*/
if (q != ZIO_TASKQ_ISSUE_HIGH &&
q != ZIO_TASKQ_INTERRUPT_HIGH)
break;
const zio_taskq_info_t zti = ZTI_NULL;
row[q] = zti;
break;
}
default:
break;
}
/* Ensure we set a mode */
if (row[q].zti_mode == ZTI_NMODES)
break;
}
/* Didn't get a full row, fail */
if (q < ZIO_TASKQ_TYPES)
return (SET_ERROR(EINVAL));
/* Eat trailing space */
if (next != NULL)
while (isspace(*next))
next++;
/* If there's anything left over then fail */
if (next != NULL && *next != '\0')
return (SET_ERROR(EINVAL));
/* Success! Copy it into the real config */
for (q = 0; q < ZIO_TASKQ_TYPES; q++)
zio_taskqs[t][q] = row[q];
return (0);
}
static int
spa_taskq_param_get(zio_type_t t, char *buf)
{
int pos = 0;
/* Build paramater string from live config */
const char *sep = "";
for (uint_t q = 0; q < ZIO_TASKQ_TYPES; q++) {
const zio_taskq_info_t *zti = &zio_taskqs[t][q];
if (zti->zti_mode == ZTI_MODE_FIXED)
pos += sprintf(&buf[pos], "%s%s,%u,%u", sep,
modes[zti->zti_mode], zti->zti_count,
zti->zti_value);
else
pos += sprintf(&buf[pos], "%s%s", sep,
modes[zti->zti_mode]);
sep = " ";
}
buf[pos++] = '\n';
buf[pos] = '\0';
return (pos);
}
#ifdef __linux__
static int
spa_taskq_read_param_set(const char *val, zfs_kernel_param_t *kp)
{
char *cfg = kmem_strdup(val);
int err = spa_taskq_param_set(ZIO_TYPE_READ, cfg);
kmem_free(cfg, strlen(val)+1);
return (-err);
}
static int
spa_taskq_read_param_get(char *buf, zfs_kernel_param_t *kp)
{
return (spa_taskq_param_get(ZIO_TYPE_READ, buf));
}
static int
spa_taskq_write_param_set(const char *val, zfs_kernel_param_t *kp)
{
char *cfg = kmem_strdup(val);
int err = spa_taskq_param_set(ZIO_TYPE_WRITE, cfg);
kmem_free(cfg, strlen(val)+1);
return (-err);
}
static int
spa_taskq_write_param_get(char *buf, zfs_kernel_param_t *kp)
{
return (spa_taskq_param_get(ZIO_TYPE_WRITE, buf));
}
#else
#include <sys/sbuf.h>
/*
* On FreeBSD load-time parameters can be set up before malloc() is available,
* so we have to do all the parsing work on the stack.
*/
#define SPA_TASKQ_PARAM_MAX (128)
static int
spa_taskq_read_param(ZFS_MODULE_PARAM_ARGS)
{
char buf[SPA_TASKQ_PARAM_MAX];
int err = 0;
if (req->newptr == NULL) {
int len = spa_taskq_param_get(ZIO_TYPE_READ, buf);
struct sbuf *s = sbuf_new_for_sysctl(NULL, NULL, len+1, req);
sbuf_cpy(s, buf);
err = sbuf_finish(s);
sbuf_delete(s);
return (err);
}
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
if (err)
return (err);
return (spa_taskq_param_set(ZIO_TYPE_READ, buf));
}
static int
spa_taskq_write_param(ZFS_MODULE_PARAM_ARGS)
{
char buf[SPA_TASKQ_PARAM_MAX];
int err = 0;
if (req->newptr == NULL) {
int len = spa_taskq_param_get(ZIO_TYPE_WRITE, buf);
struct sbuf *s = sbuf_new_for_sysctl(NULL, NULL, len+1, req);
sbuf_cpy(s, buf);
err = sbuf_finish(s);
sbuf_delete(s);
return (err);
}
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
if (err)
return (err);
return (spa_taskq_param_set(ZIO_TYPE_WRITE, buf));
}
#endif
#endif /* _KERNEL */
/*
* Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
* Note that a type may have multiple discrete taskqs to avoid lock contention
@ -10540,6 +10826,15 @@ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT,
ZMOD_RW,
"Whether extra ALLOC blkptrs were added to a livelist entry while it "
"was being condensed");
#ifdef _KERNEL
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read,
spa_taskq_read_param_set, spa_taskq_read_param_get, ZMOD_RD,
"Configure IO queues for read IO");
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_write,
spa_taskq_write_param_set, spa_taskq_write_param_get, ZMOD_RD,
"Configure IO queues for write IO");
#endif
/* END CSTYLED */
ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_wr_iss_ncpus, UINT, ZMOD_RW,

View file

@ -144,6 +144,7 @@ static kmem_cache_t *zil_zcw_cache;
static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
static itx_t *zil_itx_clone(itx_t *oitx);
static uint64_t zil_max_waste_space(zilog_t *zilog);
static int
zil_bp_compare(const void *x1, const void *x2)
@ -1710,24 +1711,6 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
mutex_exit(&zilog->zl_lock);
}
/*
* Define a limited set of intent log block sizes.
*
* These must be a multiple of 4KB. Note only the amount used (again
* aligned to 4KB) actually gets written. However, we can't always just
* allocate SPA_OLD_MAXBLOCKSIZE as the slog space could be exhausted.
*/
static const struct {
uint64_t limit;
uint64_t blksz;
} zil_block_buckets[] = {
{ 4096, 4096 }, /* non TX_WRITE */
{ 8192 + 4096, 8192 + 4096 }, /* database */
{ 32768 + 4096, 32768 + 4096 }, /* NFS writes */
{ 65536 + 4096, 65536 + 4096 }, /* 64KB writes */
{ UINT64_MAX, SPA_OLD_MAXBLOCKSIZE}, /* > 128KB writes */
};
/*
* Maximum block size used by the ZIL. This is picked up when the ZIL is
* initialized. Otherwise this should not be used directly; see
@ -1735,6 +1718,91 @@ static const struct {
*/
static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE;
/*
* Plan splitting of the provided burst size between several blocks.
*/
static uint_t
zil_lwb_plan(zilog_t *zilog, uint64_t size, uint_t *minsize)
{
uint_t md = zilog->zl_max_block_size - sizeof (zil_chain_t);
if (size <= md) {
/*
* Small bursts are written as-is in one block.
*/
*minsize = size;
return (size);
} else if (size > 8 * md) {
/*
* Big bursts use maximum blocks. The first block size
* is hard to predict, but it does not really matter.
*/
*minsize = 0;
return (md);
}
/*
* Medium bursts try to divide evenly to better utilize several SLOG
* VDEVs. The first block size we predict assuming the worst case of
* maxing out others. Fall back to using maximum blocks if due to
* large records or wasted space we can not predict anything better.
*/
uint_t s = size;
uint_t n = DIV_ROUND_UP(s, md - sizeof (lr_write_t));
uint_t chunk = DIV_ROUND_UP(s, n);
uint_t waste = zil_max_waste_space(zilog);
waste = MAX(waste, zilog->zl_cur_max);
if (chunk <= md - waste) {
*minsize = MAX(s - (md - waste) * (n - 1), waste);
return (chunk);
} else {
*minsize = 0;
return (md);
}
}
/*
* Try to predict next block size based on previous history. Make prediction
* sufficient for 7 of 8 previous bursts. Don't try to save if the saving is
* less then 50%, extra writes may cost more, but we don't want single spike
* to badly affect our predictions.
*/
static uint_t
zil_lwb_predict(zilog_t *zilog)
{
uint_t m, o;
/* If we are in the middle of a burst, take it into account also. */
if (zilog->zl_cur_size > 0) {
o = zil_lwb_plan(zilog, zilog->zl_cur_size, &m);
} else {
o = UINT_MAX;
m = 0;
}
/* Find minimum optimal size. We don't need to go below that. */
for (int i = 0; i < ZIL_BURSTS; i++)
o = MIN(o, zilog->zl_prev_opt[i]);
/* Find two biggest minimal first block sizes above the optimal. */
uint_t m1 = MAX(m, o), m2 = o;
for (int i = 0; i < ZIL_BURSTS; i++) {
m = zilog->zl_prev_min[i];
if (m >= m1) {
m2 = m1;
m1 = m;
} else if (m > m2) {
m2 = m;
}
}
/*
* If second minimum size gives 50% saving -- use it. It may cost us
* one additional write later, but the space saving is just too big.
*/
return ((m1 < m2 * 2) ? m1 : m2);
}
/*
* Close the log block for being issued and allocate the next one.
* Has to be called under zl_issuer_lock to chain more lwbs.
@ -1742,7 +1810,7 @@ static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE;
static lwb_t *
zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, lwb_state_t state)
{
int i;
uint64_t blksz, plan, plan2;
ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
@ -1757,34 +1825,40 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, lwb_state_t state)
return (NULL);
/*
* Log blocks are pre-allocated. Here we select the size of the next
* block, based on size used in the last block.
* - first find the smallest bucket that will fit the block from a
* limited set of block sizes. This is because it's faster to write
* blocks allocated from the same metaslab as they are adjacent or
* close.
* - next find the maximum from the new suggested size and an array of
* previous sizes. This lessens a picket fence effect of wrongly
* guessing the size if we have a stream of say 2k, 64k, 2k, 64k
* requests.
*
* Note we only write what is used, but we can't just allocate
* the maximum block size because we can exhaust the available
* pool log space.
* Log blocks are pre-allocated. Here we select the size of the next
* block, based on what's left of this burst and the previous history.
* While we try to only write used part of the block, we can't just
* always allocate the maximum block size because we can exhaust all
* available pool log space, so we try to be reasonable.
*/
uint64_t zil_blksz = zilog->zl_cur_used + sizeof (zil_chain_t);
for (i = 0; zil_blksz > zil_block_buckets[i].limit; i++)
continue;
zil_blksz = MIN(zil_block_buckets[i].blksz, zilog->zl_max_block_size);
zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz;
for (i = 0; i < ZIL_PREV_BLKS; i++)
zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]);
DTRACE_PROBE3(zil__block__size, zilog_t *, zilog,
uint64_t, zil_blksz,
uint64_t, zilog->zl_prev_blks[zilog->zl_prev_rotor]);
zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);
if (zilog->zl_cur_left > 0) {
/*
* We are in the middle of a burst and know how much is left.
* But if workload is multi-threaded there may be more soon.
* Try to predict what can it be and plan for the worst case.
*/
uint_t m;
plan = zil_lwb_plan(zilog, zilog->zl_cur_left, &m);
if (zilog->zl_parallel) {
plan2 = zil_lwb_plan(zilog, zilog->zl_cur_left +
zil_lwb_predict(zilog), &m);
if (plan < plan2)
plan = plan2;
}
} else {
/*
* The previous burst is done and we can only predict what
* will come next.
*/
plan = zil_lwb_predict(zilog);
}
blksz = plan + sizeof (zil_chain_t);
blksz = P2ROUNDUP_TYPED(blksz, ZIL_MIN_BLKSZ, uint64_t);
blksz = MIN(blksz, zilog->zl_max_block_size);
DTRACE_PROBE3(zil__block__size, zilog_t *, zilog, uint64_t, blksz,
uint64_t, plan);
return (zil_alloc_lwb(zilog, zil_blksz, NULL, 0, 0, state));
return (zil_alloc_lwb(zilog, blksz, NULL, 0, 0, state));
}
/*
@ -1835,7 +1909,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
int wsz = lwb->lwb_sz;
if (lwb->lwb_error == 0) {
abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf, lwb->lwb_sz);
if (!lwb->lwb_slog || zilog->zl_cur_used <= zil_slog_bulk)
if (!lwb->lwb_slog || zilog->zl_cur_size <= zil_slog_bulk)
prio = ZIO_PRIORITY_SYNC_WRITE;
else
prio = ZIO_PRIORITY_ASYNC_WRITE;
@ -1996,6 +2070,42 @@ zil_max_copied_data(zilog_t *zilog)
return (MIN(max_data, zil_maxcopied));
}
static uint64_t
zil_itx_record_size(itx_t *itx)
{
lr_t *lr = &itx->itx_lr;
if (lr->lrc_txtype == TX_COMMIT)
return (0);
ASSERT3U(lr->lrc_reclen, >=, sizeof (lr_t));
return (lr->lrc_reclen);
}
static uint64_t
zil_itx_data_size(itx_t *itx)
{
lr_t *lr = &itx->itx_lr;
lr_write_t *lrw = (lr_write_t *)lr;
if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
ASSERT3U(lr->lrc_reclen, ==, sizeof (lr_write_t));
return (P2ROUNDUP_TYPED(lrw->lr_length, sizeof (uint64_t),
uint64_t));
}
return (0);
}
static uint64_t
zil_itx_full_size(itx_t *itx)
{
lr_t *lr = &itx->itx_lr;
if (lr->lrc_txtype == TX_COMMIT)
return (0);
ASSERT3U(lr->lrc_reclen, >=, sizeof (lr_t));
return (lr->lrc_reclen + zil_itx_data_size(itx));
}
/*
* Estimate space needed in the lwb for the itx. Allocate more lwbs or
* split the itx as needed, but don't touch the actual transaction data.
@ -2038,16 +2148,9 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
}
reclen = lr->lrc_reclen;
if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
ASSERT3U(reclen, ==, sizeof (lr_write_t));
dlen = P2ROUNDUP_TYPED(
lrw->lr_length, sizeof (uint64_t), uint64_t);
} else {
ASSERT3U(reclen, >=, sizeof (lr_t));
dlen = 0;
}
ASSERT3U(reclen, >=, sizeof (lr_t));
ASSERT3U(reclen, <=, zil_max_log_data(zilog, 0));
zilog->zl_cur_used += (reclen + dlen);
dlen = zil_itx_data_size(itx);
cont:
/*
@ -2088,6 +2191,7 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
clrw->lr_length = dnow;
lrw->lr_offset += dnow;
lrw->lr_length -= dnow;
zilog->zl_cur_left -= dnow;
} else {
citx = itx;
clr = lr;
@ -2109,10 +2213,8 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
list_insert_tail(&lwb->lwb_itxs, citx);
dlen -= dnow;
if (dlen > 0) {
zilog->zl_cur_used += reclen;
if (dlen > 0)
goto cont;
}
if (lr->lrc_txtype == TX_WRITE &&
lr->lrc_txg > spa_freeze_txg(zilog->zl_spa))
@ -2139,13 +2241,8 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
if (lr->lrc_txtype == TX_COMMIT)
return;
if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
dlen = P2ROUNDUP_TYPED(
lrw->lr_length, sizeof (uint64_t), uint64_t);
} else {
dlen = 0;
}
reclen = lr->lrc_reclen;
dlen = zil_itx_data_size(itx);
ASSERT3U(reclen + dlen, <=, lwb->lwb_nused - lwb->lwb_nfilled);
lr_buf = lwb->lwb_buf + lwb->lwb_nfilled;
@ -2576,6 +2673,7 @@ zil_get_commit_list(zilog_t *zilog)
ASSERT(zilog_is_dirty_in_txg(zilog, txg) ||
spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
list_t *sync_list = &itxg->itxg_itxs->i_sync_list;
itx_t *itx = NULL;
if (unlikely(zilog->zl_suspend > 0)) {
/*
* ZIL was just suspended, but we lost the race.
@ -2585,10 +2683,20 @@ zil_get_commit_list(zilog_t *zilog)
if (!list_is_empty(sync_list))
wtxg = MAX(wtxg, txg);
} else {
itx = list_head(sync_list);
list_move_tail(commit_list, sync_list);
}
mutex_exit(&itxg->itxg_lock);
while (itx != NULL) {
uint64_t s = zil_itx_full_size(itx);
zilog->zl_cur_size += s;
zilog->zl_cur_left += s;
s = zil_itx_record_size(itx);
zilog->zl_cur_max = MAX(zilog->zl_cur_max, s);
itx = list_next(commit_list, itx);
}
}
return (wtxg);
}
@ -2728,13 +2836,20 @@ static void
zil_burst_done(zilog_t *zilog)
{
if (!list_is_empty(&zilog->zl_itx_commit_list) ||
zilog->zl_cur_used == 0)
zilog->zl_cur_size == 0)
return;
if (zilog->zl_parallel)
zilog->zl_parallel--;
zilog->zl_cur_used = 0;
uint_t r = (zilog->zl_prev_rotor + 1) & (ZIL_BURSTS - 1);
zilog->zl_prev_rotor = r;
zilog->zl_prev_opt[r] = zil_lwb_plan(zilog, zilog->zl_cur_size,
&zilog->zl_prev_min[r]);
zilog->zl_cur_size = 0;
zilog->zl_cur_max = 0;
zilog->zl_cur_left = 0;
}
/*
@ -2867,6 +2982,8 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
* itx list to somebody else who care.
*/
zilog->zl_parallel = ZIL_BURSTS;
zilog->zl_cur_left -=
zil_itx_full_size(itx);
break;
}
} else {
@ -2876,8 +2993,10 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
}
list_insert_tail(&nolwb_itxs, itx);
}
zilog->zl_cur_left -= zil_itx_full_size(itx);
} else {
ASSERT3S(lrc->lrc_txtype, !=, TX_COMMIT);
zilog->zl_cur_left -= zil_itx_full_size(itx);
zil_itx_destroy(itx);
}
}
@ -2960,9 +3079,9 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
* of each individual itx.
*/
if (lwb->lwb_state == LWB_STATE_OPENED && !zilog->zl_parallel) {
zil_burst_done(zilog);
list_insert_tail(ilwbs, lwb);
lwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_NEW);
zil_burst_done(zilog);
if (lwb == NULL) {
while ((lwb = list_remove_head(ilwbs)) != NULL)
zil_lwb_write_issue(zilog, lwb);
@ -3120,12 +3239,11 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
* since we've reached the commit waiter's timeout and it still
* hasn't been issued.
*/
zil_burst_done(zilog);
lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_NEW);
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_CLOSED);
zil_burst_done(zilog);
if (nlwb == NULL) {
/*
* When zil_lwb_write_close() returns NULL, this
@ -3720,7 +3838,9 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
zilog->zl_dirty_max_txg = 0;
zilog->zl_last_lwb_opened = NULL;
zilog->zl_last_lwb_latency = 0;
zilog->zl_max_block_size = zil_maxblocksize;
zilog->zl_max_block_size = MIN(MAX(P2ALIGN_TYPED(zil_maxblocksize,
ZIL_MIN_BLKSZ, uint64_t), ZIL_MIN_BLKSZ),
spa_maxblocksize(dmu_objset_spa(os)));
mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zilog->zl_issuer_lock, NULL, MUTEX_DEFAULT, NULL);
@ -3740,6 +3860,11 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
cv_init(&zilog->zl_cv_suspend, NULL, CV_DEFAULT, NULL);
cv_init(&zilog->zl_lwb_io_cv, NULL, CV_DEFAULT, NULL);
for (int i = 0; i < ZIL_BURSTS; i++) {
zilog->zl_prev_opt[i] = zilog->zl_max_block_size -
sizeof (zil_chain_t);
}
return (zilog);
}

View file

@ -16,6 +16,7 @@ dist_scripts_test_runner_include_DATA = \
scripts_runfilesdir = $(datadir)/$(PACKAGE)/runfiles
dist_scripts_runfiles_DATA = \
%D%/runfiles/bclone.run \
%D%/runfiles/common.run \
%D%/runfiles/freebsd.run \
%D%/runfiles/linux.run \

View file

@ -0,0 +1,46 @@
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# This run file contains all of the common functional tests. When
# adding a new test consider also adding it to the sanity.run file
# if the new test runs to completion in only a few seconds.
#
# Approximate run time: 5 hours
#
[DEFAULT]
pre = setup
quiet = False
pre_user = root
user = root
timeout = 28800
post_user = root
post = cleanup
failsafe_user = root
failsafe = callbacks/zfs_failsafe
outputdir = /var/tmp/test_results
tags = ['bclone']
[tests/functional/bclone]
tests = ['bclone_crossfs_corner_cases',
'bclone_crossfs_data',
'bclone_crossfs_embedded',
'bclone_crossfs_hole',
'bclone_diffprops_all',
'bclone_diffprops_checksum',
'bclone_diffprops_compress',
'bclone_diffprops_copies',
'bclone_diffprops_recordsize',
'bclone_prop_sync',
'bclone_samefs_corner_cases',
'bclone_samefs_data',
'bclone_samefs_embedded',
'bclone_samefs_hole']
tags = ['bclone']

View file

@ -53,6 +53,24 @@ tags = ['functional', 'arc']
tests = ['atime_001_pos', 'atime_002_neg', 'root_atime_off', 'root_atime_on']
tags = ['functional', 'atime']
[tests/functional/bclone]
tests = ['bclone_crossfs_corner_cases_limited',
'bclone_crossfs_data',
'bclone_crossfs_embedded',
'bclone_crossfs_hole',
'bclone_diffprops_all',
'bclone_diffprops_checksum',
'bclone_diffprops_compress',
'bclone_diffprops_copies',
'bclone_diffprops_recordsize',
'bclone_prop_sync',
'bclone_samefs_corner_cases_limited',
'bclone_samefs_data',
'bclone_samefs_embedded',
'bclone_samefs_hole']
tags = ['functional', 'bclone']
timeout = 7200
[tests/functional/bootfs]
tests = ['bootfs_001_pos', 'bootfs_002_neg', 'bootfs_003_pos',
'bootfs_004_neg', 'bootfs_005_neg', 'bootfs_006_pos', 'bootfs_007_pos',

View file

@ -263,13 +263,50 @@ if sys.platform.startswith('freebsd'):
'cli_root/zpool_import/zpool_import_012_pos': ['FAIL', known_reason],
'delegate/zfs_allow_003_pos': ['FAIL', known_reason],
'inheritance/inherit_001_pos': ['FAIL', 11829],
'resilver/resilver_restart_001': ['FAIL', known_reason],
'pool_checkpoint/checkpoint_big_rewind': ['FAIL', 12622],
'pool_checkpoint/checkpoint_indirect': ['FAIL', 12623],
'resilver/resilver_restart_001': ['FAIL', known_reason],
'snapshot/snapshot_002_pos': ['FAIL', '14831'],
})
elif sys.platform.startswith('linux'):
maybe.update({
'bclone/bclone_crossfs_corner_cases': ['SKIP', cfr_cross_reason],
'bclone/bclone_crossfs_corner_cases_limited':
['SKIP', cfr_cross_reason],
'bclone/bclone_crossfs_data': ['SKIP', cfr_cross_reason],
'bclone/bclone_crossfs_embedded': ['SKIP', cfr_cross_reason],
'bclone/bclone_crossfs_hole': ['SKIP', cfr_cross_reason],
'bclone/bclone_diffprops_all': ['SKIP', cfr_cross_reason],
'bclone/bclone_diffprops_checksum': ['SKIP', cfr_cross_reason],
'bclone/bclone_diffprops_compress': ['SKIP', cfr_cross_reason],
'bclone/bclone_diffprops_copies': ['SKIP', cfr_cross_reason],
'bclone/bclone_diffprops_recordsize': ['SKIP', cfr_cross_reason],
'bclone/bclone_prop_sync': ['SKIP', cfr_cross_reason],
'bclone/bclone_samefs_corner_cases': ['SKIP', cfr_reason],
'bclone/bclone_samefs_corner_cases_limited': ['SKIP', cfr_reason],
'bclone/bclone_samefs_data': ['SKIP', cfr_reason],
'bclone/bclone_samefs_embedded': ['SKIP', cfr_reason],
'bclone/bclone_samefs_hole': ['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange':
['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange_cross_dataset':
['SKIP', cfr_cross_reason],
'block_cloning/block_cloning_copyfilerange_fallback':
['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange_fallback_same_txg':
['SKIP', cfr_cross_reason],
'block_cloning/block_cloning_copyfilerange_partial':
['SKIP', cfr_reason],
'block_cloning/block_cloning_cross_enc_dataset':
['SKIP', cfr_cross_reason],
'block_cloning/block_cloning_disabled_copyfilerange':
['SKIP', cfr_reason],
'block_cloning/block_cloning_lwb_buffer_overflow':
['SKIP', cfr_reason],
'block_cloning/block_cloning_replay':
['SKIP', cfr_reason],
'block_cloning/block_cloning_replay_encrypted':
['SKIP', cfr_reason],
'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason],
'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason],
'fault/auto_online_002_pos': ['FAIL', 11889],
@ -278,41 +315,21 @@ elif sys.platform.startswith('linux'):
'fault/auto_spare_multiple': ['FAIL', 11889],
'fault/auto_spare_shared': ['FAIL', 11889],
'fault/decompress_fault': ['FAIL', 11889],
'idmap_mount/idmap_mount_001': ['SKIP', idmap_reason],
'idmap_mount/idmap_mount_002': ['SKIP', idmap_reason],
'idmap_mount/idmap_mount_003': ['SKIP', idmap_reason],
'idmap_mount/idmap_mount_004': ['SKIP', idmap_reason],
'idmap_mount/idmap_mount_005': ['SKIP', idmap_reason],
'io/io_uring': ['SKIP', 'io_uring support required'],
'limits/filesystem_limit': ['SKIP', known_reason],
'limits/snapshot_limit': ['SKIP', known_reason],
'mmp/mmp_active_import': ['FAIL', known_reason],
'mmp/mmp_exported_import': ['FAIL', known_reason],
'mmp/mmp_inactive_import': ['FAIL', known_reason],
'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621],
'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason],
'zvol/zvol_misc/zvol_misc_fua': ['SKIP', 14872],
'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621],
'zvol/zvol_misc/zvol_misc_trim': ['SKIP', 14872],
'idmap_mount/idmap_mount_001': ['SKIP', idmap_reason],
'idmap_mount/idmap_mount_002': ['SKIP', idmap_reason],
'idmap_mount/idmap_mount_003': ['SKIP', idmap_reason],
'idmap_mount/idmap_mount_004': ['SKIP', idmap_reason],
'idmap_mount/idmap_mount_005': ['SKIP', idmap_reason],
'block_cloning/block_cloning_disabled_copyfilerange':
['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange':
['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange_partial':
['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange_fallback':
['SKIP', cfr_reason],
'block_cloning/block_cloning_replay':
['SKIP', cfr_reason],
'block_cloning/block_cloning_replay_encrypted':
['SKIP', cfr_reason],
'block_cloning/block_cloning_lwb_buffer_overflow':
['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange_cross_dataset':
['SKIP', cfr_cross_reason],
'block_cloning/block_cloning_copyfilerange_fallback_same_txg':
['SKIP', cfr_cross_reason],
'block_cloning/block_cloning_cross_enc_dataset':
['SKIP', cfr_cross_reason],
'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason],
})
# Not all Github actions runners have scsi_debug module, so we may skip

View file

@ -2,6 +2,7 @@ scripts_zfs_tests_bindir = $(datadir)/$(PACKAGE)/zfs-tests/bin
scripts_zfs_tests_bin_PROGRAMS = %D%/chg_usr_exec
scripts_zfs_tests_bin_PROGRAMS += %D%/clonefile
scripts_zfs_tests_bin_PROGRAMS += %D%/cp_files
scripts_zfs_tests_bin_PROGRAMS += %D%/ctime
scripts_zfs_tests_bin_PROGRAMS += %D%/dir_rd_update
@ -119,7 +120,6 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/renameat2
scripts_zfs_tests_bin_PROGRAMS += %D%/xattrtest
scripts_zfs_tests_bin_PROGRAMS += %D%/zed_fd_spill-zedlet
scripts_zfs_tests_bin_PROGRAMS += %D%/idmap_util
scripts_zfs_tests_bin_PROGRAMS += %D%/clonefile
%C%_idmap_util_LDADD = libspl.la

View file

@ -59,6 +59,10 @@
#endif
#endif /* __NR_copy_file_range */
#ifdef __FreeBSD__
#define loff_t off_t
#endif
ssize_t
copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int)
__attribute__((weak));
@ -140,7 +144,7 @@ usage(void)
" FICLONERANGE:\n"
" clonefile -r <src> <dst> <soff> <doff> <len>\n"
" copy_file_range:\n"
" clonefile -f <src> <dst> <soff> <doff> <len>\n"
" clonefile -f <src> <dst> [<soff> <doff> <len | \"all\">]\n"
" FIDEDUPERANGE:\n"
" clonefile -d <src> <dst> <soff> <doff> <len>\n");
return (1);
@ -179,13 +183,29 @@ main(int argc, char **argv)
}
}
if (mode == CF_MODE_NONE || (argc-optind) < 2 ||
(mode != CF_MODE_CLONE && (argc-optind) < 5))
return (usage());
switch (mode) {
case CF_MODE_NONE:
return (usage());
case CF_MODE_CLONE:
if ((argc-optind) != 2)
return (usage());
break;
case CF_MODE_CLONERANGE:
case CF_MODE_DEDUPERANGE:
if ((argc-optind) != 5)
return (usage());
break;
case CF_MODE_COPYFILERANGE:
if ((argc-optind) != 2 && (argc-optind) != 5)
return (usage());
break;
default:
abort();
}
loff_t soff = 0, doff = 0;
size_t len = 0;
if (mode != CF_MODE_CLONE) {
size_t len = SSIZE_MAX;
if ((argc-optind) == 5) {
soff = strtoull(argv[optind+2], NULL, 10);
if (soff == ULLONG_MAX) {
fprintf(stderr, "invalid source offset");
@ -196,10 +216,15 @@ main(int argc, char **argv)
fprintf(stderr, "invalid dest offset");
return (1);
}
len = strtoull(argv[optind+4], NULL, 10);
if (len == ULLONG_MAX) {
fprintf(stderr, "invalid length");
return (1);
if (mode == CF_MODE_COPYFILERANGE &&
strcmp(argv[optind+4], "all") == 0) {
len = SSIZE_MAX;
} else {
len = strtoull(argv[optind+4], NULL, 10);
if (len == ULLONG_MAX) {
fprintf(stderr, "invalid length");
return (1);
}
}
}
@ -237,13 +262,15 @@ main(int argc, char **argv)
abort();
}
off_t spos = lseek(sfd, 0, SEEK_CUR);
off_t slen = lseek(sfd, 0, SEEK_END);
off_t dpos = lseek(dfd, 0, SEEK_CUR);
off_t dlen = lseek(dfd, 0, SEEK_END);
if (!quiet) {
off_t spos = lseek(sfd, 0, SEEK_CUR);
off_t slen = lseek(sfd, 0, SEEK_END);
off_t dpos = lseek(dfd, 0, SEEK_CUR);
off_t dlen = lseek(dfd, 0, SEEK_END);
fprintf(stderr, "file offsets: src=%lu/%lu; dst=%lu/%lu\n", spos, slen,
dpos, dlen);
fprintf(stderr, "file offsets: src=%lu/%lu; dst=%lu/%lu\n",
spos, slen, dpos, dlen);
}
close(dfd);
close(sfd);
@ -254,7 +281,8 @@ main(int argc, char **argv)
int
do_clone(int sfd, int dfd)
{
fprintf(stderr, "using FICLONE\n");
if (!quiet)
fprintf(stderr, "using FICLONE\n");
int err = ioctl(dfd, CF_FICLONE, sfd);
if (err < 0) {
fprintf(stderr, "ioctl(FICLONE): %s\n", strerror(errno));
@ -266,7 +294,8 @@ do_clone(int sfd, int dfd)
int
do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
{
fprintf(stderr, "using FICLONERANGE\n");
if (!quiet)
fprintf(stderr, "using FICLONERANGE\n");
cf_file_clone_range_t fcr = {
.src_fd = sfd,
.src_offset = soff,
@ -284,12 +313,22 @@ do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
int
do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
{
fprintf(stderr, "using copy_file_range\n");
if (!quiet)
fprintf(stderr, "using copy_file_range\n");
ssize_t copied = cf_copy_file_range(sfd, &soff, dfd, &doff, len, 0);
if (copied < 0) {
fprintf(stderr, "copy_file_range: %s\n", strerror(errno));
return (1);
}
if (len == SSIZE_MAX) {
struct stat sb;
if (fstat(sfd, &sb) < 0) {
fprintf(stderr, "fstat(sfd): %s\n", strerror(errno));
return (1);
}
len = sb.st_size;
}
if (copied != len) {
fprintf(stderr, "copy_file_range: copied less than requested: "
"requested=%lu; copied=%lu\n", len, copied);
@ -301,7 +340,8 @@ do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
int
do_deduperange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
{
fprintf(stderr, "using FIDEDUPERANGE\n");
if (!quiet)
fprintf(stderr, "using FIDEDUPERANGE\n");
char buf[sizeof (cf_file_dedupe_range_t)+
sizeof (cf_file_dedupe_range_info_t)] = {0};

View file

@ -98,7 +98,8 @@ export SYSTEM_FILES_COMMON='awk
uname
uniq
vmstat
wc'
wc
xargs'
export SYSTEM_FILES_FREEBSD='chflags
compress

View file

@ -123,10 +123,21 @@ function verify_ne # <a> <b> <type>
#
# $1 lower bound
# $2 upper bound
# [$3 how many]
function random_int_between
{
typeset -i min=$1
typeset -i max=$2
typeset -i count
typeset -i i
echo $(( (RANDOM % (max - min + 1)) + min ))
if [[ -z "$3" ]]; then
count=1
else
count=$3
fi
for (( i = 0; i < $count; i++ )); do
echo $(( (RANDOM % (max - min + 1)) + min ))
done
}

View file

@ -90,6 +90,9 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
functional/alloc_class/alloc_class.kshlib \
functional/atime/atime.cfg \
functional/atime/atime_common.kshlib \
functional/bclone/bclone.cfg \
functional/bclone/bclone_common.kshlib \
functional/bclone/bclone_corner_cases.kshlib \
functional/block_cloning/block_cloning.kshlib \
functional/cache/cache.cfg \
functional/cache/cache.kshlib \
@ -438,6 +441,24 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/atime/root_atime_on.ksh \
functional/atime/root_relatime_on.ksh \
functional/atime/setup.ksh \
functional/bclone/bclone_crossfs_corner_cases.ksh \
functional/bclone/bclone_crossfs_corner_cases_limited.ksh \
functional/bclone/bclone_crossfs_data.ksh \
functional/bclone/bclone_crossfs_embedded.ksh \
functional/bclone/bclone_crossfs_hole.ksh \
functional/bclone/bclone_diffprops_all.ksh \
functional/bclone/bclone_diffprops_checksum.ksh \
functional/bclone/bclone_diffprops_compress.ksh \
functional/bclone/bclone_diffprops_copies.ksh \
functional/bclone/bclone_diffprops_recordsize.ksh \
functional/bclone/bclone_prop_sync.ksh \
functional/bclone/bclone_samefs_corner_cases.ksh \
functional/bclone/bclone_samefs_corner_cases_limited.ksh \
functional/bclone/bclone_samefs_data.ksh \
functional/bclone/bclone_samefs_embedded.ksh \
functional/bclone/bclone_samefs_hole.ksh \
functional/bclone/cleanup.ksh \
functional/bclone/setup.ksh \
functional/block_cloning/cleanup.ksh \
functional/block_cloning/setup.ksh \
functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh \

View file

@ -0,0 +1,4 @@
- If dedup enabled, block_cloning uses dedup.
- check when block cloning doesn't suppose to work
- check block cloning between two different pools
- block cloning from a snapshot

View file

@ -0,0 +1,32 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
# TODO: We should calculate that based on ashift.
export MINBLOCKSIZE=512
export TESTSRCFS="$TESTPOOL/$TESTFS/src"
export TESTDSTFS="$TESTPOOL/$TESTFS/dst"
export TESTSRCDIR="$TESTDIR/src"
export TESTDSTDIR="$TESTDIR/dst"

View file

@ -0,0 +1,280 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/tests/functional/bclone/bclone.cfg
export RECORDSIZE=$(zfs get -Hp -o value recordsize $TESTPOOL/$TESTFS)
MINBLKSIZE1=512
MINBLKSIZE2=1024
function verify_block_cloning
{
if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
log_unsupported "copy_file_range not available before Linux 4.5"
fi
}
function verify_crossfs_block_cloning
{
if is_linux && [[ $(linux_version) -lt $(linux_version "5.3") ]]; then
log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3"
fi
}
# Unused.
function size_to_dsize
{
typeset -r size=$1
typeset -r dir=$2
typeset -r dataset=$(df $dir | tail -1 | awk '{print $1}')
typeset -r recordsize=$(get_prop recordsize $dataset)
typeset -r copies=$(get_prop copies $dataset)
typeset dsize
if [[ $size -le $recordsize ]]; then
dsize=$(( ((size - 1) / MINBLOCKSIZE + 1) * MINBLOCKSIZE ))
else
dsize=$(( ((size - 1) / recordsize + 1) * recordsize ))
fi
dsize=$((dsize*copies))
echo $dsize
}
function test_file_integrity
{
typeset -r original_checksum=$1
typeset -r clone=$2
typeset -r filesize=$3
typeset -r clone_checksum=$(sha256digest $clone)
if [[ $original_checksum != $clone_checksum ]]; then
log_fail "Clone $clone is corrupted with file size $filesize"
fi
}
function verify_pool_prop_eq
{
typeset -r prop=$1
typeset -r expected=$2
typeset -r value=$(get_pool_prop $prop $TESTPOOL)
if [[ $value != $expected ]]; then
log_fail "Pool property $prop is incorrect: expected $expected, got $value"
fi
}
function verify_pool_props
{
typeset -r dsize=$1
typeset -r ratio=$2
if [[ $dsize -eq 0 ]]; then
verify_pool_prop_eq bcloneused 0
verify_pool_prop_eq bclonesaved 0
verify_pool_prop_eq bcloneratio 1.00
else
if [[ $ratio -eq 1 ]]; then
verify_pool_prop_eq bcloneused 0
else
verify_pool_prop_eq bcloneused $dsize
fi
verify_pool_prop_eq bclonesaved $((dsize*(ratio-1)))
verify_pool_prop_eq bcloneratio "${ratio}.00"
fi
}
# Function to test file copying and integrity check.
function bclone_test
{
typeset -r datatype=$1
typeset filesize=$2
typeset -r embedded=$3
typeset -r srcdir=$4
typeset -r dstdir=$5
typeset dsize
typeset -r original="${srcdir}/original"
typeset -r clone="${dstdir}/clone"
log_note "Testing file copy with datatype $datatype, file size $filesize, embedded $embedded"
# Create a test file with known content.
case $datatype in
random|text)
sync_pool $TESTPOOL
if [[ $datatype = "random" ]]; then
dd if=/dev/urandom of=$original bs=$filesize count=1 2>/dev/null
else
filesize=$(((filesize/4)*4))
dd if=/dev/urandom bs=$(((filesize/4)*3)) count=1 | \
openssl base64 -A > $original
fi
sync_pool $TESTPOOL
clonefile -f $original "${clone}-tmp"
sync_pool $TESTPOOL
# It is hard to predict block sizes that will be used,
# so just do one clone and take it from bcloneused.
filesize=$(zpool get -Hp -o value bcloneused $TESTPOOL)
if [[ $embedded = "false" ]]; then
log_must test $filesize -gt 0
fi
rm -f "${clone}-tmp"
sync_pool $TESTPOOL
dsize=$filesize
;;
hole)
log_must truncate_test -s $filesize -f $original
dsize=0
;;
*)
log_fail "Unknown datatype $datatype"
;;
esac
if [[ $embedded = "true" ]]; then
dsize=0
fi
typeset -r original_checksum=$(sha256digest $original)
sync_pool $TESTPOOL
# Create a first clone of the entire file.
clonefile -f $original "${clone}0"
# Try to clone the clone in the same transaction group.
clonefile -f "${clone}0" "${clone}2"
# Clone the original again...
clonefile -f $original "${clone}1"
# ...and overwrite it in the same transaction group.
clonefile -f $original "${clone}1"
# Clone the clone...
clonefile -f "${clone}1" "${clone}3"
sync_pool $TESTPOOL
# ...and overwrite in the new transaction group.
clonefile -f "${clone}1" "${clone}3"
sync_pool $TESTPOOL
# Test removal of the pending clones (before they are committed to disk).
clonefile -f $original "${clone}4"
clonefile -f "${clone}4" "${clone}5"
rm -f "${clone}4" "${clone}5"
# Clone into one file, but remove another file, but with the same data in
# the same transaction group.
clonefile -f $original "${clone}5"
sync_pool $TESTPOOL
clonefile -f $original "${clone}4"
rm -f "${clone}5"
test_file_integrity $original_checksum "${clone}4" $filesize
sync_pool $TESTPOOL
test_file_integrity $original_checksum "${clone}4" $filesize
clonefile -f "${clone}4" "${clone}5"
# Verify integrity of the cloned file before it is committed to disk.
test_file_integrity $original_checksum "${clone}5" $filesize
sync_pool $TESTPOOL
# Verify integrity in the new transaction group.
test_file_integrity $original_checksum "${clone}0" $filesize
test_file_integrity $original_checksum "${clone}1" $filesize
test_file_integrity $original_checksum "${clone}2" $filesize
test_file_integrity $original_checksum "${clone}3" $filesize
test_file_integrity $original_checksum "${clone}4" $filesize
test_file_integrity $original_checksum "${clone}5" $filesize
verify_pool_props $dsize 7
# Clear cache and test after fresh import.
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
# Cloned uncached file.
clonefile -f $original "${clone}6"
# Cloned uncached clone.
clonefile -f "${clone}6" "${clone}7"
# Cache the file.
cat $original >/dev/null
clonefile -f $original "${clone}8"
clonefile -f "${clone}8" "${clone}9"
test_file_integrity $original_checksum "${clone}6" $filesize
test_file_integrity $original_checksum "${clone}7" $filesize
test_file_integrity $original_checksum "${clone}8" $filesize
test_file_integrity $original_checksum "${clone}9" $filesize
sync_pool $TESTPOOL
verify_pool_props $dsize 11
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
test_file_integrity $original_checksum "${clone}0" $filesize
test_file_integrity $original_checksum "${clone}1" $filesize
test_file_integrity $original_checksum "${clone}2" $filesize
test_file_integrity $original_checksum "${clone}3" $filesize
test_file_integrity $original_checksum "${clone}4" $filesize
test_file_integrity $original_checksum "${clone}5" $filesize
test_file_integrity $original_checksum "${clone}6" $filesize
test_file_integrity $original_checksum "${clone}7" $filesize
test_file_integrity $original_checksum "${clone}8" $filesize
test_file_integrity $original_checksum "${clone}9" $filesize
rm -f $original
rm -f "${clone}1" "${clone}3" "${clone}5" "${clone}7"
sync_pool $TESTPOOL
test_file_integrity $original_checksum "${clone}0" $filesize
test_file_integrity $original_checksum "${clone}2" $filesize
test_file_integrity $original_checksum "${clone}4" $filesize
test_file_integrity $original_checksum "${clone}6" $filesize
test_file_integrity $original_checksum "${clone}8" $filesize
test_file_integrity $original_checksum "${clone}9" $filesize
verify_pool_props $dsize 6
rm -f "${clone}0" "${clone}2" "${clone}4" "${clone}8" "${clone}9"
sync_pool $TESTPOOL
test_file_integrity $original_checksum "${clone}6" $filesize
verify_pool_props $dsize 1
rm -f "${clone}6"
sync_pool $TESTPOOL
verify_pool_props $dsize 1
}

View file

@ -0,0 +1,315 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
function first_half_checksum
{
typeset -r file=$1
dd if=$file bs=$HALFRECORDSIZE count=1 2>/dev/null | sha256digest
}
function second_half_checksum
{
typeset -r file=$1
dd if=$file bs=$HALFRECORDSIZE count=1 skip=1 2>/dev/null | sha256digest
}
function bclone_corner_cases_init
{
typeset -r srcdir=$1
typeset -r dstdir=$2
export RECORDSIZE=4096
export HALFRECORDSIZE=$((RECORDSIZE / 2))
export CLONE="$dstdir/clone0"
export ORIG0="$srcdir/orig0"
export ORIG1="$srcdir/orig1"
export ORIG2="$srcdir/orig2"
# Create source files.
log_must dd if=/dev/urandom of="$ORIG0" bs=$RECORDSIZE count=1
log_must dd if=/dev/urandom of="$ORIG1" bs=$RECORDSIZE count=1
log_must dd if=/dev/urandom of="$ORIG2" bs=$RECORDSIZE count=1
export FIRST_HALF_ORIG0_CHECKSUM=$(first_half_checksum $ORIG0)
export FIRST_HALF_ORIG1_CHECKSUM=$(first_half_checksum $ORIG1)
export FIRST_HALF_ORIG2_CHECKSUM=$(first_half_checksum $ORIG2)
export SECOND_HALF_ORIG0_CHECKSUM=$(second_half_checksum $ORIG0)
export SECOND_HALF_ORIG1_CHECKSUM=$(second_half_checksum $ORIG1)
export SECOND_HALF_ORIG2_CHECKSUM=$(second_half_checksum $ORIG2)
export ZEROS_CHECKSUM=$(dd if=/dev/zero bs=$HALFRECORDSIZE count=1 | sha256digest)
export FIRST_HALF_CHECKSUM=""
export SECOND_HALF_CHECKSUM=""
}
function cache_clone
{
typeset -r cached=$1
case "$cached" in
"cached")
dd if=$CLONE of=/dev/null bs=$RECORDSIZE 2>/dev/null
;;
"uncached")
;;
*)
log_fail "invalid cached: $cached"
;;
esac
}
function create_existing
{
typeset -r existing=$1
case "$existing" in
"no")
;;
"small empty")
log_must truncate_test -s $HALFRECORDSIZE -f $CLONE
;;
"full empty")
log_must truncate_test -s $RECORDSIZE -f $CLONE
;;
"small data")
log_must dd if=/dev/urandom of=$CLONE bs=$HALFRECORDSIZE count=1 \
2>/dev/null
;;
"full data")
log_must dd if=/dev/urandom of=$CLONE bs=$RECORDSIZE count=1 2>/dev/null
;;
*)
log_fail "invalid existing: $existing"
;;
esac
}
function create_clone
{
typeset -r clone=$1
typeset -r file=$2
case "$clone" in
"no")
;;
"yes")
clonefile -f $file $CLONE
case "$file" in
$ORIG0)
FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG0_CHECKSUM
SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG0_CHECKSUM
;;
$ORIG2)
FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG2_CHECKSUM
SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG2_CHECKSUM
;;
*)
log_fail "invalid file: $file"
;;
esac
;;
*)
log_fail "invalid clone: $clone"
;;
esac
}
function overwrite_clone
{
typeset -r overwrite=$1
case "$overwrite" in
"no")
;;
"free")
log_must truncate_test -s 0 -f $CLONE
log_must truncate_test -s $RECORDSIZE -f $CLONE
FIRST_HALF_CHECKSUM=$ZEROS_CHECKSUM
SECOND_HALF_CHECKSUM=$ZEROS_CHECKSUM
;;
"full")
log_must dd if=$ORIG1 of=$CLONE bs=$RECORDSIZE count=1 2>/dev/null
FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG1_CHECKSUM
SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG1_CHECKSUM
;;
"first half")
log_must dd if=$ORIG1 of=$CLONE bs=$HALFRECORDSIZE skip=0 seek=0 \
count=1 conv=notrunc 2>/dev/null
FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG1_CHECKSUM
;;
"second half")
log_must dd if=$ORIG1 of=$CLONE bs=$HALFRECORDSIZE skip=1 seek=1 \
count=1 conv=notrunc 2>/dev/null
SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG1_CHECKSUM
;;
*)
log_fail "invalid overwrite: $overwrite"
;;
esac
}
function checksum_compare
{
typeset -r compare=$1
typeset first_half_calculated_checksum second_half_calculated_checksum
case "$compare" in
"no")
;;
"yes")
first_half_calculated_checksum=$(first_half_checksum $CLONE)
second_half_calculated_checksum=$(second_half_checksum $CLONE)
if [[ $first_half_calculated_checksum != $FIRST_HALF_CHECKSUM ]] || \
[[ $second_half_calculated_checksum != $SECOND_HALF_CHECKSUM ]]; then
return 1
fi
;;
*)
log_fail "invalid compare: $compare"
;;
esac
}
function bclone_corner_cases_test
{
typeset cached existing
typeset first_clone first_overwrite
typeset read_after read_before
typeset second_clone second_overwrite
typeset -r srcdir=$1
typeset -r dstdir=$2
typeset limit=$3
typeset -i count=0
if [[ $srcdir != "count" ]]; then
if [[ -n "$limit" ]]; then
typeset -r total_count=$(bclone_corner_cases_test count)
limit=$(random_int_between 1 $total_count $((limit*2)) | sort -nu | head -n $limit | xargs)
fi
bclone_corner_cases_init $srcdir $dstdir
fi
#
# (create) / (cache) / (clone) / (overwrite) / (read) / (clone) / (overwrite) / (read) / read next txg
#
for existing in "no" "small empty" "full empty" "small data" "full data"; do
for cached in "uncached" "cached"; do
for first_clone in "no" "yes"; do
for first_overwrite in "no" "free" "full" "first half" "second half"; do
for read_before in "no" "yes"; do
for second_clone in "no" "yes"; do
for second_overwrite in "no" "free" "full" "first half" "second half"; do
for read_after in "no" "yes"; do
if [[ $first_clone = "no" ]] && \
[[ $second_clone = "no" ]]; then
continue
fi
if [[ $first_clone = "no" ]] && \
[[ $read_before = "yes" ]]; then
continue
fi
if [[ $second_clone = "no" ]] && \
[[ $read_before = "yes" ]] && \
[[ $read_after = "yes" ]]; then
continue
fi
count=$((count+1))
if [[ $srcdir = "count" ]]; then
# Just counting.
continue
fi
if [[ -n "$limit" ]]; then
if ! echo " $limit " | grep -q " $count "; then
continue
fi
fi
FIRST_HALF_CHECKSUM=""
SECOND_HALF_CHECKSUM=""
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
create_existing "$existing"
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
cache_clone "$cached"
create_clone "$first_clone" "$ORIG0"
overwrite_clone "$first_overwrite"
if checksum_compare $read_before; then
log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before"
else
log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before"
fi
create_clone "$second_clone" "$ORIG2"
overwrite_clone "$second_overwrite"
if checksum_compare $read_after; then
log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after"
else
log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after"
fi
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
if checksum_compare "yes"; then
log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after / read_next_txg"
else
log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after / read_next_txg"
fi
rm -f "$CLONE"
done
done
done
done
done
done
done
done
if [[ $srcdir = "count" ]]; then
echo $count
fi
}

View file

@ -0,0 +1,45 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib
verify_runnable "both"
verify_block_cloning
verify_crossfs_block_cloning
log_assert "Verify various corner cases in block cloning across datasets"
# Disable compression to make sure we won't use embedded blocks.
log_must zfs set compress=off $TESTSRCFS
log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS
log_must zfs set compress=off $TESTDSTFS
log_must zfs set recordsize=$RECORDSIZE $TESTDSTFS
bclone_corner_cases_test $TESTSRCDIR $TESTDSTDIR
log_pass

View file

@ -0,0 +1,45 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib
verify_runnable "both"
verify_block_cloning
verify_crossfs_block_cloning
log_assert "Verify various corner cases in block cloning across datasets"
# Disable compression to make sure we won't use embedded blocks.
log_must zfs set compress=off $TESTSRCFS
log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS
log_must zfs set compress=off $TESTDSTFS
log_must zfs set recordsize=$RECORDSIZE $TESTDSTFS
bclone_corner_cases_test $TESTSRCDIR $TESTDSTDIR 100
log_pass

View file

@ -0,0 +1,46 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
verify_crossfs_block_cloning
log_assert "Verify block cloning properly clones regular files across datasets"
# Disable compression to make sure we won't use embedded blocks.
log_must zfs set compress=off $TESTSRCFS
log_must zfs set compress=off $TESTDSTFS
for filesize in 1 107 113 511 512 513 4095 4096 4097 131071 131072 131073 \
1048575 1048576 1048577 4194303 4194304 4194305; do
bclone_test random $filesize false $TESTSRCDIR $TESTDSTDIR
done
log_pass

View file

@ -0,0 +1,50 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
verify_crossfs_block_cloning
log_assert "Verify block cloning properly clones small files (with embedded blocks) across datasets"
# Enable ZLE compression to make sure what is the maximum amount of data we
# can store in BP.
log_must zfs set compress=zle $TESTSRCFS
log_must zfs set compress=zle $TESTDSTFS
# Test BP_IS_EMBEDDED().
# Maximum embedded payload size is 112 bytes, but the buffer is extended to
# 512 bytes first and then compressed. 107 random bytes followed by 405 zeros
# gives exactly 112 bytes after compression with ZLE.
for filesize in 1 2 4 8 16 32 64 96 107; do
bclone_test random $filesize true $TESTSRCDIR $TESTDSTDIR
done
log_pass

View file

@ -0,0 +1,45 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
verify_crossfs_block_cloning
log_assert "Verify block cloning properly clones sparse files (files with holes) across datasets"
# Compression doesn't matter here.
# Test BP_IS_HOLE().
for filesize in 1 511 512 513 4095 4096 4097 131071 131072 131073 \
1048575 1048576 1048577 4194303 4194304 4194305; do
bclone_test hole $filesize false $TESTSRCDIR $TESTDSTDIR
done
log_pass

View file

@ -0,0 +1,86 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
verify_crossfs_block_cloning
log_assert "Verify block cloning across datasets with different properties"
log_must zfs set checksum=off $TESTSRCFS
log_must zfs set compress=off $TESTSRCFS
log_must zfs set copies=1 $TESTSRCFS
log_must zfs set recordsize=131072 $TESTSRCFS
log_must zfs set checksum=fletcher2 $TESTDSTFS
log_must zfs set compress=lz4 $TESTDSTFS
log_must zfs set copies=3 $TESTDSTFS
log_must zfs set recordsize=8192 $TESTDSTFS
FILESIZE=$(random_int_between 2 32767)
FILESIZE=$((FILESIZE * 64))
bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR
log_must zfs set checksum=sha256 $TESTSRCFS
log_must zfs set compress=zstd $TESTSRCFS
log_must zfs set copies=2 $TESTSRCFS
log_must zfs set recordsize=262144 $TESTSRCFS
log_must zfs set checksum=off $TESTDSTFS
log_must zfs set compress=off $TESTDSTFS
log_must zfs set copies=1 $TESTDSTFS
log_must zfs set recordsize=131072 $TESTDSTFS
FILESIZE=$(random_int_between 2 32767)
FILESIZE=$((FILESIZE * 64))
bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR
log_must zfs set checksum=sha512 $TESTSRCFS
log_must zfs set compress=gzip $TESTSRCFS
log_must zfs set copies=2 $TESTSRCFS
log_must zfs set recordsize=512 $TESTSRCFS
log_must zfs set checksum=fletcher4 $TESTDSTFS
log_must zfs set compress=lzjb $TESTDSTFS
log_must zfs set copies=3 $TESTDSTFS
log_must zfs set recordsize=16384 $TESTDSTFS
FILESIZE=$(random_int_between 2 32767)
FILESIZE=$((FILESIZE * 64))
bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR
log_must zfs inherit checksum $TESTSRCFS
log_must zfs inherit compress $TESTSRCFS
log_must zfs inherit copies $TESTSRCFS
log_must zfs inherit recordsize $TESTSRCFS
log_must zfs inherit checksum $TESTDSTFS
log_must zfs inherit compress $TESTDSTFS
log_must zfs inherit copies $TESTDSTFS
log_must zfs inherit recordsize $TESTDSTFS
log_pass

View file

@ -0,0 +1,62 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
. $STF_SUITE/include/properties.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
verify_crossfs_block_cloning
log_assert "Verify block cloning across datasets with different checksum properties"
log_must zfs set compress=off $TESTSRCFS
log_must zfs set compress=off $TESTDSTFS
for srcprop in "${checksum_prop_vals[@]}"; do
for dstprop in "${checksum_prop_vals[@]}"; do
if [[ $srcprop == $dstprop ]]; then
continue
fi
log_must zfs set checksum=$srcprop $TESTSRCFS
log_must zfs set checksum=$dstprop $TESTDSTFS
# 15*8=120, which is greater than 113, so we are sure the data won't
# be embedded into BP.
# 32767*8=262136, which is larger than a single default recordsize of
# 131072.
FILESIZE=$(random_int_between 15 32767)
FILESIZE=$((FILESIZE * 8))
bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR
done
done
log_must zfs inherit checksum $TESTSRCFS
log_must zfs inherit checksum $TESTDSTFS
log_pass

View file

@ -0,0 +1,59 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
. $STF_SUITE/include/properties.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
verify_crossfs_block_cloning
log_assert "Verify block cloning across datasets with different compression properties"
for srcprop in "${compress_prop_vals[@]}"; do
for dstprop in "${compress_prop_vals[@]}"; do
if [[ $srcprop == $dstprop ]]; then
continue
fi
log_must zfs set compress=$srcprop $TESTSRCFS
log_must zfs set compress=$dstprop $TESTDSTFS
# 15*8=120, which is greater than 113, so we are sure the data won't
# be embedded into BP.
# 32767*8=262136, which is larger than a single default recordsize of
# 131072.
FILESIZE=$(random_int_between 15 32767)
FILESIZE=$((FILESIZE * 8))
bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR
done
done
log_must zfs inherit compress $TESTSRCFS
log_must zfs inherit compress $TESTDSTFS
log_pass

View file

@ -0,0 +1,59 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
. $STF_SUITE/include/properties.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
verify_crossfs_block_cloning
log_assert "Verify block cloning across datasets with different copies properties"
log_must zfs set compress=off $TESTSRCFS
log_must zfs set compress=off $TESTDSTFS
for srcprop in "${copies_prop_vals[@]}"; do
for dstprop in "${copies_prop_vals[@]}"; do
log_must zfs set copies=$srcprop $TESTSRCFS
log_must zfs set copies=$dstprop $TESTDSTFS
# 15*8=120, which is greater than 113, so we are sure the data won't
# be embedded into BP.
# 32767*8=262136, which is larger than a single default recordsize of
# 131072.
FILESIZE=$(random_int_between 15 32767)
FILESIZE=$((FILESIZE * 8))
bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR
done
done
log_must zfs inherit copies $TESTSRCFS
log_must zfs inherit copies $TESTDSTFS
log_pass

View file

@ -0,0 +1,65 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
. $STF_SUITE/include/properties.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
verify_crossfs_block_cloning
log_assert "Verify block cloning across datasets with different recordsize properties"
log_must zfs set compress=off $TESTSRCFS
log_must zfs set compress=off $TESTDSTFS
# recsize_prop_vals[] array contains too many entries and the tests take too
# long. Let's use only a subset of them.
typeset -a bclone_recsize_prop_vals=('512' '4096' '131072' '1048576')
for srcprop in "${bclone_recsize_prop_vals[@]}"; do
for dstprop in "${bclone_recsize_prop_vals[@]}"; do
if [[ $srcprop == $dstprop ]]; then
continue
fi
log_must zfs set recordsize=$srcprop $TESTSRCFS
log_must zfs set recordsize=$dstprop $TESTDSTFS
# 2*64=128, which is greater than 113, so we are sure the data won't
# be embedded into BP.
# 32767*64=2097088, which is larger than the largest recordsize (1MB).
FILESIZE=$(random_int_between 2 32767)
FILESIZE=$((FILESIZE * 64))
bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR
done
done
log_must zfs inherit recordsize $TESTSRCFS
log_must zfs inherit recordsize $TESTDSTFS
log_pass

View file

@ -0,0 +1,66 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
. $STF_SUITE/include/properties.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
verify_crossfs_block_cloning
log_assert "Verify block cloning with all sync property settings"
log_must zfs set compress=zle $TESTSRCFS
log_must zfs set compress=zle $TESTDSTFS
for prop in "${sync_prop_vals[@]}"; do
log_must zfs set sync=$prop $TESTSRCFS
# 32767*8=262136, which is larger than a single default recordsize of
# 131072.
FILESIZE=$(random_int_between 1 32767)
FILESIZE=$((FILESIZE * 8))
bclone_test random $FILESIZE false $TESTSRCDIR $TESTSRCDIR
done
for srcprop in "${sync_prop_vals[@]}"; do
log_must zfs set sync=$srcprop $TESTSRCFS
for dstprop in "${sync_prop_vals[@]}"; do
log_must zfs set sync=$dstprop $TESTDSTFS
# 32767*8=262136, which is larger than a single default recordsize of
# 131072.
FILESIZE=$(random_int_between 1 32767)
FILESIZE=$((FILESIZE * 8))
bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR
done
done
log_must zfs inherit sync $TESTSRCFS
log_must zfs inherit sync $TESTDSTFS
log_pass

View file

@ -0,0 +1,42 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib
verify_runnable "both"
verify_block_cloning
log_assert "Verify various corner cases in block cloning within the same dataset"
# Disable compression to make sure we won't use embedded blocks.
log_must zfs set compress=off $TESTSRCFS
log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS
bclone_corner_cases_test $TESTSRCDIR $TESTSRCDIR
log_pass

View file

@ -0,0 +1,42 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib
verify_runnable "both"
verify_block_cloning
log_assert "Verify various corner cases in block cloning within the same dataset"
# Disable compression to make sure we won't use embedded blocks.
log_must zfs set compress=off $TESTSRCFS
log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS
bclone_corner_cases_test $TESTSRCDIR $TESTSRCDIR 100
log_pass

View file

@ -0,0 +1,44 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
log_assert "Verify block cloning properly clones regular files within the same dataset"
# Disable compression to make sure we won't use embedded blocks.
log_must zfs set compress=off $TESTSRCFS
for filesize in 1 107 113 511 512 513 4095 4096 4097 131071 131072 131073 \
1048575 1048576 1048577 4194303 4194304 4194305; do
bclone_test random $filesize false $TESTSRCDIR $TESTSRCDIR
done
log_pass

View file

@ -0,0 +1,48 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
log_assert "Verify block cloning properly clones small files (with embedded blocks) within the same dataset"
# Enable ZLE compression to make sure what is the maximum amount of data we
# can store in BP.
log_must zfs set compress=zle $TESTSRCFS
# Test BP_IS_EMBEDDED().
# Maximum embedded payload size is 112 bytes, but the buffer is extended to
# 512 bytes first and then compressed. 107 random bytes followed by 405 zeros
# gives exactly 112 bytes after compression with ZLE.
for filesize in 1 2 4 8 16 32 64 96 107; do
bclone_test random $filesize true $TESTSRCDIR $TESTSRCDIR
done
log_pass

View file

@ -0,0 +1,44 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
verify_runnable "both"
verify_block_cloning
log_assert "Verify block cloning properly clones sparse files (files with holes) within the same dataset"
# Compression doesn't matter here.
# Test BP_IS_HOLE().
for filesize in 1 511 512 513 4095 4096 4097 131071 131072 131073 \
1048575 1048576 1048577 4194303 4194304 4194305; do
bclone_test hole $filesize false $TESTSRCDIR $TESTSRCDIR
done
log_pass

View file

@ -0,0 +1,37 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2013 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone.cfg
log_must zfs destroy $TESTSRCFS
log_must zfs destroy $TESTDSTFS
default_cleanup

View file

@ -0,0 +1,45 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2023 by Pawel Jakub Dawidek
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/bclone/bclone.cfg
if ! command -v clonefile > /dev/null ; then
log_unsupported "clonefile program required to test block cloning"
fi
DISK=${DISKS%% *}
default_setup_noexit $DISK "true"
log_must zpool set feature@block_cloning=enabled $TESTPOOL
log_must zfs create $TESTSRCFS
log_must zfs create $TESTDSTFS
log_pass

View file

@ -44,28 +44,6 @@ function cleanup
done
}
#
# Get random number between min and max number.
#
# $1 Minimal value
# $2 Maximal value
#
function random
{
typeset -i min=$1
typeset -i max=$2
typeset -i value
while true; do
((value = RANDOM % (max + 1)))
if ((value >= min)); then
break
fi
done
echo $value
}
#
# Get the number of checksum errors for the pool.
#