OpenZFS 9330 - stack overflow when creating a deeply nested dataset

Datasets that are deeply nested (~100 levels) are impractical. We just
put a limit of 50 levels to newly created datasets. Existing datasets
should work without a problem.

The problem can be seen by attempting to create a dataset using the -p
option with many levels:

    panic[cpu0]/thread=ffffff01cd282c20: BAD TRAP: type=8 (#df Double fault) rp=ffffffff

    fffffffffbc3aa60 unix:die+100 ()
    fffffffffbc3ab70 unix:trap+157d ()
    ffffff00083d7020 unix:_patch_xrstorq_rbx+196 ()
    ffffff00083d7050 zfs:dbuf_rele+2e ()
    ...
    ffffff00083d7080 zfs:dsl_dir_close+32 ()
    ffffff00083d70b0 zfs:dsl_dir_evict+30 ()
    ffffff00083d70d0 zfs:dbuf_evict_user+4a ()
    ffffff00083d7100 zfs:dbuf_rele_and_unlock+87 ()
    ffffff00083d7130 zfs:dbuf_rele+2e ()
    ... The block above repeats once per directory in the ...
    ... create -p command, working towards the root ...
    ffffff00083db9f0 zfs:dsl_dataset_drop_ref+19 ()
    ffffff00083dba20 zfs:dsl_dataset_rele+42 ()
    ffffff00083dba70 zfs:dmu_objset_prefetch+e4 ()
    ffffff00083dbaa0 zfs:findfunc+23 ()
    ffffff00083dbb80 zfs:dmu_objset_find_spa+38c ()
    ffffff00083dbbc0 zfs:dmu_objset_find+40 ()
    ffffff00083dbc20 zfs:zfs_ioc_snapshot_list_next+4b ()
    ffffff00083dbcc0 zfs:zfsdev_ioctl+347 ()
    ffffff00083dbd00 genunix:cdev_ioctl+45 ()
    ffffff00083dbd40 specfs:spec_ioctl+5a ()
    ffffff00083dbdc0 genunix:fop_ioctl+7b ()
    ffffff00083dbec0 genunix:ioctl+18e ()
    ffffff00083dbf10 unix:brand_sys_sysenter+1c9 ()

Porting notes:
* Added zfs_max_dataset_nesting module option with documentation.
* Updated zfs_rename_014_neg.ksh for Linux.
* Increase the zfs.sh stack warning to 15K.  Enough time has passed
  that 16K can be reasonably assumed to be the default value.  It
  was increased in the 3.15 kernel released in June of 2014.

Authored by: Serapheim Dimitropoulos <serapheim.dimitro@delphix.com>
Reviewed by: John Kennedy <john.kennedy@delphix.com>
Reviewed by: Matt Ahrens <matt@delphix.com>
Ported-by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Garrett D'Amore <garrett@damore.org>

OpenZFS-issue: https://www.illumos.org/issues/9330
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/757a75a
Closes #7681
This commit is contained in:
Serapheim Dimitropoulos 2016-09-12 08:15:20 -07:00 committed by Brian Behlendorf
parent 66df02497c
commit a7ed98d8b5
13 changed files with 259 additions and 23 deletions

View file

@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
*/
#ifndef _ZFS_NAMECHECK_H
@ -48,9 +48,13 @@ typedef enum {
#define ZFS_PERMSET_MAXLEN 64
extern int zfs_max_dataset_nesting;
int get_dataset_depth(const char *);
int pool_namecheck(const char *, namecheck_err_t *, char *);
int entity_namecheck(const char *, namecheck_err_t *, char *);
int dataset_namecheck(const char *, namecheck_err_t *, char *);
int dataset_nestcheck(const char *);
int mountpoint_namecheck(const char *, namecheck_err_t *);
int zfs_component_namecheck(const char *, namecheck_err_t *, char *);
int permset_namecheck(const char *, namecheck_err_t *, char *);

View file

@ -3583,8 +3583,22 @@ zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
{
int prefix;
char *path_copy;
char errbuf[1024];
int rc = 0;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot create '%s'"), path);
/*
* Check that we are not passing the nesting limit
* before we start creating any ancestors.
*/
if (dataset_nestcheck(path) != 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"maximum name nesting depth exceeded"));
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
}
if (check_parents(hdl, path, NULL, B_TRUE, &prefix) != 0)
return (-1);
@ -3623,6 +3637,12 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
if (!zfs_validate_name(hdl, path, type, B_TRUE))
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
if (dataset_nestcheck(path) != 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"maximum name nesting depth exceeded"));
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
}
/* validate parents exist */
if (check_parents(hdl, path, &zoned, B_FALSE, NULL) != 0)
return (-1);
@ -4434,6 +4454,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive,
errbuf));
}
}
if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
} else {

View file

@ -1596,6 +1596,18 @@ in bytes.
Default value: \fB104,857,600\fR.
.RE
.sp
.ne 2
.na
\fBzfs_max_dataset_nesting\fR (int)
.ad
.RS 12n
The maximum depth of nested datasets. This value can be tuned temporarily to
fix existing datasets that exceed the predefined limit.
.sp
Default value: \fB50\fR.
.RE
.sp
.ne 2
.na

View file

@ -343,7 +343,8 @@ pool/{filesystem,volume,snapshot}
.Pp
where the maximum length of a dataset name is
.Dv MAXNAMELEN
.Pq 256 bytes .
.Pq 256 bytes
and the maximum amount of nesting allowed in a path is 50 levels deep.
.Pp
A dataset can be one of the following:
.Bl -tag -width "file system"

View file

@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
*/
/*
@ -34,8 +34,6 @@
* name is invalid. In the kernel, we only care whether it's valid or not.
* Each routine therefore takes a 'namecheck_err_t' which describes exactly why
* the name failed to validate.
*
* Each function returns 0 on success, -1 on error.
*/
#if !defined(_KERNEL)
@ -48,6 +46,14 @@
#include "zfs_namecheck.h"
#include "zfs_deleg.h"
/*
* Deeply nested datasets can overflow the stack, so we put a limit
* in the amount of nesting a path can have. zfs_max_dataset_nesting
* can be tuned temporarily to fix existing datasets that exceed our
* predefined limit.
*/
int zfs_max_dataset_nesting = 50;
static int
valid_char(char c)
{
@ -57,11 +63,36 @@ valid_char(char c)
c == '-' || c == '_' || c == '.' || c == ':' || c == ' ');
}
/*
* Looks at a path and returns its level of nesting (depth).
*/
int
get_dataset_depth(const char *path)
{
const char *loc = path;
int nesting = 0;
/*
* Keep track of nesting until you hit the end of the
* path or found the snapshot/bookmark seperator.
*/
for (int i = 0; loc[i] != '\0' &&
loc[i] != '@' &&
loc[i] != '#'; i++) {
if (loc[i] == '/')
nesting++;
}
return (nesting);
}
/*
* Snapshot names must be made up of alphanumeric characters plus the following
* characters:
*
* [-_.: ]
* [-_.: ]
*
* Returns 0 on success, -1 on error.
*/
int
zfs_component_namecheck(const char *path, namecheck_err_t *why, char *what)
@ -97,6 +128,8 @@ zfs_component_namecheck(const char *path, namecheck_err_t *why, char *what)
* Permissions set name must start with the letter '@' followed by the
* same character restrictions as snapshot names, except that the name
* cannot exceed 64 characters.
*
* Returns 0 on success, -1 on error.
*/
int
permset_namecheck(const char *path, namecheck_err_t *why, char *what)
@ -118,29 +151,41 @@ permset_namecheck(const char *path, namecheck_err_t *why, char *what)
return (zfs_component_namecheck(&path[1], why, what));
}
/*
* Dataset paths should not be deeper than zfs_max_dataset_nesting
* in terms of nesting.
*
* Returns 0 on success, -1 on error.
*/
int
dataset_nestcheck(const char *path)
{
return ((get_dataset_depth(path) < zfs_max_dataset_nesting) ? 0 : -1);
}
/*
* Entity names must be of the following form:
*
* [component/]*[component][(@|#)component]?
* [component/]*[component][(@|#)component]?
*
* Where each component is made up of alphanumeric characters plus the following
* characters:
*
* [-_.:%]
* [-_.:%]
*
* We allow '%' here as we use that character internally to create unique
* names for temporary clones (for online recv).
*
* Returns 0 on success, -1 on error.
*/
int
entity_namecheck(const char *path, namecheck_err_t *why, char *what)
{
const char *start, *end;
int found_delim;
const char *end;
/*
* Make sure the name is not too long.
*/
if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) {
if (why)
*why = NAME_ERR_TOOLONG;
@ -160,8 +205,8 @@ entity_namecheck(const char *path, namecheck_err_t *why, char *what)
return (-1);
}
start = path;
found_delim = 0;
const char *start = path;
boolean_t found_delim = B_FALSE;
for (;;) {
/* Find the end of this component */
end = start;
@ -196,7 +241,7 @@ entity_namecheck(const char *path, namecheck_err_t *why, char *what)
return (-1);
}
found_delim = 1;
found_delim = B_TRUE;
}
/* Zero-length components are not allowed */
@ -248,6 +293,8 @@ dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
* mountpoint names must be of the following form:
*
* /[component][/]*[component][/]
*
* Returns 0 on success, -1 on error.
*/
int
mountpoint_namecheck(const char *path, namecheck_err_t *why)
@ -292,6 +339,8 @@ mountpoint_namecheck(const char *path, namecheck_err_t *why)
* dataset names, with the additional restriction that the pool name must begin
* with a letter. The pool names 'raidz' and 'mirror' are also reserved names
* that cannot be used.
*
* Returns 0 on success, -1 on error.
*/
int
pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
@ -351,4 +400,10 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
EXPORT_SYMBOL(pool_namecheck);
EXPORT_SYMBOL(dataset_namecheck);
EXPORT_SYMBOL(zfs_component_namecheck);
EXPORT_SYMBOL(dataset_nestcheck);
EXPORT_SYMBOL(get_dataset_depth);
EXPORT_SYMBOL(zfs_max_dataset_nesting);
module_param(zfs_max_dataset_nesting, int, 0644);
MODULE_PARM_DESC(zfs_max_dataset_nesting, "Maximum depth of nested datasets");
#endif

View file

@ -60,6 +60,7 @@
#include <sys/spa_impl.h>
#include <sys/dmu_send.h>
#include <sys/zfs_project.h>
#include "zfs_namecheck.h"
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
@ -1098,6 +1099,9 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
if (strlen(doca->doca_name) >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
if (dataset_nestcheck(doca->doca_name) != 0)
return (SET_ERROR(ENAMETOOLONG));
error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail);
if (error != 0)
return (error);

View file

@ -1854,16 +1854,28 @@ typedef struct dsl_dir_rename_arg {
cred_t *ddra_cred;
} dsl_dir_rename_arg_t;
typedef struct dsl_valid_rename_arg {
int char_delta;
int nest_delta;
} dsl_valid_rename_arg_t;
/* ARGSUSED */
static int
dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
int *deltap = arg;
dsl_valid_rename_arg_t *dvra = arg;
char namebuf[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_name(ds, namebuf);
if (strlen(namebuf) + *deltap >= ZFS_MAX_DATASET_NAME_LEN)
ASSERT3U(strnlen(namebuf, ZFS_MAX_DATASET_NAME_LEN),
<, ZFS_MAX_DATASET_NAME_LEN);
int namelen = strlen(namebuf) + dvra->char_delta;
int depth = get_dataset_depth(namebuf) + dvra->nest_delta;
if (namelen >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
if (dvra->nest_delta > 0 && depth >= zfs_max_dataset_nesting)
return (SET_ERROR(ENAMETOOLONG));
return (0);
}
@ -1874,9 +1886,9 @@ dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
dsl_dir_rename_arg_t *ddra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd, *newparent;
dsl_valid_rename_arg_t dvra;
const char *mynewname;
int error;
int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname);
/* target dir should exist */
error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
@ -1905,10 +1917,19 @@ dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
return (SET_ERROR(EEXIST));
}
ASSERT3U(strnlen(ddra->ddra_newname, ZFS_MAX_DATASET_NAME_LEN),
<, ZFS_MAX_DATASET_NAME_LEN);
ASSERT3U(strnlen(ddra->ddra_oldname, ZFS_MAX_DATASET_NAME_LEN),
<, ZFS_MAX_DATASET_NAME_LEN);
dvra.char_delta = strlen(ddra->ddra_newname)
- strlen(ddra->ddra_oldname);
dvra.nest_delta = get_dataset_depth(ddra->ddra_newname)
- get_dataset_depth(ddra->ddra_oldname);
/* if the name length is growing, validate child name lengths */
if (delta > 0) {
if (dvra.char_delta > 0 || dvra.nest_delta > 0) {
error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
&delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
&dvra, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
if (error != 0) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);

View file

@ -201,7 +201,7 @@ stack_clear() {
stack_check() {
STACK_MAX_SIZE=/sys/kernel/debug/tracing/stack_max_size
STACK_TRACE=/sys/kernel/debug/tracing/stack_trace
STACK_LIMIT=7600
STACK_LIMIT=15362
if [ -e "$STACK_MAX_SIZE" ]; then
STACK_SIZE=$(cat "$STACK_MAX_SIZE")

View file

@ -209,7 +209,7 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_006_pos',
'zfs_rename_007_pos', 'zfs_rename_008_pos', 'zfs_rename_009_neg',
'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg',
'zfs_rename_013_pos', 'zfs_rename_encrypted_child',
'zfs_rename_013_pos', 'zfs_rename_014_neg', 'zfs_rename_encrypted_child',
'zfs_rename_to_encrypted']
tags = ['functional', 'cli_root', 'zfs_rename']

View file

@ -25,7 +25,7 @@
#
#
# Copyright (c) 2012 by Delphix. All rights reserved.
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
#
export BYND_MAX_NAME="byondmaxnamelength\
@ -38,6 +38,12 @@ export BYND_MAX_NAME="byondmaxnamelength\
012345678901234567890123456789\
012345678901234567890123456789"
export BYND_NEST_LIMIT="a/a/a/\
a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/\
a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/\
a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/\
a/a"
# There're 3 different prompt messages while create
# a volume that great than 1TB on 32-bit
# - volume size exceeds limit for this system. (happy gate)

View file

@ -43,6 +43,7 @@
# *Beyond maximal name length.
# *Same property set multiple times via '-o property=value'
# *Volume's property set on filesystem
# *Exceeding maximum name nesting
#
# STRATEGY:
# 1. Create an array of <filesystem> arguments
@ -89,7 +90,7 @@ set -A args "$TESTPOOL/" "$TESTPOOL//blah" "$TESTPOOL/@blah" \
"$TESTPOOL/blah*blah" "$TESTPOOL/blah blah" \
"-s $TESTPOOL/$TESTFS1" "-b 1092 $TESTPOOL/$TESTFS1" \
"-b 64k $TESTPOOL/$TESTFS1" "-s -b 32k $TESTPOOL/$TESTFS1" \
"$TESTPOOL/$BYND_MAX_NAME"
"$TESTPOOL/$BYND_MAX_NAME" "$TESTPOOL/$BYND_NEST_LIMIT"
log_assert "Verify 'zfs create <filesystem>' fails with bad <filesystem> argument."

View file

@ -15,6 +15,7 @@ dist_pkgdata_SCRIPTS = \
zfs_rename_011_pos.ksh \
zfs_rename_012_neg.ksh \
zfs_rename_013_pos.ksh \
zfs_rename_014_neg.ksh \
zfs_rename_encrypted_child.ksh \
zfs_rename_to_encrypted.ksh

View file

@ -0,0 +1,110 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2016 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# zfs rename should work on existing datasets that exceed
# zfs_max_dataset_nesting (our nesting limit) except in the
# scenario that we try to rename it to something deeper
# than it already is.
#
# STRATEGY:
# 1. Create a set of ZFS datasets within our nesting limit.
# 2. Try renaming one of them on top of the other so its
# children pass the limit - it should fail.
# 3. Increase the nesting limit.
# 4. Check that renaming a dataset on top of the other
# cannot exceed the new nesting limit but can exceed
# the old one.
# 5. Bring back the old nesting limit so you can simulate
# the scenario of existing datasets that exceed our
# nesting limit.
# 6. Make sure that 'zfs rename' can work only if we are
# trying to keep existing datasets that exceed the limit
# at the same nesting level or less. Making it even
# deeper should not work.
#
verify_runnable "both"
dsA01="a"
dsA02="a/a"
dsA49=$(printf 'a/%.0s' {1..48})"a"
dsB01="b"
dsB49=$(printf 'b/%.0s' {1..48})"b"
dsC01="c"
dsC16=$(printf 'c/%.0s' {1..15})"c"
dsB16A=$(printf 'b/%.0s' {1..16})"a"
dsB15A=$(printf 'b/%.0s' {1..15})"a"
dsB15A47A=$(printf 'b/%.0s' {1..15})$(printf 'a/%.0s' {1..47})"a"
dsB15A47C=$(printf 'b/%.0s' {1..15})$(printf 'a/%.0s' {1..47})"c"
dsB15A40B=$(printf 'b/%.0s' {1..15})$(printf 'a/%.0s' {1..40})"b"
dsB15A47B=$(printf 'b/%.0s' {1..15})$(printf 'a/%.0s' {1..47})"b"
function nesting_cleanup
{
log_must zfs destroy -fR $TESTPOOL/$dsA01
log_must zfs destroy -fR $TESTPOOL/$dsB01
log_must zfs destroy -fR $TESTPOOL/$dsC01
# If the test fails after increasing the limit and
# before resetting it, it will be left at the modified
# value for the remaining tests. That's the reason
# we reset it again here just in case.
log_must set_tunable_impl zfs_max_dataset_nesting 50 Z zcommon
}
log_onexit nesting_cleanup
log_must zfs create -p $TESTPOOL/$dsA49
log_must zfs create -p $TESTPOOL/$dsB49
log_must zfs create -p $TESTPOOL/$dsC16
log_mustnot zfs rename $TESTPOOL/$dsA02 $TESTPOOL/$dsB15A
# extend limit
log_must set_tunable_impl zfs_max_dataset_nesting 64 Z zcommon
log_mustnot zfs rename $TESTPOOL/$dsA02 $TESTPOOL/$dsB16A
log_must zfs rename $TESTPOOL/$dsA02 $TESTPOOL/$dsB15A
# bring back old limit
log_must set_tunable_impl zfs_max_dataset_nesting 50 Z zcommon
log_mustnot zfs rename $TESTPOOL/$dsC01 $TESTPOOL/$dsB15A47C
log_must zfs rename $TESTPOOL/$dsB15A47A $TESTPOOL/$dsB15A47B
log_must zfs rename $TESTPOOL/$dsB15A47B $TESTPOOL/$dsB15A40B
log_pass "Verify 'zfs rename' limits datasets so they don't pass " \
"the nesting limit. For existing ones that do, it should " \
"not allow them to grow anymore."