cp: fix some cases with infinite recursion

As noted in the PR, cp -R has some surprising behavior.  Typically, when
you `cp -R foo bar` where both foo and bar exist, foo is cleanly copied
to foo/bar.  When you `cp -R foo foo` (where foo clearly exists), cp(1)
goes a little off the rails as it creates foo/foo, then discovers that
and creates foo/foo/foo, so on and so forth, until it eventually fails.

POSIX doesn't seem to disallow this behavior, but it isn't very useful.
GNU cp(1) will detect the recursion and squash it, but emit a message in
the process that it has done so.

This change seemingly follows the GNU behavior, but it currently doesn't
warn about the situation -- the author feels that the final product is
about what one might expect from doing this and thus, doesn't need a
warning.  The author doesn't feel strongly about this.

PR:		235438
Reviewed by:	bapt
Sponsored by:	Klara, Inc.
Differential Revision:	https://reviews.freebsd.org/D33944
This commit is contained in:
Kyle Evans 2022-01-27 12:02:17 -06:00
parent 6abb5043a6
commit 848263aad1
2 changed files with 147 additions and 5 deletions

View file

@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/stat.h>
#include <assert.h>
#include <err.h>
#include <errno.h>
#include <fts.h>
@ -91,7 +92,7 @@ volatile sig_atomic_t info;
enum op { FILE_TO_FILE, FILE_TO_DIR, DIR_TO_DNE };
static int copy(char *[], enum op, int);
static int copy(char *[], enum op, int, struct stat *);
static void siginfo(int __unused);
int
@ -259,18 +260,26 @@ main(int argc, char *argv[])
*/
type = FILE_TO_DIR;
exit (copy(argv, type, fts_options));
/*
* For DIR_TO_DNE, we could provide copy() with the to_stat we've
* already allocated on the stack here that isn't being used for
* anything. Not doing so, though, simplifies later logic a little bit
* as we need to skip checking root_stat on the first iteration and
* ensure that we set it with the first mkdir().
*/
exit (copy(argv, type, fts_options, (type == DIR_TO_DNE ? NULL :
&to_stat)));
}
static int
copy(char *argv[], enum op type, int fts_options)
copy(char *argv[], enum op type, int fts_options, struct stat *root_stat)
{
struct stat to_stat;
struct stat created_root_stat, to_stat;
FTS *ftsp;
FTSENT *curr;
int base = 0, dne, badcp, rval;
size_t nlen;
char *p, *target_mid;
char *p, *recurse_path, *target_mid;
mode_t mask, mode;
/*
@ -280,6 +289,7 @@ copy(char *argv[], enum op type, int fts_options)
mask = ~umask(0777);
umask(~mask);
recurse_path = NULL;
if ((ftsp = fts_open(argv, fts_options, NULL)) == NULL)
err(1, "fts_open");
for (badcp = rval = 0; errno = 0, (curr = fts_read(ftsp)) != NULL;
@ -300,6 +310,47 @@ copy(char *argv[], enum op type, int fts_options)
;
}
if (curr->fts_info == FTS_D && type != FILE_TO_FILE &&
root_stat != NULL &&
root_stat->st_dev == curr->fts_statp->st_dev &&
root_stat->st_ino == curr->fts_statp->st_ino) {
assert(recurse_path == NULL);
if (curr->fts_level > FTS_ROOTLEVEL) {
/*
* If the recursion isn't at the immediate
* level, we can just not traverse into this
* directory.
*/
fts_set(ftsp, curr, FTS_SKIP);
continue;
} else {
const char *slash;
/*
* Grab the last path component and double it,
* to make life easier later and ensure that
* we work even with fts_level == 0 is a couple
* of components deep in fts_path. No path
* separators are fine and expected in the
* common case, though.
*/
slash = strrchr(curr->fts_path, '/');
if (slash != NULL)
slash++;
else
slash = curr->fts_path;
if (asprintf(&recurse_path, "%s/%s",
curr->fts_path, slash) == -1)
err(1, "asprintf");
}
}
if (recurse_path != NULL &&
strcmp(curr->fts_path, recurse_path) == 0) {
fts_set(ftsp, curr, FTS_SKIP);
continue;
}
/*
* If we are in case (2) or (3) above, we need to append the
* source name to the target name.
@ -448,6 +499,19 @@ copy(char *argv[], enum op type, int fts_options)
if (mkdir(to.p_path,
curr->fts_statp->st_mode | S_IRWXU) < 0)
err(1, "%s", to.p_path);
/*
* First DNE with a NULL root_stat is the root
* path, so set root_stat. We can't really
* tell in all cases if the target path is
* within the src path, so we just stat() the
* first directory we created and use that.
*/
if (root_stat == NULL &&
stat(to.p_path, &created_root_stat) == -1) {
err(1, "stat");
} else if (root_stat == NULL) {
root_stat = &created_root_stat;
}
} else if (!S_ISDIR(to_stat.st_mode)) {
errno = ENOTDIR;
err(1, "%s", to.p_path);
@ -493,6 +557,7 @@ copy(char *argv[], enum op type, int fts_options)
if (errno)
err(1, "fts_read");
fts_close(ftsp);
free(recurse_path);
return (rval);
}

View file

@ -57,8 +57,85 @@ chrdev_body()
check_size trunc 0
}
atf_test_case matching_srctgt
matching_srctgt_body()
{
# PR235438: `cp -R foo foo` would previously infinitely recurse and
# eventually error out.
mkdir foo
echo "qux" > foo/bar
cp foo/bar foo/zoo
atf_check cp -R foo foo
atf_check -o inline:"qux\n" cat foo/foo/bar
atf_check -o inline:"qux\n" cat foo/foo/zoo
atf_check -e not-empty -s not-exit:0 stat foo/foo/foo
}
atf_test_case matching_srctgt_contained
matching_srctgt_contained_body()
{
# Let's do the same thing, except we'll try to recursively copy foo into
# one of its subdirectories.
mkdir foo
echo "qux" > foo/bar
mkdir foo/loo
mkdir foo/moo
mkdir foo/roo
cp foo/bar foo/zoo
atf_check cp -R foo foo/moo
atf_check -o inline:"qux\n" cat foo/moo/foo/bar
atf_check -o inline:"qux\n" cat foo/moo/foo/zoo
atf_check -e not-empty -s not-exit:0 stat foo/moo/foo/moo
}
atf_test_case matching_srctgt_link
matching_srctgt_link_body()
{
mkdir foo
echo "qux" > foo/bar
cp foo/bar foo/zoo
atf_check ln -s foo roo
atf_check cp -RH roo foo
atf_check -o inline:"qux\n" cat foo/roo/bar
atf_check -o inline:"qux\n" cat foo/roo/zoo
}
atf_test_case matching_srctgt_nonexistent
matching_srctgt_nonexistent_body()
{
# We'll copy foo to a nonexistent subdirectory; ideally, we would
# skip just the directory and end up with a layout like;
#
# foo/
# bar
# dne/
# bar
# zoo
# zoo
#
mkdir foo
echo "qux" > foo/bar
cp foo/bar foo/zoo
atf_check cp -R foo foo/dne
atf_check -o inline:"qux\n" cat foo/dne/bar
atf_check -o inline:"qux\n" cat foo/dne/zoo
atf_check -e not-empty -s not-exit:0 stat foo/dne/foo
}
atf_init_test_cases()
{
atf_add_test_case basic
atf_add_test_case chrdev
atf_add_test_case matching_srctgt
atf_add_test_case matching_srctgt_contained
atf_add_test_case matching_srctgt_link
atf_add_test_case matching_srctgt_nonexistent
}