From 2cd85c40a9f396bb24f7861c832acd52e61c4780 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 7 Feb 2009 16:08:27 +0100 Subject: [PATCH 1/5] Make test-path-utils more robust against incorrect use Previously, this test utility happily returned with exit code 0 if garbage was thrown at it. Now it reports failure if an unknown function name was given on the command line. Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- test-path-utils.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test-path-utils.c b/test-path-utils.c index 2c0f5a37e8b..7e6fc8deaf8 100644 --- a/test-path-utils.c +++ b/test-path-utils.c @@ -7,6 +7,7 @@ int main(int argc, char **argv) int rv = normalize_absolute_path(buf, argv[2]); assert(strlen(buf) == rv); puts(buf); + return 0; } if (argc >= 2 && !strcmp(argv[1], "make_absolute_path")) { @@ -15,12 +16,16 @@ int main(int argc, char **argv) argc--; argv++; } + return 0; } if (argc == 4 && !strcmp(argv[1], "longest_ancestor_length")) { int len = longest_ancestor_length(argv[2], argv[3]); printf("%d\n", len); + return 0; } - return 0; + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], + argv[1] ? argv[1] : "(there was none)"); + return 1; } From f3cad0ad82e24966bf7bcc8a47670c54c30e4b18 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 7 Feb 2009 16:08:28 +0100 Subject: [PATCH 2/5] Move sanitary_path_copy() to path.c and rename it to normalize_path_copy() This function and normalize_absolute_path() do almost the same thing. The former already works on Windows, but the latter crashes. In subsequent changes we will remove normalize_absolute_path(). Here we make the replacement function reusable. On the way we rename it to reflect that it does some path normalization. Apart from that this is only moving around code. Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- cache.h | 1 + path.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ setup.c | 88 +-------------------------------------------------------- 3 files changed, 88 insertions(+), 87 deletions(-) diff --git a/cache.h b/cache.h index 42f2f2754b9..65897e7b2cf 100644 --- a/cache.h +++ b/cache.h @@ -548,6 +548,7 @@ const char *make_absolute_path(const char *path); const char *make_nonrelative_path(const char *path); const char *make_relative_path(const char *abs, const char *base); int normalize_absolute_path(char *buf, const char *path); +int normalize_path_copy(char *dst, const char *src); int longest_ancestor_length(const char *path, const char *prefix_list); /* Read and unpack a sha1 file into memory, write memory to a sha1 file */ diff --git a/path.c b/path.c index a074aea6492..820eab5ac32 100644 --- a/path.c +++ b/path.c @@ -415,6 +415,92 @@ int normalize_absolute_path(char *buf, const char *path) return dst - buf; } +int normalize_path_copy(char *dst, const char *src) +{ + char *dst0; + + if (has_dos_drive_prefix(src)) { + *dst++ = *src++; + *dst++ = *src++; + } + dst0 = dst; + + if (is_dir_sep(*src)) { + *dst++ = '/'; + while (is_dir_sep(*src)) + src++; + } + + for (;;) { + char c = *src; + + /* + * A path component that begins with . could be + * special: + * (1) "." and ends -- ignore and terminate. + * (2) "./" -- ignore them, eat slash and continue. + * (3) ".." and ends -- strip one and terminate. + * (4) "../" -- strip one, eat slash and continue. + */ + if (c == '.') { + if (!src[1]) { + /* (1) */ + src++; + } else if (is_dir_sep(src[1])) { + /* (2) */ + src += 2; + while (is_dir_sep(*src)) + src++; + continue; + } else if (src[1] == '.') { + if (!src[2]) { + /* (3) */ + src += 2; + goto up_one; + } else if (is_dir_sep(src[2])) { + /* (4) */ + src += 3; + while (is_dir_sep(*src)) + src++; + goto up_one; + } + } + } + + /* copy up to the next '/', and eat all '/' */ + while ((c = *src++) != '\0' && !is_dir_sep(c)) + *dst++ = c; + if (is_dir_sep(c)) { + *dst++ = '/'; + while (is_dir_sep(c)) + c = *src++; + src--; + } else if (!c) + break; + continue; + + up_one: + /* + * dst0..dst is prefix portion, and dst[-1] is '/'; + * go up one level. + */ + dst -= 2; /* go past trailing '/' if any */ + if (dst < dst0) + return -1; + while (1) { + if (dst <= dst0) + break; + c = *dst--; + if (c == '/') { /* MinGW: cannot be '\\' anymore */ + dst += 2; + break; + } + } + } + *dst = '\0'; + return 0; +} + /* * path = Canonical absolute path * prefix_list = Colon-separated list of absolute paths diff --git a/setup.c b/setup.c index 78a8041ff0d..59735c14506 100644 --- a/setup.c +++ b/setup.c @@ -4,92 +4,6 @@ static int inside_git_dir = -1; static int inside_work_tree = -1; -static int sanitary_path_copy(char *dst, const char *src) -{ - char *dst0; - - if (has_dos_drive_prefix(src)) { - *dst++ = *src++; - *dst++ = *src++; - } - dst0 = dst; - - if (is_dir_sep(*src)) { - *dst++ = '/'; - while (is_dir_sep(*src)) - src++; - } - - for (;;) { - char c = *src; - - /* - * A path component that begins with . could be - * special: - * (1) "." and ends -- ignore and terminate. - * (2) "./" -- ignore them, eat slash and continue. - * (3) ".." and ends -- strip one and terminate. - * (4) "../" -- strip one, eat slash and continue. - */ - if (c == '.') { - if (!src[1]) { - /* (1) */ - src++; - } else if (is_dir_sep(src[1])) { - /* (2) */ - src += 2; - while (is_dir_sep(*src)) - src++; - continue; - } else if (src[1] == '.') { - if (!src[2]) { - /* (3) */ - src += 2; - goto up_one; - } else if (is_dir_sep(src[2])) { - /* (4) */ - src += 3; - while (is_dir_sep(*src)) - src++; - goto up_one; - } - } - } - - /* copy up to the next '/', and eat all '/' */ - while ((c = *src++) != '\0' && !is_dir_sep(c)) - *dst++ = c; - if (is_dir_sep(c)) { - *dst++ = '/'; - while (is_dir_sep(c)) - c = *src++; - src--; - } else if (!c) - break; - continue; - - up_one: - /* - * dst0..dst is prefix portion, and dst[-1] is '/'; - * go up one level. - */ - dst -= 2; /* go past trailing '/' if any */ - if (dst < dst0) - return -1; - while (1) { - if (dst <= dst0) - break; - c = *dst--; - if (c == '/') { /* MinGW: cannot be '\\' anymore */ - dst += 2; - break; - } - } - } - *dst = '\0'; - return 0; -} - const char *prefix_path(const char *prefix, int len, const char *path) { const char *orig = path; @@ -101,7 +15,7 @@ const char *prefix_path(const char *prefix, int len, const char *path) memcpy(sanitized, prefix, len); strcpy(sanitized + len, path); } - if (sanitary_path_copy(sanitized, sanitized)) + if (normalize_path_copy(sanitized, sanitized)) goto error_out; if (is_absolute_path(orig)) { const char *work_tree = get_git_work_tree(); From 43a7ddb55d82d5c6f0c4d2cbe408a1df71d58ef3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 7 Feb 2009 16:08:29 +0100 Subject: [PATCH 3/5] Fix GIT_CEILING_DIRECTORIES on Windows Using git with GIT_CEILING_DIRECTORIES crashed on Windows due to a failed assertion in normalize_absolute_path(): This function expects absolute paths to start with a slash, while on Windows they can start with a drive letter or a backslash. This fixes it by using the alternative, normalize_path_copy() instead, which can handle Windows-style paths just fine. Secondly, the portability macro PATH_SEP is used instead of expecting colons to be used as path list delimiter. The test script t1504 is also changed to help MSYS's bash recognize some program arguments as path list. (MSYS's bash must translate POSIX-style path lists to Windows-style path lists, and the heuristic did not catch some cases.) Signed-off-by: Rene Scharfe Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- path.c | 11 ++++++----- t/t1504-ceiling-dirs.sh | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/path.c b/path.c index 820eab5ac32..79c5c104c37 100644 --- a/path.c +++ b/path.c @@ -524,15 +524,16 @@ int longest_ancestor_length(const char *path, const char *prefix_list) return -1; for (colon = ceil = prefix_list; *colon; ceil = colon+1) { - for (colon = ceil; *colon && *colon != ':'; colon++); + for (colon = ceil; *colon && *colon != PATH_SEP; colon++); len = colon - ceil; if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil)) continue; strlcpy(buf, ceil, len+1); - len = normalize_absolute_path(buf, buf); - /* Strip "trailing slashes" from "/". */ - if (len == 1) - len = 0; + if (normalize_path_copy(buf, buf) < 0) + continue; + len = strlen(buf); + if (len > 0 && buf[len-1] == '/') + buf[--len] = '\0'; if (!strncmp(path, buf, len) && path[len] == '/' && diff --git a/t/t1504-ceiling-dirs.sh b/t/t1504-ceiling-dirs.sh index 91b704a3a4c..e377d48902c 100755 --- a/t/t1504-ceiling-dirs.sh +++ b/t/t1504-ceiling-dirs.sh @@ -93,13 +93,13 @@ GIT_CEILING_DIRECTORIES="$TRASH_ROOT/subdi" test_prefix subdir_ceil_at_subdi_slash "sub/dir/" -GIT_CEILING_DIRECTORIES="foo:$TRASH_ROOT/sub" +GIT_CEILING_DIRECTORIES="/foo:$TRASH_ROOT/sub" test_fail second_of_two -GIT_CEILING_DIRECTORIES="$TRASH_ROOT/sub:bar" +GIT_CEILING_DIRECTORIES="$TRASH_ROOT/sub:/bar" test_fail first_of_two -GIT_CEILING_DIRECTORIES="foo:$TRASH_ROOT/sub:bar" +GIT_CEILING_DIRECTORIES="/foo:$TRASH_ROOT/sub:/bar" test_fail second_of_three From f42302b49333d035a323f5d80fb9562d375b17f1 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 7 Feb 2009 16:08:30 +0100 Subject: [PATCH 4/5] Test and fix normalize_path_copy() This changes the test-path-utils utility to invoke normalize_path_copy() instead of normalize_absolute_path() because the latter is about to be removed. The test cases in t0060 are adjusted in two regards: - normalize_path_copy() more often leaves a trailing slash in the result. This has no negative side effects because the new user of this function, longest_ancester_length(), already accounts for this behavior. - The function can fail. The tests uncover a flaw in normalize_path_copy(): If there are sufficiently many '..' path components so that the root is reached, such as in "/d1/s1/../../d2", then the leading slash was lost. This manifested itself that (assuming there is a repository at /tmp/foo) $ git add /d1/../tmp/foo/some-file reported 'pathspec is outside repository'. This is now fixed. Moreover, the test case descriptions of t0060 now include the test data and expected outcome. Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- path.c | 16 +++++----------- t/t0060-path-utils.sh | 33 +++++++++++++++++---------------- test-path-utils.c | 7 ++++--- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/path.c b/path.c index 79c5c104c37..6b7be5b869f 100644 --- a/path.c +++ b/path.c @@ -484,18 +484,12 @@ int normalize_path_copy(char *dst, const char *src) * dst0..dst is prefix portion, and dst[-1] is '/'; * go up one level. */ - dst -= 2; /* go past trailing '/' if any */ - if (dst < dst0) + dst--; /* go to trailing '/' */ + if (dst <= dst0) return -1; - while (1) { - if (dst <= dst0) - break; - c = *dst--; - if (c == '/') { /* MinGW: cannot be '\\' anymore */ - dst += 2; - break; - } - } + /* Windows: dst[-1] cannot be backslash anymore */ + while (dst0 < dst && dst[-1] != '/') + dst--; } *dst = '\0'; return 0; diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index 6e7501f352e..4ed1f0b4dde 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -8,36 +8,37 @@ test_description='Test various path utilities' . ./test-lib.sh norm_abs() { - test_expect_success "normalize absolute" \ - "test \$(test-path-utils normalize_absolute_path '$1') = '$2'" + test_expect_success "normalize absolute: $1 => $2" \ + "test \"\$(test-path-utils normalize_path_copy '$1')\" = '$2'" } ancestor() { - test_expect_success "longest ancestor" \ - "test \$(test-path-utils longest_ancestor_length '$1' '$2') = '$3'" + test_expect_success "longest ancestor: $1 $2 => $3" \ + "test \"\$(test-path-utils longest_ancestor_length '$1' '$2')\" = '$3'" } -norm_abs "" / +norm_abs "" "" norm_abs / / norm_abs // / norm_abs /// / norm_abs /. / norm_abs /./ / -norm_abs /./.. / -norm_abs /../. / -norm_abs /./../.// / +norm_abs /./.. ++failed++ +norm_abs /../. ++failed++ +norm_abs /./../.// ++failed++ norm_abs /dir/.. / norm_abs /dir/sub/../.. / +norm_abs /dir/sub/../../.. ++failed++ norm_abs /dir /dir -norm_abs /dir// /dir +norm_abs /dir// /dir/ norm_abs /./dir /dir -norm_abs /dir/. /dir -norm_abs /dir///./ /dir -norm_abs /dir//sub/.. /dir -norm_abs /dir/sub/../ /dir -norm_abs //dir/sub/../. /dir -norm_abs /dir/s1/../s2/ /dir/s2 -norm_abs /d1/s1///s2/..//../s3/ /d1/s3 +norm_abs /dir/. /dir/ +norm_abs /dir///./ /dir/ +norm_abs /dir//sub/.. /dir/ +norm_abs /dir/sub/../ /dir/ +norm_abs //dir/sub/../. /dir/ +norm_abs /dir/s1/../s2/ /dir/s2/ +norm_abs /d1/s1///s2/..//../s3/ /d1/s3/ norm_abs /d1/s1//../s2/../../d2 /d2 norm_abs /d1/.../d2 /d1/.../d2 norm_abs /d1/..././../d2 /d1/d2 diff --git a/test-path-utils.c b/test-path-utils.c index 7e6fc8deaf8..5168a8e3dfd 100644 --- a/test-path-utils.c +++ b/test-path-utils.c @@ -2,10 +2,11 @@ int main(int argc, char **argv) { - if (argc == 3 && !strcmp(argv[1], "normalize_absolute_path")) { + if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) { char *buf = xmalloc(PATH_MAX + 1); - int rv = normalize_absolute_path(buf, argv[2]); - assert(strlen(buf) == rv); + int rv = normalize_path_copy(buf, argv[2]); + if (rv) + buf = "++failed++"; puts(buf); return 0; } From f2a782b8ba189b5ed51d18aa3eb93a670c220018 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 7 Feb 2009 16:08:31 +0100 Subject: [PATCH 5/5] Remove unused normalize_absolute_path() This function is now superseded by normalize_path_copy(). Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- cache.h | 1 - path.c | 51 ++++++--------------------------------------------- 2 files changed, 6 insertions(+), 46 deletions(-) diff --git a/cache.h b/cache.h index 65897e7b2cf..39f0d6fea4b 100644 --- a/cache.h +++ b/cache.h @@ -547,7 +547,6 @@ static inline int is_absolute_path(const char *path) const char *make_absolute_path(const char *path); const char *make_nonrelative_path(const char *path); const char *make_relative_path(const char *abs, const char *base); -int normalize_absolute_path(char *buf, const char *path); int normalize_path_copy(char *dst, const char *src); int longest_ancestor_length(const char *path, const char *prefix_list); diff --git a/path.c b/path.c index 6b7be5b869f..4b9107fed10 100644 --- a/path.c +++ b/path.c @@ -363,58 +363,19 @@ const char *make_relative_path(const char *abs, const char *base) } /* - * path = absolute path - * buf = buffer of at least max(2, strlen(path)+1) bytes - * It is okay if buf == path, but they should not overlap otherwise. + * It is okay if dst == src, but they should not overlap otherwise. * - * Performs the following normalizations on path, storing the result in buf: - * - Removes trailing slashes. - * - Removes empty components. + * Performs the following normalizations on src, storing the result in dst: + * - Ensures that components are separated by '/' (Windows only) + * - Squashes sequences of '/'. * - Removes "." components. * - Removes ".." components, and the components the precede them. - * "" and paths that contain only slashes are normalized to "/". - * Returns the length of the output. + * Returns failure (non-zero) if a ".." component appears as first path + * component anytime during the normalization. Otherwise, returns success (0). * * Note that this function is purely textual. It does not follow symlinks, * verify the existence of the path, or make any system calls. */ -int normalize_absolute_path(char *buf, const char *path) -{ - const char *comp_start = path, *comp_end = path; - char *dst = buf; - int comp_len; - assert(buf); - assert(path); - - while (*comp_start) { - assert(*comp_start == '/'); - while (*++comp_end && *comp_end != '/') - ; /* nothing */ - comp_len = comp_end - comp_start; - - if (!strncmp("/", comp_start, comp_len) || - !strncmp("/.", comp_start, comp_len)) - goto next; - - if (!strncmp("/..", comp_start, comp_len)) { - while (dst > buf && *--dst != '/') - ; /* nothing */ - goto next; - } - - memmove(dst, comp_start, comp_len); - dst += comp_len; - next: - comp_start = comp_end; - } - - if (dst == buf) - *dst++ = '/'; - - *dst = '\0'; - return dst - buf; -} - int normalize_path_copy(char *dst, const char *src) { char *dst0;