From 84069fcc140480fb76dc204ebce7819c2c87ec7c Mon Sep 17 00:00:00 2001 From: Atharva Raykar Date: Tue, 6 Jul 2021 23:49:34 +0530 Subject: [PATCH 01/51] t7400: test failure to add submodule in tracked path Add a test to ensure failure on adding a submodule to a directory with tracked contents in the index. As we are going to refactor and port to C some parts of `git submodule add`, let's add a test to help ensure no regression is introduced. Signed-off-by: Atharva Raykar Mentored-by: Christian Couder Based-on-patch-by: Shourya Shukla Mentored-by: Shourya Shukla Signed-off-by: Junio C Hamano --- t/t7400-submodule-basic.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index a924fdb7a6..7aa7fefdfa 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -196,6 +196,17 @@ test_expect_success 'submodule add to .gitignored path with --force' ' ) ' +test_expect_success 'submodule add to path with tracked content fails' ' + ( + cd addtest && + echo "'\''dir-tracked'\'' already exists in the index" >expect && + mkdir dir-tracked && + test_commit foo dir-tracked/bar && + test_must_fail git submodule add "$submodurl" dir-tracked >actual 2>&1 && + test_cmp expect actual + ) +' + test_expect_success 'submodule add to reconfigure existing submodule with --force' ' ( cd addtest-ignore && From 0008d12284ef916b53341c2aadccdc017ff07c7a Mon Sep 17 00:00:00 2001 From: Atharva Raykar Date: Sat, 10 Jul 2021 13:17:59 +0530 Subject: [PATCH 02/51] submodule: prefix die messages with 'fatal' The standard `die()` function that is used in C code prefixes all the messages passed to it with 'fatal: '. This does not happen with the `die` used in 'git-submodule.sh'. Let's prefix each of the shell die messages with 'fatal: ' so that when they are converted to C code, the error messages stay the same as before the conversion. Note that the shell version of `die` exits with error code 1, while the C version exits with error code 128. In practice, this does not change any behaviour, as no functionality in 'submodule add' and 'submodule update' relies on the value of the exit code. Signed-off-by: Atharva Raykar Mentored-by: Christian Couder Mentored-by: Shourya Shukla Signed-off-by: Junio C Hamano --- git-submodule.sh | 38 ++++++++++++++++++------------------- t/t7400-submodule-basic.sh | 4 ++-- t/t7406-submodule-update.sh | 10 +++++----- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/git-submodule.sh b/git-submodule.sh index 4678378424..69bcb4fab2 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -147,7 +147,7 @@ cmd_add() if ! git submodule--helper config --check-writeable >/dev/null 2>&1 then - die "$(eval_gettext "please make sure that the .gitmodules file is in the working tree")" + die "fatal: $(eval_gettext "please make sure that the .gitmodules file is in the working tree")" fi if test -n "$reference_path" @@ -176,7 +176,7 @@ cmd_add() case "$repo" in ./*|../*) test -z "$wt_prefix" || - die "$(gettext "Relative path can only be used from the toplevel of the working tree")" + die "fatal: $(gettext "Relative path can only be used from the toplevel of the working tree")" # dereference source url relative to parent's url realrepo=$(git submodule--helper resolve-relative-url "$repo") || exit @@ -186,7 +186,7 @@ cmd_add() realrepo=$repo ;; *) - die "$(eval_gettext "repo URL: '\$repo' must be absolute or begin with ./|../")" + die "fatal: $(eval_gettext "repo URL: '\$repo' must be absolute or begin with ./|../")" ;; esac @@ -205,17 +205,17 @@ cmd_add() if test -z "$force" then git ls-files --error-unmatch "$sm_path" > /dev/null 2>&1 && - die "$(eval_gettext "'\$sm_path' already exists in the index")" + die "fatal: $(eval_gettext "'\$sm_path' already exists in the index")" else git ls-files -s "$sm_path" | sane_grep -v "^160000" > /dev/null 2>&1 && - die "$(eval_gettext "'\$sm_path' already exists in the index and is not a submodule")" + die "fatal: $(eval_gettext "'\$sm_path' already exists in the index and is not a submodule")" fi if test -d "$sm_path" && test -z $(git -C "$sm_path" rev-parse --show-cdup 2>/dev/null) then git -C "$sm_path" rev-parse --verify -q HEAD >/dev/null || - die "$(eval_gettext "'\$sm_path' does not have a commit checked out")" + die "fatal: $(eval_gettext "'\$sm_path' does not have a commit checked out")" fi if test -z "$force" @@ -238,7 +238,7 @@ cmd_add() if ! git submodule--helper check-name "$sm_name" then - die "$(eval_gettext "'$sm_name' is not a valid submodule name")" + die "fatal: $(eval_gettext "'$sm_name' is not a valid submodule name")" fi # perhaps the path exists and is already a git repo, else clone it @@ -281,7 +281,7 @@ or you are unsure what this means choose another name with the '--name' option." git config submodule."$sm_name".url "$realrepo" git add --no-warn-embedded-repo $force "$sm_path" || - die "$(eval_gettext "Failed to add submodule '\$sm_path'")" + die "fatal: $(eval_gettext "Failed to add submodule '\$sm_path'")" git submodule--helper config submodule."$sm_name".path "$sm_path" && git submodule--helper config submodule."$sm_name".url "$repo" && @@ -290,7 +290,7 @@ or you are unsure what this means choose another name with the '--name' option." git submodule--helper config submodule."$sm_name".branch "$branch" fi && git add --force .gitmodules || - die "$(eval_gettext "Failed to register submodule '\$sm_path'")" + die "fatal: $(eval_gettext "Failed to register submodule '\$sm_path'")" # NEEDSWORK: In a multi-working-tree world, this needs to be # set in the per-worktree config. @@ -565,7 +565,7 @@ cmd_update() else subsha1=$(sanitize_submodule_env; cd "$sm_path" && git rev-parse --verify HEAD) || - die "$(eval_gettext "Unable to find current revision in submodule path '\$displaypath'")" + die "fatal: $(eval_gettext "Unable to find current revision in submodule path '\$displaypath'")" fi if test -n "$remote" @@ -575,12 +575,12 @@ cmd_update() then # Fetch remote before determining tracking $sha1 fetch_in_submodule "$sm_path" $depth || - die "$(eval_gettext "Unable to fetch in submodule path '\$sm_path'")" + die "fatal: $(eval_gettext "Unable to fetch in submodule path '\$sm_path'")" fi remote_name=$(sanitize_submodule_env; cd "$sm_path" && git submodule--helper print-default-remote) sha1=$(sanitize_submodule_env; cd "$sm_path" && git rev-parse --verify "${remote_name}/${branch}") || - die "$(eval_gettext "Unable to find current \${remote_name}/\${branch} revision in submodule path '\$sm_path'")" + die "fatal: $(eval_gettext "Unable to find current \${remote_name}/\${branch} revision in submodule path '\$sm_path'")" fi if test "$subsha1" != "$sha1" || test -n "$force" @@ -604,36 +604,36 @@ cmd_update() # not be reachable from any of the refs is_tip_reachable "$sm_path" "$sha1" || fetch_in_submodule "$sm_path" "$depth" "$sha1" || - die "$(eval_gettext "Fetched in submodule path '\$displaypath', but it did not contain \$sha1. Direct fetching of that commit failed.")" + die "fatal: $(eval_gettext "Fetched in submodule path '\$displaypath', but it did not contain \$sha1. Direct fetching of that commit failed.")" fi must_die_on_failure= case "$update_module" in checkout) command="git checkout $subforce -q" - die_msg="$(eval_gettext "Unable to checkout '\$sha1' in submodule path '\$displaypath'")" + die_msg="fatal: $(eval_gettext "Unable to checkout '\$sha1' in submodule path '\$displaypath'")" say_msg="$(eval_gettext "Submodule path '\$displaypath': checked out '\$sha1'")" ;; rebase) command="git rebase ${GIT_QUIET:+--quiet}" - die_msg="$(eval_gettext "Unable to rebase '\$sha1' in submodule path '\$displaypath'")" + die_msg="fatal: $(eval_gettext "Unable to rebase '\$sha1' in submodule path '\$displaypath'")" say_msg="$(eval_gettext "Submodule path '\$displaypath': rebased into '\$sha1'")" must_die_on_failure=yes ;; merge) command="git merge ${GIT_QUIET:+--quiet}" - die_msg="$(eval_gettext "Unable to merge '\$sha1' in submodule path '\$displaypath'")" + die_msg="fatal: $(eval_gettext "Unable to merge '\$sha1' in submodule path '\$displaypath'")" say_msg="$(eval_gettext "Submodule path '\$displaypath': merged in '\$sha1'")" must_die_on_failure=yes ;; !*) command="${update_module#!}" - die_msg="$(eval_gettext "Execution of '\$command \$sha1' failed in submodule path '\$displaypath'")" + die_msg="fatal: $(eval_gettext "Execution of '\$command \$sha1' failed in submodule path '\$displaypath'")" say_msg="$(eval_gettext "Submodule path '\$displaypath': '\$command \$sha1'")" must_die_on_failure=yes ;; *) - die "$(eval_gettext "Invalid update mode '$update_module' for submodule path '$path'")" + die "fatal: $(eval_gettext "Invalid update mode '$update_module' for submodule path '$path'")" esac if (sanitize_submodule_env; cd "$sm_path" && $command "$sha1") @@ -660,7 +660,7 @@ cmd_update() res=$? if test $res -gt 0 then - die_msg="$(eval_gettext "Failed to recurse into submodule path '\$displaypath'")" + die_msg="fatal: $(eval_gettext "Failed to recurse into submodule path '\$displaypath'")" if test $res -ne 2 then err="${err};$die_msg" diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index 7aa7fefdfa..cb1b8e35db 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -51,7 +51,7 @@ test_expect_success 'submodule update aborts on missing gitmodules url' ' test_expect_success 'add aborts on repository with no commits' ' cat >expect <<-\EOF && - '"'repo-no-commits'"' does not have a commit checked out + fatal: '"'repo-no-commits'"' does not have a commit checked out EOF git init repo-no-commits && test_must_fail git submodule add ../a ./repo-no-commits 2>actual && @@ -199,7 +199,7 @@ test_expect_success 'submodule add to .gitignored path with --force' ' test_expect_success 'submodule add to path with tracked content fails' ' ( cd addtest && - echo "'\''dir-tracked'\'' already exists in the index" >expect && + echo "fatal: '\''dir-tracked'\'' already exists in the index" >expect && mkdir dir-tracked && test_commit foo dir-tracked/bar && test_must_fail git submodule add "$submodurl" dir-tracked >actual 2>&1 && diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index f4f61fe554..11cccbb333 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -448,7 +448,7 @@ test_expect_success 'fsck detects command in .gitmodules' ' ' cat << EOF >expect -Execution of 'false $submodulesha1' failed in submodule path 'submodule' +fatal: Execution of 'false $submodulesha1' failed in submodule path 'submodule' EOF test_expect_success 'submodule update - command in .git/config catches failure' ' @@ -465,7 +465,7 @@ test_expect_success 'submodule update - command in .git/config catches failure' ' cat << EOF >expect -Execution of 'false $submodulesha1' failed in submodule path '../submodule' +fatal: Execution of 'false $submodulesha1' failed in submodule path '../submodule' EOF test_expect_success 'submodule update - command in .git/config catches failure -- subdirectory' ' @@ -484,7 +484,7 @@ test_expect_success 'submodule update - command in .git/config catches failure - test_expect_success 'submodule update - command run for initial population of submodule' ' cat >expect <<-EOF && - Execution of '\''false $submodulesha1'\'' failed in submodule path '\''submodule'\'' + fatal: Execution of '\''false $submodulesha1'\'' failed in submodule path '\''submodule'\'' EOF rm -rf super/submodule && test_must_fail git -C super submodule update 2>actual && @@ -493,8 +493,8 @@ test_expect_success 'submodule update - command run for initial population of su ' cat << EOF >expect -Execution of 'false $submodulesha1' failed in submodule path '../super/submodule' -Failed to recurse into submodule path '../super' +fatal: Execution of 'false $submodulesha1' failed in submodule path '../super/submodule' +fatal: Failed to recurse into submodule path '../super' EOF test_expect_success 'recursive submodule update - command in .git/config catches failure -- subdirectory' ' From a98b02c11289879868dd0d5f80e894d8d01dc73b Mon Sep 17 00:00:00 2001 From: Atharva Raykar Date: Sat, 10 Jul 2021 13:18:00 +0530 Subject: [PATCH 03/51] submodule--helper: refactor module_clone() Separate out the core logic of module_clone() from the flag parsing---this way we can call the equivalent of the `submodule--helper clone` subcommand directly within C, without needing to push arguments in a strvec. Signed-off-by: Atharva Raykar Mentored-by: Christian Couder Mentored-by: Shourya Shukla Suggested-by: Junio C Hamano Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 241 +++++++++++++++++++----------------- 1 file changed, 128 insertions(+), 113 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index d55f6262e9..ae246a35f9 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1658,45 +1658,20 @@ static int module_deinit(int argc, const char **argv, const char *prefix) return 0; } -static int clone_submodule(const char *path, const char *gitdir, const char *url, - const char *depth, struct string_list *reference, int dissociate, - int quiet, int progress, int single_branch) -{ - struct child_process cp = CHILD_PROCESS_INIT; - - strvec_push(&cp.args, "clone"); - strvec_push(&cp.args, "--no-checkout"); - if (quiet) - strvec_push(&cp.args, "--quiet"); - if (progress) - strvec_push(&cp.args, "--progress"); - if (depth && *depth) - strvec_pushl(&cp.args, "--depth", depth, NULL); - if (reference->nr) { - struct string_list_item *item; - for_each_string_list_item(item, reference) - strvec_pushl(&cp.args, "--reference", - item->string, NULL); - } - if (dissociate) - strvec_push(&cp.args, "--dissociate"); - if (gitdir && *gitdir) - strvec_pushl(&cp.args, "--separate-git-dir", gitdir, NULL); - if (single_branch >= 0) - strvec_push(&cp.args, single_branch ? - "--single-branch" : - "--no-single-branch"); - - strvec_push(&cp.args, "--"); - strvec_push(&cp.args, url); - strvec_push(&cp.args, path); - - cp.git_cmd = 1; - prepare_submodule_repo_env(&cp.env_array); - cp.no_stdin = 1; - - return run_command(&cp); -} +struct module_clone_data { + const char *prefix; + const char *path; + const char *name; + const char *url; + const char *depth; + struct string_list reference; + unsigned int quiet: 1; + unsigned int progress: 1; + unsigned int dissociate: 1; + unsigned int require_init: 1; + int single_branch; +}; +#define MODULE_CLONE_DATA_INIT { .reference = STRING_LIST_INIT_NODUP, .single_branch = -1 } struct submodule_alternate_setup { const char *submodule_name; @@ -1802,37 +1777,128 @@ static void prepare_possible_alternates(const char *sm_name, free(error_strategy); } +static int clone_submodule(struct module_clone_data *clone_data) +{ + char *p, *sm_gitdir; + char *sm_alternate = NULL, *error_strategy = NULL; + struct strbuf sb = STRBUF_INIT; + struct child_process cp = CHILD_PROCESS_INIT; + + strbuf_addf(&sb, "%s/modules/%s", get_git_dir(), clone_data->name); + sm_gitdir = absolute_pathdup(sb.buf); + strbuf_reset(&sb); + + if (!is_absolute_path(clone_data->path)) { + strbuf_addf(&sb, "%s/%s", get_git_work_tree(), clone_data->path); + clone_data->path = strbuf_detach(&sb, NULL); + } else { + clone_data->path = xstrdup(clone_data->path); + } + + if (validate_submodule_git_dir(sm_gitdir, clone_data->name) < 0) + die(_("refusing to create/use '%s' in another submodule's " + "git dir"), sm_gitdir); + + if (!file_exists(sm_gitdir)) { + if (safe_create_leading_directories_const(sm_gitdir) < 0) + die(_("could not create directory '%s'"), sm_gitdir); + + prepare_possible_alternates(clone_data->name, &clone_data->reference); + + strvec_push(&cp.args, "clone"); + strvec_push(&cp.args, "--no-checkout"); + if (clone_data->quiet) + strvec_push(&cp.args, "--quiet"); + if (clone_data->progress) + strvec_push(&cp.args, "--progress"); + if (clone_data->depth && *(clone_data->depth)) + strvec_pushl(&cp.args, "--depth", clone_data->depth, NULL); + if (clone_data->reference.nr) { + struct string_list_item *item; + for_each_string_list_item(item, &clone_data->reference) + strvec_pushl(&cp.args, "--reference", + item->string, NULL); + } + if (clone_data->dissociate) + strvec_push(&cp.args, "--dissociate"); + if (sm_gitdir && *sm_gitdir) + strvec_pushl(&cp.args, "--separate-git-dir", sm_gitdir, NULL); + if (clone_data->single_branch >= 0) + strvec_push(&cp.args, clone_data->single_branch ? + "--single-branch" : + "--no-single-branch"); + + strvec_push(&cp.args, "--"); + strvec_push(&cp.args, clone_data->url); + strvec_push(&cp.args, clone_data->path); + + cp.git_cmd = 1; + prepare_submodule_repo_env(&cp.env_array); + cp.no_stdin = 1; + + if(run_command(&cp)) + die(_("clone of '%s' into submodule path '%s' failed"), + clone_data->url, clone_data->path); + } else { + if (clone_data->require_init && !access(clone_data->path, X_OK) && + !is_empty_dir(clone_data->path)) + die(_("directory not empty: '%s'"), clone_data->path); + if (safe_create_leading_directories_const(clone_data->path) < 0) + die(_("could not create directory '%s'"), clone_data->path); + strbuf_addf(&sb, "%s/index", sm_gitdir); + unlink_or_warn(sb.buf); + strbuf_reset(&sb); + } + + connect_work_tree_and_git_dir(clone_data->path, sm_gitdir, 0); + + p = git_pathdup_submodule(clone_data->path, "config"); + if (!p) + die(_("could not get submodule directory for '%s'"), clone_data->path); + + /* setup alternateLocation and alternateErrorStrategy in the cloned submodule if needed */ + git_config_get_string("submodule.alternateLocation", &sm_alternate); + if (sm_alternate) + git_config_set_in_file(p, "submodule.alternateLocation", + sm_alternate); + git_config_get_string("submodule.alternateErrorStrategy", &error_strategy); + if (error_strategy) + git_config_set_in_file(p, "submodule.alternateErrorStrategy", + error_strategy); + + free(sm_alternate); + free(error_strategy); + + strbuf_release(&sb); + free(sm_gitdir); + free(p); + return 0; +} + static int module_clone(int argc, const char **argv, const char *prefix) { - const char *name = NULL, *url = NULL, *depth = NULL; - int quiet = 0; - int progress = 0; - char *p, *path = NULL, *sm_gitdir; - struct strbuf sb = STRBUF_INIT; - struct string_list reference = STRING_LIST_INIT_NODUP; - int dissociate = 0, require_init = 0; - char *sm_alternate = NULL, *error_strategy = NULL; - int single_branch = -1; + int dissociate = 0, quiet = 0, progress = 0, require_init = 0; + struct module_clone_data clone_data = MODULE_CLONE_DATA_INIT; struct option module_clone_options[] = { - OPT_STRING(0, "prefix", &prefix, + OPT_STRING(0, "prefix", &clone_data.prefix, N_("path"), N_("alternative anchor for relative paths")), - OPT_STRING(0, "path", &path, + OPT_STRING(0, "path", &clone_data.path, N_("path"), N_("where the new submodule will be cloned to")), - OPT_STRING(0, "name", &name, + OPT_STRING(0, "name", &clone_data.name, N_("string"), N_("name of the new submodule")), - OPT_STRING(0, "url", &url, + OPT_STRING(0, "url", &clone_data.url, N_("string"), N_("url where to clone the submodule from")), - OPT_STRING_LIST(0, "reference", &reference, + OPT_STRING_LIST(0, "reference", &clone_data.reference, N_("repo"), N_("reference repository")), OPT_BOOL(0, "dissociate", &dissociate, N_("use --reference only while cloning")), - OPT_STRING(0, "depth", &depth, + OPT_STRING(0, "depth", &clone_data.depth, N_("string"), N_("depth for shallow clones")), OPT__QUIET(&quiet, "Suppress output for cloning a submodule"), @@ -1840,7 +1906,7 @@ static int module_clone(int argc, const char **argv, const char *prefix) N_("force cloning progress")), OPT_BOOL(0, "require-init", &require_init, N_("disallow cloning into non-empty directory")), - OPT_BOOL(0, "single-branch", &single_branch, + OPT_BOOL(0, "single-branch", &clone_data.single_branch, N_("clone only one branch, HEAD or --branch")), OPT_END() }; @@ -1856,67 +1922,16 @@ static int module_clone(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, prefix, module_clone_options, git_submodule_helper_usage, 0); - if (argc || !url || !path || !*path) + clone_data.dissociate = !!dissociate; + clone_data.quiet = !!quiet; + clone_data.progress = !!progress; + clone_data.require_init = !!require_init; + + if (argc || !clone_data.url || !clone_data.path || !*(clone_data.path)) usage_with_options(git_submodule_helper_usage, module_clone_options); - strbuf_addf(&sb, "%s/modules/%s", get_git_dir(), name); - sm_gitdir = absolute_pathdup(sb.buf); - strbuf_reset(&sb); - - if (!is_absolute_path(path)) { - strbuf_addf(&sb, "%s/%s", get_git_work_tree(), path); - path = strbuf_detach(&sb, NULL); - } else - path = xstrdup(path); - - if (validate_submodule_git_dir(sm_gitdir, name) < 0) - die(_("refusing to create/use '%s' in another submodule's " - "git dir"), sm_gitdir); - - if (!file_exists(sm_gitdir)) { - if (safe_create_leading_directories_const(sm_gitdir) < 0) - die(_("could not create directory '%s'"), sm_gitdir); - - prepare_possible_alternates(name, &reference); - - if (clone_submodule(path, sm_gitdir, url, depth, &reference, dissociate, - quiet, progress, single_branch)) - die(_("clone of '%s' into submodule path '%s' failed"), - url, path); - } else { - if (require_init && !access(path, X_OK) && !is_empty_dir(path)) - die(_("directory not empty: '%s'"), path); - if (safe_create_leading_directories_const(path) < 0) - die(_("could not create directory '%s'"), path); - strbuf_addf(&sb, "%s/index", sm_gitdir); - unlink_or_warn(sb.buf); - strbuf_reset(&sb); - } - - connect_work_tree_and_git_dir(path, sm_gitdir, 0); - - p = git_pathdup_submodule(path, "config"); - if (!p) - die(_("could not get submodule directory for '%s'"), path); - - /* setup alternateLocation and alternateErrorStrategy in the cloned submodule if needed */ - git_config_get_string("submodule.alternateLocation", &sm_alternate); - if (sm_alternate) - git_config_set_in_file(p, "submodule.alternateLocation", - sm_alternate); - git_config_get_string("submodule.alternateErrorStrategy", &error_strategy); - if (error_strategy) - git_config_set_in_file(p, "submodule.alternateErrorStrategy", - error_strategy); - - free(sm_alternate); - free(error_strategy); - - strbuf_release(&sb); - free(sm_gitdir); - free(path); - free(p); + clone_submodule(&clone_data); return 0; } From 8c8195e9c3e21922673575828977845b613c3491 Mon Sep 17 00:00:00 2001 From: Atharva Raykar Date: Sat, 10 Jul 2021 13:18:01 +0530 Subject: [PATCH 04/51] submodule--helper: introduce add-clone subcommand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's add a new "add-clone" subcommand to `git submodule--helper` with the goal of converting part of the shell code in git-submodule.sh related to `git submodule add` into C code. This new subcommand clones the repository that is to be added, and checks out to the appropriate branch. This is meant to be a faithful conversion that leaves the behaviour of 'cmd_add()' script unchanged. Signed-off-by: Atharva Raykar Mentored-by: Christian Couder Mentored-by: Shourya Shukla Based-on-patch-by: Shourya Shukla Based-on-patch-by: Prathamesh Chavan Helped-by: Đoàn Trần Công Danh Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 177 ++++++++++++++++++++++++++++++++++++ git-submodule.sh | 38 +------- 2 files changed, 178 insertions(+), 37 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index ae246a35f9..6d52a73a57 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -2760,6 +2760,182 @@ static int module_set_branch(int argc, const char **argv, const char *prefix) return !!ret; } +struct add_data { + const char *prefix; + const char *branch; + const char *reference_path; + const char *sm_path; + const char *sm_name; + const char *repo; + const char *realrepo; + int depth; + unsigned int force: 1; + unsigned int quiet: 1; + unsigned int progress: 1; + unsigned int dissociate: 1; +}; +#define ADD_DATA_INIT { .depth = -1 } + +static void show_fetch_remotes(FILE *output, const char *sm_name, const char *git_dir_path) +{ + struct child_process cp_remote = CHILD_PROCESS_INIT; + struct strbuf sb_remote_out = STRBUF_INIT; + + cp_remote.git_cmd = 1; + strvec_pushf(&cp_remote.env_array, + "GIT_DIR=%s", git_dir_path); + strvec_push(&cp_remote.env_array, "GIT_WORK_TREE=."); + strvec_pushl(&cp_remote.args, "remote", "-v", NULL); + if (!capture_command(&cp_remote, &sb_remote_out, 0)) { + char *next_line; + char *line = sb_remote_out.buf; + while ((next_line = strchr(line, '\n')) != NULL) { + size_t len = next_line - line; + if (strip_suffix_mem(line, &len, " (fetch)")) + fprintf(output, " %.*s\n", (int)len, line); + line = next_line + 1; + } + } + + strbuf_release(&sb_remote_out); +} + +static int add_submodule(const struct add_data *add_data) +{ + char *submod_gitdir_path; + struct module_clone_data clone_data = MODULE_CLONE_DATA_INIT; + + /* perhaps the path already exists and is already a git repo, else clone it */ + if (is_directory(add_data->sm_path)) { + struct strbuf sm_path = STRBUF_INIT; + strbuf_addstr(&sm_path, add_data->sm_path); + submod_gitdir_path = xstrfmt("%s/.git", add_data->sm_path); + if (is_nonbare_repository_dir(&sm_path)) + printf(_("Adding existing repo at '%s' to the index\n"), + add_data->sm_path); + else + die(_("'%s' already exists and is not a valid git repo"), + add_data->sm_path); + strbuf_release(&sm_path); + free(submod_gitdir_path); + } else { + struct child_process cp = CHILD_PROCESS_INIT; + submod_gitdir_path = xstrfmt(".git/modules/%s", add_data->sm_name); + + if (is_directory(submod_gitdir_path)) { + if (!add_data->force) { + fprintf(stderr, _("A git directory for '%s' is found " + "locally with remote(s):"), + add_data->sm_name); + show_fetch_remotes(stderr, add_data->sm_name, + submod_gitdir_path); + free(submod_gitdir_path); + die(_("If you want to reuse this local git " + "directory instead of cloning again from\n" + " %s\n" + "use the '--force' option. If the local git " + "directory is not the correct repo\n" + "or if you are unsure what this means, choose " + "another name with the '--name' option.\n"), + add_data->realrepo); + } else { + printf(_("Reactivating local git directory for " + "submodule '%s'\n"), add_data->sm_name); + } + } + free(submod_gitdir_path); + + clone_data.prefix = add_data->prefix; + clone_data.path = add_data->sm_path; + clone_data.name = add_data->sm_name; + clone_data.url = add_data->realrepo; + clone_data.quiet = add_data->quiet; + clone_data.progress = add_data->progress; + if (add_data->reference_path) + string_list_append(&clone_data.reference, + xstrdup(add_data->reference_path)); + clone_data.dissociate = add_data->dissociate; + if (add_data->depth >= 0) + clone_data.depth = xstrfmt("%d", add_data->depth); + + if (clone_submodule(&clone_data)) + return -1; + + prepare_submodule_repo_env(&cp.env_array); + cp.git_cmd = 1; + cp.dir = add_data->sm_path; + strvec_pushl(&cp.args, "checkout", "-f", "-q", NULL); + + if (add_data->branch) { + strvec_pushl(&cp.args, "-B", add_data->branch, NULL); + strvec_pushf(&cp.args, "origin/%s", add_data->branch); + } + + if (run_command(&cp)) + die(_("unable to checkout submodule '%s'"), add_data->sm_path); + } + return 0; +} + +static int add_clone(int argc, const char **argv, const char *prefix) +{ + int force = 0, quiet = 0, dissociate = 0, progress = 0; + struct add_data add_data = ADD_DATA_INIT; + + struct option options[] = { + OPT_STRING('b', "branch", &add_data.branch, + N_("branch"), + N_("branch of repository to checkout on cloning")), + OPT_STRING(0, "prefix", &prefix, + N_("path"), + N_("alternative anchor for relative paths")), + OPT_STRING(0, "path", &add_data.sm_path, + N_("path"), + N_("where the new submodule will be cloned to")), + OPT_STRING(0, "name", &add_data.sm_name, + N_("string"), + N_("name of the new submodule")), + OPT_STRING(0, "url", &add_data.realrepo, + N_("string"), + N_("url where to clone the submodule from")), + OPT_STRING(0, "reference", &add_data.reference_path, + N_("repo"), + N_("reference repository")), + OPT_BOOL(0, "dissociate", &dissociate, + N_("use --reference only while cloning")), + OPT_INTEGER(0, "depth", &add_data.depth, + N_("depth for shallow clones")), + OPT_BOOL(0, "progress", &progress, + N_("force cloning progress")), + OPT__FORCE(&force, N_("allow adding an otherwise ignored submodule path"), + PARSE_OPT_NOCOMPLETE), + OPT__QUIET(&quiet, "suppress output for cloning a submodule"), + OPT_END() + }; + + const char *const usage[] = { + N_("git submodule--helper add-clone [...] " + "--url --path --name "), + NULL + }; + + argc = parse_options(argc, argv, prefix, options, usage, 0); + + if (argc != 0) + usage_with_options(usage, options); + + add_data.prefix = prefix; + add_data.progress = !!progress; + add_data.dissociate = !!dissociate; + add_data.force = !!force; + add_data.quiet = !!quiet; + + if (add_submodule(&add_data)) + return 1; + + return 0; +} + #define SUPPORT_SUPER_PREFIX (1<<0) struct cmd_struct { @@ -2772,6 +2948,7 @@ static struct cmd_struct commands[] = { {"list", module_list, 0}, {"name", module_name, 0}, {"clone", module_clone, 0}, + {"add-clone", add_clone, 0}, {"update-module-mode", module_update_module_mode, 0}, {"update-clone", update_clone, 0}, {"ensure-core-worktree", ensure_core_worktree, 0}, diff --git a/git-submodule.sh b/git-submodule.sh index 69bcb4fab2..053daf3724 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -241,43 +241,7 @@ cmd_add() die "fatal: $(eval_gettext "'$sm_name' is not a valid submodule name")" fi - # perhaps the path exists and is already a git repo, else clone it - if test -e "$sm_path" - then - if test -d "$sm_path"/.git || test -f "$sm_path"/.git - then - eval_gettextln "Adding existing repo at '\$sm_path' to the index" - else - die "$(eval_gettext "'\$sm_path' already exists and is not a valid git repo")" - fi - - else - if test -d ".git/modules/$sm_name" - then - if test -z "$force" - then - eval_gettextln >&2 "A git directory for '\$sm_name' is found locally with remote(s):" - GIT_DIR=".git/modules/$sm_name" GIT_WORK_TREE=. git remote -v | grep '(fetch)' | sed -e s,^," ", -e s,' (fetch)',, >&2 - die "$(eval_gettextln "\ -If you want to reuse this local git directory instead of cloning again from - \$realrepo -use the '--force' option. If the local git directory is not the correct repo -or you are unsure what this means choose another name with the '--name' option.")" - else - eval_gettextln "Reactivating local git directory for submodule '\$sm_name'." - fi - fi - git submodule--helper clone ${GIT_QUIET:+--quiet} ${progress:+"--progress"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit - ( - sanitize_submodule_env - cd "$sm_path" && - # ash fails to wordsplit ${branch:+-b "$branch"...} - case "$branch" in - '') git checkout -f -q ;; - ?*) git checkout -f -q -B "$branch" "origin/$branch" ;; - esac - ) || die "$(eval_gettext "Unable to checkout submodule '\$sm_path'")" - fi + git submodule--helper add-clone ${GIT_QUIET:+--quiet} ${force:+"--force"} ${progress:+"--progress"} ${branch:+--branch "$branch"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit git config submodule."$sm_name".url "$realrepo" git add --no-warn-embedded-repo $force "$sm_path" || From 0d53d1994670ce43964a34bdf1f68d26188bafc8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 29 Jun 2021 02:13:02 +0000 Subject: [PATCH 05/51] p2000: add 'git checkout -' test and decrease depth As we increase our list of commands to test in p2000-sparse-operations.sh, we will want to have a slightly smaller test repository. Reduce the size by a factor of four by reducing the depth of the step that creates a big index around a moderately-sized repository. Also add a step to run 'git checkout -' on repeat. This requires having a previous location in the reflog, so add that to the initialization steps. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/perf/p2000-sparse-operations.sh | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh index 94513c9774..f7f8c01210 100755 --- a/t/perf/p2000-sparse-operations.sh +++ b/t/perf/p2000-sparse-operations.sh @@ -6,7 +6,7 @@ test_description="test performance of Git operations using the index" test_perf_default_repo -SPARSE_CONE=f2/f4/f1 +SPARSE_CONE=f2/f4 test_expect_success 'setup repo and indexes' ' git reset --hard HEAD && @@ -27,7 +27,7 @@ test_expect_success 'setup repo and indexes' ' OLD_COMMIT=$(git rev-parse HEAD) && OLD_TREE=$(git rev-parse HEAD^{tree}) && - for i in $(test_seq 1 4) + for i in $(test_seq 1 3) do cat >in <<-EOF && 100755 blob $BLOB a @@ -43,14 +43,23 @@ test_expect_success 'setup repo and indexes' ' done && git sparse-checkout init --cone && - git branch -f wide $OLD_COMMIT && + git sparse-checkout set $SPARSE_CONE && + git checkout -b wide $OLD_COMMIT && + + for l2 in f1 f2 f3 f4 + do + echo more bogus >>$SPARSE_CONE/$l2/a && + git commit -a -m "edit $SPARSE_CONE/$l2/a" || return 1 + done && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v3 && ( cd full-index-v3 && git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && - git update-index --index-version=3 + git update-index --index-version=3 && + git checkout HEAD~4 ) && git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v4 && ( @@ -58,7 +67,8 @@ test_expect_success 'setup repo and indexes' ' git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && - git update-index --index-version=4 + git update-index --index-version=4 && + git checkout HEAD~4 ) && git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v3 && ( @@ -66,7 +76,8 @@ test_expect_success 'setup repo and indexes' ' git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && - git update-index --index-version=3 + git update-index --index-version=3 && + git checkout HEAD~4 ) && git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v4 && ( @@ -74,7 +85,8 @@ test_expect_success 'setup repo and indexes' ' git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && - git update-index --index-version=4 + git update-index --index-version=4 && + git checkout HEAD~4 ) ' @@ -97,5 +109,6 @@ test_perf_on_all git status test_perf_on_all git add -A test_perf_on_all git add . test_perf_on_all git commit -a -m A +test_perf_on_all git checkout -f - test_done From 11042ab9142bab44db75ba9ececf836053fdb92a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 29 Jun 2021 02:13:03 +0000 Subject: [PATCH 06/51] p2000: compress repo names By using shorter names for the test repos, we will get a slightly more compressed performance summary without comprimising clarity. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/perf/p2000-sparse-operations.sh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh index f7f8c01210..597626276f 100755 --- a/t/perf/p2000-sparse-operations.sh +++ b/t/perf/p2000-sparse-operations.sh @@ -52,36 +52,36 @@ test_expect_success 'setup repo and indexes' ' git commit -a -m "edit $SPARSE_CONE/$l2/a" || return 1 done && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v3 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-v3 && ( - cd full-index-v3 && + cd full-v3 && git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && git update-index --index-version=3 && git checkout HEAD~4 ) && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v4 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-v4 && ( - cd full-index-v4 && + cd full-v4 && git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && git update-index --index-version=4 && git checkout HEAD~4 ) && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v3 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-v3 && ( - cd sparse-index-v3 && + cd sparse-v3 && git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && git update-index --index-version=3 && git checkout HEAD~4 ) && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v4 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-v4 && ( - cd sparse-index-v4 && + cd sparse-v4 && git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && @@ -92,8 +92,8 @@ test_expect_success 'setup repo and indexes' ' test_perf_on_all () { command="$@" - for repo in full-index-v3 full-index-v4 \ - sparse-index-v3 sparse-index-v4 + for repo in full-v3 full-v4 \ + sparse-v3 sparse-v4 do test_perf "$command ($repo)" " ( From daa1acefc55bb6492c00519634e0a7622b3b6d69 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 29 Jun 2021 02:13:04 +0000 Subject: [PATCH 07/51] commit: integrate with sparse-index Update 'git commit' to allow using the sparse-index in memory without expanding to a full one. The only place that had an ensure_full_index() call was in cache_tree_update(). The recursive algorithm for update_one() was already updated in 2de37c536 (cache-tree: integrate with sparse directory entries, 2021-03-03) to handle sparse directory entries in the index. Most of this change involves testing different command-line options that allow specifying which on-disk changes should be included in the commit. This includes no options (only take currently-staged changes), -a (take all tracked changes), and --include (take a list of specific changes). To simplify testing that these options do not expand the index, update the test that previously verified that 'git status' does not expand the index with a helper method, ensure_not_expanded(). This allows 'git commit' to operate much faster when the sparse-checkout cone is much smaller than the full list of files at HEAD. Here are the relevant lines from p2000-sparse-operations.sh: Test HEAD~1 HEAD ---------------------------------------------------------------------------------- 2000.14: git commit -a -m A (full-v3) 0.35(0.26+0.06) 0.36(0.28+0.07) +2.9% 2000.15: git commit -a -m A (full-v4) 0.32(0.26+0.05) 0.34(0.28+0.06) +6.3% 2000.16: git commit -a -m A (sparse-v3) 0.63(0.59+0.06) 0.04(0.05+0.05) -93.7% 2000.17: git commit -a -m A (sparse-v4) 0.64(0.59+0.08) 0.04(0.04+0.04) -93.8% It is important to compare the full-index case to the sparse-index case, so the improvement for index version v4 is actually an 88% improvement in this synthetic example. In a real repository with over two million files at HEAD and 60,000 files in the sparse-checkout definition, the time for 'git commit -a' went from 2.61 seconds to 134ms. I compared this to the result if the index only contained the paths in the sparse-checkout definition and found the theoretical optimum to be 120ms, so the out-of-cone paths only add a 12% overhead. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit.c | 3 ++ cache-tree.c | 2 - t/t1092-sparse-checkout-compatibility.sh | 47 ++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 12f51db158..0bc6489250 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1682,6 +1682,9 @@ int cmd_commit(int argc, const char **argv, const char *prefix) if (argc == 2 && !strcmp(argv[1], "-h")) usage_with_options(builtin_commit_usage, builtin_commit_options); + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + status_init_config(&s, git_commit_config); s.commit_template = 1; status_format = STATUS_FORMAT_NONE; /* Ignore status.short */ diff --git a/cache-tree.c b/cache-tree.c index 45e58666af..577b18d881 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -461,8 +461,6 @@ int cache_tree_update(struct index_state *istate, int flags) if (i) return i; - ensure_full_index(istate); - if (!istate->cache_tree) istate->cache_tree = cache_tree(); diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index cabbd42e33..d3e34d0aca 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -262,6 +262,34 @@ test_expect_success 'add, commit, checkout' ' test_all_match git checkout - ' +test_expect_success 'commit including unstaged changes' ' + init_repos && + + write_script edit-file <<-\EOF && + echo $1 >$2 + EOF + + run_on_all ../edit-file 1 a && + run_on_all ../edit-file 1 deep/a && + + test_all_match git commit -m "-a" -a && + test_all_match git status --porcelain=v2 && + + run_on_all ../edit-file 2 a && + run_on_all ../edit-file 2 deep/a && + + test_all_match git commit -m "--include" --include deep/a && + test_all_match git status --porcelain=v2 && + test_all_match git commit -m "--include" --include a && + test_all_match git status --porcelain=v2 && + + run_on_all ../edit-file 3 a && + run_on_all ../edit-file 3 deep/a && + + test_all_match git commit -m "--amend" -a --amend && + test_all_match git status --porcelain=v2 +' + test_expect_success 'status/add: outside sparse cone' ' init_repos && @@ -514,14 +542,25 @@ test_expect_success 'sparse-index is expanded and converted back' ' test_region index ensure_full_index trace2.txt ' -test_expect_success 'sparse-index is not expanded' ' - init_repos && - +ensure_not_expanded () { rm -f trace2.txt && echo >>sparse-index/untracked.txt && GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \ - git -C sparse-index status && + git -C sparse-index "$@" && test_region ! index ensure_full_index trace2.txt +} + +test_expect_success 'sparse-index is not expanded' ' + init_repos && + + ensure_not_expanded status && + ensure_not_expanded commit --allow-empty -m empty && + echo >>sparse-index/a && + ensure_not_expanded commit -a -m a && + echo >>sparse-index/a && + ensure_not_expanded commit --include a -m a && + echo >>sparse-index/deep/deeper1/a && + ensure_not_expanded commit --include deep/deeper1/a -m deeper ' # NEEDSWORK: a sparse-checkout behaves differently from a full checkout From f934f1b47fb56d18b2b81d9288590e03e9a0ed23 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 29 Jun 2021 02:13:05 +0000 Subject: [PATCH 08/51] sparse-index: recompute cache-tree When some commands run with command_requires_full_index=1, then the index can get in a state where the in-memory cache tree is actually equal to the sparse index's cache tree instead of the full one. This results in incorrect entry_count values. By clearing the cache tree before converting to sparse, we avoid this issue. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- sparse-index.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sparse-index.c b/sparse-index.c index 53c8f711cc..c6b4feec41 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -170,6 +170,8 @@ int convert_to_sparse(struct index_state *istate) if (index_has_unmerged_entries(istate)) return 0; + /* Clear and recompute the cache-tree */ + cache_tree_free(&istate->cache_tree); if (cache_tree_update(istate, 0)) { warning(_("unable to update cache-tree, staying full")); return -1; From 1ba5f45132f987e630497c09b74af687bf4f863b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 29 Jun 2021 02:13:06 +0000 Subject: [PATCH 09/51] checkout: stop expanding sparse indexes Previous changes did the necessary improvements to unpack-trees.c and diff-lib.c in order to modify a sparse index based on its comparision with a tree. The only remaining work is to remove some ensure_full_index() calls and add tests that verify that the index is not expanded in our interesting cases. Include 'switch' and 'restore' in these tests, as they share a base implementation with 'checkout'. Here are the relevant performance results from p2000-sparse-operations.sh: Test HEAD~1 HEAD -------------------------------------------------------------------------------- 2000.18: git checkout -f - (full-v3) 0.49(0.43+0.03) 0.47(0.39+0.05) -4.1% 2000.19: git checkout -f - (full-v4) 0.45(0.37+0.06) 0.42(0.37+0.05) -6.7% 2000.20: git checkout -f - (sparse-v3) 0.76(0.71+0.07) 0.04(0.03+0.04) -94.7% 2000.21: git checkout -f - (sparse-v4) 0.75(0.72+0.04) 0.05(0.06+0.04) -93.3% It is important to compare the full index case to the sparse index case, as the previous results for the sparse index were inflated by the index expansion. For index v4, this is an 88% improvement. On an internal repository with over two million paths at HEAD and a sparse-checkout definition containing ~60,000 of those paths, 'git checkout' went from 3.5s to 297ms with this change. The theoretical optimum where only those ~60,000 paths exist was 275ms, so the extra sparse directory entries contribute a 22ms overhead. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/checkout.c | 8 +++----- t/t1092-sparse-checkout-compatibility.sh | 10 +++++++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index f4cd7747d3..b5d477919a 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -378,9 +378,6 @@ static int checkout_worktree(const struct checkout_opts *opts, if (pc_workers > 1) init_parallel_checkout(); - /* TODO: audit for interaction with sparse-index. */ - ensure_full_index(&the_index); - for (pos = 0; pos < active_nr; pos++) { struct cache_entry *ce = active_cache[pos]; if (ce->ce_flags & CE_MATCHED) { @@ -530,8 +527,6 @@ static int checkout_paths(const struct checkout_opts *opts, * Make sure all pathspecs participated in locating the paths * to be checked out. */ - /* TODO: audit for interaction with sparse-index. */ - ensure_full_index(&the_index); for (pos = 0; pos < active_nr; pos++) if (opts->overlay_mode) mark_ce_for_checkout_overlay(active_cache[pos], @@ -1593,6 +1588,9 @@ static int checkout_main(int argc, const char **argv, const char *prefix, git_config(git_checkout_config, opts); + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + opts->track = BRANCH_TRACK_UNSPECIFIED; if (!opts->accept_pathspec && !opts->accept_ref) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index d3e34d0aca..fde3b41aba 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -560,7 +560,15 @@ test_expect_success 'sparse-index is not expanded' ' echo >>sparse-index/a && ensure_not_expanded commit --include a -m a && echo >>sparse-index/deep/deeper1/a && - ensure_not_expanded commit --include deep/deeper1/a -m deeper + ensure_not_expanded commit --include deep/deeper1/a -m deeper && + ensure_not_expanded checkout rename-out-to-out && + ensure_not_expanded checkout - && + ensure_not_expanded switch rename-out-to-out && + ensure_not_expanded switch - && + git -C sparse-index reset --hard && + ensure_not_expanded checkout rename-out-to-out -- deep/deeper1 && + git -C sparse-index reset --hard && + ensure_not_expanded restore -s rename-out-to-out -- deep/deeper1 ' # NEEDSWORK: a sparse-checkout behaves differently from a full checkout From 785bf2088e54ed8a450edb5c7371286ff6405605 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 16 Jul 2021 05:22:31 +0000 Subject: [PATCH 10/51] merge-ort: resolve paths early when we have sufficient information When there are no directories involved at a given path, and all three sides have a file at that path, and two of the three sides of history match, we can immediately resolve the merge of that path in collect_merge_info() and do not need to wait until process_entries(). This is actually a very minor improvement: half the time when I run it, I see an improvement; the other half a slowdown. It seems to be in the range of noise. However, this idea serves as the beginning of some bigger optimizations coming in the following patches. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 8cfa0ccd3c..5704326973 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1023,6 +1023,43 @@ static int collect_merge_info_callback(int n, return mask; } + /* + * If the sides match, and all three paths are present and are + * files, then we can take either as the resolution. We can't do + * this with trees, because there may be rename sources from the + * merge_base. + */ + if (sides_match && filemask == 0x07) { + /* use side1 (== side2) version as resolution */ + setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+1, side1_null, 0, + filemask, dirmask, 1); + return mask; + } + + /* + * If side1 matches mbase and all three paths are present and are + * files, then we can use side2 as the resolution. We cannot + * necessarily do so this for trees, because there may be rename + * destinations within side2. + */ + if (side1_matches_mbase && filemask == 0x07) { + /* use side2 version as resolution */ + setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+2, side2_null, 0, + filemask, dirmask, 1); + return mask; + } + + /* Similar to above but swapping sides 1 and 2 */ + if (side2_matches_mbase && filemask == 0x07) { + /* use side1 version as resolution */ + setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+1, side1_null, 0, + filemask, dirmask, 1); + return mask; + } + /* * Gather additional information used in rename detection. */ From 528fc51b6d899e080a877f9420efcca86c98b35e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 16 Jul 2021 05:22:32 +0000 Subject: [PATCH 11/51] merge-ort: add some more explanations in collect_merge_info_callback() The previous patch possibly raises some questions about whether additional cases in collect_merge_info_callback() can be handled early. Add some explanations in the form of comments to help explain these better. While we're at it, add a few comments to denote what a few boolean '0' or '1' values stand for. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 5704326973..37aaac38b9 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1018,8 +1018,8 @@ static int collect_merge_info_callback(int n, if (side1_matches_mbase && side2_matches_mbase) { /* mbase, side1, & side2 all match; use mbase as resolution */ setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, - names, names+0, mbase_null, 0, - filemask, dirmask, 1); + names, names+0, mbase_null, 0 /* df_conflict */, + filemask, dirmask, 1 /* resolved */); return mask; } @@ -1061,14 +1061,24 @@ static int collect_merge_info_callback(int n, } /* - * Gather additional information used in rename detection. + * Sometimes we can tell that a source path need not be included in + * rename detection -- namely, whenever either + * side1_matches_mbase && side2_null + * or + * side2_matches_mbase && side1_null + * However, we call collect_rename_info() even in those cases, + * because exact renames are cheap and would let us remove both a + * source and destination path. We'll cull the unneeded sources + * later. */ collect_rename_info(opt, names, dirname, fullpath, filemask, dirmask, match_mask); /* - * Record information about the path so we can resolve later in - * process_entries. + * None of the special cases above matched, so we have a + * provisional conflict. (Rename detection might allow us to + * unconflict some more cases, but that comes later so all we can + * do now is record the different non-null file hashes.) */ setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, names, NULL, 0, df_conflict, filemask, dirmask, 0); From d478f5675923d02d2676bf2e47f0ccdd4dba0da8 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 16 Jul 2021 05:22:33 +0000 Subject: [PATCH 12/51] merge-ort: add data structures for allowable trivial directory resolves As noted a few commits ago, we can resolve individual files early if all three sides of the merge have a file at the path and two of the three sides match. We would really like to do the same thing with directories, because being able to do a trivial directory resolve means we don't have to recurse into the directory, potentially saving us a huge amount of time in both collect_merge_info() and process_entries(). Unfortunately, resolving directories early would mean missing any renames whose source or destination is underneath that directory. If we somehow knew there weren't any renames under the directory in question, then we could resolve it early. Sadly, it is impossible to determine whether there are renames under the directory in question without recursing into it, and this has traditionally kept us from ever implementing such an optimization. In commit f89b4f2bee ("merge-ort: skip rename detection entirely if possible", 2021-03-11), we added an additional reason that rename detection could be skipped entirely -- namely, if no *relevant* sources were present. Without completing collect_merge_info_callback(), we do not yet know if there are no relevant sources. However, we do know that if the current directory on one side matches the merge base, then every source file within that directory will not be RELEVANT_CONTENT, and a few simple checks can often let us rule out RELEVANT_LOCATION as well. This suggests we can just defer recursing into such directories until the end of collect_merge_info. Since the deferred directories are known to not add any relevant sources due to the above properties, then if there are no relevant sources after we've traversed all paths other than the deferred ones, then we know there are not any relevant sources. Under those conditions, rename detection is unnecessary, and that means we can resolve the deferred directories without recursing into them. Note that the logic for skipping rename detection was also modified further in commit 76e253793c ("merge-ort, diffcore-rename: employ cached renames when possible", 2021-01-30); in particular rename detection can be skipped if we already have cached renames for each relevant source. We can take advantage of this information as well with our deferral of recursing into directories where one side matches the merge base. Add some data structures that we will use to do these deferrals, with some lengthy comments explaining their purpose. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 37aaac38b9..15ce7f8c52 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -62,6 +62,53 @@ struct traversal_callback_data { struct name_entry names[3]; }; +struct deferred_traversal_data { + /* + * possible_trivial_merges: directories to be explored only when needed + * + * possible_trivial_merges is a map of directory names to + * dir_rename_mask. When we detect that a directory is unchanged on + * one side, we can sometimes resolve the directory without recursing + * into it. Renames are the only things that can prevent such an + * optimization. However, for rename sources: + * - If no parent directory needed directory rename detection, then + * no path under such a directory can be a relevant_source. + * and for rename destinations: + * - If no cached rename has a target path under the directory AND + * - If there are no unpaired relevant_sources elsewhere in the + * repository + * then we don't need any path under this directory for a rename + * destination. The only way to know the last item above is to defer + * handling such directories until the end of collect_merge_info(), + * in handle_deferred_entries(). + * + * For each we store dir_rename_mask, since that's the only bit of + * information we need, other than the path, to resume the recursive + * traversal. + */ + struct strintmap possible_trivial_merges; + + /* + * trivial_merges_okay: if trivial directory merges are okay + * + * See possible_trivial_merges above. The "no unpaired + * relevant_sources elsewhere in the repository" is a single boolean + * per merge side, which we store here. Note that while 0 means no, + * 1 only means "maybe" rather than "yes"; we optimistically set it + * to 1 initially and only clear when we determine it is unsafe to + * do trivial directory merges. + */ + unsigned trivial_merges_okay; + + /* + * target_dirs: ancestor directories of rename targets + * + * target_dirs contains all directory names that are an ancestor of + * any rename destination. + */ + struct strset target_dirs; +}; + struct rename_info { /* * All variables that are arrays of size 3 correspond to data tracked @@ -119,6 +166,8 @@ struct rename_info { */ struct strintmap relevant_sources[3]; + struct deferred_traversal_data deferred[3]; + /* * dir_rename_mask: * 0: optimization removing unmodified potential rename source okay @@ -501,6 +550,11 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, strmap_clear(&renames->dir_rename_count[i], 1); } } + for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) { + strintmap_func(&renames->deferred[i].possible_trivial_merges); + strset_func(&renames->deferred[i].target_dirs); + renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */ + } renames->cached_pairs_valid_side = 0; renames->dir_rename_mask = 0; @@ -4065,6 +4119,13 @@ static void merge_start(struct merge_options *opt, struct merge_result *result) strset_init_with_options(&renames->cached_target_names[i], NULL, 0); } + for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) { + strintmap_init_with_options(&renames->deferred[i].possible_trivial_merges, + 0, NULL, 0); + strset_init_with_options(&renames->deferred[i].target_dirs, + NULL, 1); + renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */ + } /* * Although we initialize opt->priv->paths with strdup_strings=0, From e0ef578eae4aea91920ef394a0d38170b39777d1 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 16 Jul 2021 05:22:34 +0000 Subject: [PATCH 13/51] merge-ort: add a handle_deferred_entries() helper function In order to allow trivial directory resolution, we first need to be able to gather more information to determine if the optimization is safe. To enable that, we need a way of deferring the recursion into the directory until a later time. Naturally, deferring the entry into a subtree means that we need some function that will later recurse into the subdirectory exactly the same way that collect_merge_info_callback() would have done. Add a helper function that does this. For now this function is not used but a subsequent commit will change that. Future commits will also make the function sometimes resolve directories instead of traversing inside. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 15ce7f8c52..6f81795846 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1202,6 +1202,70 @@ static int collect_merge_info_callback(int n, return mask; } +MAYBE_UNUSED +static int handle_deferred_entries(struct merge_options *opt, + struct traverse_info *info) +{ + struct rename_info *renames = &opt->priv->renames; + struct hashmap_iter iter; + struct strmap_entry *entry; + int side, ret = 0; + + for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) { + renames->deferred[side].trivial_merges_okay = 0; + strintmap_for_each_entry(&renames->deferred[side].possible_trivial_merges, + &iter, entry) { + const char *path = entry->key; + unsigned dir_rename_mask = (intptr_t)entry->value; + struct conflict_info *ci; + unsigned dirmask; + struct tree_desc t[3]; + void *buf[3] = {NULL,}; + int i; + + ci = strmap_get(&opt->priv->paths, path); + VERIFY_CI(ci); + dirmask = ci->dirmask; + + info->name = path; + info->namelen = strlen(path); + info->pathlen = info->namelen + 1; + + for (i = 0; i < 3; i++, dirmask >>= 1) { + if (i == 1 && ci->match_mask == 3) + t[1] = t[0]; + else if (i == 2 && ci->match_mask == 5) + t[2] = t[0]; + else if (i == 2 && ci->match_mask == 6) + t[2] = t[1]; + else { + const struct object_id *oid = NULL; + if (dirmask & 1) + oid = &ci->stages[i].oid; + buf[i] = fill_tree_descriptor(opt->repo, + t+i, oid); + } + } + + ci->match_mask &= ci->filemask; + opt->priv->current_dir_name = path; + renames->dir_rename_mask = dir_rename_mask; + if (renames->dir_rename_mask == 0 || + renames->dir_rename_mask == 0x07) + ret = traverse_trees(NULL, 3, t, info); + else + ret = traverse_trees_wrapper(NULL, 3, t, info); + + for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) + free(buf[i]); + + if (ret < 0) + return ret; + } + } + return ret; +} + static int collect_merge_info(struct merge_options *opt, struct tree *merge_base, struct tree *side1, From 5e1ca57a7bbf19ac6c92a4e032ca05345dc622bd Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 16 Jul 2021 05:22:35 +0000 Subject: [PATCH 14/51] merge-ort: defer recursing into directories when merge base is matched When one side of history matches the merge base (including when the merge base has no entry for the given directory), have collect_merge_info_callback() defer recursing into the directory. To ensure those entries are eventually handled, add a call to handled_deferred_entries() in collect_merge_info() after traverse_trees() returns. Note that the condition in collect_merge_info_callback() may look more complicated than necessary at first glance; renames->trivial_merges_okay[side] is always true until handle_deferred_entries() is called, and possible_trivial_merges[side] is always empty right now (and in the future won't be filled until handle_deferred_entries() is called). However, when handle_deferred_entries() calls traverse_trees() for the relevant deferred directories, those traverse_trees() calls will once again end up in collect_merge_info_callback() for all the entries under those subdirectories. The extra conditions are there for such deferred cases and will be used more as we do more with those variables. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 6f81795846..7e624fcd36 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1147,8 +1147,36 @@ static int collect_merge_info_callback(int n, struct tree_desc t[3]; void *buf[3] = {NULL, NULL, NULL}; const char *original_dir_name; - int i, ret; + int i, ret, side; + /* + * Check for whether we can avoid recursing due to one side + * matching the merge base. The side that does NOT match is + * the one that might have a rename destination we need. + */ + assert(!side1_matches_mbase || !side2_matches_mbase); + side = side1_matches_mbase ? MERGE_SIDE2 : + side2_matches_mbase ? MERGE_SIDE1 : MERGE_BASE; + if (filemask == 0 && (dirmask == 2 || dirmask == 4)) { + /* + * Also defer recursing into new directories; set up a + * few variables to let us do so. + */ + ci->match_mask = (7 - dirmask); + side = dirmask / 2; + } + if (renames->dir_rename_mask != 0x07 && + side != MERGE_BASE && + renames->deferred[side].trivial_merges_okay && + !strset_contains(&renames->deferred[side].target_dirs, + pi.string)) { + strintmap_set(&renames->deferred[side].possible_trivial_merges, + pi.string, renames->dir_rename_mask); + renames->dir_rename_mask = prev_dir_rename_mask; + return mask; + } + + /* We need to recurse */ ci->match_mask &= filemask; newinfo = *info; newinfo.prev = info; @@ -1202,7 +1230,6 @@ static int collect_merge_info_callback(int n, return mask; } -MAYBE_UNUSED static int handle_deferred_entries(struct merge_options *opt, struct traverse_info *info) { @@ -1291,6 +1318,8 @@ static int collect_merge_info(struct merge_options *opt, trace2_region_enter("merge", "traverse_trees", opt->repo); ret = traverse_trees(NULL, 3, t, &info); + if (ret == 0) + ret = handle_deferred_entries(opt, &info); trace2_region_leave("merge", "traverse_trees", opt->repo); return ret; From 7bee6c100431e5c24cf27b5a7cfc9f67c8c66751 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 16 Jul 2021 05:22:36 +0000 Subject: [PATCH 15/51] merge-ort: avoid recursing into directories when we don't need to MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This combines the work of the last several patches, and implements the conditions when we don't need to recurse into directories. It's perhaps easiest to see the logic by separating the fact that a directory might have both rename sources and rename destinations: * rename sources: only files present in the merge base can serve as rename sources, and only when one side deletes that file. When the tree on one side matches the merge base, that means every file within the subtree matches the merge base. This means that the skip-irrelevant-rename-detection optimization from before kicks in and we don't need any of these files as rename sources. * rename destinations: the tree that does not match the merge base might have newly added and hence unmatched destination files. This is what usually prevents us from doing trivial directory resolutions in the merge machinery. However, the fact that we have deferred recursing into this directory until the end means we know whether there are any unmatched relevant potential rename sources elsewhere in this merge. If there are no unmatched such relevant sources anywhere, then there is no need to look for unmatched potential rename destinations to match them with. This informs our algorithm: * Search through relevant_sources; if we have entries, they better all be reflected in cached_pairs or cached_irrelevant, otherwise they represent an unmatched potential rename source (causing the optimization to be disallowed). * For any relevant_source represented in cached_pairs, we do need to to make sure to get the destination for each source, meaning we need to recurse into any ancestor directories of those destinations. * Once we've recursed into all the rename destinations for any relevant_sources in cached_pairs, we can then do the trivial directory resolution for the remaining directories. For the testcases mentioned in commit 557ac0350d ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 5.235 s ± 0.042 s 205.1 ms ± 3.8 ms mega-renames: 9.419 s ± 0.107 s 1.564 s ± 0.010 s just-one-mega: 480.1 ms ± 3.9 ms 479.5 ms ± 3.9 ms Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 3 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 7e624fcd36..ad94b30a76 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1230,6 +1230,18 @@ static int collect_merge_info_callback(int n, return mask; } +static void resolve_trivial_directory_merge(struct conflict_info *ci, int side) +{ + VERIFY_CI(ci); + assert((side == 1 && ci->match_mask == 5) || + (side == 2 && ci->match_mask == 3)); + oidcpy(&ci->merged.result.oid, &ci->stages[side].oid); + ci->merged.result.mode = ci->stages[side].mode; + ci->merged.is_null = is_null_oid(&ci->stages[side].oid); + ci->match_mask = 0; + ci->merged.clean = 1; /* (ci->filemask == 0); */ +} + static int handle_deferred_entries(struct merge_options *opt, struct traverse_info *info) { @@ -1239,9 +1251,72 @@ static int handle_deferred_entries(struct merge_options *opt, int side, ret = 0; for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) { - renames->deferred[side].trivial_merges_okay = 0; - strintmap_for_each_entry(&renames->deferred[side].possible_trivial_merges, - &iter, entry) { + unsigned optimization_okay = 1; + struct strintmap copy; + + /* Loop over the set of paths we need to know rename info for */ + strset_for_each_entry(&renames->relevant_sources[side], + &iter, entry) { + char *rename_target, *dir, *dir_marker; + struct strmap_entry *e; + + /* + * If we don't know delete/rename info for this path, + * then we need to recurse into all trees to get all + * adds to make sure we have it. + */ + if (strset_contains(&renames->cached_irrelevant[side], + entry->key)) + continue; + e = strmap_get_entry(&renames->cached_pairs[side], + entry->key); + if (!e) { + optimization_okay = 0; + break; + } + + /* If this is a delete, we have enough info already */ + rename_target = e->value; + if (!rename_target) + continue; + + /* If we already walked the rename target, we're good */ + if (strmap_contains(&opt->priv->paths, rename_target)) + continue; + + /* + * Otherwise, we need to get a list of directories that + * will need to be recursed into to get this + * rename_target. + */ + dir = xstrdup(rename_target); + while ((dir_marker = strrchr(dir, '/'))) { + *dir_marker = '\0'; + if (strset_contains(&renames->deferred[side].target_dirs, + dir)) + break; + strset_add(&renames->deferred[side].target_dirs, + dir); + } + free(dir); + } + renames->deferred[side].trivial_merges_okay = optimization_okay; + /* + * We need to recurse into any directories in + * possible_trivial_merges[side] found in target_dirs[side]. + * But when we recurse, we may need to queue up some of the + * subdirectories for possible_trivial_merges[side]. Since + * we can't safely iterate through a hashmap while also adding + * entries, move the entries into 'copy', iterate over 'copy', + * and then we'll also iterate anything added into + * possible_trivial_merges[side] once this loop is done. + */ + copy = renames->deferred[side].possible_trivial_merges; + strintmap_init_with_options(&renames->deferred[side].possible_trivial_merges, + 0, + NULL, + 0); + strintmap_for_each_entry(©, &iter, entry) { const char *path = entry->key; unsigned dir_rename_mask = (intptr_t)entry->value; struct conflict_info *ci; @@ -1254,6 +1329,13 @@ static int handle_deferred_entries(struct merge_options *opt, VERIFY_CI(ci); dirmask = ci->dirmask; + if (optimization_okay && + !strset_contains(&renames->deferred[side].target_dirs, + path)) { + resolve_trivial_directory_merge(ci, side); + continue; + } + info->name = path; info->namelen = strlen(path); info->pathlen = info->namelen + 1; @@ -1289,6 +1371,20 @@ static int handle_deferred_entries(struct merge_options *opt, if (ret < 0) return ret; } + strintmap_clear(©); + strintmap_for_each_entry(&renames->deferred[side].possible_trivial_merges, + &iter, entry) { + const char *path = entry->key; + struct conflict_info *ci; + + ci = strmap_get(&opt->priv->paths, path); + VERIFY_CI(ci); + + assert(renames->deferred[side].trivial_merges_okay && + !strset_contains(&renames->deferred[side].target_dirs, + path)); + resolve_trivial_directory_merge(ci, side); + } } return ret; } From 8b09a900a1f1f00d4deb04f567994ae8f1804b5e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 16 Jul 2021 05:22:37 +0000 Subject: [PATCH 16/51] merge-ort: restart merge with cached renames to reduce process entry cost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The merge algorithm mostly consists of the following three functions: collect_merge_info() detect_and_process_renames() process_entries() Prior to the trivial directory resolution optimization of the last half dozen commits, process_entries() was consistently the slowest, followed by collect_merge_info(), then detect_and_process_renames(). When the trivial directory resolution applies, it often dramatically decreases the amount of time spent in the two slower functions. Looking at the performance results in the previous commit, the trivial directory resolution optimization helps amazingly well when there are no relevant renames. It also helps really well when reapplying a long series of linear commits (such as in a rebase or cherry-pick), since the relevant renames may well be cached from the first reapplied commit. But when there are any relevant renames that are not cached (represented by the just-one-mega testcase), then the optimization does not help at all. Often, I noticed that when the optimization does not apply, it is because there are a handful of relevant sources -- maybe even only one. It felt frustrating to need to recurse into potentially hundreds or even thousands of directories just for a single rename, but it was needed for correctness. However, staring at this list of functions and noticing that process_entries() is the most expensive and knowing I could avoid it if I had cached renames suggested a simple idea: change collect_merge_info() detect_and_process_renames() process_entries() into collect_merge_info() detect_and_process_renames() collect_merge_info() detect_and_process_renames() process_entries() This may seem odd and look like more work. However, note that although we run collect_merge_info() twice, the second time we get to employ trivial directory resolves, which makes it much faster, so the increased time in collect_merge_info() is small. While we run detect_and_process_renames() again, all renames are cached so it's nearly a no-op (we don't call into diffcore_rename_extended() but we do have a little bit of data structure checking and fixing up). And the big payoff comes from the fact that process_entries(), will be much faster due to having far fewer entries to process. This restarting only makes sense if we can save recursing into enough directories to make it worth our while. Introduce a simple heuristic to guide this. Note that this heuristic uses a "wanted_factor" that I have virtually no actual real world data for, just some back-of-the-envelope quasi-scientific calculations that I included in some comments and then plucked a simple round number out of thin air. It could be that tweaking this number to make it either higher or lower improves the optimization. (There's slightly more here; when I first introduced this optimization, I used a factor of 10, because I was completely confident it was big enough to not cause slowdowns in special cases. I was certain it was higher than needed. Several months later, I added the rough calculations which make me think the optimal number is close to 2; but instead of pushing to the limit, I just bumped it to 3 to reduce the risk that there are special cases where this optimization can result in slowing down the code a little. If the ratio of path counts is below 3, we probably will only see minor performance improvements at best anyway.) Also, note that while the diffstat looks kind of long (nearly 100 lines), more than half of it is in two comments explaining how things work. For the testcases mentioned in commit 557ac0350d ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 205.1 ms ± 3.8 ms 204.2 ms ± 3.0 ms mega-renames: 1.564 s ± 0.010 s 1.076 s ± 0.015 s just-one-mega: 479.5 ms ± 3.9 ms 364.1 ms ± 7.0 ms Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 92 +++++++++++++++++++++++++++-- t/t6423-merge-rename-directories.sh | 2 +- 2 files changed, 87 insertions(+), 7 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index ad94b30a76..e75b524153 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -213,6 +213,7 @@ struct rename_info { * MERGE_SIDE2: cached data from side2 can be reused * MERGE_SIDE1: cached data from side1 can be reused * 0: no cached data can be reused + * -1: See redo_after_renames; both sides can be reused. */ int cached_pairs_valid_side; @@ -258,6 +259,28 @@ struct rename_info { */ struct strset cached_irrelevant[3]; + /* + * redo_after_renames: optimization flag for "restarting" the merge + * + * Sometimes it pays to detect renames, cache them, and then + * restart the merge operation from the beginning. The reason for + * this is that when we know where all the renames are, we know + * whether a certain directory has any paths under it affected -- + * and if a directory is not affected then it permits us to do + * trivial tree merging in more cases. Doing trivial tree merging + * prevents the need to run process_entry() on every path + * underneath trees that can be trivially merged, and + * process_entry() is more expensive than collect_merge_info() -- + * plus, the second collect_merge_info() will be much faster since + * it doesn't have to recurse into the relevant trees. + * + * Values for this flag: + * 0 = don't bother, not worth it (or conditions not yet checked) + * 1 = conditions for optimization met, optimization worthwhile + * 2 = we already did it (don't restart merge yet again) + */ + unsigned redo_after_renames; + /* * needed_limit: value needed for inexact rename detection to run * @@ -541,7 +564,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, strintmap_func(&renames->relevant_sources[i]); if (!reinitialize) assert(renames->cached_pairs_valid_side == 0); - if (i != renames->cached_pairs_valid_side) { + if (i != renames->cached_pairs_valid_side && + -1 != renames->cached_pairs_valid_side) { strset_func(&renames->cached_target_names[i]); strmap_func(&renames->cached_pairs[i], 1); strset_func(&renames->cached_irrelevant[i]); @@ -1249,7 +1273,9 @@ static int handle_deferred_entries(struct merge_options *opt, struct hashmap_iter iter; struct strmap_entry *entry; int side, ret = 0; + int path_count_before, path_count_after = 0; + path_count_before = strmap_get_size(&opt->priv->paths); for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) { unsigned optimization_okay = 1; struct strintmap copy; @@ -1385,7 +1411,32 @@ static int handle_deferred_entries(struct merge_options *opt, path)); resolve_trivial_directory_merge(ci, side); } + if (!optimization_okay || path_count_after) + path_count_after = strmap_get_size(&opt->priv->paths); } + if (path_count_after) { + /* + * The choice of wanted_factor here does not affect + * correctness, only performance. When the + * path_count_after / path_count_before + * ratio is high, redoing after renames is a big + * performance boost. I suspect that redoing is a wash + * somewhere near a value of 2, and below that redoing will + * slow things down. I applied a fudge factor and picked + * 3; see the commit message when this was introduced for + * back of the envelope calculations for this ratio. + */ + const int wanted_factor = 3; + + /* We should only redo collect_merge_info one time */ + assert(renames->redo_after_renames == 0); + + if (path_count_after / path_count_before >= wanted_factor) { + renames->redo_after_renames = 1; + renames->cached_pairs_valid_side = -1; + } + } else if (renames->redo_after_renames == 2) + renames->redo_after_renames = 0; return ret; } @@ -2828,8 +2879,8 @@ static int compare_pairs(const void *a_, const void *b_) } /* Call diffcore_rename() to update deleted/added pairs into rename pairs */ -static void detect_regular_renames(struct merge_options *opt, - unsigned side_index) +static int detect_regular_renames(struct merge_options *opt, + unsigned side_index) { struct diff_options diff_opts; struct rename_info *renames = &opt->priv->renames; @@ -2842,7 +2893,7 @@ static void detect_regular_renames(struct merge_options *opt, * side had directory renames. */ resolve_diffpair_statuses(&renames->pairs[side_index]); - return; + return 0; } partial_clear_dir_rename_count(&renames->dir_rename_count[side_index]); @@ -2868,6 +2919,8 @@ static void detect_regular_renames(struct merge_options *opt, trace2_region_leave("diff", "diffcore_rename", opt->repo); resolve_diffpair_statuses(&diff_queued_diff); + if (diff_opts.needed_rename_limit > 0) + renames->redo_after_renames = 0; if (diff_opts.needed_rename_limit > renames->needed_limit) renames->needed_limit = diff_opts.needed_rename_limit; @@ -2877,6 +2930,8 @@ static void detect_regular_renames(struct merge_options *opt, diff_queued_diff.nr = 0; diff_queued_diff.queue = NULL; diff_flush(&diff_opts); + + return 1; } /* @@ -2966,14 +3021,32 @@ static int detect_and_process_renames(struct merge_options *opt, struct diff_queue_struct combined; struct rename_info *renames = &opt->priv->renames; int need_dir_renames, s, clean = 1; + unsigned detection_run = 0; memset(&combined, 0, sizeof(combined)); if (!possible_renames(renames)) goto cleanup; trace2_region_enter("merge", "regular renames", opt->repo); - detect_regular_renames(opt, MERGE_SIDE1); - detect_regular_renames(opt, MERGE_SIDE2); + detection_run |= detect_regular_renames(opt, MERGE_SIDE1); + detection_run |= detect_regular_renames(opt, MERGE_SIDE2); + if (renames->redo_after_renames && detection_run) { + int i, side; + struct diff_filepair *p; + + /* Cache the renames, we found */ + for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) { + for (i = 0; i < renames->pairs[side].nr; ++i) { + p = renames->pairs[side].queue[i]; + possibly_cache_new_pair(renames, p, side, NULL); + } + } + + /* Restart the merge with the cached renames */ + renames->redo_after_renames = 2; + trace2_region_leave("merge", "regular renames", opt->repo); + goto cleanup; + } use_cached_pairs(opt, &renames->cached_pairs[1], &renames->pairs[1]); use_cached_pairs(opt, &renames->cached_pairs[2], &renames->pairs[2]); trace2_region_leave("merge", "regular renames", opt->repo); @@ -4403,6 +4476,7 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt, opt->subtree_shift); } +redo: trace2_region_enter("merge", "collect_merge_info", opt->repo); if (collect_merge_info(opt, merge_base, side1, side2) != 0) { /* @@ -4422,6 +4496,12 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt, result->clean = detect_and_process_renames(opt, merge_base, side1, side2); trace2_region_leave("merge", "renames", opt->repo); + if (opt->priv->renames.redo_after_renames == 2) { + trace2_region_enter("merge", "reset_maps", opt->repo); + clear_or_reinit_internal_opts(opt->priv, 1); + trace2_region_leave("merge", "reset_maps", opt->repo); + goto redo; + } trace2_region_enter("merge", "process_entries", opt->repo); process_entries(opt, &working_tree_oid); diff --git a/t/t6423-merge-rename-directories.sh b/t/t6423-merge-rename-directories.sh index 4af4fb0038..5b81a130e9 100755 --- a/t/t6423-merge-rename-directories.sh +++ b/t/t6423-merge-rename-directories.sh @@ -4797,7 +4797,7 @@ test_setup_12f () { ) } -test_expect_merge_algorithm failure failure '12f: Trivial directory resolve, caching, all kinds of fun' ' +test_expect_merge_algorithm failure success '12f: Trivial directory resolve, caching, all kinds of fun' ' test_setup_12f && ( cd 12f && From 70569fadceb8f2e766803c2f02b08e5fd4a9ee19 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 20 Jul 2021 20:14:40 +0000 Subject: [PATCH 17/51] t1092: document bad 'git checkout' behavior Add new branches to the test repo that demonstrate directory/file conflicts in different ways. Since the directory 'folder1/' has adjacent files 'folder1-', 'folder1.txt', and 'folder10' it causes searches for 'folder1/' to land in a different place in the index than a search for 'folder1'. This causes a change in behavior when working with the df-conflict-1 and df-conflict-2 branches, whose only difference is that the first uses 'folder1' as the conflict and the other uses 'folder2' which does not have these adjacent files. We can extend two tests that compare the behavior across different 'git checkout' commands, and we see already that the behavior will be different in some cases and not in others. The difference between the two test loops is that one uses 'git reset --hard' between iterations. Further, we isolate the behavior of creating a staged change within a directory and then checking out a branch where that directory is replaced with a file. A full checkout behaves differently across these two cases, while a sparse-checkout cone behaves consistently. In both cases, the behavior is wrong. In one case, the staged change is dropped entirely. The other case the staged change is kept, replacing the file at that location, but none of the other files in the directory are kept. Likely, the correct behavior in this case is to reject the checkout and report the conflict, leaving HEAD in its previous location. None of the cases behave this way currently. Use comments to demonstrate that the tested behavior is only a documentation of the current, incorrect behavior to ensure we do not _accidentally_ change it. Instead, we would prefer to change it on purpose with a future change. At this point, the sparse-index does not handle these 'git checkout' commands correctly. Or rather, it _does_ reject the 'git checkout' when we have the staged change, but for the wrong reason. It also rejects the 'git checkout' commands when there is no staged change and we want to replace a directory with a file. A fix for that unstaged case will follow in the next change, but that will make the sparse-index agree with the full checkout case in these documented incorrect behaviors. Helped-by: Elijah Newren Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1092-sparse-checkout-compatibility.sh | 142 ++++++++++++++++++++++- 1 file changed, 140 insertions(+), 2 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index fde3b41aba..79b4a8ce19 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -95,6 +95,25 @@ test_expect_success 'setup' ' git add . && git commit -m "rename deep/deeper1/... to folder1/..." && + git checkout -b df-conflict-1 base && + rm -rf folder1 && + echo content >folder1 && + git add . && + git commit -m "dir to file" && + + git checkout -b df-conflict-2 base && + rm -rf folder2 && + echo content >folder2 && + git add . && + git commit -m "dir to file" && + + git checkout -b fd-conflict base && + rm a && + mkdir a && + echo content >a/a && + git add . && + git commit -m "file to dir" && + git checkout -b deepest base && echo "updated deepest" >deep/deeper1/deepest/a && git commit -a -m "update deepest" && @@ -358,10 +377,16 @@ test_expect_success 'diff --staged' ' test_all_match git diff --staged ' +# NEEDSWORK: sparse-checkout behaves differently from full-checkout when +# running this test with 'df-conflict-2' after 'df-conflict-1'. test_expect_success 'diff with renames and conflicts' ' init_repos && - for branch in rename-out-to-out rename-out-to-in rename-in-to-out + for branch in rename-out-to-out \ + rename-out-to-in \ + rename-in-to-out \ + df-conflict-1 \ + fd-conflict do test_all_match git checkout rename-base && test_all_match git checkout $branch -- . && @@ -371,10 +396,15 @@ test_expect_success 'diff with renames and conflicts' ' done ' +# NEEDSWORK: the sparse-index fails to move HEAD across a directory/file +# conflict such as when checking out df-conflict-1 and df-conflict2. test_expect_success 'diff with directory/file conflicts' ' init_repos && - for branch in rename-out-to-out rename-out-to-in rename-in-to-out + for branch in rename-out-to-out \ + rename-out-to-in \ + rename-in-to-out \ + fd-conflict do git -C full-checkout reset --hard && test_sparse_match git reset --hard && @@ -606,4 +636,112 @@ test_expect_success 'add everything with deep new file' ' test_all_match git status --porcelain=v2 ' +# NEEDSWORK: 'git checkout' behaves incorrectly in the case of +# directory/file conflicts, even without sparse-checkout. Use this +# test only as a documentation of the incorrect behavior, not a +# measure of how it _should_ behave. +test_expect_success 'checkout behaves oddly with df-conflict-1' ' + init_repos && + + test_sparse_match git sparse-checkout disable && + + write_script edit-content <<-\EOF && + echo content >>folder1/larger-content + git add folder1 + EOF + + run_on_all ../edit-content && + test_all_match git status --porcelain=v2 && + + git -C sparse-checkout sparse-checkout init --cone && + git -C sparse-index sparse-checkout init --cone --sparse-index && + + test_all_match git status --porcelain=v2 && + + # This checkout command should fail, because we have a staged + # change to folder1/larger-content, but the destination changes + # folder1 to a file. + git -C full-checkout checkout df-conflict-1 \ + 1>full-checkout-out \ + 2>full-checkout-err && + git -C sparse-checkout checkout df-conflict-1 \ + 1>sparse-checkout-out \ + 2>sparse-checkout-err && + + # NEEDSWORK: the sparse-index case refuses to change HEAD here, + # but for the wrong reason. + test_must_fail git -C sparse-index checkout df-conflict-1 \ + 1>sparse-index-out \ + 2>sparse-index-err && + + # Instead, the checkout deletes the folder1 file and adds the + # folder1/larger-content file, leaving all other paths that were + # in folder1/ as deleted (without any warning). + cat >expect <<-EOF && + D folder1 + A folder1/larger-content + EOF + test_cmp expect full-checkout-out && + test_cmp expect sparse-checkout-out && + + # stderr: Switched to branch df-conflict-1 + test_cmp full-checkout-err sparse-checkout-err +' + +# NEEDSWORK: 'git checkout' behaves incorrectly in the case of +# directory/file conflicts, even without sparse-checkout. Use this +# test only as a documentation of the incorrect behavior, not a +# measure of how it _should_ behave. +test_expect_success 'checkout behaves oddly with df-conflict-2' ' + init_repos && + + test_sparse_match git sparse-checkout disable && + + write_script edit-content <<-\EOF && + echo content >>folder2/larger-content + git add folder2 + EOF + + run_on_all ../edit-content && + test_all_match git status --porcelain=v2 && + + git -C sparse-checkout sparse-checkout init --cone && + git -C sparse-index sparse-checkout init --cone --sparse-index && + + test_all_match git status --porcelain=v2 && + + # This checkout command should fail, because we have a staged + # change to folder1/larger-content, but the destination changes + # folder1 to a file. + git -C full-checkout checkout df-conflict-2 \ + 1>full-checkout-out \ + 2>full-checkout-err && + git -C sparse-checkout checkout df-conflict-2 \ + 1>sparse-checkout-out \ + 2>sparse-checkout-err && + + # NEEDSWORK: the sparse-index case refuses to change HEAD + # here, but for the wrong reason. + test_must_fail git -C sparse-index checkout df-conflict-2 \ + 1>sparse-index-out \ + 2>sparse-index-err && + + # The full checkout deviates from the df-conflict-1 case here! + # It drops the change to folder1/larger-content and leaves the + # folder1 path as-is on disk. + test_must_be_empty full-checkout-out && + + # In the sparse-checkout case, the checkout deletes the folder1 + # file and adds the folder1/larger-content file, leaving all other + # paths that were in folder1/ as deleted (without any warning). + cat >expect <<-EOF && + D folder2 + A folder2/larger-content + EOF + test_cmp expect sparse-checkout-out && + + # Switched to branch df-conflict-1 + test_cmp full-checkout-err sparse-checkout-err +' + test_done From e05cdb17e89a2d3257533d47350b3138bfce8737 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 20 Jul 2021 20:14:41 +0000 Subject: [PATCH 18/51] unpack-trees: resolve sparse-directory/file conflicts When running unpack_trees() with a sparse index, we attempt to operate on the index without expanding the sparse directory entries. Thus, we operate by manipulating entire directories and passing them to the unpack function. In the case of the 'git checkout' command, this is the twoway_merge() function. There are several cases in twoway_merge() that handle different situations. One new one to add is the case of a directory/file conflict where the directory is sparse. Before the sparse index, such a conflict would appear as a list of file additions and deletions. Now, twoway_merge() initializes 'current', 'oldtree', and 'newtree' from src[0], src[1], and src[2], then sets 'oldtree' to NULL because it is equal to the df_conflict_entry. The way to determine that we have a directory/file conflict is to test that 'current' and 'newtree' disagree on being sparse directory entries. When we are in this case, we want to resolve the situation by calling merged_entry(). This allows replacing the 'current' entry with the 'newtree' entry. This is important for cases where we want to run 'git checkout' across the conflict and have the new HEAD represent the new file type at that path. The first NEEDSWORK comment dropped in t1092 demonstrates this necessary behavior. However, we still are in a confusing state when 'current' corresponds to a staged change within a sparse directory that is not present at HEAD. This should be atypical, because it requires adding a change outside of the sparse-checkout cone, but it is possible. Since we are unable to determine that this is a staged change within twoway_merge(), we cannot add a case to reject the merge at this point. I believe this is due to the use of df_conflict_entry in the place of 'oldtree' instead of using the valud at HEAD, which would provide some perspective to this decision. Any change that would allow this differentiation for staged entries would need to involve information further up in unpack_trees(). That work should be done, sometime, because we are further confusing the behavior of a directory/file conflict when staging a change in the directory. The two cases 'checkout behaves oddly with df-conflict-?' in t1092 demonstrate that even without a sparse-checkout, Git is not consistent in its behavior. Neither of the two options seems correct, either. This change makes the sparse-index behave differently than the typcial sparse-checkout case, but it does match the full checkout behavior in the df-conflict-2 case. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1092-sparse-checkout-compatibility.sh | 24 ++++++++++++------------ unpack-trees.c | 11 +++++++++++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 79b4a8ce19..91e30d6ec2 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -396,14 +396,14 @@ test_expect_success 'diff with renames and conflicts' ' done ' -# NEEDSWORK: the sparse-index fails to move HEAD across a directory/file -# conflict such as when checking out df-conflict-1 and df-conflict2. test_expect_success 'diff with directory/file conflicts' ' init_repos && for branch in rename-out-to-out \ rename-out-to-in \ rename-in-to-out \ + df-conflict-1 \ + df-conflict-2 \ fd-conflict do git -C full-checkout reset --hard && @@ -667,10 +667,7 @@ test_expect_success 'checkout behaves oddly with df-conflict-1' ' git -C sparse-checkout checkout df-conflict-1 \ 1>sparse-checkout-out \ 2>sparse-checkout-err && - - # NEEDSWORK: the sparse-index case refuses to change HEAD here, - # but for the wrong reason. - test_must_fail git -C sparse-index checkout df-conflict-1 \ + git -C sparse-index checkout df-conflict-1 \ 1>sparse-index-out \ 2>sparse-index-err && @@ -684,7 +681,11 @@ test_expect_success 'checkout behaves oddly with df-conflict-1' ' test_cmp expect full-checkout-out && test_cmp expect sparse-checkout-out && + # The sparse-index reports no output + test_must_be_empty sparse-index-out && + # stderr: Switched to branch df-conflict-1 + test_cmp full-checkout-err sparse-checkout-err && test_cmp full-checkout-err sparse-checkout-err ' @@ -719,17 +720,15 @@ test_expect_success 'checkout behaves oddly with df-conflict-2' ' git -C sparse-checkout checkout df-conflict-2 \ 1>sparse-checkout-out \ 2>sparse-checkout-err && - - # NEEDSWORK: the sparse-index case refuses to change HEAD - # here, but for the wrong reason. - test_must_fail git -C sparse-index checkout df-conflict-2 \ + git -C sparse-index checkout df-conflict-2 \ 1>sparse-index-out \ 2>sparse-index-err && # The full checkout deviates from the df-conflict-1 case here! # It drops the change to folder1/larger-content and leaves the - # folder1 path as-is on disk. + # folder1 path as-is on disk. The sparse-index behaves the same. test_must_be_empty full-checkout-out && + test_must_be_empty sparse-index-out && # In the sparse-checkout case, the checkout deletes the folder1 # file and adds the folder1/larger-content file, leaving all other @@ -741,7 +740,8 @@ test_expect_success 'checkout behaves oddly with df-conflict-2' ' test_cmp expect sparse-checkout-out && # Switched to branch df-conflict-1 - test_cmp full-checkout-err sparse-checkout-err + test_cmp full-checkout-err sparse-checkout-err && + test_cmp full-checkout-err sparse-index-err ' test_done diff --git a/unpack-trees.c b/unpack-trees.c index 0a5135ab39..309c1352f5 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -2619,6 +2619,17 @@ int twoway_merge(const struct cache_entry * const *src, same(current, oldtree) && !same(current, newtree)) { /* 20 or 21 */ return merged_entry(newtree, current, o); + } else if (current && !oldtree && newtree && + S_ISSPARSEDIR(current->ce_mode) != S_ISSPARSEDIR(newtree->ce_mode) && + ce_stage(current) == 0) { + /* + * This case is a directory/file conflict across the sparse-index + * boundary. When we are changing from one path to another via + * 'git checkout', then we want to replace one entry with another + * via merged_entry(). If there are staged changes, then we should + * reject the merge instead. + */ + return merged_entry(newtree, current, o); } else return reject_merge(current, o); } From 4523dc8624cf3d494fcc3643a85b8dbb69858e07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 22 Jul 2021 14:11:23 +0200 Subject: [PATCH 19/51] SubmittingPatches: move discussion of Signed-off-by above "send" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the section discussing the addition of a SOB trailer above the section that discusses generating the patch itself. This makes sense as we don't want someone to go through the process of "git format-patch", only to realize late that they should have used "git commit -s" or equivalent. This is a move-only change, no lines here are being altered, only moved around. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- Documentation/SubmittingPatches | 158 ++++++++++++++++---------------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 3e215f4d80..07e2073155 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -167,6 +167,85 @@ or, on an older version of Git without support for --pretty=reference: git show -s --date=short --pretty='format:%h (%s, %ad)' .... +[[sign-off]] +=== Certify your work by adding your `Signed-off-by` trailer + +To improve tracking of who did what, we ask you to certify that you +wrote the patch or have the right to pass it on under the same license +as ours, by "signing off" your patch. Without sign-off, we cannot +accept your patches. + +If (and only if) you certify the below D-C-O: + +[[dco]] +.Developer's Certificate of Origin 1.1 +____ +By making a contribution to this project, I certify that: + +a. The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +b. The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +c. The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +d. I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +____ + +you add a "Signed-off-by" trailer to your commit, that looks like +this: + +.... + Signed-off-by: Random J Developer +.... + +This line can be added by Git if you run the git-commit command with +the -s option. + +Notice that you can place your own `Signed-off-by` trailer when +forwarding somebody else's patch with the above rules for +D-C-O. Indeed you are encouraged to do so. Do not forget to +place an in-body "From: " line at the beginning to properly attribute +the change to its true author (see (2) above). + +This procedure originally came from the Linux kernel project, so our +rule is quite similar to theirs, but what exactly it means to sign-off +your patch differs from project to project, so it may be different +from that of the project you are accustomed to. + +[[real-name]] +Also notice that a real name is used in the `Signed-off-by` trailer. Please +don't hide your real name. + +[[commit-trailers]] +If you like, you can put extra tags at the end: + +. `Reported-by:` is used to credit someone who found the bug that + the patch attempts to fix. +. `Acked-by:` says that the person who is more familiar with the area + the patch attempts to modify liked the patch. +. `Reviewed-by:`, unlike the other tags, can only be offered by the + reviewers themselves when they are completely satisfied with the + patch after a detailed analysis. +. `Tested-by:` is used to indicate that the person applied the patch + and found it to have the desired effect. + +You can also create your own tag or use one that's in common usage +such as "Thanks-to:", "Based-on-patch-by:", or "Mentored-by:". + [[git-tools]] === Generate your patch using Git tools out of your commits. @@ -302,85 +381,6 @@ Do not forget to add trailers such as `Acked-by:`, `Reviewed-by:` and `Tested-by:` lines as necessary to credit people who helped your patch, and "cc:" them when sending such a final version for inclusion. -[[sign-off]] -=== Certify your work by adding your `Signed-off-by` trailer - -To improve tracking of who did what, we ask you to certify that you -wrote the patch or have the right to pass it on under the same license -as ours, by "signing off" your patch. Without sign-off, we cannot -accept your patches. - -If (and only if) you certify the below D-C-O: - -[[dco]] -.Developer's Certificate of Origin 1.1 -____ -By making a contribution to this project, I certify that: - -a. The contribution was created in whole or in part by me and I - have the right to submit it under the open source license - indicated in the file; or - -b. The contribution is based upon previous work that, to the best - of my knowledge, is covered under an appropriate open source - license and I have the right under that license to submit that - work with modifications, whether created in whole or in part - by me, under the same open source license (unless I am - permitted to submit under a different license), as indicated - in the file; or - -c. The contribution was provided directly to me by some other - person who certified (a), (b) or (c) and I have not modified - it. - -d. I understand and agree that this project and the contribution - are public and that a record of the contribution (including all - personal information I submit with it, including my sign-off) is - maintained indefinitely and may be redistributed consistent with - this project or the open source license(s) involved. -____ - -you add a "Signed-off-by" trailer to your commit, that looks like -this: - -.... - Signed-off-by: Random J Developer -.... - -This line can be added by Git if you run the git-commit command with -the -s option. - -Notice that you can place your own `Signed-off-by` trailer when -forwarding somebody else's patch with the above rules for -D-C-O. Indeed you are encouraged to do so. Do not forget to -place an in-body "From: " line at the beginning to properly attribute -the change to its true author (see (2) above). - -This procedure originally came from the Linux kernel project, so our -rule is quite similar to theirs, but what exactly it means to sign-off -your patch differs from project to project, so it may be different -from that of the project you are accustomed to. - -[[real-name]] -Also notice that a real name is used in the `Signed-off-by` trailer. Please -don't hide your real name. - -[[commit-trailers]] -If you like, you can put extra tags at the end: - -. `Reported-by:` is used to credit someone who found the bug that - the patch attempts to fix. -. `Acked-by:` says that the person who is more familiar with the area - the patch attempts to modify liked the patch. -. `Reviewed-by:`, unlike the other tags, can only be offered by the - reviewers themselves when they are completely satisfied with the - patch after a detailed analysis. -. `Tested-by:` is used to indicate that the person applied the patch - and found it to have the desired effect. - -You can also create your own tag or use one that's in common usage -such as "Thanks-to:", "Based-on-patch-by:", or "Mentored-by:". - == Subsystems with dedicated maintainers Some parts of the system have dedicated maintainers with their own From f003a91f5c5695a44a94efe9171bc905fbfa27ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 22 Jul 2021 14:11:24 +0200 Subject: [PATCH 20/51] SubmittingPatches: replace discussion of Travis with GitHub Actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the discussion of Travis CI added in 0e5d028a7a0 (Documentation: add setup instructions for Travis CI, 2016-05-02) with something that covers the GitHub Actions added in 889cacb6897 (ci: configure GitHub Actions for CI/PR, 2020-04-11). The setup is trivial compared to using Travis, and it even works on Windows (that "hopefully soon" comment was probably out-of-date on Travis as well). Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- Documentation/SubmittingPatches | 47 +++++++++++---------------------- 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 07e2073155..e409022d93 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -74,10 +74,9 @@ the feature triggers the new behavior when it should, and to show the feature does not trigger when it shouldn't. After any code change, make sure that the entire test suite passes. -If you have an account at GitHub (and you can get one for free to work -on open source projects), you can use their Travis CI integration to -test your changes on Linux, Mac (and hopefully soon Windows). See -GitHub-Travis CI hints section for details. +Pushing to a fork of https://github.com/git/git will use their CI +integration to test your changes on Linux, Mac and Windows. See the +<> section for details. Do not forget to update the documentation to describe the updated behavior and make sure that the resulting documentation set formats @@ -449,13 +448,12 @@ their trees themselves. entitled "What's cooking in git.git" and "What's in git.git" giving the status of various proposed changes. -[[travis]] -== GitHub-Travis CI hints +== GitHub CI[[GHCI]]] -With an account at GitHub (you can get one for free to work on open -source projects), you can use Travis CI to test your changes on Linux, -Mac (and hopefully soon Windows). You can find a successful example -test build here: https://travis-ci.org/git/git/builds/120473209 +With an account at GitHub, you can use GitHub CI to test your changes +on Linux, Mac and Windows. See +https://github.com/git/git/actions/workflows/main.yml for examples of +recent CI runs. Follow these steps for the initial setup: @@ -463,31 +461,18 @@ Follow these steps for the initial setup: You can find detailed instructions how to fork here: https://help.github.com/articles/fork-a-repo/ -. Open the Travis CI website: https://travis-ci.org - -. Press the "Sign in with GitHub" button. - -. Grant Travis CI permissions to access your GitHub account. - You can find more information about the required permissions here: - https://docs.travis-ci.com/user/github-oauth-scopes - -. Open your Travis CI profile page: https://travis-ci.org/profile - -. Enable Travis CI builds for your Git fork. - -After the initial setup, Travis CI will run whenever you push new changes +After the initial setup, CI will run whenever you push new changes to your fork of Git on GitHub. You can monitor the test state of all your -branches here: https://travis-ci.org/____/git/branches +branches here: https://github.com//git/actions/workflows/main.yml If a branch did not pass all test cases then it is marked with a red -cross. In that case you can click on the failing Travis CI job and -scroll all the way down in the log. Find the line "<-- Click here to see -detailed test output!" and click on the triangle next to the log line -number to expand the detailed test output. Here is such a failing -example: https://travis-ci.org/git/git/jobs/122676187 +cross. In that case you can click on the failing job and navigate to +"ci/run-build-and-tests.sh" and/or "ci/print-test-failures.sh". You +can also download "Artifacts" which are tarred (or zipped) archives +with test data relevant for debugging. -Fix the problem and push your fix to your Git fork. This will trigger -a new Travis CI build to ensure all tests pass. +Then fix the problem and push your fix to your GitHub fork. This will +trigger a new CI build to ensure all tests pass. [[mua]] == MUA specific hints From 9938f30d13d20026dad2eed7a6b51de25768c858 Mon Sep 17 00:00:00 2001 From: Philippe Blain Date: Fri, 23 Jul 2021 12:14:27 +0000 Subject: [PATCH 21/51] merge: add missing word "strategy" to a message The variable 'best_strategy' holds the name of the merge strategy that resulted in fewer conflicts, if several strategies were tried. When that's the case but the best strategy was not the first one tried, we inform the user which strategy was the "best" one before recreating the merge and leaving the conflicted files in the tree. This informational message is missing the word "strategy", so it shows something like: Using the recursive to prepare resolving by hand. Fix that. Signed-off-by: Philippe Blain Signed-off-by: Junio C Hamano --- builtin/merge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/merge.c b/builtin/merge.c index a8a843b1f5..74797b6c7a 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -1715,7 +1715,7 @@ int cmd_merge(int argc, const char **argv, const char *prefix) else { printf(_("Rewinding the tree to pristine...\n")); restore_state(&head_commit->object.oid, &stash); - printf(_("Using the %s to prepare resolving by hand.\n"), + printf(_("Using the %s strategy to prepare resolving by hand.\n"), best_strategy); try_merge_strategy(best_strategy, common, remoteheads, head_commit); From fd441eb612b28e2b0f512ad6d98859c9e82a2cbd Mon Sep 17 00:00:00 2001 From: Philippe Blain Date: Fri, 23 Jul 2021 12:14:28 +0000 Subject: [PATCH 22/51] Documentation: define 'MERGE_AUTOSTASH' The documentation for 'git merge --abort' and 'git merge --quit' both mention the special ref 'MERGE_AUTOSTASH', but this ref is not formally defined anywhere. Mention it in the description of the '--autostash' option for 'git merge'. Signed-off-by: Philippe Blain Signed-off-by: Junio C Hamano --- Documentation/merge-options.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/merge-options.txt b/Documentation/merge-options.txt index eb0aabd396..52565014c1 100644 --- a/Documentation/merge-options.txt +++ b/Documentation/merge-options.txt @@ -154,7 +154,8 @@ endif::git-pull[] --autostash:: --no-autostash:: Automatically create a temporary stash entry before the operation - begins, and apply it after the operation ends. This means + begins, record it in the special ref `MERGE_AUTOSTASH` + and apply it after the operation ends. This means that you can run the operation on a dirty worktree. However, use with care: the final stash application after a successful merge might result in non-trivial conflicts. From 12510bd5da6187690ae957d46b41f59276b0dadc Mon Sep 17 00:00:00 2001 From: Philippe Blain Date: Fri, 23 Jul 2021 12:14:29 +0000 Subject: [PATCH 23/51] merge: apply autostash if fast-forward fails Since 'git merge' learned '--autostash' in a03b55530a (merge: teach --autostash option, 2020-04-07), 'cmd_merge', in the fast-forward case, calls 'create_autostash' before calling 'checkout_fast_forward' if '--autostash' is given. However, if 'checkout_fast_forward' fails, the autostash is not applied to the working tree, nor saved in the stash list, since the code simply calls 'goto done'. Be more helpful to the user by applying the autostash in that case. An easy way to test a failing fast-forward is when we are merging a branch that has a tracked file that conflicts with an untracked file in the working tree. Signed-off-by: Philippe Blain Signed-off-by: Junio C Hamano --- builtin/merge.c | 1 + t/t7600-merge.sh | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/builtin/merge.c b/builtin/merge.c index 74797b6c7a..788a6b0cd5 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -1560,6 +1560,7 @@ int cmd_merge(int argc, const char **argv, const char *prefix) &head_commit->object.oid, &commit->object.oid, overwrite_ignore)) { + apply_autostash(git_path_merge_autostash(the_repository)); ret = 1; goto done; } diff --git a/t/t7600-merge.sh b/t/t7600-merge.sh index 1cbc9715a8..216113d353 100755 --- a/t/t7600-merge.sh +++ b/t/t7600-merge.sh @@ -122,6 +122,8 @@ test_expect_success 'setup' ' c0=$(git rev-parse HEAD) && cp file.1 file && git add file && + cp file.1 other && + git add other && test_tick && git commit -m "commit 1" && git tag c1 && @@ -711,6 +713,15 @@ test_expect_success 'fast-forward merge with --autostash' ' test_cmp result.1-5 file ' +test_expect_success 'failed fast-forward merge with --autostash' ' + git reset --hard c0 && + git merge-file file file.orig file.5 && + cp file.5 other && + test_must_fail git merge --autostash c1 2>err && + test_i18ngrep "Applied autostash." err && + test_cmp file.5 file +' + test_expect_success 'octopus merge with --autostash' ' git reset --hard c1 && git merge-file file file.orig file.3 && From e082631e51ebe2c7ee6756a3b45d10732a6480df Mon Sep 17 00:00:00 2001 From: Philippe Blain Date: Fri, 23 Jul 2021 12:14:30 +0000 Subject: [PATCH 24/51] merge: apply autostash if merge strategy fails Since 'git merge' learned '--autostash' in a03b55530a (merge: teach --autostash option, 2020-04-07), 'cmd_merge', once it is determined that we have to create a merge commit, calls 'create_autostash' if '--autostash' is given. As explained in a03b55530a, and made more abvious by the tests added in that commit, the autostash is then applied if the merge succeeds, either directly or by committing (after conflict resolution or if '--no-commit' was given), or if the merge is aborted with 'git merge --abort'. In some other cases, like the user calling 'git reset --merge' or 'git merge --quit', the autostash is not applied, but saved in the stash list. However, there exists a scenario that creates an autostash but does not apply nor save it to the stash list: if the chosen merge strategy completely fails to handle the merge, i.e. 'try_merge_strategy' returns 2. Apply the autostash in that case also. An easy way to test that is to try to merge more than two commits but explicitely ask for the 'recursive' merge strategy. Signed-off-by: Philippe Blain Signed-off-by: Junio C Hamano --- builtin/merge.c | 1 + t/t7600-merge.sh | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/builtin/merge.c b/builtin/merge.c index 788a6b0cd5..d44c14a21a 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -1709,6 +1709,7 @@ int cmd_merge(int argc, const char **argv, const char *prefix) else fprintf(stderr, _("Merge with strategy %s failed.\n"), use_strategies[0]->name); + apply_autostash(git_path_merge_autostash(the_repository)); ret = 2; goto done; } else if (best_strategy == wt_strategy) diff --git a/t/t7600-merge.sh b/t/t7600-merge.sh index 216113d353..2ef39d3088 100755 --- a/t/t7600-merge.sh +++ b/t/t7600-merge.sh @@ -732,6 +732,14 @@ test_expect_success 'octopus merge with --autostash' ' test_cmp result.1-3-5-9 file ' +test_expect_success 'failed merge (exit 2) with --autostash' ' + git reset --hard c1 && + git merge-file file file.orig file.5 && + test_must_fail git merge -s recursive --autostash c2 c3 2>err && + test_i18ngrep "Applied autostash." err && + test_cmp result.1-5 file +' + test_expect_success 'conflicted merge with --autostash, --abort restores stash' ' git reset --hard c3 && cp file.1 file && From bbe3165f825c8d9b6e4a6747a287147b4583c3c1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 23 Jul 2021 07:12:30 -0400 Subject: [PATCH 25/51] submodule: drop unused sm_name parameter from show_fetch_remotes() This parameter has not been used since the function was introduced in 8c8195e9c3 (submodule--helper: introduce add-clone subcommand, 2021-07-10). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 6d52a73a57..a592ecaec1 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -2776,7 +2776,7 @@ struct add_data { }; #define ADD_DATA_INIT { .depth = -1 } -static void show_fetch_remotes(FILE *output, const char *sm_name, const char *git_dir_path) +static void show_fetch_remotes(FILE *output, const char *git_dir_path) { struct child_process cp_remote = CHILD_PROCESS_INIT; struct strbuf sb_remote_out = STRBUF_INIT; @@ -2827,8 +2827,7 @@ static int add_submodule(const struct add_data *add_data) fprintf(stderr, _("A git directory for '%s' is found " "locally with remote(s):"), add_data->sm_name); - show_fetch_remotes(stderr, add_data->sm_name, - submod_gitdir_path); + show_fetch_remotes(stderr, submod_gitdir_path); free(submod_gitdir_path); die(_("If you want to reuse this local git " "directory instead of cloning again from\n" From 9fa62137314437162bf3e022f44c1fa8e5b43b50 Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:19 +0200 Subject: [PATCH 26/51] fmt-merge-msg: free newly allocated temporary strings when done origin starts off pointing to somewhere within line, which is owned by the caller. Later we might allocate a new string using xmemdupz() or xstrfmt(). To avoid leaking these new strings, we introduce a to_free pointer - which allows us to safely free the newly allocated string when we're done (we cannot just free origin directly as it might still be pointing to line). LSAN output from t0090: Direct leak of 8 byte(s) in 1 object(s) allocated from: #0 0x49a82d in malloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:145:3 #1 0xa71f49 in do_xmalloc wrapper.c:41:8 #2 0xa720b0 in do_xmallocz wrapper.c:75:8 #3 0xa720b0 in xmallocz wrapper.c:83:9 #4 0xa720b0 in xmemdupz wrapper.c:99:16 #5 0x8092ba in handle_line fmt-merge-msg.c:187:23 #6 0x8092ba in fmt_merge_msg fmt-merge-msg.c:666:7 #7 0x5ce2e6 in prepare_merge_message builtin/merge.c:1119:2 #8 0x5ce2e6 in collect_parents builtin/merge.c:1215:3 #9 0x5c9c1e in cmd_merge builtin/merge.c:1454:16 #10 0x4ce83e in run_builtin git.c:475:11 #11 0x4ccafe in handle_builtin git.c:729:3 #12 0x4cb01c in run_argv git.c:818:4 #13 0x4cb01c in cmd_main git.c:949:19 #14 0x6b3fad in main common-main.c:52:11 #15 0x7fb929620349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 8 byte(s) leaked in 1 allocation(s). Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- fmt-merge-msg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fmt-merge-msg.c b/fmt-merge-msg.c index 0f66818e0f..b969dc6ebb 100644 --- a/fmt-merge-msg.c +++ b/fmt-merge-msg.c @@ -108,6 +108,7 @@ static int handle_line(char *line, struct merge_parents *merge_parents) struct origin_data *origin_data; char *src; const char *origin, *tag_name; + char *to_free = NULL; struct src_data *src_data; struct string_list_item *item; int pulling_head = 0; @@ -183,12 +184,13 @@ static int handle_line(char *line, struct merge_parents *merge_parents) if (!strcmp(".", src) || !strcmp(src, origin)) { int len = strlen(origin); if (origin[0] == '\'' && origin[len - 1] == '\'') - origin = xmemdupz(origin + 1, len - 2); + origin = to_free = xmemdupz(origin + 1, len - 2); } else - origin = xstrfmt("%s of %s", origin, src); + origin = to_free = xstrfmt("%s of %s", origin, src); if (strcmp(".", src)) origin_data->is_local_branch = 0; string_list_append(&origins, origin)->util = origin_data; + free(to_free); return 0; } From 14c3dd817dbdb957e22ebc9f2e8d78a2f901ef7f Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:20 +0200 Subject: [PATCH 27/51] environment: move strbuf into block to plug leak realpath is only populated if we execute the git_work_tree_initialized block. However that block also causes us to return early, meaning we never actually release the strbuf in the case where we populated it. Therefore we move all strbuf related code into the block to guarantee that we can't leak it. LSAN output from t0095: Direct leak of 129 byte(s) in 1 object(s) allocated from: #0 0x49a9b9 in realloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:164:3 #1 0x78f585 in xrealloc wrapper.c:126:8 #2 0x713ff4 in strbuf_grow strbuf.c:98:2 #3 0x713ff4 in strbuf_getcwd strbuf.c:597:3 #4 0x4f0c18 in strbuf_realpath_1 abspath.c:99:7 #5 0x5ae4a4 in set_git_work_tree environment.c:259:3 #6 0x6fdd8a in setup_discovered_git_dir setup.c:931:2 #7 0x6fdd8a in setup_git_directory_gently setup.c:1235:12 #8 0x4cb50d in get_bloom_filter_for_commit t/helper/test-bloom.c:41:2 #9 0x4cb50d in cmd__bloom t/helper/test-bloom.c:95:3 #10 0x4caa1f in cmd_main t/helper/test-tool.c:124:11 #11 0x4caded in main common-main.c:52:11 #12 0x7f0869f02349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 129 byte(s) leaked in 1 allocation(s). It looks like this leak has existed since realpath was first added to set_git_work_tree() in: 3d7747e318 (real_path: remove unsafe API, 2020-03-10) Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- environment.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/environment.c b/environment.c index 2f27008424..d6b22ede7e 100644 --- a/environment.c +++ b/environment.c @@ -253,21 +253,20 @@ static int git_work_tree_initialized; */ void set_git_work_tree(const char *new_work_tree) { - struct strbuf realpath = STRBUF_INIT; - if (git_work_tree_initialized) { + struct strbuf realpath = STRBUF_INIT; + strbuf_realpath(&realpath, new_work_tree, 1); new_work_tree = realpath.buf; if (strcmp(new_work_tree, the_repository->worktree)) die("internal error: work tree has already been set\n" "Current worktree: %s\nNew worktree: %s", the_repository->worktree, new_work_tree); + strbuf_release(&realpath); return; } git_work_tree_initialized = 1; repo_set_worktree(the_repository, new_work_tree); - - strbuf_release(&realpath); } const char *get_git_work_tree(void) From edfc744918fb130213633c024597f2aa25ff5de1 Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:21 +0200 Subject: [PATCH 28/51] builtin/submodule--helper: release unused strbuf to avoid leak relative_url() populates sb. In the normal return path, its buffer is detached using strbuf_detach(). However the early return path does nothing with sb, which means that sb's memory is leaked - therefore we add a release to avoid this leak. The reset is also only necessary for the normal return path, hence we move it down to after the early-return to avoid unnecessary work. LSAN output from t0060: Direct leak of 121 byte(s) in 1 object(s) allocated from: #0 0x7f31246f28b0 in realloc (/usr/lib64/libasan.so.4+0xdc8b0) #1 0x98d7d6 in xrealloc wrapper.c:126 #2 0x909a60 in strbuf_grow strbuf.c:98 #3 0x90bf00 in strbuf_vaddf strbuf.c:401 #4 0x90c321 in strbuf_addf strbuf.c:335 #5 0x5cb78d in relative_url builtin/submodule--helper.c:182 #6 0x5cbe46 in resolve_relative_url_test builtin/submodule--helper.c:248 #7 0x410dcd in run_builtin git.c:475 #8 0x410dcd in handle_builtin git.c:729 #9 0x414087 in run_argv git.c:818 #10 0x414087 in cmd_main git.c:949 #11 0x40e9ec in main common-main.c:52 #12 0x7f3123c41349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 121 byte(s) leaked in 1 allocation(s). Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index f73963ad67..7e5b75971c 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -187,11 +187,13 @@ static char *relative_url(const char *remote_url, out = xstrdup(sb.buf + 2); else out = xstrdup(sb.buf); - strbuf_reset(&sb); - if (!up_path || !is_relative) + if (!up_path || !is_relative) { + strbuf_release(&sb); return out; + } + strbuf_reset(&sb); strbuf_addf(&sb, "%s%s", up_path, out); free(out); return strbuf_detach(&sb, NULL); From 2b2999460c7e907dc80593483e1c23ce00b6745c Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:22 +0200 Subject: [PATCH 29/51] builtin/for-each-repo: remove unnecessary argv copy to plug leak cmd_for_each_repo() copies argv into args (a strvec), which is later passed into run_command_on_repo(), which in turn copies that strvec onto the end of child.args. The initial copy is unnecessary (we never modify args). We therefore choose to just pass argv directly into run_command_on_repo(), which lets us avoid the copy and fixes the leak. LSAN output from t0068: Direct leak of 192 byte(s) in 1 object(s) allocated from: #0 0x7f63bd4ab8b0 in realloc (/usr/lib64/libasan.so.4+0xdc8b0) #1 0x98d7e6 in xrealloc wrapper.c:126 #2 0x916914 in strvec_push_nodup strvec.c:19 #3 0x916a6e in strvec_push strvec.c:26 #4 0x4be4eb in cmd_for_each_repo builtin/for-each-repo.c:49 #5 0x410dcd in run_builtin git.c:475 #6 0x410dcd in handle_builtin git.c:729 #7 0x414087 in run_argv git.c:818 #8 0x414087 in cmd_main git.c:949 #9 0x40e9ec in main common-main.c:52 #10 0x7f63bc9fa349 in __libc_start_main (/lib64/libc.so.6+0x24349) Indirect leak of 22 byte(s) in 2 object(s) allocated from: #0 0x7f63bd445e30 in __interceptor_strdup (/usr/lib64/libasan.so.4+0x76e30) #1 0x98d698 in xstrdup wrapper.c:29 #2 0x916a63 in strvec_push strvec.c:26 #3 0x4be4eb in cmd_for_each_repo builtin/for-each-repo.c:49 #4 0x410dcd in run_builtin git.c:475 #5 0x410dcd in handle_builtin git.c:729 #6 0x414087 in run_argv git.c:818 #7 0x414087 in cmd_main git.c:949 #8 0x40e9ec in main common-main.c:52 #9 0x7f63bc9fa349 in __libc_start_main (/lib64/libc.so.6+0x24349) See also discussion about the original implementation below - this code appears to have evolved from a callback explaining the double-strvec-copy pattern, but there's no strong reason to keep that now: https://lore.kernel.org/git/68bbeca5-314b-08ee-ef36-040e3f3814e9@gmail.com/ Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- builtin/for-each-repo.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/builtin/for-each-repo.c b/builtin/for-each-repo.c index 52be64a437..fd86e5a861 100644 --- a/builtin/for-each-repo.c +++ b/builtin/for-each-repo.c @@ -10,18 +10,16 @@ static const char * const for_each_repo_usage[] = { NULL }; -static int run_command_on_repo(const char *path, - void *cbdata) +static int run_command_on_repo(const char *path, int argc, const char ** argv) { int i; struct child_process child = CHILD_PROCESS_INIT; - struct strvec *args = (struct strvec *)cbdata; child.git_cmd = 1; strvec_pushl(&child.args, "-C", path, NULL); - for (i = 0; i < args->nr; i++) - strvec_push(&child.args, args->v[i]); + for (i = 0; i < argc; i++) + strvec_push(&child.args, argv[i]); return run_command(&child); } @@ -31,7 +29,6 @@ int cmd_for_each_repo(int argc, const char **argv, const char *prefix) static const char *config_key = NULL; int i, result = 0; const struct string_list *values; - struct strvec args = STRVEC_INIT; const struct option options[] = { OPT_STRING(0, "config", &config_key, N_("config"), @@ -45,9 +42,6 @@ int cmd_for_each_repo(int argc, const char **argv, const char *prefix) if (!config_key) die(_("missing --config=")); - for (i = 0; i < argc; i++) - strvec_push(&args, argv[i]); - values = repo_config_get_value_multi(the_repository, config_key); @@ -59,7 +53,7 @@ int cmd_for_each_repo(int argc, const char **argv, const char *prefix) return 0; for (i = 0; !result && i < values->nr; i++) - result = run_command_on_repo(values->items[i].string, &args); + result = run_command_on_repo(values->items[i].string, argc, argv); return result; } From 4e3250b7fb806f3e391239d680638ee44068ffe1 Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:23 +0200 Subject: [PATCH 30/51] diffcore-rename: move old_dir/new_dir definition to plug leak old_dir/new_dir are free()'d at the end of update_dir_rename_counts, however if we return early we'll never free those strings. Therefore we should move all new allocations after the possible early return, avoiding a leak. This seems like a fairly recent leak, that started happening since the early-return was added in: 1ad69eb0dc (diffcore-rename: compute dir_rename_counts in stages, 2021-02-27) LSAN output from t0022: Direct leak of 7 byte(s) in 1 object(s) allocated from: #0 0x486804 in strdup ../projects/compiler-rt/lib/asan/asan_interceptors.cpp:452:3 #1 0xa71e48 in xstrdup wrapper.c:29:14 #2 0x7db9c7 in update_dir_rename_counts diffcore-rename.c:464:12 #3 0x7db6ae in find_renames diffcore-rename.c:1062:3 #4 0x7d76c3 in diffcore_rename_extended diffcore-rename.c:1472:18 #5 0x7b4cfc in diffcore_std diff.c:6705:4 #6 0x855e46 in log_tree_diff_flush log-tree.c:846:2 #7 0x856574 in log_tree_diff log-tree.c:955:3 #8 0x856574 in log_tree_commit log-tree.c:986:10 #9 0x9a9c67 in print_commit_summary sequencer.c:1329:7 #10 0x52e623 in cmd_commit builtin/commit.c:1862:3 #11 0x4ce83e in run_builtin git.c:475:11 #12 0x4ccafe in handle_builtin git.c:729:3 #13 0x4cb01c in run_argv git.c:818:4 #14 0x4cb01c in cmd_main git.c:949:19 #15 0x6b3f3d in main common-main.c:52:11 #16 0x7fe397c7a349 in __libc_start_main (/lib64/libc.so.6+0x24349) Direct leak of 7 byte(s) in 1 object(s) allocated from: #0 0x486804 in strdup ../projects/compiler-rt/lib/asan/asan_interceptors.cpp:452:3 #1 0xa71e48 in xstrdup wrapper.c:29:14 #2 0x7db9bc in update_dir_rename_counts diffcore-rename.c:463:12 #3 0x7db6ae in find_renames diffcore-rename.c:1062:3 #4 0x7d76c3 in diffcore_rename_extended diffcore-rename.c:1472:18 #5 0x7b4cfc in diffcore_std diff.c:6705:4 #6 0x855e46 in log_tree_diff_flush log-tree.c:846:2 #7 0x856574 in log_tree_diff log-tree.c:955:3 #8 0x856574 in log_tree_commit log-tree.c:986:10 #9 0x9a9c67 in print_commit_summary sequencer.c:1329:7 #10 0x52e623 in cmd_commit builtin/commit.c:1862:3 #11 0x4ce83e in run_builtin git.c:475:11 #12 0x4ccafe in handle_builtin git.c:729:3 #13 0x4cb01c in run_argv git.c:818:4 #14 0x4cb01c in cmd_main git.c:949:19 #15 0x6b3f3d in main common-main.c:52:11 #16 0x7fe397c7a349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 14 byte(s) leaked in 2 allocation(s). Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- diffcore-rename.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/diffcore-rename.c b/diffcore-rename.c index 4ef0459cfb..15b4add556 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -448,9 +448,9 @@ static void update_dir_rename_counts(struct dir_rename_info *info, const char *oldname, const char *newname) { - char *old_dir = xstrdup(oldname); - char *new_dir = xstrdup(newname); - char new_dir_first_char = new_dir[0]; + char *old_dir; + char *new_dir; + const char new_dir_first_char = newname[0]; int first_time_in_loop = 1; if (!info->setup) @@ -475,6 +475,10 @@ static void update_dir_rename_counts(struct dir_rename_info *info, */ return; + + old_dir = xstrdup(oldname); + new_dir = xstrdup(newname); + while (1) { int drd_flag = NOT_RELEVANT; From d7cf4188e2e85faa00552b8616db31a17844df1b Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:24 +0200 Subject: [PATCH 31/51] ref-filter: also free head for ATOM_HEAD to avoid leak u.head is populated using resolve_refdup(), which returns a newly allocated string - hence we also need to free() it. Found while running t0041 with LSAN: Direct leak of 16 byte(s) in 1 object(s) allocated from: #0 0x486804 in strdup ../projects/compiler-rt/lib/asan/asan_interceptors.cpp:452:3 #1 0xa8be98 in xstrdup wrapper.c:29:14 #2 0x9481db in head_atom_parser ref-filter.c:549:17 #3 0x9408c7 in parse_ref_filter_atom ref-filter.c:703:30 #4 0x9400e3 in verify_ref_format ref-filter.c:974:8 #5 0x4f9e8b in print_ref_list builtin/branch.c:439:6 #6 0x4f9e8b in cmd_branch builtin/branch.c:757:3 #7 0x4ce83e in run_builtin git.c:475:11 #8 0x4ccafe in handle_builtin git.c:729:3 #9 0x4cb01c in run_argv git.c:818:4 #10 0x4cb01c in cmd_main git.c:949:19 #11 0x6bdc2d in main common-main.c:52:11 #12 0x7f96edf86349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 16 byte(s) leaked in 1 allocation(s). Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- ref-filter.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index 4db0e40ff4..f8bfd25ae4 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2225,8 +2225,12 @@ void ref_array_clear(struct ref_array *array) FREE_AND_NULL(array->items); array->nr = array->alloc = 0; - for (i = 0; i < used_atom_cnt; i++) - free((char *)used_atom[i].name); + for (i = 0; i < used_atom_cnt; i++) { + struct used_atom *atom = &used_atom[i]; + if (atom->atom_type == ATOM_HEAD) + free(atom->u.head); + free((char *)atom->name); + } FREE_AND_NULL(used_atom); used_atom_cnt = 0; From 8d833e933719cd4badb7ec7d042d6f7cc206247d Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:25 +0200 Subject: [PATCH 32/51] read-cache: call diff_setup_done to avoid leak repo_diff_setup() calls through to diff.c's static prep_parse_options(), which in turn allocates a new array into diff_opts.parseopts. diff_setup_done() is responsible for freeing that array, and has the benefit of verifying diff_opts too - hence we add a call to diff_setup_done() to avoid leaking parseopts. Output from the leak as found while running t0090 with LSAN: Direct leak of 7120 byte(s) in 1 object(s) allocated from: #0 0x49a82d in malloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:145:3 #1 0xa8bf89 in do_xmalloc wrapper.c:41:8 #2 0x7a7bae in prep_parse_options diff.c:5636:2 #3 0x7a7bae in repo_diff_setup diff.c:4611:2 #4 0x93716c in repo_index_has_changes read-cache.c:2518:3 #5 0x872233 in unclean merge-ort-wrappers.c:12:14 #6 0x872233 in merge_ort_recursive merge-ort-wrappers.c:53:6 #7 0x5d5b11 in try_merge_strategy builtin/merge.c:752:12 #8 0x5d0b6b in cmd_merge builtin/merge.c:1666:9 #9 0x4ce83e in run_builtin git.c:475:11 #10 0x4ccafe in handle_builtin git.c:729:3 #11 0x4cb01c in run_argv git.c:818:4 #12 0x4cb01c in cmd_main git.c:949:19 #13 0x6bdc2d in main common-main.c:52:11 #14 0x7f551eb51349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 7120 byte(s) leaked in 1 allocation(s) Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- read-cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/read-cache.c b/read-cache.c index ba2b012a6c..6030015846 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2499,6 +2499,7 @@ int repo_index_has_changes(struct repository *repo, opt.flags.exit_with_status = 1; if (!sb) opt.flags.quick = 1; + diff_setup_done(&opt); do_diff_cache(&cmp, &opt); diffcore_std(&opt); for (i = 0; sb && i < diff_queued_diff.nr; i++) { From 675ea4efdbe22076c4b0681f89c3047c9d04656a Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:26 +0200 Subject: [PATCH 33/51] convert: release strbuf to avoid leak apply_multi_file_filter and async_query_available_blobs both query subprocess output using subprocess_read_status, which writes data into the identically named filter_status strbuf. We add a strbuf_release to avoid leaking their contents. Leak output seen when running t0021 with LSAN: Direct leak of 24 byte(s) in 1 object(s) allocated from: #0 0x49ab49 in realloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:164:3 #1 0xa8c2b5 in xrealloc wrapper.c:126:8 #2 0x9ff99d in strbuf_grow strbuf.c:98:2 #3 0x9ff99d in strbuf_addbuf strbuf.c:304:2 #4 0xa101d6 in subprocess_read_status sub-process.c:45:5 #5 0x77793c in apply_multi_file_filter convert.c:886:8 #6 0x77793c in apply_filter convert.c:1042:10 #7 0x77a0b5 in convert_to_git_filter_fd convert.c:1492:7 #8 0x8b48cd in index_stream_convert_blob object-file.c:2156:2 #9 0x8b48cd in index_fd object-file.c:2248:9 #10 0x597411 in hash_fd builtin/hash-object.c:43:9 #11 0x596be1 in hash_object builtin/hash-object.c:59:2 #12 0x596be1 in cmd_hash_object builtin/hash-object.c:153:3 #13 0x4ce83e in run_builtin git.c:475:11 #14 0x4ccafe in handle_builtin git.c:729:3 #15 0x4cb01c in run_argv git.c:818:4 #16 0x4cb01c in cmd_main git.c:949:19 #17 0x6bdc2d in main common-main.c:52:11 #18 0x7f42acf79349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 24 byte(s) leaked in 1 allocation(s). Direct leak of 120 byte(s) in 5 object(s) allocated from: #0 0x49ab49 in realloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:164:3 #1 0xa8c295 in xrealloc wrapper.c:126:8 #2 0x9ff97d in strbuf_grow strbuf.c:98:2 #3 0x9ff97d in strbuf_addbuf strbuf.c:304:2 #4 0xa101b6 in subprocess_read_status sub-process.c:45:5 #5 0x775c73 in async_query_available_blobs convert.c:960:8 #6 0x80029d in finish_delayed_checkout entry.c:183:9 #7 0xa65d1e in check_updates unpack-trees.c:493:10 #8 0xa5f469 in unpack_trees unpack-trees.c:1747:8 #9 0x525971 in checkout builtin/clone.c:815:6 #10 0x525971 in cmd_clone builtin/clone.c:1409:8 #11 0x4ce83e in run_builtin git.c:475:11 #12 0x4ccafe in handle_builtin git.c:729:3 #13 0x4cb01c in run_argv git.c:818:4 #14 0x4cb01c in cmd_main git.c:949:19 #15 0x6bdc2d in main common-main.c:52:11 #16 0x7fa253fce349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 120 byte(s) leaked in 5 allocation(s). Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- convert.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/convert.c b/convert.c index fd9c84b025..0d6fb3410a 100644 --- a/convert.c +++ b/convert.c @@ -916,6 +916,7 @@ static int apply_multi_file_filter(const char *path, const char *src, size_t len else strbuf_swap(dst, &nbuf); strbuf_release(&nbuf); + strbuf_release(&filter_status); return !err; } @@ -966,6 +967,7 @@ int async_query_available_blobs(const char *cmd, struct string_list *available_p if (err) handle_filter_error(&filter_status, entry, 0); + strbuf_release(&filter_status); return !err; } From ed3c566d97f225e9552a6179c2a9328bdba6af73 Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:27 +0200 Subject: [PATCH 34/51] builtin/mv: free or UNLEAK multiple pointers at end of cmd_mv These leaks all happen at the end of cmd_mv, hence don't matter in any way. But we still fix the easy ones and squash the rest to get us closer to being able to run tests without leaks. LSAN output from t0050: Direct leak of 384 byte(s) in 1 object(s) allocated from: #0 0x49ab49 in realloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:164:3 #1 0xa8c015 in xrealloc wrapper.c:126:8 #2 0xa0a7e1 in add_entry string-list.c:44:2 #3 0xa0a7e1 in string_list_insert string-list.c:58:14 #4 0x5dac03 in cmd_mv builtin/mv.c:248:4 #5 0x4ce83e in run_builtin git.c:475:11 #6 0x4ccafe in handle_builtin git.c:729:3 #7 0x4cb01c in run_argv git.c:818:4 #8 0x4cb01c in cmd_main git.c:949:19 #9 0x6bd9ad in main common-main.c:52:11 #10 0x7fbfeffc4349 in __libc_start_main (/lib64/libc.so.6+0x24349) Direct leak of 16 byte(s) in 1 object(s) allocated from: #0 0x49a82d in malloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:145:3 #1 0xa8bd09 in do_xmalloc wrapper.c:41:8 #2 0x5dbc34 in internal_prefix_pathspec builtin/mv.c:32:2 #3 0x5da575 in cmd_mv builtin/mv.c:158:14 #4 0x4ce83e in run_builtin git.c:475:11 #5 0x4ccafe in handle_builtin git.c:729:3 #6 0x4cb01c in run_argv git.c:818:4 #7 0x4cb01c in cmd_main git.c:949:19 #8 0x6bd9ad in main common-main.c:52:11 #9 0x7fbfeffc4349 in __libc_start_main (/lib64/libc.so.6+0x24349) Direct leak of 16 byte(s) in 1 object(s) allocated from: #0 0x49a82d in malloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:145:3 #1 0xa8bd09 in do_xmalloc wrapper.c:41:8 #2 0x5dbc34 in internal_prefix_pathspec builtin/mv.c:32:2 #3 0x5da4e4 in cmd_mv builtin/mv.c:148:11 #4 0x4ce83e in run_builtin git.c:475:11 #5 0x4ccafe in handle_builtin git.c:729:3 #6 0x4cb01c in run_argv git.c:818:4 #7 0x4cb01c in cmd_main git.c:949:19 #8 0x6bd9ad in main common-main.c:52:11 #9 0x7fbfeffc4349 in __libc_start_main (/lib64/libc.so.6+0x24349) Direct leak of 8 byte(s) in 1 object(s) allocated from: #0 0x49a9a2 in calloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:154:3 #1 0xa8c119 in xcalloc wrapper.c:140:8 #2 0x5da585 in cmd_mv builtin/mv.c:159:22 #3 0x4ce83e in run_builtin git.c:475:11 #4 0x4ccafe in handle_builtin git.c:729:3 #5 0x4cb01c in run_argv git.c:818:4 #6 0x4cb01c in cmd_main git.c:949:19 #7 0x6bd9ad in main common-main.c:52:11 #8 0x7fbfeffc4349 in __libc_start_main (/lib64/libc.so.6+0x24349) Direct leak of 4 byte(s) in 1 object(s) allocated from: #0 0x49a9a2 in calloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:154:3 #1 0xa8c119 in xcalloc wrapper.c:140:8 #2 0x5da4f8 in cmd_mv builtin/mv.c:149:10 #3 0x4ce83e in run_builtin git.c:475:11 #4 0x4ccafe in handle_builtin git.c:729:3 #5 0x4cb01c in run_argv git.c:818:4 #6 0x4cb01c in cmd_main git.c:949:19 #7 0x6bd9ad in main common-main.c:52:11 #8 0x7fbfeffc4349 in __libc_start_main (/lib64/libc.so.6+0x24349) Indirect leak of 65 byte(s) in 1 object(s) allocated from: #0 0x49ab49 in realloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:164:3 #1 0xa8c015 in xrealloc wrapper.c:126:8 #2 0xa00226 in strbuf_grow strbuf.c:98:2 #3 0xa00226 in strbuf_vaddf strbuf.c:394:3 #4 0xa065c7 in xstrvfmt strbuf.c:981:2 #5 0xa065c7 in xstrfmt strbuf.c:991:8 #6 0x9e7ce7 in prefix_path_gently setup.c:115:15 #7 0x9e7fa6 in prefix_path setup.c:128:12 #8 0x5dbdbf in internal_prefix_pathspec builtin/mv.c:55:23 #9 0x5da575 in cmd_mv builtin/mv.c:158:14 #10 0x4ce83e in run_builtin git.c:475:11 #11 0x4ccafe in handle_builtin git.c:729:3 #12 0x4cb01c in run_argv git.c:818:4 #13 0x4cb01c in cmd_main git.c:949:19 #14 0x6bd9ad in main common-main.c:52:11 #15 0x7fbfeffc4349 in __libc_start_main (/lib64/libc.so.6+0x24349) Indirect leak of 65 byte(s) in 1 object(s) allocated from: #0 0x49ab49 in realloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:164:3 #1 0xa8c015 in xrealloc wrapper.c:126:8 #2 0xa00226 in strbuf_grow strbuf.c:98:2 #3 0xa00226 in strbuf_vaddf strbuf.c:394:3 #4 0xa065c7 in xstrvfmt strbuf.c:981:2 #5 0xa065c7 in xstrfmt strbuf.c:991:8 #6 0x9e7ce7 in prefix_path_gently setup.c:115:15 #7 0x9e7fa6 in prefix_path setup.c:128:12 #8 0x5dbdbf in internal_prefix_pathspec builtin/mv.c:55:23 #9 0x5da4e4 in cmd_mv builtin/mv.c:148:11 #10 0x4ce83e in run_builtin git.c:475:11 #11 0x4ccafe in handle_builtin git.c:729:3 #12 0x4cb01c in run_argv git.c:818:4 #13 0x4cb01c in cmd_main git.c:949:19 #14 0x6bd9ad in main common-main.c:52:11 #15 0x7fbfeffc4349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 558 byte(s) leaked in 7 allocation(s). Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- builtin/mv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/builtin/mv.c b/builtin/mv.c index 3fccdcb645..c2f96c8e89 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -303,5 +303,10 @@ int cmd_mv(int argc, const char **argv, const char *prefix) COMMIT_LOCK | SKIP_IF_UNCHANGED)) die(_("Unable to write new index file")); + string_list_clear(&src_for_dst, 0); + UNLEAK(source); + UNLEAK(dest_path); + free(submodule_gitfile); + free(modes); return 0; } From 8c05e42c7a0bdf08532188e1862c6f72f6db7c56 Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:28 +0200 Subject: [PATCH 35/51] builtin/merge: free found_ref when done merge_name() calls dwim_ref(), which allocates a new string into found_ref. Therefore add a free() to avoid leaking found_ref. LSAN output from t0021: Direct leak of 16 byte(s) in 1 object(s) allocated from: #0 0x486804 in strdup ../projects/compiler-rt/lib/asan/asan_interceptors.cpp:452:3 #1 0xa8beb8 in xstrdup wrapper.c:29:14 #2 0x954054 in expand_ref refs.c:671:12 #3 0x953cb6 in repo_dwim_ref refs.c:644:22 #4 0x5d3759 in dwim_ref refs.h:162:9 #5 0x5d3759 in merge_name builtin/merge.c:517:6 #6 0x5d3759 in collect_parents builtin/merge.c:1214:5 #7 0x5cf60d in cmd_merge builtin/merge.c:1458:16 #8 0x4ce83e in run_builtin git.c:475:11 #9 0x4ccafe in handle_builtin git.c:729:3 #10 0x4cb01c in run_argv git.c:818:4 #11 0x4cb01c in cmd_main git.c:949:19 #12 0x6bdbfd in main common-main.c:52:11 #13 0x7f0430502349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 16 byte(s) leaked in 1 allocation(s). Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- builtin/merge.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/builtin/merge.c b/builtin/merge.c index a8a843b1f5..7ad85c044a 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -503,7 +503,7 @@ static void merge_name(const char *remote, struct strbuf *msg) struct strbuf bname = STRBUF_INIT; struct merge_remote_desc *desc; const char *ptr; - char *found_ref; + char *found_ref = NULL; int len, early; strbuf_branchname(&bname, remote, 0); @@ -586,6 +586,7 @@ static void merge_name(const char *remote, struct strbuf *msg) strbuf_addf(msg, "%s\t\tcommit '%s'\n", oid_to_hex(&remote_head->object.oid), remote); cleanup: + free(found_ref); strbuf_release(&buf); strbuf_release(&bname); } From b54cf3a766e6e5041baa371a763b7bdc8a1a8a4b Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:29 +0200 Subject: [PATCH 36/51] builtin/rebase: fix options.strategy memory lifecycle - cmd_rebase populates rebase_options.strategy with newly allocated strings, hence we need to free those strings at the end of cmd_rebase to avoid a leak. - In some cases: get_replay_opts() is called, which prepares replay_opts using data from rebase_options. We used to simply copy the pointer from rebase_options.strategy, however that would now result in a double-free because sequencer_remove_state() is eventually used to free replay_opts.strategy. To avoid this we xstrdup() strategy when adding it to replay_opts. The original leak happens because we always populate rebase_options.strategy, but we don't always enter the path that calls get_replay_opts() and later sequencer_remove_state() - in other words we'd always allocate a new string into rebase_options.strategy but only sometimes did we free it. We now make sure that rebase_options and replay_opts both own their own copies of strategy, and each copy is free'd independently. This was first seen when running t0021 with LSAN, but t2012 helped catch the fact that we can't just free(options.strategy) at the end of cmd_rebase (as that can cause a double-free). LSAN output from t0021: LSAN output from t0021: Direct leak of 4 byte(s) in 1 object(s) allocated from: #0 0x486804 in strdup ../projects/compiler-rt/lib/asan/asan_interceptors.cpp:452:3 #1 0xa71eb8 in xstrdup wrapper.c:29:14 #2 0x61b1cc in cmd_rebase builtin/rebase.c:1779:22 #3 0x4ce83e in run_builtin git.c:475:11 #4 0x4ccafe in handle_builtin git.c:729:3 #5 0x4cb01c in run_argv git.c:818:4 #6 0x4cb01c in cmd_main git.c:949:19 #7 0x6b3fad in main common-main.c:52:11 #8 0x7f267b512349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 4 byte(s) leaked in 1 allocation(s). Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- builtin/rebase.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/builtin/rebase.c b/builtin/rebase.c index 12f093121d..33e0961900 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -139,7 +139,7 @@ static struct replay_opts get_replay_opts(const struct rebase_options *opts) replay.ignore_date = opts->ignore_date; replay.gpg_sign = xstrdup_or_null(opts->gpg_sign_opt); if (opts->strategy) - replay.strategy = opts->strategy; + replay.strategy = xstrdup_or_null(opts->strategy); else if (!replay.strategy && replay.default_strategy) { replay.strategy = replay.default_strategy; replay.default_strategy = NULL; @@ -2109,6 +2109,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) free(options.head_name); free(options.gpg_sign_opt); free(options.cmd); + free(options.strategy); strbuf_release(&options.git_format_patch_opt); free(squash_onto_name); return ret; From 9a863b3358b84c627c8129defb9c127ec73e8e30 Mon Sep 17 00:00:00 2001 From: Andrzej Hunt Date: Sun, 25 Jul 2021 15:08:30 +0200 Subject: [PATCH 37/51] reset: clear_unpack_trees_porcelain to plug leak setup_unpack_trees_porcelain() populates various fields on unpack_tree_opts, we need to call clear_unpack_trees_porcelain() to avoid leaking them. Specifically, we used to leak unpack_tree_opts.msgs_to_free. We have to do this in leave_reset_head because there are multiple scenarios where unpack_tree_opts has already been configured, followed by a 'goto leave_reset_head'. But we can also 'goto leave_reset_head' prior to having initialised unpack_tree_opts via memset(..., 0, ...). Therefore we also move unpack_tree_opts initialisation to the start of reset_head(), and convert it to use brace initialisation - which guarantees that we can never clear an uninitialised unpack_tree_opts. clear_unpack_tree_opts() is always safe to call as long as unpack_tree_opts is at least zero-initialised, i.e. it does not depend on a previous call to setup_unpack_trees_porcelain(). LSAN output from t0021: Direct leak of 192 byte(s) in 1 object(s) allocated from: #0 0x49ab49 in realloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:164:3 #1 0xa721e5 in xrealloc wrapper.c:126:8 #2 0x9f7861 in strvec_push_nodup strvec.c:19:2 #3 0x9f7861 in strvec_pushf strvec.c:39:2 #4 0xa43e14 in setup_unpack_trees_porcelain unpack-trees.c:129:3 #5 0x97e011 in reset_head reset.c:53:2 #6 0x61dfa5 in cmd_rebase builtin/rebase.c:1991:9 #7 0x4ce83e in run_builtin git.c:475:11 #8 0x4ccafe in handle_builtin git.c:729:3 #9 0x4cb01c in run_argv git.c:818:4 #10 0x4cb01c in cmd_main git.c:949:19 #11 0x6b3f3d in main common-main.c:52:11 #12 0x7fa8addf3349 in __libc_start_main (/lib64/libc.so.6+0x24349) Indirect leak of 147 byte(s) in 1 object(s) allocated from: #0 0x49ab49 in realloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:164:3 #1 0xa721e5 in xrealloc wrapper.c:126:8 #2 0x9e8d54 in strbuf_grow strbuf.c:98:2 #3 0x9e8d54 in strbuf_vaddf strbuf.c:401:3 #4 0x9f7774 in strvec_pushf strvec.c:36:2 #5 0xa43e14 in setup_unpack_trees_porcelain unpack-trees.c:129:3 #6 0x97e011 in reset_head reset.c:53:2 #7 0x61dfa5 in cmd_rebase builtin/rebase.c:1991:9 #8 0x4ce83e in run_builtin git.c:475:11 #9 0x4ccafe in handle_builtin git.c:729:3 #10 0x4cb01c in run_argv git.c:818:4 #11 0x4cb01c in cmd_main git.c:949:19 #12 0x6b3f3d in main common-main.c:52:11 #13 0x7fa8addf3349 in __libc_start_main (/lib64/libc.so.6+0x24349) Indirect leak of 134 byte(s) in 1 object(s) allocated from: #0 0x49ab49 in realloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:164:3 #1 0xa721e5 in xrealloc wrapper.c:126:8 #2 0x9e8d54 in strbuf_grow strbuf.c:98:2 #3 0x9e8d54 in strbuf_vaddf strbuf.c:401:3 #4 0x9f7774 in strvec_pushf strvec.c:36:2 #5 0xa43fe4 in setup_unpack_trees_porcelain unpack-trees.c:168:3 #6 0x97e011 in reset_head reset.c:53:2 #7 0x61dfa5 in cmd_rebase builtin/rebase.c:1991:9 #8 0x4ce83e in run_builtin git.c:475:11 #9 0x4ccafe in handle_builtin git.c:729:3 #10 0x4cb01c in run_argv git.c:818:4 #11 0x4cb01c in cmd_main git.c:949:19 #12 0x6b3f3d in main common-main.c:52:11 #13 0x7fa8addf3349 in __libc_start_main (/lib64/libc.so.6+0x24349) Indirect leak of 130 byte(s) in 1 object(s) allocated from: #0 0x49ab49 in realloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:164:3 #1 0xa721e5 in xrealloc wrapper.c:126:8 #2 0x9e8d54 in strbuf_grow strbuf.c:98:2 #3 0x9e8d54 in strbuf_vaddf strbuf.c:401:3 #4 0x9f7774 in strvec_pushf strvec.c:36:2 #5 0xa43f20 in setup_unpack_trees_porcelain unpack-trees.c:150:3 #6 0x97e011 in reset_head reset.c:53:2 #7 0x61dfa5 in cmd_rebase builtin/rebase.c:1991:9 #8 0x4ce83e in run_builtin git.c:475:11 #9 0x4ccafe in handle_builtin git.c:729:3 #10 0x4cb01c in run_argv git.c:818:4 #11 0x4cb01c in cmd_main git.c:949:19 #12 0x6b3f3d in main common-main.c:52:11 #13 0x7fa8addf3349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 603 byte(s) leaked in 4 allocation(s). Signed-off-by: Andrzej Hunt Signed-off-by: Junio C Hamano --- reset.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reset.c b/reset.c index 4bea758053..79310ae071 100644 --- a/reset.c +++ b/reset.c @@ -21,7 +21,7 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action, struct object_id head_oid; struct tree_desc desc[2] = { { NULL }, { NULL } }; struct lock_file lock = LOCK_INIT; - struct unpack_trees_options unpack_tree_opts; + struct unpack_trees_options unpack_tree_opts = { 0 }; struct tree *tree; const char *reflog_action; struct strbuf msg = STRBUF_INIT; @@ -49,7 +49,6 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action, if (refs_only) goto reset_head_refs; - memset(&unpack_tree_opts, 0, sizeof(unpack_tree_opts)); setup_unpack_trees_porcelain(&unpack_tree_opts, action); unpack_tree_opts.head_idx = 1; unpack_tree_opts.src_index = r->index; @@ -134,6 +133,7 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action, leave_reset_head: strbuf_release(&msg); rollback_lock_file(&lock); + clear_unpack_trees_porcelain(&unpack_tree_opts); while (nr) free((void *)desc[--nr].buffer); return ret; From f0b922473e246f4288669c2ed20a0b4b5d6dec13 Mon Sep 17 00:00:00 2001 From: Andrei Rybak Date: Thu, 29 Jul 2021 13:02:52 +0200 Subject: [PATCH 38/51] Documentation: render special characters correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three hyphens are rendered verbatim, so "--" has to be used to produce a dash. There is no double arrow ("<->" is rendered as "<→"), so a left and right arrow "<-->" have to be combined for that. So fix asciidoc output for special characters. This is similar to fixes in commit de82095a95 (doc hash-function-transition: fix asciidoc output, 2021-02-05). Signed-off-by: Andrei Rybak Acked-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/git-fetch.txt | 2 +- Documentation/gittutorial.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt index 9067c2079e..550c16ca61 100644 --- a/Documentation/git-fetch.txt +++ b/Documentation/git-fetch.txt @@ -133,7 +133,7 @@ remember to run that, set `fetch.prune` globally, or linkgit:git-config[1]. Here's where things get tricky and more specific. The pruning feature -doesn't actually care about branches, instead it'll prune local <-> +doesn't actually care about branches, instead it'll prune local <--> remote-references as a function of the refspec of the remote (see `` and <> above). diff --git a/Documentation/gittutorial.txt b/Documentation/gittutorial.txt index 59ef5cef1f..0e0b863105 100644 --- a/Documentation/gittutorial.txt +++ b/Documentation/gittutorial.txt @@ -322,7 +322,7 @@ initiating this "pull". If Bob's work conflicts with what Alice did since their histories forked, Alice will use her working tree and the index to resolve conflicts, and existing local changes will interfere with the conflict resolution process (Git will still perform the fetch but will -refuse to merge --- Alice will have to get rid of her local changes in +refuse to merge -- Alice will have to get rid of her local changes in some way and pull again when this happens). Alice can peek at what Bob did without merging first, using the "fetch" From 482e1488a9b549cbf7466f4ee5396f93bb4588d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 30 Jul 2021 18:18:14 +0200 Subject: [PATCH 39/51] t0001: fix broken not-quite getcwd(3) test in bed67874e2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With a54e938e5b (strbuf: support long paths w/o read rights in strbuf_getcwd() on FreeBSD, 2017-03-26) we had t0001 break on systems like OpenBSD and AIX whose getcwd(3) has standard (but not like glibc et al) behavior. This was partially fixed in bed67874e2 (t0001: skip test with restrictive permissions if getpwd(3) respects them, 2017-08-07). The problem with that fix is that while its analysis of the problem is correct, it doesn't actually call getcwd(3), instead it invokes "pwd -P". There is no guarantee that "pwd -P" is going to call getcwd(3), as opposed to e.g. being a shell built-in. On AIX under both bash and ksh this test breaks because "pwd -P" will happily display the current working directory, but getcwd(3) called by the "git init" we're testing here will fail to get it. I checked whether clobbering the $PWD environment variable would affect it, and it didn't. Presumably these shells keep track of their working directory internally. There's possible follow-up work here in teaching strbuf_getcwd() to get the working directory with whatever method "pwd" uses on these platforms. See [1] for a discussion of that, but let's take the easy way out here and just skip these tests by fixing the GETCWD_IGNORES_PERMS prerequisite to match the limitations of strbuf_getcwd(). 1. https://lore.kernel.org/git/b650bef5-d739-d98d-e9f1-fa292b6ce982@web.de/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- Makefile | 1 + t/helper/test-getcwd.c | 26 ++++++++++++++++++++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + t/t0001-init.sh | 5 ++++- 5 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 t/helper/test-getcwd.c diff --git a/Makefile b/Makefile index c3565fc0f8..8c000ba48b 100644 --- a/Makefile +++ b/Makefile @@ -711,6 +711,7 @@ TEST_BUILTINS_OBJS += test-example-decorate.o TEST_BUILTINS_OBJS += test-fast-rebase.o TEST_BUILTINS_OBJS += test-genrandom.o TEST_BUILTINS_OBJS += test-genzeros.o +TEST_BUILTINS_OBJS += test-getcwd.o TEST_BUILTINS_OBJS += test-hash-speed.o TEST_BUILTINS_OBJS += test-hash.o TEST_BUILTINS_OBJS += test-hashmap.o diff --git a/t/helper/test-getcwd.c b/t/helper/test-getcwd.c new file mode 100644 index 0000000000..d680038a78 --- /dev/null +++ b/t/helper/test-getcwd.c @@ -0,0 +1,26 @@ +#include "test-tool.h" +#include "git-compat-util.h" +#include "parse-options.h" + +static const char *getcwd_usage[] = { + "test-tool getcwd", + NULL +}; + +int cmd__getcwd(int argc, const char **argv) +{ + struct option options[] = { + OPT_END() + }; + char *cwd; + + argc = parse_options(argc, argv, "test-tools", options, getcwd_usage, 0); + if (argc > 0) + usage_with_options(getcwd_usage, options); + + cwd = xgetcwd(); + puts(cwd); + free(cwd); + + return 0; +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index c5bd0c6d4c..3c13cb19b5 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -33,6 +33,7 @@ static struct test_cmd cmds[] = { { "fast-rebase", cmd__fast_rebase }, { "genrandom", cmd__genrandom }, { "genzeros", cmd__genzeros }, + { "getcwd", cmd__getcwd }, { "hashmap", cmd__hashmap }, { "hash-speed", cmd__hash_speed }, { "index-version", cmd__index_version }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index e8069a3b22..e691a4d9e9 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -23,6 +23,7 @@ int cmd__example_decorate(int argc, const char **argv); int cmd__fast_rebase(int argc, const char **argv); int cmd__genrandom(int argc, const char **argv); int cmd__genzeros(int argc, const char **argv); +int cmd__getcwd(int argc, const char **argv); int cmd__hashmap(int argc, const char **argv); int cmd__hash_speed(int argc, const char **argv); int cmd__index_version(int argc, const char **argv); diff --git a/t/t0001-init.sh b/t/t0001-init.sh index acd662e403..df544bb321 100755 --- a/t/t0001-init.sh +++ b/t/t0001-init.sh @@ -356,7 +356,10 @@ test_lazy_prereq GETCWD_IGNORES_PERMS ' chmod 100 $base || BUG "cannot prepare $base" - (cd $base/dir && /bin/pwd -P) + ( + cd $base/dir && + test-tool getcwd + ) status=$? chmod 700 $base && From 74318423259b265cf95112355be254fa21d4a6f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 30 Jul 2021 21:06:58 +0200 Subject: [PATCH 40/51] use fspathhash() everywhere MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cf2dc1c238 (speed up alt_odb_usable() with many alternates, 2021-07-07) introduced the function fspathhash() for calculating path hashes while respecting the configuration option core.ignorecase. Call it instead of open-coding it; the resulting code is shorter and less repetitive. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 10 ++-------- dir.c | 13 +++---------- merge-recursive.c | 11 +++-------- 3 files changed, 8 insertions(+), 26 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index a4bdd7c494..8ba9f13787 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -380,10 +380,7 @@ static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *pat struct pattern_entry *e = xmalloc(sizeof(*e)); e->patternlen = path->len; e->pattern = strbuf_detach(path, NULL); - hashmap_entry_init(&e->ent, - ignore_case ? - strihash(e->pattern) : - strhash(e->pattern)); + hashmap_entry_init(&e->ent, fspathhash(e->pattern)); hashmap_add(&pl->recursive_hashmap, &e->ent); @@ -399,10 +396,7 @@ static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *pat e = xmalloc(sizeof(struct pattern_entry)); e->patternlen = newlen; e->pattern = xstrndup(oldpattern, newlen); - hashmap_entry_init(&e->ent, - ignore_case ? - strihash(e->pattern) : - strhash(e->pattern)); + hashmap_entry_init(&e->ent, fspathhash(e->pattern)); if (!hashmap_get_entry(&pl->parent_hashmap, e, ent, NULL)) hashmap_add(&pl->parent_hashmap, &e->ent); diff --git a/dir.c b/dir.c index 23b4417268..03c4d21267 100644 --- a/dir.c +++ b/dir.c @@ -782,9 +782,7 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern translated->pattern = truncated; translated->patternlen = given->patternlen - 2; hashmap_entry_init(&translated->ent, - ignore_case ? - strihash(translated->pattern) : - strhash(translated->pattern)); + fspathhash(translated->pattern)); if (!hashmap_get_entry(&pl->recursive_hashmap, translated, ent, NULL)) { @@ -813,9 +811,7 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern translated->pattern = dup_and_filter_pattern(given->pattern); translated->patternlen = given->patternlen; hashmap_entry_init(&translated->ent, - ignore_case ? - strihash(translated->pattern) : - strhash(translated->pattern)); + fspathhash(translated->pattern)); hashmap_add(&pl->recursive_hashmap, &translated->ent); @@ -845,10 +841,7 @@ static int hashmap_contains_path(struct hashmap *map, /* Check straight mapping */ p.pattern = pattern->buf; p.patternlen = pattern->len; - hashmap_entry_init(&p.ent, - ignore_case ? - strihash(p.pattern) : - strhash(p.pattern)); + hashmap_entry_init(&p.ent, fspathhash(p.pattern)); return !!hashmap_get_entry(map, &p, ent, NULL); } diff --git a/merge-recursive.c b/merge-recursive.c index 7008a90df5..3355d50e8a 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -61,11 +61,6 @@ static int path_hashmap_cmp(const void *cmp_data, return strcmp(a->path, key ? key : b->path); } -static unsigned int path_hash(const char *path) -{ - return ignore_case ? strihash(path) : strhash(path); -} - /* * For dir_rename_entry, directory names are stored as a full path from the * toplevel of the repository and do not include a trailing '/'. Also: @@ -463,7 +458,7 @@ static int save_files_dirs(const struct object_id *oid, strbuf_addstr(base, path); FLEX_ALLOC_MEM(entry, path, base->buf, base->len); - hashmap_entry_init(&entry->e, path_hash(entry->path)); + hashmap_entry_init(&entry->e, fspathhash(entry->path)); hashmap_add(&opt->priv->current_file_dir_set, &entry->e); strbuf_setlen(base, baselen); @@ -737,14 +732,14 @@ static char *unique_path(struct merge_options *opt, base_len = newpath.len; while (hashmap_get_from_hash(&opt->priv->current_file_dir_set, - path_hash(newpath.buf), newpath.buf) || + fspathhash(newpath.buf), newpath.buf) || (!opt->priv->call_depth && file_exists(newpath.buf))) { strbuf_setlen(&newpath, base_len); strbuf_addf(&newpath, "_%d", suffix++); } FLEX_ALLOC_MEM(entry, path, newpath.buf, newpath.len); - hashmap_entry_init(&entry->e, path_hash(entry->path)); + hashmap_entry_init(&entry->e, fspathhash(entry->path)); hashmap_add(&opt->priv->current_file_dir_set, &entry->e); return strbuf_detach(&newpath, NULL); } From 4da8b2fcd4f7b993ffd3ecf59d279c97fe23b0df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Sun, 1 Aug 2021 14:53:00 -0700 Subject: [PATCH 41/51] t7508: avoid non POSIX BRE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 24c30e0b6 (wt-status: tolerate dangling marks, 2020-09-01) adds a test that uses a BRE which breaks at least with OpenBSD's grep. switch to an ERE as it is done for similar checks and while at it, remove the now obsolete test_i18ngrep call. Signed-off-by: Carlo Marcelo Arenas Belón Signed-off-by: Junio C Hamano --- t/t7508-status.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t7508-status.sh b/t/t7508-status.sh index 2b72451ba3..05c6c02435 100755 --- a/t/t7508-status.sh +++ b/t/t7508-status.sh @@ -882,7 +882,7 @@ test_expect_success 'status shows detached HEAD properly after checking out non- git clone upstream downstream && git -C downstream checkout @{u} && git -C downstream status >actual && - test_i18ngrep "HEAD detached at [0-9a-f]\\+" actual + grep -E "HEAD detached at [0-9a-f]+" actual ' test_expect_success 'setup status submodule summary' ' From 3e7d4888e5b83f1ed75667ff557b8996f427adf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=C3=A9tous?= Date: Mon, 2 Aug 2021 21:07:30 +0000 Subject: [PATCH 42/51] mingw: align symlinks-related rmdir() behavior with Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When performing a rebase, rmdir() is called on the folder .git/logs. On Unix rmdir() exits without deleting anything in case .git/logs is a symbolic link but the equivalent functions on Windows (_rmdir, _wrmdir and RemoveDirectoryW) do not behave the same and remove the folder if it is symlinked even if it is not empty. This creates issues when folders in .git/ are symlinks which is especially the case when git-repo[1] is used: It replaces `.git/logs/` with a symlink. One such issue is that the _target_ of that symlink is removed e.g. during a `git rebase`, where `delete_reflog("REBASE_HEAD")` will not only try to remove `.git/logs/REBASE_HEAD` but then recursively try to remove the parent directories until an error occurs, a technique that obviously relies on `rmdir()` refusing to remove a symlink. This was reported in https://github.com/git-for-windows/git/issues/2967. This commit updates mingw_rmdir() so that its behavior is the same as Linux rmdir() in case of symbolic links. To verify that Git does not regress on the reported issue, this patch adds a regression test for the `git rebase` symptom, even if the same `rmdir()` behavior is quite likely to cause potential problems in other Git commands as well. [1]: git-repo is a python tool built on top of Git which helps manage many Git repositories. It stores all the .git/ folders in a central place by taking advantage of symbolic links. More information: https://gerrit.googlesource.com/git-repo/ Signed-off-by: Thomas Bétous Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/mingw.c | 21 +++++++++++++++++++++ t/t3400-rebase.sh | 10 ++++++++++ t/test-lib.sh | 6 ++++++ 3 files changed, 37 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index aa647b367b..9e0cd1e097 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -341,6 +341,27 @@ int mingw_rmdir(const char *pathname) { int ret, tries = 0; wchar_t wpathname[MAX_PATH]; + struct stat st; + + /* + * Contrary to Linux' `rmdir()`, Windows' _wrmdir() and _rmdir() + * (and `RemoveDirectoryW()`) will attempt to remove the target of a + * symbolic link (if it points to a directory). + * + * This behavior breaks the assumption of e.g. `remove_path()` which + * upon successful deletion of a file will attempt to remove its parent + * directories recursively until failure (which usually happens when + * the directory is not empty). + * + * Therefore, before calling `_wrmdir()`, we first check if the path is + * a symbolic link. If it is, we exit and return the same error as + * Linux' `rmdir()` would, i.e. `ENOTDIR`. + */ + if (!mingw_lstat(pathname, &st) && S_ISLNK(st.st_mode)) { + errno = ENOTDIR; + return -1; + } + if (xutftowcs_path(wpathname, pathname) < 0) return -1; diff --git a/t/t3400-rebase.sh b/t/t3400-rebase.sh index 0bb88aa982..23dbd3c82e 100755 --- a/t/t3400-rebase.sh +++ b/t/t3400-rebase.sh @@ -406,4 +406,14 @@ test_expect_success 'refuse to switch to branch checked out elsewhere' ' test_i18ngrep "already checked out" err ' +test_expect_success MINGW,SYMLINKS_WINDOWS 'rebase when .git/logs is a symlink' ' + git checkout main && + mv .git/logs actual_logs && + cmd //c "mklink /D .git\logs ..\actual_logs" && + git rebase -f HEAD^ && + test -L .git/logs && + rm .git/logs && + mv actual_logs .git/logs +' + test_done diff --git a/t/test-lib.sh b/t/test-lib.sh index adaf03543e..73f6d645b6 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1513,6 +1513,12 @@ test_lazy_prereq SYMLINKS ' ln -s x y && test -h y ' +test_lazy_prereq SYMLINKS_WINDOWS ' + # test whether symbolic links are enabled on Windows + test_have_prereq MINGW && + cmd //c "mklink y x" &> /dev/null && test -h y +' + test_lazy_prereq FILEMODE ' test "$(git config --bool core.filemode)" = true ' From 11c649b8911eb641a41169957dba525efe50eaa0 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 4 Aug 2021 19:24:20 +0700 Subject: [PATCH 43/51] diff: --pickaxe-all typofix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When I was fixing fuzzies as I updating po/id.po for 2.33.0 l10n round, I noticed a triple-dash typo (--pickaxe-all) at diff.c, which according to git-diff(1) manpage, the correct option name should be --pickaxe-all. Fix the typo. Signed-off-by: Bagas Sanjaya Acked-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- diff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diff.c b/diff.c index fe3abac79f..084f7b1bae 100644 --- a/diff.c +++ b/diff.c @@ -4636,7 +4636,7 @@ void diff_setup_done(struct diff_options *options) die(_("-G and --pickaxe-regex are mutually exclusive, use --pickaxe-regex with -S")); if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_ALL_OBJFIND_MASK)) - die(_("---pickaxe-all and --find-object are mutually exclusive, use --pickaxe-all with -G and -S")); + die(_("--pickaxe-all and --find-object are mutually exclusive, use --pickaxe-all with -G and -S")); /* * Most of the time we can say "there are changes" From e5a14ddd2d93da4d951fd63d4f78fe2410debe68 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 4 Aug 2021 13:09:13 -0700 Subject: [PATCH 44/51] The eighth batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.33.0.txt | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/Documentation/RelNotes/2.33.0.txt b/Documentation/RelNotes/2.33.0.txt index 7960c4f7ab..09cab31e7a 100644 --- a/Documentation/RelNotes/2.33.0.txt +++ b/Documentation/RelNotes/2.33.0.txt @@ -48,7 +48,7 @@ Performance, Internal Implementation, Development Support etc. reduce code duplication. * Repeated rename detections in a sequence of mergy operations have - been optimize out. + been optimized out for the 'ort' merge strategy. * Preliminary clean-up of tests before the main reftable changes hits the codebase. @@ -98,6 +98,11 @@ Performance, Internal Implementation, Development Support etc. * "git read-tree" had a codepath where blobs are fetched one-by-one from the promisor remote, which has been corrected to fetch in bulk. + * Rewrite of "git submodule" in C continues. + + * "git checkout" and "git commit" learn to work without unnecessarily + expanding sparse indexes. + Fixes since v2.32 ----------------- @@ -237,6 +242,14 @@ Fixes since v2.32 * A race between repacking and using pack bitmaps has been corrected. (merge dc1daacdcc jk/check-pack-valid-before-opening-bitmap later to maint). + * The local changes stashed by "git merge --autostash" were lost when + the merge failed in certain ways, which has been corrected. + + * Windows rmdir() equivalent behaves differently from POSIX ones in + that when used on a symbolic link that points at a directory, the + target directory gets removed, which has been corrected. + (merge 3e7d4888e5 tb/mingw-rmdir-symlink-to-directory later to maint). + * Other code cleanup, docfix, build fix, etc. (merge bfe35a6165 ah/doc-describe later to maint). (merge f302c1e4aa jc/clarify-revision-range later to maint). @@ -278,3 +291,5 @@ Fixes since v2.32 (merge ddcb189d9d tb/bitmap-type-filter-comment-fix later to maint). (merge 878b399734 pb/submodule-recurse-doc later to maint). (merge 734283855f jk/config-env-doc later to maint). + (merge 482e1488a9 ab/getcwd-test later to maint). + (merge f0b922473e ar/doc-markup-fix later to maint). From 390b44eb2bca5195d524629bccc98d8bbee74410 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Thu, 5 Aug 2021 14:48:25 -0500 Subject: [PATCH 45/51] test: fix for COLUMNS and bash 5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since c49a177bec (test-lib.sh: set COLUMNS=80 for --verbose repeatability, 2021-06-29) multiple tests have been failing when using bash 5 because checkwinsize is enabled by default, therefore COLUMNS is reset using TIOCGWINSZ even for non-interactive shells. It's debatable whether or not bash should even be doing that, but for now we can avoid this undesirable behavior by disabling this option. Reported-by: Fabian Stelzer Signed-off-by: Felipe Contreras [jc: with SZEDER Gábor's suggestion to do this before setting COLUMNS] Signed-off-by: Junio C Hamano --- t/test-lib.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/t/test-lib.sh b/t/test-lib.sh index 4d4439a917..52701afaea 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -395,6 +395,12 @@ then verbose=t fi +# Since bash 5.0, checkwinsize is enabled by default which does +# update the COLUMNS variable every time a non-builtin command +# completes, even for non-interactive shells. +# Disable that since we are aiming for repeatability. +test -n "$BASH_VERSION" && shopt -u checkwinsize 2>/dev/null + # For repeatability, reset the environment to known value. # TERM is sanitized below, after saving color control sequences. LANG=C From 2d755dfac9aadab25c3e025b849252b8c0a61465 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 6 Aug 2021 12:53:06 -0700 Subject: [PATCH 46/51] Git 2.33-rc1 Signed-off-by: Junio C Hamano --- GIT-VERSION-GEN | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index b1507e8037..504de6536b 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.33.0-rc0 +DEF_VER=v2.33.0-rc1 LF=' ' From 14825944d7c57b6acd7e1a05a4c6046965efc6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Sun, 8 Aug 2021 18:38:31 -0700 Subject: [PATCH 47/51] oidtree: avoid nested struct oidtree_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 92d8ed8ac1 (oidtree: a crit-bit tree for odb_loose_cache, 2021-07-07) adds a struct oidtree_node that contains only an n field with a struct cb_node. unfortunately, while building in pedantic mode witch clang 12 (as well as a similar error from gcc 11) it will show: oidtree.c:11:17: error: 'n' may not be nested in a struct due to flexible array member [-Werror,-Wflexible-array-extensions] struct cb_node n; ^ because of a constrain coded in ISO C 11 6.7.2.1¶3 that forbids using structs that contain a flexible array as part of another struct. use a strict cb_node directly instead. Signed-off-by: Carlo Marcelo Arenas Belón Signed-off-by: Junio C Hamano --- oidtree.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/oidtree.c b/oidtree.c index 7eb0e9ba05..580cab8ae2 100644 --- a/oidtree.c +++ b/oidtree.c @@ -6,11 +6,6 @@ #include "alloc.h" #include "hash.h" -struct oidtree_node { - /* n.k[] is used to store "struct object_id" */ - struct cb_node n; -}; - struct oidtree_iter_data { oidtree_iter fn; void *arg; @@ -35,13 +30,13 @@ void oidtree_clear(struct oidtree *ot) void oidtree_insert(struct oidtree *ot, const struct object_id *oid) { - struct oidtree_node *on; + struct cb_node *on; if (!oid->algo) BUG("oidtree_insert requires oid->algo"); on = mem_pool_alloc(&ot->mem_pool, sizeof(*on) + sizeof(*oid)); - oidcpy_with_padding((struct object_id *)on->n.k, oid); + oidcpy_with_padding((struct object_id *)on->k, oid); /* * n.b. Current callers won't get us duplicates, here. If a @@ -49,7 +44,7 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) * that won't be freed until oidtree_clear. Currently it's not * worth maintaining a free list */ - cb_insert(&ot->tree, &on->n, sizeof(*oid)); + cb_insert(&ot->tree, on, sizeof(*oid)); } From dd3c8a72a2eaecf0c752a37e1f4ba4de59818e93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Sun, 8 Aug 2021 18:38:32 -0700 Subject: [PATCH 48/51] object-store: avoid extra ';' from KHASH_INIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cf2dc1c238 (speed up alt_odb_usable() with many alternates, 2021-07-07) introduces a KHASH_INIT invocation with a trailing ';', which while commonly expected will trigger warnings with pedantic on both clang[-Wextra-semi] and gcc[-Wpedantic], because that macro has already a semicolon and is meant to be invoked without one. while fixing the macro would be a worthy solution (specially considering this is a common recurring problem), remove the extra ';' for now to minimize churn. Signed-off-by: Carlo Marcelo Arenas Belón Signed-off-by: Junio C Hamano --- object-store.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-store.h b/object-store.h index e679acc4c3..d24915ced1 100644 --- a/object-store.h +++ b/object-store.h @@ -34,7 +34,7 @@ struct object_directory { }; KHASH_INIT(odb_path_map, const char * /* key: odb_path */, - struct object_directory *, 1, fspathhash, fspatheq); + struct object_directory *, 1, fspathhash, fspatheq) void prepare_alt_odb(struct repository *r); char *compute_alternate_path(const char *path, struct strbuf *err); From 6a38e33331560c3e84123c567f95de72d1cfa506 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Thu, 5 Aug 2021 18:45:23 -0700 Subject: [PATCH 49/51] Revert 'diff-merges: let "-m" imply "-p"' This reverts commit f5bfcc823ba242a46e20fb6f71c9fbf7ebb222fe, which made "git log -m" imply "--patch" by default. The logic was that "-m", which makes diff generation for merges perform a diff against each parent, has no use unless I am viewing the diff, so we could save the user some typing by turning on display of the resulting diff automatically. That wasn't expected to adversely affect scripts because scripts would either be using a command like "git diff-tree" that already emits diffs by default or would be combining -m with a diff generation option such as --name-status. By saving typing for interactive use without adversely affecting scripts in the wild, it would be a pure improvement. The problem is that although diff generation options are only relevant for the displayed diff, a script author can imagine them affecting path limiting. For example, I might run git log -w --format=%H -- README hoping to list commits that edited README, excluding whitespace-only changes. In fact, a whitespace-only change is not TREESAME so the use of -w here has no effect (since we don't apply these diff generation flags to the diff_options struct rev_info::pruning used for this purpose), but the documentation suggests that it should work Suppose you specified foo as the . We shall call commits that modify foo !TREESAME, and the rest TREESAME. (In a diff filtered for foo, they look different and equal, respectively.) and a script author who has not tested whitespace-only changes wouldn't notice. Similarly, a script author could include git log -m --first-parent --format=%H -- README to filter the first-parent history for commits that modified README. The -m is a no-op but it reflects the script author's intent. For example, until 1e20a407fe2 (stash list: stop passing "-m" to "git log", 2021-05-21), "git stash list" did this. As a result, we can't safely change "-m" to imply "-p" without fear of breaking such scripts. Restore the previous behavior. Noticed because Rust's src/bootstrap/bootstrap.py made use of this same construct: https://github.com/rust-lang/rust/pull/87513. That script has been updated to omit the unnecessary "-m" option, but we can expect other scripts in the wild to have similar expectations. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 8 ++++---- diff-merges.c | 1 - t/t4013-diff-various.sh | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 2825783049..6d968b9012 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -49,9 +49,10 @@ ifdef::git-log[] --diff-merges=m::: -m::: This option makes diff output for merge commits to be shown in - the default format. The default format could be changed using + the default format. `-m` will produce the output only if `-p` + is given as well. The default format could be changed using `log.diffMerges` configuration parameter, which default value - is `separate`. `-m` implies `-p`. + is `separate`. + --diff-merges=first-parent::: --diff-merges=1::: @@ -61,8 +62,7 @@ ifdef::git-log[] --diff-merges=separate::: This makes merge commits show the full diff with respect to each of the parents. Separate log entry and diff is generated - for each parent. This is the format that `-m` produced - historically. + for each parent. + --diff-merges=combined::: --diff-merges=c::: diff --git a/diff-merges.c b/diff-merges.c index 0dfcaa1b11..d897fd8a29 100644 --- a/diff-merges.c +++ b/diff-merges.c @@ -107,7 +107,6 @@ int diff_merges_parse_opts(struct rev_info *revs, const char **argv) if (!strcmp(arg, "-m")) { set_to_default(revs); - revs->merges_imply_patch = 1; } else if (!strcmp(arg, "-c")) { set_combined(revs); revs->merges_imply_patch = 1; diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh index 7fadc985cc..e561a8e485 100755 --- a/t/t4013-diff-various.sh +++ b/t/t4013-diff-various.sh @@ -455,8 +455,8 @@ diff-tree --stat --compact-summary initial mode diff-tree -R --stat --compact-summary initial mode EOF -test_expect_success 'log -m matches log -m -p' ' - git log -m -p master >result && +test_expect_success 'log -m matches pure log' ' + git log master >result && process_diffs result >expected && git log -m >result && process_diffs result >actual && From 581a3bb155c157094ca486e3a10c4a9b70c8f650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 6 Aug 2021 19:53:47 +0200 Subject: [PATCH 50/51] object-file: use unsigned arithmetic with bit mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 33f379eee6 (make object_directory.loose_objects_subdir_seen a bitmap, 2021-07-07) replaced a wasteful 256-byte array with a 32-byte array and bit operations. The mask calculation shifts a literal 1 of type int left by anything between 0 and 31. UndefinedBehaviorSanitizer doesn't like that and reports: object-file.c:2477:18: runtime error: left shift of 1 by 31 places cannot be represented in type 'int' Make sure to use an unsigned 1 instead to avoid the issue. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- object-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index 35f3e7e9bb..dcb1859225 100644 --- a/object-file.c +++ b/object-file.c @@ -2463,7 +2463,7 @@ struct oidtree *odb_loose_cache(struct object_directory *odb, struct strbuf buf = STRBUF_INIT; size_t word_bits = bitsizeof(odb->loose_objects_subdir_seen[0]); size_t word_index = subdir_nr / word_bits; - size_t mask = 1 << (subdir_nr % word_bits); + size_t mask = 1u << (subdir_nr % word_bits); uint32_t *bitmap; if (subdir_nr < 0 || From 5d213e46bb7b880238ff5ea3914e940a50ae9369 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 11 Aug 2021 11:54:03 -0700 Subject: [PATCH 51/51] Git 2.33-rc2 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.33.0.txt | 12 ------------ GIT-VERSION-GEN | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/Documentation/RelNotes/2.33.0.txt b/Documentation/RelNotes/2.33.0.txt index 09cab31e7a..afa2663809 100644 --- a/Documentation/RelNotes/2.33.0.txt +++ b/Documentation/RelNotes/2.33.0.txt @@ -1,18 +1,6 @@ Git 2.33 Release Notes ====================== -Backward compatibility notes ----------------------------- - - * The "-m" option in "git log -m" that does not specify which format, - if any, of diff is desired did not have any visible effect; it now - implies some form of diff (by default "--patch") is produced. - - You can disable the diff output with "git log -m --no-patch", but - then there probably isn't much point in passing "-m" in the first - place ;-). - - Updates since Git 2.32 ---------------------- diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 504de6536b..98be1eb261 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.33.0-rc1 +DEF_VER=v2.33.0-rc2 LF=' '