From d3b2ff75fd7056bc88ea838e8ed5c772ba149e2c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 20 May 2024 16:14:30 -0700 Subject: [PATCH 1/5] setup: add an escape hatch for "no more default hash algorithm" change Partially revert c8aed5e8 (repository: stop setting SHA1 as the default object hash, 2024-05-07), to keep end-user systems still broken when we have gap in our test coverage but yet give them an escape hatch to set the GIT_TEST_DEFAULT_HASH_ALGO environment variable to "sha1" in order to revert to the previous behaviour, in case we haven't done a thorough job in fixing the fallout from c8aed5e8. After we build confidence, we should remove the escape hatch support, but we are not there yet after only fixing three commands (hash-object, apply, and patch-id) in this series. Due to the way the end-user facing GIT_DEFAULT_HASH environment variable is used in our test suite, we unfortunately cannot reuse it for this purpose. Signed-off-by: Junio C Hamano --- repository.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/repository.c b/repository.c index 15c10015b0..c62e329878 100644 --- a/repository.c +++ b/repository.c @@ -19,6 +19,28 @@ static struct repository the_repo; struct repository *the_repository = &the_repo; +/* + * An escape hatch: if we hit a bug in the production code that fails + * to set an appropriate hash algorithm (most likely to happen when + * running outside a repository), we can tell the user who reported + * the crash to set the environment variable to "sha1" (all lowercase) + * to revert to the historical behaviour of defaulting to SHA-1. + */ +static void set_default_hash_algo(struct repository *repo) +{ + const char *hash_name; + int algo; + + hash_name = getenv("GIT_TEST_DEFAULT_HASH_ALGO"); + if (!hash_name) + return; + algo = hash_algo_by_name(hash_name); + if (algo == GIT_HASH_UNKNOWN) + return; + + repo_set_hash_algo(repo, algo); +} + void initialize_repository(struct repository *repo) { repo->objects = raw_object_store_new(); @@ -26,6 +48,28 @@ void initialize_repository(struct repository *repo) repo->parsed_objects = parsed_object_pool_new(); ALLOC_ARRAY(repo->index, 1); index_state_init(repo->index, repo); + + /* + * When a command runs inside a repository, it learns what + * hash algorithm is in use from the repository, but some + * commands are designed to work outside a repository, yet + * they want to access the_hash_algo, if only for the length + * of the hashed value to see if their input looks like a + * plausible hash value. + * + * We are in the process of identifying such code paths and + * giving them an appropriate default individually; any + * unconverted code paths that try to access the_hash_algo + * will thus fail. The end-users however have an escape hatch + * to set GIT_TEST_DEFAULT_HASH_ALGO environment variable to + * "sha1" to get back the old behaviour of defaulting to SHA-1. + * + * This escape hatch is deliberately kept unadvertised, so + * that they see crashes and we can get a report before + * telling them about it. + */ + if (repo == the_repository) + set_default_hash_algo(repo); } static void expand_base_dir(char **out, const char *in, From abece6e9704dd3139b56a37a8b1eb94d1ab0b8b5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 20 May 2024 16:14:31 -0700 Subject: [PATCH 2/5] t1517: test commands that are designed to be run outside repository A few commands, like "git apply" and "git patch-id", have been broken with a recent change to stop setting the default hash algorithm to SHA-1. Test them and fix them in later commits. Signed-off-by: Junio C Hamano --- t/t1517-outside-repo.sh | 59 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100755 t/t1517-outside-repo.sh diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh new file mode 100755 index 0000000000..389974d9fb --- /dev/null +++ b/t/t1517-outside-repo.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +test_description='check random commands outside repo' + +TEST_PASSES_SANITIZE_LEAK=true +. ./test-lib.sh + +test_expect_success 'set up a non-repo directory and test file' ' + GIT_CEILING_DIRECTORIES=$(pwd) && + export GIT_CEILING_DIRECTORIES && + mkdir non-repo && + ( + cd non-repo && + # confirm that git does not find a repo + test_must_fail git rev-parse --git-dir + ) && + test_write_lines one two three four >nums && + git add nums && + cp nums nums.old && + test_write_lines five >>nums && + git diff >sample.patch +' + +test_expect_failure 'compute a patch-id outside repository (uses SHA-1)' ' + nongit env GIT_DEFAULT_HASH=sha1 \ + git patch-id patch-id.expect && + nongit \ + git patch-id patch-id.actual && + test_cmp patch-id.expect patch-id.actual +' + +test_expect_failure 'hash-object outside repository (uses SHA-1)' ' + nongit env GIT_DEFAULT_HASH=sha1 \ + git hash-object --stdin hash.expect && + nongit \ + git hash-object --stdin hash.actual && + test_cmp hash.expect hash.actual +' + +test_expect_failure 'apply a patch outside repository' ' + ( + cd non-repo && + cp ../nums.old nums && + git apply ../sample.patch + ) && + test_cmp nums non-repo/nums +' + +test_expect_success 'grep outside repository' ' + git grep --cached two >expect && + ( + cd non-repo && + cp ../nums.old nums && + git grep --no-index two >../actual + ) && + test_cmp expect actual +' + +test_done From 4a1c95931f5f0dc541925703b7d9ad9409cd4067 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 May 2024 16:14:32 -0700 Subject: [PATCH 3/5] builtin/patch-id: fix uninitialized hash function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In c8aed5e8da (repository: stop setting SHA1 as the default object hash, 2024-05-07), we have adapted `initialize_repository()` to no longer set up a default hash function. As this function is also used to set up `the_repository`, the consequence is that `the_hash_algo` will now by default be a `NULL` pointer unless the hash algorithm was configured properly. This is done as a mechanism to detect cases where we may be using the wrong hash function by accident. This change now causes git-patch-id(1) to segfault when it's run outside of a repository. As this command can read diffs from stdin, it does not necessarily need a repository, but then relies on `the_hash_algo` to compute the patch ID itself. It is somewhat dubious that git-patch-id(1) relies on `the_hash_algo` in the first place. Quoting its manpage: A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a patch, with line numbers ignored. As such, it’s "reasonably stable", but at the same time also reasonably unique, i.e., two patches that have the same "patch ID" are almost guaranteed to be the same thing. We explicitly document patch IDs to be using SHA-1. Furthermore, patch IDs are supposed to be stable for most of the part. But even with the same input, the patch IDs will now be different depending on the repo's configured object hash. Work around the issue by setting up SHA-1 when there was no startup repository for now. This is arguably not the correct fix, but for now we rather want to focus on getting the segfault fixed. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/patch-id.c | 13 +++++++++++++ t/t1517-outside-repo.sh | 2 +- t/t4204-patch-id.sh | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/builtin/patch-id.c b/builtin/patch-id.c index 3894d2b970..583099cacf 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -5,6 +5,7 @@ #include "hash.h" #include "hex.h" #include "parse-options.h" +#include "setup.h" static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result) { @@ -237,6 +238,18 @@ int cmd_patch_id(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, prefix, builtin_patch_id_options, patch_id_usage, 0); + /* + * We rely on `the_hash_algo` to compute patch IDs. This is dubious as + * it means that the hash algorithm now depends on the object hash of + * the repository, even though git-patch-id(1) clearly defines that + * patch IDs always use SHA1. + * + * NEEDSWORK: This hack should be removed in favor of converting + * the code that computes patch IDs to always use SHA1. + */ + if (!the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + generate_id_list(opts ? opts > 1 : config.stable, opts ? opts == 3 : config.verbatim); return 0; diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh index 389974d9fb..278ef57b3a 100755 --- a/t/t1517-outside-repo.sh +++ b/t/t1517-outside-repo.sh @@ -21,7 +21,7 @@ test_expect_success 'set up a non-repo directory and test file' ' git diff >sample.patch ' -test_expect_failure 'compute a patch-id outside repository (uses SHA-1)' ' +test_expect_success 'compute a patch-id outside repository (uses SHA-1)' ' nongit env GIT_DEFAULT_HASH=sha1 \ git patch-id patch-id.expect && nongit \ diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh index a7fa94ce0a..605faea0c7 100755 --- a/t/t4204-patch-id.sh +++ b/t/t4204-patch-id.sh @@ -310,4 +310,38 @@ test_expect_success 'patch-id handles diffs with one line of before/after' ' test_config patchid.stable true && calc_patch_id diffu1stable diff <<-\EOF && + diff --git a/bar b/bar + index bdaf90f..31051f6 100644 + --- a/bar + +++ b/bar + @@ -2 +2,2 @@ + b + +c + EOF + + git init --object-format=sha1 repo-sha1 && + git -C repo-sha1 patch-id patch-id-sha1 && + git init --object-format=sha256 repo-sha256 && + git -C repo-sha256 patch-id patch-id-sha256 && + test_cmp patch-id-sha1 patch-id-sha256 +' + +test_expect_success 'patch-id without repository' ' + cat >diff <<-\EOF && + diff --git a/bar b/bar + index bdaf90f..31051f6 100644 + --- a/bar + +++ b/bar + @@ -2 +2,2 @@ + b + +c + EOF + nongit git patch-id Date: Mon, 20 May 2024 16:14:33 -0700 Subject: [PATCH 4/5] builtin/hash-object: fix uninitialized hash function The git-hash-object(1) command allows users to hash an object even without a repository. Starting with c8aed5e8da (repository: stop setting SHA1 as the default object hash, 2024-05-07), this will make us hit an uninitialized hash function, which subsequently leads to a segfault. Fix this by falling back to SHA-1 explicitly when running outside of a Git repository. Users can use GIT_DEFAULT_HASH environment to specify what hash algorithm they want, so arguably this code should not be needed. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/hash-object.c | 3 +++ t/t1007-hash-object.sh | 6 ++++++ t/t1517-outside-repo.sh | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/builtin/hash-object.c b/builtin/hash-object.c index 82ca6d2bfd..c767414a0c 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -123,6 +123,9 @@ int cmd_hash_object(int argc, const char **argv, const char *prefix) else prefix = setup_git_directory_gently(&nongit); + if (nongit && !the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + if (vpath && prefix) { vpath_free = prefix_filename(prefix, vpath); vpath = vpath_free; diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index 64aea38486..d73a5cc237 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -260,4 +260,10 @@ test_expect_success '--literally with extra-long type' ' echo example | git hash-object -t $t --literally --stdin ' +test_expect_success '--stdin outside of repository (uses SHA-1)' ' + nongit git hash-object --stdin actual && + echo "$(test_oid --hash=sha1 hello)" >expect && + test_cmp expect actual +' + test_done diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh index 278ef57b3a..2d8982d61a 100755 --- a/t/t1517-outside-repo.sh +++ b/t/t1517-outside-repo.sh @@ -29,7 +29,7 @@ test_expect_success 'compute a patch-id outside repository (uses SHA-1)' ' test_cmp patch-id.expect patch-id.actual ' -test_expect_failure 'hash-object outside repository (uses SHA-1)' ' +test_expect_success 'hash-object outside repository (uses SHA-1)' ' nongit env GIT_DEFAULT_HASH=sha1 \ git hash-object --stdin hash.expect && nongit \ From 4674ab682dc1a875fd29de8f4e9568196a88b97b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 20 May 2024 16:14:34 -0700 Subject: [PATCH 5/5] apply: fix uninitialized hash function "git apply" can work outside a repository as a better "GNU patch", but when it does so, it still assumed that it can access the_hash_algo, which is no longer true in the new world order. Make sure we explicitly fall back to SHA-1 algorithm for backward compatibility. It is of dubious value to make this configurable to other hash algorithms, as the code does not use the_hash_algo for hashing purposes when working outside a repository (which is how the_hash_algo is left to NULL)---it is only used to learn the max length of the hash when parsing the object names on the "index" line, but failing to parse the "index" line is not a hard failure, and the program does not support operations like applying binary patches and --3way fallback that requires object access outside a repository. Signed-off-by: Junio C Hamano --- builtin/apply.c | 10 ++++++++++ t/t1517-outside-repo.sh | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/builtin/apply.c b/builtin/apply.c index 861a01910c..d623c52f78 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "gettext.h" #include "repository.h" +#include "hash.h" #include "apply.h" static const char * const apply_usage[] = { @@ -18,6 +19,15 @@ int cmd_apply(int argc, const char **argv, const char *prefix) if (init_apply_state(&state, the_repository, prefix)) exit(128); + /* + * We could to redo the "apply.c" machinery to make this + * arbitrary fallback unnecessary, but it is dubious that it + * is worth the effort. + * cf. https://lore.kernel.org/git/xmqqcypfcmn4.fsf@gitster.g/ + */ + if (!the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + argc = apply_parse_options(argc, argv, &state, &force_apply, &options, apply_usage); diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh index 2d8982d61a..557808ffa7 100755 --- a/t/t1517-outside-repo.sh +++ b/t/t1517-outside-repo.sh @@ -37,7 +37,7 @@ test_expect_success 'hash-object outside repository (uses SHA-1)' ' test_cmp hash.expect hash.actual ' -test_expect_failure 'apply a patch outside repository' ' +test_expect_success 'apply a patch outside repository' ' ( cd non-repo && cp ../nums.old nums &&