diff --git a/builtin/apply.c b/builtin/apply.c index 861a01910c..d623c52f78 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "gettext.h" #include "repository.h" +#include "hash.h" #include "apply.h" static const char * const apply_usage[] = { @@ -18,6 +19,15 @@ int cmd_apply(int argc, const char **argv, const char *prefix) if (init_apply_state(&state, the_repository, prefix)) exit(128); + /* + * We could to redo the "apply.c" machinery to make this + * arbitrary fallback unnecessary, but it is dubious that it + * is worth the effort. + * cf. https://lore.kernel.org/git/xmqqcypfcmn4.fsf@gitster.g/ + */ + if (!the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + argc = apply_parse_options(argc, argv, &state, &force_apply, &options, apply_usage); diff --git a/builtin/hash-object.c b/builtin/hash-object.c index 82ca6d2bfd..c767414a0c 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -123,6 +123,9 @@ int cmd_hash_object(int argc, const char **argv, const char *prefix) else prefix = setup_git_directory_gently(&nongit); + if (nongit && !the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + if (vpath && prefix) { vpath_free = prefix_filename(prefix, vpath); vpath = vpath_free; diff --git a/builtin/patch-id.c b/builtin/patch-id.c index 3894d2b970..583099cacf 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -5,6 +5,7 @@ #include "hash.h" #include "hex.h" #include "parse-options.h" +#include "setup.h" static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result) { @@ -237,6 +238,18 @@ int cmd_patch_id(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, prefix, builtin_patch_id_options, patch_id_usage, 0); + /* + * We rely on `the_hash_algo` to compute patch IDs. This is dubious as + * it means that the hash algorithm now depends on the object hash of + * the repository, even though git-patch-id(1) clearly defines that + * patch IDs always use SHA1. + * + * NEEDSWORK: This hack should be removed in favor of converting + * the code that computes patch IDs to always use SHA1. + */ + if (!the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + generate_id_list(opts ? opts > 1 : config.stable, opts ? opts == 3 : config.verbatim); return 0; diff --git a/repository.c b/repository.c index 9f3014b3b5..deb68625e0 100644 --- a/repository.c +++ b/repository.c @@ -20,6 +20,28 @@ static struct repository the_repo; struct repository *the_repository = &the_repo; +/* + * An escape hatch: if we hit a bug in the production code that fails + * to set an appropriate hash algorithm (most likely to happen when + * running outside a repository), we can tell the user who reported + * the crash to set the environment variable to "sha1" (all lowercase) + * to revert to the historical behaviour of defaulting to SHA-1. + */ +static void set_default_hash_algo(struct repository *repo) +{ + const char *hash_name; + int algo; + + hash_name = getenv("GIT_TEST_DEFAULT_HASH_ALGO"); + if (!hash_name) + return; + algo = hash_algo_by_name(hash_name); + if (algo == GIT_HASH_UNKNOWN) + return; + + repo_set_hash_algo(repo, algo); +} + void initialize_repository(struct repository *repo) { repo->objects = raw_object_store_new(); @@ -27,6 +49,28 @@ void initialize_repository(struct repository *repo) repo->parsed_objects = parsed_object_pool_new(); ALLOC_ARRAY(repo->index, 1); index_state_init(repo->index, repo); + + /* + * When a command runs inside a repository, it learns what + * hash algorithm is in use from the repository, but some + * commands are designed to work outside a repository, yet + * they want to access the_hash_algo, if only for the length + * of the hashed value to see if their input looks like a + * plausible hash value. + * + * We are in the process of identifying such code paths and + * giving them an appropriate default individually; any + * unconverted code paths that try to access the_hash_algo + * will thus fail. The end-users however have an escape hatch + * to set GIT_TEST_DEFAULT_HASH_ALGO environment variable to + * "sha1" to get back the old behaviour of defaulting to SHA-1. + * + * This escape hatch is deliberately kept unadvertised, so + * that they see crashes and we can get a report before + * telling them about it. + */ + if (repo == the_repository) + set_default_hash_algo(repo); } static void expand_base_dir(char **out, const char *in, diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index 64aea38486..d73a5cc237 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -260,4 +260,10 @@ test_expect_success '--literally with extra-long type' ' echo example | git hash-object -t $t --literally --stdin ' +test_expect_success '--stdin outside of repository (uses SHA-1)' ' + nongit git hash-object --stdin actual && + echo "$(test_oid --hash=sha1 hello)" >expect && + test_cmp expect actual +' + test_done diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh new file mode 100755 index 0000000000..557808ffa7 --- /dev/null +++ b/t/t1517-outside-repo.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +test_description='check random commands outside repo' + +TEST_PASSES_SANITIZE_LEAK=true +. ./test-lib.sh + +test_expect_success 'set up a non-repo directory and test file' ' + GIT_CEILING_DIRECTORIES=$(pwd) && + export GIT_CEILING_DIRECTORIES && + mkdir non-repo && + ( + cd non-repo && + # confirm that git does not find a repo + test_must_fail git rev-parse --git-dir + ) && + test_write_lines one two three four >nums && + git add nums && + cp nums nums.old && + test_write_lines five >>nums && + git diff >sample.patch +' + +test_expect_success 'compute a patch-id outside repository (uses SHA-1)' ' + nongit env GIT_DEFAULT_HASH=sha1 \ + git patch-id patch-id.expect && + nongit \ + git patch-id patch-id.actual && + test_cmp patch-id.expect patch-id.actual +' + +test_expect_success 'hash-object outside repository (uses SHA-1)' ' + nongit env GIT_DEFAULT_HASH=sha1 \ + git hash-object --stdin hash.expect && + nongit \ + git hash-object --stdin hash.actual && + test_cmp hash.expect hash.actual +' + +test_expect_success 'apply a patch outside repository' ' + ( + cd non-repo && + cp ../nums.old nums && + git apply ../sample.patch + ) && + test_cmp nums non-repo/nums +' + +test_expect_success 'grep outside repository' ' + git grep --cached two >expect && + ( + cd non-repo && + cp ../nums.old nums && + git grep --no-index two >../actual + ) && + test_cmp expect actual +' + +test_done diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh index a7fa94ce0a..605faea0c7 100755 --- a/t/t4204-patch-id.sh +++ b/t/t4204-patch-id.sh @@ -310,4 +310,38 @@ test_expect_success 'patch-id handles diffs with one line of before/after' ' test_config patchid.stable true && calc_patch_id diffu1stable diff <<-\EOF && + diff --git a/bar b/bar + index bdaf90f..31051f6 100644 + --- a/bar + +++ b/bar + @@ -2 +2,2 @@ + b + +c + EOF + + git init --object-format=sha1 repo-sha1 && + git -C repo-sha1 patch-id patch-id-sha1 && + git init --object-format=sha256 repo-sha256 && + git -C repo-sha256 patch-id patch-id-sha256 && + test_cmp patch-id-sha1 patch-id-sha256 +' + +test_expect_success 'patch-id without repository' ' + cat >diff <<-\EOF && + diff --git a/bar b/bar + index bdaf90f..31051f6 100644 + --- a/bar + +++ b/bar + @@ -2 +2,2 @@ + b + +c + EOF + nongit git patch-id