Merge branch 'ps/undecided-is-not-necessarily-sha1'

Before discovering the repository details, We used to assume SHA-1
as the "default" hash function, which has been corrected. Hopefully
this will smoke out codepaths that rely on such an unwarranted
assumptions.

* ps/undecided-is-not-necessarily-sha1:
  repository: stop setting SHA1 as the default object hash
  oss-fuzz/commit-graph: set up hash algorithm
  builtin/shortlog: don't set up revisions without repo
  builtin/diff: explicitly set hash algo when there is no repo
  builtin/bundle: abort "verify" early when there is no repository
  builtin/blame: don't access potentially unitialized `the_hash_algo`
  builtin/rev-parse: allow shortening to more than 40 hex characters
  remote-curl: fix parsing of detached SHA256 heads
  attr: fix BUG() when parsing attrs outside of repo
  attr: don't recompute default attribute source
  parse-options-cb: only abbreviate hashes when hash algo is known
  path: move `validate_headref()` to its only user
  path: harden validation of HEAD with non-standard hashes
This commit is contained in:
Junio C Hamano 2024-05-30 14:15:10 -07:00
commit a60c21b720
17 changed files with 168 additions and 92 deletions

31
attr.c
View file

@ -1205,15 +1205,16 @@ static void collect_some_attrs(struct index_state *istate,
}
static const char *default_attr_source_tree_object_name;
static int ignore_bad_attr_tree;
void set_git_attr_source(const char *tree_object_name)
{
default_attr_source_tree_object_name = xstrdup(tree_object_name);
}
static void compute_default_attr_source(struct object_id *attr_source)
static int compute_default_attr_source(struct object_id *attr_source)
{
int ignore_bad_attr_tree = 0;
if (!default_attr_source_tree_object_name)
default_attr_source_tree_object_name = getenv(GIT_ATTR_SOURCE_ENVIRONMENT);
@ -1222,22 +1223,34 @@ static void compute_default_attr_source(struct object_id *attr_source)
ignore_bad_attr_tree = 1;
}
if (!default_attr_source_tree_object_name || !is_null_oid(attr_source))
return;
if (!default_attr_source_tree_object_name)
return 0;
if (!startup_info->have_repository) {
if (!ignore_bad_attr_tree)
die(_("cannot use --attr-source or GIT_ATTR_SOURCE without repo"));
return 0;
}
if (repo_get_oid_treeish(the_repository,
default_attr_source_tree_object_name,
attr_source) && !ignore_bad_attr_tree)
die(_("bad --attr-source or GIT_ATTR_SOURCE"));
attr_source)) {
if (!ignore_bad_attr_tree)
die(_("bad --attr-source or GIT_ATTR_SOURCE"));
return 0;
}
return 1;
}
static struct object_id *default_attr_source(void)
{
static struct object_id attr_source;
static int has_attr_source = -1;
if (is_null_oid(&attr_source))
compute_default_attr_source(&attr_source);
if (is_null_oid(&attr_source))
if (has_attr_source < 0)
has_attr_source = compute_default_attr_source(&attr_source);
if (!has_attr_source)
return NULL;
return &attr_source;
}

View file

@ -915,7 +915,6 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
struct range_set ranges;
unsigned int range_i;
long anchor;
const int hexsz = the_hash_algo->hexsz;
long num_lines = 0;
const char *str_usage = cmd_is_annotate ? annotate_usage : blame_usage;
const char **opt_usage = cmd_is_annotate ? annotate_opt_usage : blame_opt_usage;
@ -973,11 +972,11 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
} else if (show_progress < 0)
show_progress = isatty(2);
if (0 < abbrev && abbrev < hexsz)
if (0 < abbrev && abbrev < (int)the_hash_algo->hexsz)
/* one more abbrev length is needed for the boundary commit */
abbrev++;
else if (!abbrev)
abbrev = hexsz;
abbrev = the_hash_algo->hexsz;
if (revs_file && read_ancestry(revs_file))
die_errno("reading graft file '%s' failed", revs_file);

View file

@ -140,6 +140,11 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
builtin_bundle_verify_usage, options, &bundle_file);
/* bundle internals use argv[1] as further parameters */
if (!startup_info->have_repository) {
ret = error(_("need a repository to verify a bundle"));
goto cleanup;
}
if ((bundle_fd = open_bundle(bundle_file, &header, &name)) < 0) {
ret = 1;
goto cleanup;

View file

@ -465,6 +465,15 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
no_index = DIFF_NO_INDEX_IMPLICIT;
}
/*
* When operating outside of a Git repository we need to have a hash
* algorithm at hand so that we can generate the blob hashes. We
* default to SHA1 here, but may eventually want to change this to be
* configurable via a command line option.
*/
if (nongit)
repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
init_diff_ui_defaults();
git_config(git_diff_ui_config, NULL);
prefix = precompose_argv_prefix(argc, argv, prefix);

View file

@ -691,7 +691,6 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix)
const char *name = NULL;
struct object_context unused;
struct strbuf buf = STRBUF_INIT;
const int hexsz = the_hash_algo->hexsz;
int seen_end_of_options = 0;
enum format_type format = FORMAT_DEFAULT;
@ -867,8 +866,8 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix)
abbrev = strtoul(arg, NULL, 10);
if (abbrev < MINIMUM_ABBREV)
abbrev = MINIMUM_ABBREV;
else if (hexsz <= abbrev)
abbrev = hexsz;
else if ((int)the_hash_algo->hexsz <= abbrev)
abbrev = the_hash_algo->hexsz;
continue;
}
if (!strcmp(arg, "--sq")) {

View file

@ -435,7 +435,7 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix)
usage_with_options(shortlog_usage, options);
}
if (setup_revisions(argc, argv, &rev, NULL) != 1) {
if (!nongit && setup_revisions(argc, argv, &rev, NULL) != 1) {
error(_("unrecognized argument: %s"), argv[1]);
usage_with_options(shortlog_usage, options);
}

View file

@ -19,6 +19,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
* touching the disk to keep the individual fuzz-test cases as fast as
* possible.
*/
repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
the_repository->settings.commit_graph_generation_version = 2;
the_repository->settings.commit_graph_read_changed_paths = 1;
g = parse_commit_graph(&the_repository->settings, (void *)data, size);

View file

@ -7,6 +7,7 @@
#include "environment.h"
#include "gettext.h"
#include "object-name.h"
#include "setup.h"
#include "string-list.h"
#include "strvec.h"
#include "oid-array.h"
@ -29,7 +30,7 @@ int parse_opt_abbrev_cb(const struct option *opt, const char *arg, int unset)
opt->long_name);
if (v && v < MINIMUM_ABBREV)
v = MINIMUM_ABBREV;
else if (v > the_hash_algo->hexsz)
else if (startup_info->have_repository && v > the_hash_algo->hexsz)
v = the_hash_algo->hexsz;
}
*(int *)(opt->value) = v;

53
path.c
View file

@ -5,7 +5,6 @@
#include "abspath.h"
#include "environment.h"
#include "gettext.h"
#include "hex.h"
#include "repository.h"
#include "strbuf.h"
#include "string-list.h"
@ -647,58 +646,6 @@ void strbuf_git_common_path(struct strbuf *sb,
va_end(args);
}
int validate_headref(const char *path)
{
struct stat st;
char buffer[256];
const char *refname;
struct object_id oid;
int fd;
ssize_t len;
if (lstat(path, &st) < 0)
return -1;
/* Make sure it is a "refs/.." symlink */
if (S_ISLNK(st.st_mode)) {
len = readlink(path, buffer, sizeof(buffer)-1);
if (len >= 5 && !memcmp("refs/", buffer, 5))
return 0;
return -1;
}
/*
* Anything else, just open it and try to see if it is a symbolic ref.
*/
fd = open(path, O_RDONLY);
if (fd < 0)
return -1;
len = read_in_full(fd, buffer, sizeof(buffer)-1);
close(fd);
if (len < 0)
return -1;
buffer[len] = '\0';
/*
* Is it a symbolic ref?
*/
if (skip_prefix(buffer, "ref:", &refname)) {
while (isspace(*refname))
refname++;
if (starts_with(refname, "refs/"))
return 0;
}
/*
* Is this a detached HEAD?
*/
if (!get_oid_hex(buffer, &oid))
return 0;
return -1;
}
static struct passwd *getpw_str(const char *username, size_t len)
{
struct passwd *pw;

1
path.h
View file

@ -173,7 +173,6 @@ const char *git_path_fetch_head(struct repository *r);
const char *git_path_shallow(struct repository *r);
int ends_with_path_components(const char *path, const char *components);
int validate_headref(const char *ref);
int calc_shared_perm(int mode);
int adjust_shared_perm(const char *path);

View file

@ -266,12 +266,23 @@ static struct ref *parse_git_refs(struct discovery *heads, int for_push)
return list;
}
/*
* Try to detect the hash algorithm used by the remote repository when using
* the dumb HTTP transport. As dumb transports cannot tell us the object hash
* directly have to derive it from the advertised ref lengths.
*/
static const struct git_hash_algo *detect_hash_algo(struct discovery *heads)
{
const char *p = memchr(heads->buf, '\t', heads->len);
int algo;
/*
* In case the remote has no refs we have no way to reliably determine
* the object hash used by that repository. In that case we simply fall
* back to SHA1, which may or may not be correct.
*/
if (!p)
return the_hash_algo;
return &hash_algos[GIT_HASH_SHA1];
algo = hash_algo_by_length((p - heads->buf) / 2);
if (algo == GIT_HASH_UNKNOWN)
@ -295,6 +306,12 @@ static struct ref *parse_info_refs(struct discovery *heads)
"is this a git repository?",
transport_anonymize_url(url.buf));
/*
* Set the repository's hash algo to whatever we have just detected.
* This ensures that we can correctly parse the remote references.
*/
repo_set_hash_algo(the_repository, hash_algo_by_ptr(options.hash_algo));
data = heads->buf;
start = NULL;
mid = data;

View file

@ -26,26 +26,6 @@ void initialize_repository(struct repository *repo)
repo->parsed_objects = parsed_object_pool_new();
ALLOC_ARRAY(repo->index, 1);
index_state_init(repo->index, repo);
/*
* Unfortunately, we need to keep this hack around for the time being:
*
* - Not setting up the hash algorithm for `the_repository` leads to
* crashes because `the_hash_algo` is a macro that expands to
* `the_repository->hash_algo`. So if Git commands try to access
* `the_hash_algo` without a Git directory we crash.
*
* - Setting up the hash algorithm to be SHA1 by default breaks other
* commands when running with SHA256.
*
* This is another point in case why having global state is a bad idea.
* Eventually, we should remove this hack and stop setting the hash
* algorithm in this function altogether. Instead, it should only ever
* be set via our repository setup procedures. But that requires more
* work.
*/
if (repo == the_repository)
repo_set_hash_algo(repo, GIT_HASH_SHA1);
}
static void expand_base_dir(char **out, const char *in,

53
setup.c
View file

@ -4,6 +4,7 @@
#include "environment.h"
#include "exec-cmd.h"
#include "gettext.h"
#include "hex.h"
#include "object-name.h"
#include "refs.h"
#include "repository.h"
@ -342,6 +343,58 @@ int get_common_dir_noenv(struct strbuf *sb, const char *gitdir)
return ret;
}
static int validate_headref(const char *path)
{
struct stat st;
char buffer[256];
const char *refname;
struct object_id oid;
int fd;
ssize_t len;
if (lstat(path, &st) < 0)
return -1;
/* Make sure it is a "refs/.." symlink */
if (S_ISLNK(st.st_mode)) {
len = readlink(path, buffer, sizeof(buffer)-1);
if (len >= 5 && !memcmp("refs/", buffer, 5))
return 0;
return -1;
}
/*
* Anything else, just open it and try to see if it is a symbolic ref.
*/
fd = open(path, O_RDONLY);
if (fd < 0)
return -1;
len = read_in_full(fd, buffer, sizeof(buffer)-1);
close(fd);
if (len < 0)
return -1;
buffer[len] = '\0';
/*
* Is it a symbolic ref?
*/
if (skip_prefix(buffer, "ref:", &refname)) {
while (isspace(*refname))
refname++;
if (starts_with(refname, "refs/"))
return 0;
}
/*
* Is this a detached HEAD?
*/
if (get_oid_hex_any(buffer, &oid) != GIT_HASH_UNKNOWN)
return 0;
return -1;
}
/*
* Test if it looks like we're at a git directory.
* We want to see:

View file

@ -434,6 +434,21 @@ test_expect_success 'precedence of --attr-source, GIT_ATTR_SOURCE, then attr.tre
)
'
test_expect_success 'diff without repository with attr source' '
mkdir -p "$TRASH_DIRECTORY/outside/nongit" &&
(
cd "$TRASH_DIRECTORY/outside/nongit" &&
GIT_CEILING_DIRECTORIES="$TRASH_DIRECTORY/outside" &&
export GIT_CEILING_DIRECTORIES &&
touch file &&
cat >expect <<-EOF &&
fatal: cannot use --attr-source or GIT_ATTR_SOURCE without repo
EOF
test_must_fail env GIT_ATTR_SOURCE=HEAD git grep --no-index foo file 2>err &&
test_cmp expect err
)
'
test_expect_success 'bare repository: with --source' '
(
cd bare.git &&

View file

@ -176,6 +176,23 @@ test_expect_success 'long options' '
test_cmp expect output
'
test_expect_success 'abbreviate to something longer than SHA1 length' '
cat >expect <<-EOF &&
boolean: 0
integer: 0
magnitude: 0
timestamp: 0
string: (not set)
abbrev: 100
verbose: -1
quiet: 0
dry run: no
file: (not set)
EOF
test-tool parse-options --abbrev=100 >output &&
test_cmp expect output
'
test_expect_success 'missing required value' '
cat >expect <<-\EOF &&
error: switch `s'\'' requires a value

View file

@ -304,4 +304,10 @@ test_expect_success 'rev-parse --bisect includes bad, excludes good' '
test_cmp expect actual
'
test_expect_success '--short= truncates to the actual hash length' '
git rev-parse HEAD >expect &&
git rev-parse --short=100 HEAD >actual &&
test_cmp expect actual
'
test_done

View file

@ -55,6 +55,21 @@ test_expect_success 'list refs from outside any repository' '
test_cmp expect actual
'
test_expect_success 'list detached HEAD from outside any repository' '
git clone --mirror "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" \
"$HTTPD_DOCUMENT_ROOT_PATH/repo-detached.git" &&
git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo-detached.git" \
update-ref --no-deref HEAD refs/heads/main &&
git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo-detached.git" update-server-info &&
cat >expect <<-EOF &&
$(git rev-parse main) HEAD
$(git rev-parse main) refs/heads/main
EOF
nongit git ls-remote "$HTTPD_URL/dumb/repo-detached.git" >actual &&
test_cmp expect actual
'
test_expect_success 'create password-protected repository' '
mkdir -p "$HTTPD_DOCUMENT_ROOT_PATH/auth/dumb/" &&
cp -Rf "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" \