git/builtin/rev-list.c
Christian Couder 7b644c8c5a rev-list: allow missing tips with --missing=[print|allow*]
In 9830926c7d (rev-list: add commit object support in `--missing`
option, 2023-10-27) we fixed the `--missing` option in `git rev-list`
so that it works with with missing commits, not just blobs/trees.

Unfortunately, such a command would still fail with a "fatal: bad
object <oid>" if it is passed a missing commit, blob or tree as an
argument (before the rev walking even begins).

When such a command is used to find the dependencies of some objects,
for example the dependencies of quarantined objects (see the
"QUARANTINE ENVIRONMENT" section in the git-receive-pack(1)
documentation), it would be better if the command would instead
consider such missing objects, especially commits, in the same way as
other missing objects.

If, for example `--missing=print` is used, it would be nice for some
use cases if the missing tips passed as arguments were reported in
the same way as other missing objects instead of the command just
failing.

We could introduce a new option to make it work like this, but most
users are likely to prefer the command to have this behavior as the
default one. Introducing a new option would require another dumb loop
to look for that option early, which isn't nice.

Also we made `git rev-list` work with missing commits very recently
and the command is most often passed commits as arguments. So let's
consider this as a bug fix related to these recent changes.

While at it let's add a NEEDSWORK comment to say that we should get
rid of the existing ugly dumb loops that parse the
`--exclude-promisor-objects` and `--missing=...` options early.

Helped-by: Linus Arver <linusa@google.com>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-02-14 09:39:14 -08:00

816 lines
21 KiB
C

#include "builtin.h"
#include "config.h"
#include "commit.h"
#include "diff.h"
#include "environment.h"
#include "gettext.h"
#include "hex.h"
#include "revision.h"
#include "list-objects.h"
#include "list-objects-filter-options.h"
#include "object.h"
#include "object-name.h"
#include "object-file.h"
#include "object-store-ll.h"
#include "pack-bitmap.h"
#include "log-tree.h"
#include "graph.h"
#include "bisect.h"
#include "progress.h"
#include "reflog-walk.h"
#include "oidset.h"
#include "packfile.h"
static const char rev_list_usage[] =
"git rev-list [<options>] <commit>... [--] [<path>...]\n"
"\n"
" limiting output:\n"
" --max-count=<n>\n"
" --max-age=<epoch>\n"
" --min-age=<epoch>\n"
" --sparse\n"
" --no-merges\n"
" --min-parents=<n>\n"
" --no-min-parents\n"
" --max-parents=<n>\n"
" --no-max-parents\n"
" --remove-empty\n"
" --all\n"
" --branches\n"
" --tags\n"
" --remotes\n"
" --stdin\n"
" --exclude-hidden=[fetch|receive|uploadpack]\n"
" --quiet\n"
" ordering output:\n"
" --topo-order\n"
" --date-order\n"
" --reverse\n"
" formatting output:\n"
" --parents\n"
" --children\n"
" --objects | --objects-edge\n"
" --disk-usage[=human]\n"
" --unpacked\n"
" --header | --pretty\n"
" --[no-]object-names\n"
" --abbrev=<n> | --no-abbrev\n"
" --abbrev-commit\n"
" --left-right\n"
" --count\n"
" special purpose:\n"
" --bisect\n"
" --bisect-vars\n"
" --bisect-all"
;
static struct progress *progress;
static unsigned progress_counter;
static struct oidset omitted_objects;
static int arg_print_omitted; /* print objects omitted by filter */
static struct oidset missing_objects;
enum missing_action {
MA_ERROR = 0, /* fail if any missing objects are encountered */
MA_ALLOW_ANY, /* silently allow ALL missing objects */
MA_PRINT, /* print ALL missing objects in special section */
MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
};
static enum missing_action arg_missing_action;
/* display only the oid of each object encountered */
static int arg_show_object_names = 1;
#define DEFAULT_OIDSET_SIZE (16*1024)
static int show_disk_usage;
static off_t total_disk_usage;
static int human_readable;
static off_t get_object_disk_usage(struct object *obj)
{
off_t size;
struct object_info oi = OBJECT_INFO_INIT;
oi.disk_sizep = &size;
if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
die(_("unable to get disk usage of %s"), oid_to_hex(&obj->oid));
return size;
}
static inline void finish_object__ma(struct object *obj)
{
/*
* Whether or not we try to dynamically fetch missing objects
* from the server, we currently DO NOT have the object. We
* can either print, allow (ignore), or conditionally allow
* (ignore) them.
*/
switch (arg_missing_action) {
case MA_ERROR:
die("missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
return;
case MA_ALLOW_ANY:
return;
case MA_PRINT:
oidset_insert(&missing_objects, &obj->oid);
return;
case MA_ALLOW_PROMISOR:
if (is_promisor_object(&obj->oid))
return;
die("unexpected missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
return;
default:
BUG("unhandled missing_action");
return;
}
}
static void finish_commit(struct commit *commit)
{
free_commit_list(commit->parents);
commit->parents = NULL;
free_commit_buffer(the_repository->parsed_objects,
commit);
}
static void show_commit(struct commit *commit, void *data)
{
struct rev_list_info *info = data;
struct rev_info *revs = info->revs;
display_progress(progress, ++progress_counter);
if (revs->do_not_die_on_missing_objects &&
oidset_contains(&revs->missing_commits, &commit->object.oid)) {
finish_object__ma(&commit->object);
return;
}
if (show_disk_usage)
total_disk_usage += get_object_disk_usage(&commit->object);
if (info->flags & REV_LIST_QUIET) {
finish_commit(commit);
return;
}
graph_show_commit(revs->graph);
if (revs->count) {
if (commit->object.flags & PATCHSAME)
revs->count_same++;
else if (commit->object.flags & SYMMETRIC_LEFT)
revs->count_left++;
else
revs->count_right++;
finish_commit(commit);
return;
}
if (info->show_timestamp)
printf("%"PRItime" ", commit->date);
if (info->header_prefix)
fputs(info->header_prefix, stdout);
if (revs->include_header) {
if (!revs->graph)
fputs(get_revision_mark(revs, commit), stdout);
if (revs->abbrev_commit && revs->abbrev)
fputs(repo_find_unique_abbrev(the_repository, &commit->object.oid, revs->abbrev),
stdout);
else
fputs(oid_to_hex(&commit->object.oid), stdout);
}
if (revs->print_parents) {
struct commit_list *parents = commit->parents;
while (parents) {
printf(" %s", oid_to_hex(&parents->item->object.oid));
parents = parents->next;
}
}
if (revs->children.name) {
struct commit_list *children;
children = lookup_decoration(&revs->children, &commit->object);
while (children) {
printf(" %s", oid_to_hex(&children->item->object.oid));
children = children->next;
}
}
show_decorations(revs, commit);
if (revs->commit_format == CMIT_FMT_ONELINE)
putchar(' ');
else if (revs->include_header)
putchar('\n');
if (revs->verbose_header) {
struct strbuf buf = STRBUF_INIT;
struct pretty_print_context ctx = {0};
ctx.abbrev = revs->abbrev;
ctx.date_mode = revs->date_mode;
ctx.date_mode_explicit = revs->date_mode_explicit;
ctx.fmt = revs->commit_format;
ctx.output_encoding = get_log_output_encoding();
ctx.color = revs->diffopt.use_color;
pretty_print_commit(&ctx, commit, &buf);
if (buf.len) {
if (revs->commit_format != CMIT_FMT_ONELINE)
graph_show_oneline(revs->graph);
graph_show_commit_msg(revs->graph, stdout, &buf);
/*
* Add a newline after the commit message.
*
* Usually, this newline produces a blank
* padding line between entries, in which case
* we need to add graph padding on this line.
*
* However, the commit message may not end in a
* newline. In this case the newline simply
* ends the last line of the commit message,
* and we don't need any graph output. (This
* always happens with CMIT_FMT_ONELINE, and it
* happens with CMIT_FMT_USERFORMAT when the
* format doesn't explicitly end in a newline.)
*/
if (buf.len && buf.buf[buf.len - 1] == '\n')
graph_show_padding(revs->graph);
putchar(info->hdr_termination);
} else {
/*
* If the message buffer is empty, just show
* the rest of the graph output for this
* commit.
*/
if (graph_show_remainder(revs->graph))
putchar('\n');
if (revs->commit_format == CMIT_FMT_ONELINE)
putchar('\n');
}
strbuf_release(&buf);
} else {
if (graph_show_remainder(revs->graph))
putchar('\n');
}
maybe_flush_or_die(stdout, "stdout");
finish_commit(commit);
}
static int finish_object(struct object *obj, const char *name UNUSED,
void *cb_data)
{
struct rev_list_info *info = cb_data;
if (oid_object_info_extended(the_repository, &obj->oid, NULL, 0) < 0) {
finish_object__ma(obj);
return 1;
}
if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT)
parse_object(the_repository, &obj->oid);
return 0;
}
static void show_object(struct object *obj, const char *name, void *cb_data)
{
struct rev_list_info *info = cb_data;
struct rev_info *revs = info->revs;
if (finish_object(obj, name, cb_data))
return;
display_progress(progress, ++progress_counter);
if (show_disk_usage)
total_disk_usage += get_object_disk_usage(obj);
if (info->flags & REV_LIST_QUIET)
return;
if (revs->count) {
/*
* The object count is always accumulated in the .count_right
* field for traversal that is not a left-right traversal,
* and cmd_rev_list() made sure that a .count request that
* wants to count non-commit objects, which is handled by
* the show_object() callback, does not ask for .left_right.
*/
revs->count_right++;
return;
}
if (arg_show_object_names)
show_object_with_name(stdout, obj, name);
else
printf("%s\n", oid_to_hex(&obj->oid));
}
static void show_edge(struct commit *commit)
{
printf("-%s\n", oid_to_hex(&commit->object.oid));
}
static void print_var_str(const char *var, const char *val)
{
printf("%s='%s'\n", var, val);
}
static void print_var_int(const char *var, int val)
{
printf("%s=%d\n", var, val);
}
static int show_bisect_vars(struct rev_list_info *info, int reaches, int all)
{
int cnt, flags = info->flags;
char hex[GIT_MAX_HEXSZ + 1] = "";
struct commit_list *tried;
struct rev_info *revs = info->revs;
if (!revs->commits)
return 1;
revs->commits = filter_skipped(revs->commits, &tried,
flags & BISECT_SHOW_ALL,
NULL, NULL);
/*
* revs->commits can reach "reaches" commits among
* "all" commits. If it is good, then there are
* (all-reaches) commits left to be bisected.
* On the other hand, if it is bad, then the set
* to bisect is "reaches".
* A bisect set of size N has (N-1) commits further
* to test, as we already know one bad one.
*/
cnt = all - reaches;
if (cnt < reaches)
cnt = reaches;
if (revs->commits)
oid_to_hex_r(hex, &revs->commits->item->object.oid);
if (flags & BISECT_SHOW_ALL) {
traverse_commit_list(revs, show_commit, show_object, info);
printf("------\n");
}
print_var_str("bisect_rev", hex);
print_var_int("bisect_nr", cnt - 1);
print_var_int("bisect_good", all - reaches - 1);
print_var_int("bisect_bad", reaches - 1);
print_var_int("bisect_all", all);
print_var_int("bisect_steps", estimate_bisect_steps(all));
return 0;
}
static int show_object_fast(
const struct object_id *oid,
enum object_type type UNUSED,
int exclude UNUSED,
uint32_t name_hash UNUSED,
struct packed_git *found_pack UNUSED,
off_t found_offset UNUSED)
{
fprintf(stdout, "%s\n", oid_to_hex(oid));
return 1;
}
static void print_disk_usage(off_t size)
{
struct strbuf sb = STRBUF_INIT;
if (human_readable)
strbuf_humanise_bytes(&sb, size);
else
strbuf_addf(&sb, "%"PRIuMAX, (uintmax_t)size);
puts(sb.buf);
strbuf_release(&sb);
}
static inline int parse_missing_action_value(const char *value)
{
if (!strcmp(value, "error")) {
arg_missing_action = MA_ERROR;
return 1;
}
if (!strcmp(value, "allow-any")) {
arg_missing_action = MA_ALLOW_ANY;
fetch_if_missing = 0;
return 1;
}
if (!strcmp(value, "print")) {
arg_missing_action = MA_PRINT;
fetch_if_missing = 0;
return 1;
}
if (!strcmp(value, "allow-promisor")) {
arg_missing_action = MA_ALLOW_PROMISOR;
fetch_if_missing = 0;
return 1;
}
return 0;
}
static int try_bitmap_count(struct rev_info *revs,
int filter_provided_objects)
{
uint32_t commit_count = 0,
tag_count = 0,
tree_count = 0,
blob_count = 0;
int max_count;
struct bitmap_index *bitmap_git;
/* This function only handles counting, not general traversal. */
if (!revs->count)
return -1;
/*
* A bitmap result can't know left/right, etc, because we don't
* actually traverse.
*/
if (revs->left_right || revs->cherry_mark)
return -1;
/*
* If we're counting reachable objects, we can't handle a max count of
* commits to traverse, since we don't know which objects go with which
* commit.
*/
if (revs->max_count >= 0 &&
(revs->tag_objects || revs->tree_objects || revs->blob_objects))
return -1;
/*
* This must be saved before doing any walking, since the revision
* machinery will count it down to zero while traversing.
*/
max_count = revs->max_count;
bitmap_git = prepare_bitmap_walk(revs, filter_provided_objects);
if (!bitmap_git)
return -1;
count_bitmap_commit_list(bitmap_git, &commit_count,
revs->tree_objects ? &tree_count : NULL,
revs->blob_objects ? &blob_count : NULL,
revs->tag_objects ? &tag_count : NULL);
if (max_count >= 0 && max_count < commit_count)
commit_count = max_count;
printf("%d\n", commit_count + tree_count + blob_count + tag_count);
free_bitmap_index(bitmap_git);
return 0;
}
static int try_bitmap_traversal(struct rev_info *revs,
int filter_provided_objects)
{
struct bitmap_index *bitmap_git;
/*
* We can't use a bitmap result with a traversal limit, since the set
* of commits we'd get would be essentially random.
*/
if (revs->max_count >= 0)
return -1;
bitmap_git = prepare_bitmap_walk(revs, filter_provided_objects);
if (!bitmap_git)
return -1;
traverse_bitmap_commit_list(bitmap_git, revs, &show_object_fast);
free_bitmap_index(bitmap_git);
return 0;
}
static int try_bitmap_disk_usage(struct rev_info *revs,
int filter_provided_objects)
{
struct bitmap_index *bitmap_git;
off_t size_from_bitmap;
if (!show_disk_usage)
return -1;
bitmap_git = prepare_bitmap_walk(revs, filter_provided_objects);
if (!bitmap_git)
return -1;
size_from_bitmap = get_disk_usage_from_bitmap(bitmap_git, revs);
print_disk_usage(size_from_bitmap);
return 0;
}
int cmd_rev_list(int argc, const char **argv, const char *prefix)
{
struct rev_info revs;
struct rev_list_info info;
struct setup_revision_opt s_r_opt = {
.allow_exclude_promisor_objects = 1,
};
int i;
int bisect_list = 0;
int bisect_show_vars = 0;
int bisect_find_all = 0;
int use_bitmap_index = 0;
int filter_provided_objects = 0;
const char *show_progress = NULL;
int ret = 0;
if (argc == 2 && !strcmp(argv[1], "-h"))
usage(rev_list_usage);
git_config(git_default_config, NULL);
repo_init_revisions(the_repository, &revs, prefix);
revs.abbrev = DEFAULT_ABBREV;
revs.commit_format = CMIT_FMT_UNSPECIFIED;
revs.include_header = 1;
/*
* Scan the argument list before invoking setup_revisions(), so that we
* know if fetch_if_missing needs to be set to 0.
*
* "--exclude-promisor-objects" acts as a pre-filter on missing objects
* by not crossing the boundary from realized objects to promisor
* objects.
*
* Let "--missing" to conditionally set fetch_if_missing.
*/
/*
* NEEDSWORK: These loops that attempt to find presence of
* options without understanding that the options they are
* skipping are broken (e.g., it would not know "--grep
* --exclude-promisor-objects" is not triggering
* "--exclude-promisor-objects" option). We really need
* setup_revisions() to have a mechanism to allow and disallow
* some sets of options for different commands (like rev-list,
* replay, etc). Such a mechanism should do an early parsing
* of options and be able to manage the `--missing=...` and
* `--exclude-promisor-objects` options below.
*/
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--exclude-promisor-objects")) {
fetch_if_missing = 0;
revs.exclude_promisor_objects = 1;
break;
}
}
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (skip_prefix(arg, "--missing=", &arg)) {
if (revs.exclude_promisor_objects)
die(_("options '%s' and '%s' cannot be used together"), "--exclude-promisor-objects", "--missing");
if (parse_missing_action_value(arg))
break;
}
}
if (arg_missing_action)
revs.do_not_die_on_missing_objects = 1;
argc = setup_revisions(argc, argv, &revs, &s_r_opt);
memset(&info, 0, sizeof(info));
info.revs = &revs;
if (revs.bisect)
bisect_list = 1;
if (revs.diffopt.flags.quick)
info.flags |= REV_LIST_QUIET;
for (i = 1 ; i < argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--header")) {
revs.verbose_header = 1;
continue;
}
if (!strcmp(arg, "--timestamp")) {
info.show_timestamp = 1;
continue;
}
if (!strcmp(arg, "--bisect")) {
bisect_list = 1;
continue;
}
if (!strcmp(arg, "--bisect-all")) {
bisect_list = 1;
bisect_find_all = 1;
info.flags |= BISECT_SHOW_ALL;
revs.show_decorations = 1;
continue;
}
if (!strcmp(arg, "--bisect-vars")) {
bisect_list = 1;
bisect_show_vars = 1;
continue;
}
if (!strcmp(arg, "--use-bitmap-index")) {
use_bitmap_index = 1;
continue;
}
if (!strcmp(arg, "--test-bitmap")) {
test_bitmap_walk(&revs);
goto cleanup;
}
if (skip_prefix(arg, "--progress=", &arg)) {
show_progress = arg;
continue;
}
if (!strcmp(arg, "--filter-provided-objects")) {
filter_provided_objects = 1;
continue;
}
if (!strcmp(arg, "--filter-print-omitted")) {
arg_print_omitted = 1;
continue;
}
if (!strcmp(arg, "--exclude-promisor-objects"))
continue; /* already handled above */
if (skip_prefix(arg, "--missing=", &arg))
continue; /* already handled above */
if (!strcmp(arg, ("--no-object-names"))) {
arg_show_object_names = 0;
continue;
}
if (!strcmp(arg, ("--object-names"))) {
arg_show_object_names = 1;
continue;
}
if (!strcmp(arg, ("--commit-header"))) {
revs.include_header = 1;
continue;
}
if (!strcmp(arg, ("--no-commit-header"))) {
revs.include_header = 0;
continue;
}
if (skip_prefix(arg, "--disk-usage", &arg)) {
if (*arg == '=') {
if (!strcmp(++arg, "human")) {
human_readable = 1;
} else
die(_("invalid value for '%s': '%s', the only allowed format is '%s'"),
"--disk-usage=<format>", arg, "human");
} else if (*arg) {
/*
* Arguably should goto a label to continue chain of ifs?
* Doesn't matter unless we try to add --disk-usage-foo
* afterwards.
*/
usage(rev_list_usage);
}
show_disk_usage = 1;
info.flags |= REV_LIST_QUIET;
continue;
}
usage(rev_list_usage);
}
if (revs.commit_format != CMIT_FMT_USERFORMAT)
revs.include_header = 1;
if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {
/* The command line has a --pretty */
info.hdr_termination = '\n';
if (revs.commit_format == CMIT_FMT_ONELINE || !revs.include_header)
info.header_prefix = "";
else
info.header_prefix = "commit ";
}
else if (revs.verbose_header)
/* Only --header was specified */
revs.commit_format = CMIT_FMT_RAW;
if ((!revs.commits && reflog_walk_empty(revs.reflog_info) &&
(!(revs.tag_objects || revs.tree_objects || revs.blob_objects) &&
!revs.pending.nr) &&
!revs.rev_input_given && !revs.read_from_stdin) ||
revs.diff)
usage(rev_list_usage);
if (revs.show_notes)
die(_("rev-list does not support display of notes"));
if (revs.count &&
(revs.tag_objects || revs.tree_objects || revs.blob_objects) &&
(revs.left_right || revs.cherry_mark))
die(_("marked counting and '%s' cannot be used together"), "--objects");
save_commit_buffer = (revs.verbose_header ||
revs.grep_filter.pattern_list ||
revs.grep_filter.header_list);
if (bisect_list)
revs.limited = 1;
if (show_progress)
progress = start_delayed_progress(show_progress, 0);
if (use_bitmap_index) {
if (!try_bitmap_count(&revs, filter_provided_objects))
goto cleanup;
if (!try_bitmap_disk_usage(&revs, filter_provided_objects))
goto cleanup;
if (!try_bitmap_traversal(&revs, filter_provided_objects))
goto cleanup;
}
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
if (revs.tree_objects)
mark_edges_uninteresting(&revs, show_edge, 0);
if (bisect_list) {
int reaches, all;
unsigned bisect_flags = 0;
if (bisect_find_all)
bisect_flags |= FIND_BISECTION_ALL;
if (revs.first_parent_only)
bisect_flags |= FIND_BISECTION_FIRST_PARENT_ONLY;
find_bisection(&revs.commits, &reaches, &all, bisect_flags);
if (bisect_show_vars) {
ret = show_bisect_vars(&info, reaches, all);
goto cleanup;
}
}
if (filter_provided_objects) {
struct commit_list *c;
for (i = 0; i < revs.pending.nr; i++) {
struct object_array_entry *pending = revs.pending.objects + i;
pending->item->flags |= NOT_USER_GIVEN;
}
for (c = revs.commits; c; c = c->next)
c->item->object.flags |= NOT_USER_GIVEN;
}
if (arg_print_omitted)
oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
if (arg_missing_action == MA_PRINT) {
oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
/* Add missing tips */
oidset_insert_from_set(&missing_objects, &revs.missing_commits);
oidset_clear(&revs.missing_commits);
}
traverse_commit_list_filtered(
&revs, show_commit, show_object, &info,
(arg_print_omitted ? &omitted_objects : NULL));
if (arg_print_omitted) {
struct oidset_iter iter;
struct object_id *oid;
oidset_iter_init(&omitted_objects, &iter);
while ((oid = oidset_iter_next(&iter)))
printf("~%s\n", oid_to_hex(oid));
oidset_clear(&omitted_objects);
}
if (arg_missing_action == MA_PRINT) {
struct oidset_iter iter;
struct object_id *oid;
oidset_iter_init(&missing_objects, &iter);
while ((oid = oidset_iter_next(&iter)))
printf("?%s\n", oid_to_hex(oid));
oidset_clear(&missing_objects);
}
stop_progress(&progress);
if (revs.count) {
if (revs.left_right && revs.cherry_mark)
printf("%d\t%d\t%d\n", revs.count_left, revs.count_right, revs.count_same);
else if (revs.left_right)
printf("%d\t%d\n", revs.count_left, revs.count_right);
else if (revs.cherry_mark)
printf("%d\t%d\n", revs.count_left + revs.count_right, revs.count_same);
else
printf("%d\n", revs.count_left + revs.count_right);
}
if (show_disk_usage)
print_disk_usage(total_disk_usage);
cleanup:
release_revisions(&revs);
return ret;
}