git/builtin/for-each-ref.c
Taylor Blau 8255dd8a3d builtin/for-each-ref.c: add --exclude option
When using `for-each-ref`, it is sometimes convenient for the caller to
be able to exclude certain parts of the references.

For example, if there are many `refs/__hidden__/*` references, the
caller may want to emit all references *except* the hidden ones.
Currently, the only way to do this is to post-process the output, like:

    $ git for-each-ref --format='%(refname)' | grep -v '^refs/hidden/'

Which is do-able, but requires processing a potentially large quantity
of references.

Teach `git for-each-ref` a new `--exclude=<pattern>` option, which
excludes references from the results if they match one or more excluded
patterns.

This patch provides a naive implementation where the `ref_filter` still
sees all references (including ones that it will discard) and is left to
check whether each reference matches any excluded pattern(s) before
emitting them.

By culling out references we know the caller doesn't care about, we can
avoid allocating memory for their storage, as well as spending time
sorting the output (among other things). Even the naive implementation
provides a significant speed-up on a modified copy of linux.git (that
has a hidden ref pointing at each commit):

    $ hyperfine \
      'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' \
      'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/'
    Benchmark 1: git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"
      Time (mean ± σ):     820.1 ms ±   2.0 ms    [User: 703.7 ms, System: 152.0 ms]
      Range (min … max):   817.7 ms … 823.3 ms    10 runs

    Benchmark 2: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/
      Time (mean ± σ):     106.6 ms ±   1.1 ms    [User: 99.4 ms, System: 7.1 ms]
      Range (min … max):   104.7 ms … 109.1 ms    27 runs

    Summary
      'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' ran
        7.69 ± 0.08 times faster than 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"'

Subsequent patches will improve on this by avoiding visiting excluded
sections of the `packed-refs` file in certain cases.

Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-10 14:48:55 -07:00

128 lines
4.1 KiB
C

#include "builtin.h"
#include "cache.h"
#include "config.h"
#include "gettext.h"
#include "refs.h"
#include "object.h"
#include "parse-options.h"
#include "ref-filter.h"
#include "strvec.h"
#include "commit-reach.h"
static char const * const for_each_ref_usage[] = {
N_("git for-each-ref [<options>] [<pattern>]"),
N_("git for-each-ref [--points-at <object>]"),
N_("git for-each-ref [--merged [<commit>]] [--no-merged [<commit>]]"),
N_("git for-each-ref [--contains [<commit>]] [--no-contains [<commit>]]"),
NULL
};
int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
{
int i;
struct ref_sorting *sorting;
struct string_list sorting_options = STRING_LIST_INIT_DUP;
int maxcount = 0, icase = 0, omit_empty = 0;
struct ref_array array;
struct ref_filter filter = REF_FILTER_INIT;
struct ref_format format = REF_FORMAT_INIT;
struct strbuf output = STRBUF_INIT;
struct strbuf err = STRBUF_INIT;
int from_stdin = 0;
struct strvec vec = STRVEC_INIT;
struct option opts[] = {
OPT_BIT('s', "shell", &format.quote_style,
N_("quote placeholders suitably for shells"), QUOTE_SHELL),
OPT_BIT('p', "perl", &format.quote_style,
N_("quote placeholders suitably for perl"), QUOTE_PERL),
OPT_BIT(0 , "python", &format.quote_style,
N_("quote placeholders suitably for python"), QUOTE_PYTHON),
OPT_BIT(0 , "tcl", &format.quote_style,
N_("quote placeholders suitably for Tcl"), QUOTE_TCL),
OPT_BOOL(0, "omit-empty", &omit_empty,
N_("do not output a newline after empty formatted refs")),
OPT_GROUP(""),
OPT_INTEGER( 0 , "count", &maxcount, N_("show only <n> matched refs")),
OPT_STRING( 0 , "format", &format.format, N_("format"), N_("format to use for the output")),
OPT__COLOR(&format.use_color, N_("respect format colors")),
OPT_REF_FILTER_EXCLUDE(&filter),
OPT_REF_SORT(&sorting_options),
OPT_CALLBACK(0, "points-at", &filter.points_at,
N_("object"), N_("print only refs which points at the given object"),
parse_opt_object_name),
OPT_MERGED(&filter, N_("print only refs that are merged")),
OPT_NO_MERGED(&filter, N_("print only refs that are not merged")),
OPT_CONTAINS(&filter.with_commit, N_("print only refs which contain the commit")),
OPT_NO_CONTAINS(&filter.no_commit, N_("print only refs which don't contain the commit")),
OPT_BOOL(0, "ignore-case", &icase, N_("sorting and filtering are case insensitive")),
OPT_BOOL(0, "stdin", &from_stdin, N_("read reference patterns from stdin")),
OPT_END(),
};
memset(&array, 0, sizeof(array));
format.format = "%(objectname) %(objecttype)\t%(refname)";
git_config(git_default_config, NULL);
parse_options(argc, argv, prefix, opts, for_each_ref_usage, 0);
if (maxcount < 0) {
error("invalid --count argument: `%d'", maxcount);
usage_with_options(for_each_ref_usage, opts);
}
if (HAS_MULTI_BITS(format.quote_style)) {
error("more than one quoting style?");
usage_with_options(for_each_ref_usage, opts);
}
if (verify_ref_format(&format))
usage_with_options(for_each_ref_usage, opts);
sorting = ref_sorting_options(&sorting_options);
ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase);
filter.ignore_case = icase;
if (from_stdin) {
struct strbuf line = STRBUF_INIT;
if (argv[0])
die(_("unknown arguments supplied with --stdin"));
while (strbuf_getline(&line, stdin) != EOF)
strvec_push(&vec, line.buf);
strbuf_release(&line);
/* vec.v is NULL-terminated, just like 'argv'. */
filter.name_patterns = vec.v;
} else {
filter.name_patterns = argv;
}
filter.match_as_path = 1;
filter_refs(&array, &filter, FILTER_REFS_ALL);
filter_ahead_behind(the_repository, &format, &array);
ref_array_sort(sorting, &array);
if (!maxcount || array.nr < maxcount)
maxcount = array.nr;
for (i = 0; i < maxcount; i++) {
strbuf_reset(&err);
strbuf_reset(&output);
if (format_ref_array_item(array.items[i], &format, &output, &err))
die("%s", err.buf);
fwrite(output.buf, 1, output.len, stdout);
if (output.len || !omit_empty)
putchar('\n');
}
strbuf_release(&err);
strbuf_release(&output);
ref_array_clear(&array);
ref_filter_clear(&filter);
ref_sorting_release(sorting);
strvec_clear(&vec);
return 0;
}