cat-file: add --buffer option

We use a direct write() to output the results of --batch and
--batch-check. This is good for processes feeding the input
and reading the output interactively, but it introduces
measurable overhead if you do not want this feature. For
example, on linux.git:

  $ git rev-list --objects --all | cut -d' ' -f1 >objects
  $ time git cat-file --batch-check='%(objectsize)' \
          <objects >/dev/null
  real    0m5.440s
  user    0m5.060s
  sys     0m0.384s

This patch adds an option to use regular stdio buffering:

  $ time git cat-file --batch-check='%(objectsize)' \
          --buffer <objects >/dev/null
  real    0m4.975s
  user    0m4.888s
  sys     0m0.092s

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jeff King 2015-06-22 06:45:17 -04:00 committed by Junio C Hamano
parent bfd155943e
commit fc4937c372
2 changed files with 26 additions and 7 deletions

View file

@ -69,6 +69,13 @@ OPTIONS
not be combined with any other options or arguments. See the
section `BATCH OUTPUT` below for details.
--buffer::
Normally batch output is flushed after each object is output, so
that a process can interactively read and write from
`cat-file`. With this option, the output uses normal stdio
buffering; this is much more efficient when invoking
`--batch-check` on a large number of objects.
--allow-unknown-type::
Allow -s or -t to query broken/corrupt objects of unknown type.

View file

@ -14,6 +14,7 @@ struct batch_options {
int enabled;
int follow_symlinks;
int print_contents;
int buffer_output;
const char *format;
};
@ -211,14 +212,25 @@ static size_t expand_format(struct strbuf *sb, const char *start, void *data)
return end - start + 1;
}
static void print_object_or_die(int fd, struct expand_data *data)
static void batch_write(struct batch_options *opt, const void *data, int len)
{
if (opt->buffer_output) {
if (fwrite(data, 1, len, stdout) != len)
die_errno("unable to write to stdout");
} else
write_or_die(1, data, len);
}
static void print_object_or_die(struct batch_options *opt, struct expand_data *data)
{
const unsigned char *sha1 = data->sha1;
assert(data->info.typep);
if (data->type == OBJ_BLOB) {
if (stream_blob_to_fd(fd, sha1, NULL, 0) < 0)
if (opt->buffer_output)
fflush(stdout);
if (stream_blob_to_fd(1, sha1, NULL, 0) < 0)
die("unable to stream %s to stdout", sha1_to_hex(sha1));
}
else {
@ -234,12 +246,11 @@ static void print_object_or_die(int fd, struct expand_data *data)
if (data->info.sizep && size != data->size)
die("object %s changed size!?", sha1_to_hex(sha1));
write_or_die(fd, contents, size);
batch_write(opt, contents, size);
free(contents);
}
}
static int batch_one_object(const char *obj_name, struct batch_options *opt,
struct expand_data *data)
{
@ -294,12 +305,12 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt,
strbuf_expand(&buf, opt->format, expand_format, data);
strbuf_addch(&buf, '\n');
write_or_die(1, buf.buf, buf.len);
batch_write(opt, buf.buf, buf.len);
strbuf_release(&buf);
if (opt->print_contents) {
print_object_or_die(1, data);
write_or_die(1, "\n", 1);
print_object_or_die(opt, data);
batch_write(opt, "\n", 1);
}
return 0;
}
@ -415,6 +426,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
N_("for blob objects, run textconv on object's content"), 'c'),
OPT_BOOL(0, "allow-unknown-type", &unknown_type,
N_("allow -s and -t to work with broken/corrupt objects")),
OPT_BOOL(0, "buffer", &batch.buffer_output, N_("buffer --batch output")),
{ OPTION_CALLBACK, 0, "batch", &batch, "format",
N_("show info and content of objects fed from the standard input"),
PARSE_OPT_OPTARG, batch_option_callback },