cat-file: sort and de-dup output of --batch-all-objects

The sorting we could probably live without, but printing
duplicates is just a hassle for the user, who must then
de-dup themselves (or risk a wrong answer if they are doing
something like counting objects with a particular property).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jeff King 2015-06-22 07:06:32 -04:00 committed by Junio C Hamano
parent 6a951937ae
commit 3115ee45c8
3 changed files with 18 additions and 12 deletions

View file

@ -74,8 +74,7 @@ OPTIONS
requested batch operation on all objects in the repository and
any alternate object stores (not just reachable objects).
Requires `--batch` or `--batch-check` be specified. Note that
the order of the objects is unspecified, and there may be
duplicate entries.
the objects are visited in order sorted by their hashes.
--buffer::
Normally batch output is flushed after each object is output, so

View file

@ -9,6 +9,7 @@
#include "userdiff.h"
#include "streaming.h"
#include "tree-walk.h"
#include "sha1-array.h"
struct batch_options {
int enabled;
@ -324,19 +325,19 @@ struct object_cb_data {
struct expand_data *expand;
};
static int batch_object_cb(const unsigned char *sha1,
struct object_cb_data *data)
static void batch_object_cb(const unsigned char sha1[20], void *vdata)
{
struct object_cb_data *data = vdata;
hashcpy(data->expand->sha1, sha1);
batch_object_write(NULL, data->opt, data->expand);
return 0;
}
static int batch_loose_object(const unsigned char *sha1,
const char *path,
void *data)
{
return batch_object_cb(sha1, data);
sha1_array_append(data, sha1);
return 0;
}
static int batch_packed_object(const unsigned char *sha1,
@ -344,7 +345,8 @@ static int batch_packed_object(const unsigned char *sha1,
uint32_t pos,
void *data)
{
return batch_object_cb(sha1, data);
sha1_array_append(data, sha1);
return 0;
}
static int batch_objects(struct batch_options *opt)
@ -375,11 +377,17 @@ static int batch_objects(struct batch_options *opt)
data.info.typep = &data.type;
if (opt->all_objects) {
struct sha1_array sa = SHA1_ARRAY_INIT;
struct object_cb_data cb;
for_each_loose_object(batch_loose_object, &sa, 0);
for_each_packed_object(batch_packed_object, &sa, 0);
cb.opt = opt;
cb.expand = &data;
for_each_loose_object(batch_loose_object, &cb, 0);
for_each_packed_object(batch_packed_object, &cb, 0);
sha1_array_for_each_unique(&sa, batch_object_cb, &cb);
sha1_array_clear(&sa);
return 0;
}

View file

@ -548,7 +548,7 @@ test_expect_success 'git cat-file --batch --follow-symlink returns correct sha a
'
test_expect_success 'cat-file --batch-all-objects shows all objects' '
# make new repos so we now the full set of objects; we will
# make new repos so we know the full set of objects; we will
# also make sure that there are some packed and some loose
# objects, some referenced and some not, and that there are
# some available only via alternates.
@ -569,8 +569,7 @@ test_expect_success 'cat-file --batch-all-objects shows all objects' '
) >>expect.unsorted &&
sort <expect.unsorted >expect &&
git -C all-two cat-file --batch-all-objects \
--batch-check="%(objectname)" >actual.unsorted &&
sort <actual.unsorted >actual &&
--batch-check="%(objectname)" >actual &&
test_cmp expect actual
'