Merge branch 'sg/commit-graph-cleanups' into master

The changed-path Bloom filter is improved using ideas from an
independent implementation.

* sg/commit-graph-cleanups:
  commit-graph: simplify write_commit_graph_file() #2
  commit-graph: simplify write_commit_graph_file() #1
  commit-graph: simplify parse_commit_graph() #2
  commit-graph: simplify parse_commit_graph() #1
  commit-graph: clean up #includes
  diff.h: drop diff_tree_oid() & friends' return value
  commit-slab: add a function to deep free entries on the slab
  commit-graph-format.txt: all multi-byte numbers are in network byte order
  commit-graph: fix parsing the Chunk Lookup table
  tree-walk.c: don't match submodule entries for 'submod/anything'
This commit is contained in:
Junio C Hamano 2020-07-30 13:20:30 -07:00
commit de6dda0dc3
13 changed files with 117 additions and 108 deletions

View file

@ -32,7 +32,7 @@ the body into "chunks" and provide a binary lookup table at the beginning
of the body. The header includes certain values, such as number of chunks
and hash type.
All 4-byte numbers are in network order.
All multi-byte numbers are in network byte order.
HEADER:

View file

@ -1,7 +1,5 @@
#include "cache.h"
#include "config.h"
#include "dir.h"
#include "git-compat-util.h"
#include "config.h"
#include "lockfile.h"
#include "pack.h"
#include "packfile.h"
@ -285,8 +283,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
const unsigned char *data, *chunk_lookup;
uint32_t i;
struct commit_graph *graph;
uint64_t last_chunk_offset;
uint32_t last_chunk_id;
uint64_t next_chunk_offset;
uint32_t graph_signature;
unsigned char graph_version, hash_version;
@ -326,24 +323,26 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
graph->data = graph_map;
graph->data_len = graph_size;
last_chunk_id = 0;
last_chunk_offset = 8;
if (graph_size < GRAPH_HEADER_SIZE +
(graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH +
GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
error(_("commit-graph file is too small to hold %u chunks"),
graph->num_chunks);
free(graph);
return NULL;
}
chunk_lookup = data + 8;
next_chunk_offset = get_be64(chunk_lookup + 4);
for (i = 0; i < graph->num_chunks; i++) {
uint32_t chunk_id;
uint64_t chunk_offset;
uint64_t chunk_offset = next_chunk_offset;
int chunk_repeated = 0;
if (data + graph_size - chunk_lookup <
GRAPH_CHUNKLOOKUP_WIDTH) {
error(_("commit-graph chunk lookup table entry missing; file may be incomplete"));
goto free_and_return;
}
chunk_id = get_be32(chunk_lookup + 0);
chunk_offset = get_be64(chunk_lookup + 4);
chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
next_chunk_offset = get_be64(chunk_lookup + 4);
if (chunk_offset > graph_size - the_hash_algo->rawsz) {
error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
@ -362,8 +361,11 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
case GRAPH_CHUNKID_OIDLOOKUP:
if (graph->chunk_oid_lookup)
chunk_repeated = 1;
else
else {
graph->chunk_oid_lookup = data + chunk_offset;
graph->num_commits = (next_chunk_offset - chunk_offset)
/ graph->hash_len;
}
break;
case GRAPH_CHUNKID_DATA:
@ -417,15 +419,6 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
goto free_and_return;
}
if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
{
graph->num_commits = (chunk_offset - last_chunk_offset)
/ graph->hash_len;
}
last_chunk_id = chunk_id;
last_chunk_offset = chunk_offset;
}
if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
@ -1586,17 +1579,22 @@ static int write_graph_chunk_base(struct hashfile *f,
return 0;
}
struct chunk_info {
uint32_t id;
uint64_t size;
};
static int write_commit_graph_file(struct write_commit_graph_context *ctx)
{
uint32_t i;
int fd;
struct hashfile *f;
struct lock_file lk = LOCK_INIT;
uint32_t chunk_ids[MAX_NUM_CHUNKS + 1];
uint64_t chunk_offsets[MAX_NUM_CHUNKS + 1];
struct chunk_info chunks[MAX_NUM_CHUNKS + 1];
const unsigned hashsz = the_hash_algo->rawsz;
struct strbuf progress_title = STRBUF_INIT;
int num_chunks = 3;
uint64_t chunk_offset;
struct object_id file_hash;
const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
@ -1644,51 +1642,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
}
chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
chunk_ids[2] = GRAPH_CHUNKID_DATA;
chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
chunks[0].size = GRAPH_FANOUT_SIZE;
chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
chunks[1].size = hashsz * ctx->commits.nr;
chunks[2].id = GRAPH_CHUNKID_DATA;
chunks[2].size = (hashsz + 16) * ctx->commits.nr;
if (ctx->num_extra_edges) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES;
chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
chunks[num_chunks].size = 4 * ctx->num_extra_edges;
num_chunks++;
}
if (ctx->changed_paths) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES;
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
num_chunks++;
chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA;
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
chunks[num_chunks].size = sizeof(uint32_t) * 3
+ ctx->total_bloom_filter_data_size;
num_chunks++;
}
if (ctx->num_commit_graphs_after > 1) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE;
chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
num_chunks++;
}
chunk_ids[num_chunks] = 0;
chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr;
chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr;
num_chunks = 3;
if (ctx->num_extra_edges) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
4 * ctx->num_extra_edges;
num_chunks++;
}
if (ctx->changed_paths) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
sizeof(uint32_t) * ctx->commits.nr;
num_chunks++;
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size;
num_chunks++;
}
if (ctx->num_commit_graphs_after > 1) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
hashsz * (ctx->num_commit_graphs_after - 1);
num_chunks++;
}
chunks[num_chunks].id = 0;
chunks[num_chunks].size = 0;
hashwrite_be32(f, GRAPH_SIGNATURE);
@ -1697,13 +1678,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
hashwrite_u8(f, num_chunks);
hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
for (i = 0; i <= num_chunks; i++) {
uint32_t chunk_write[3];
chunk_write[0] = htonl(chunk_ids[i]);
chunk_write[1] = htonl(chunk_offsets[i] >> 32);
chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
chunk_write[0] = htonl(chunks[i].id);
chunk_write[1] = htonl(chunk_offset >> 32);
chunk_write[2] = htonl(chunk_offset & 0xffffffff);
hashwrite(f, chunk_write, 12);
chunk_offset += chunks[i].size;
}
if (ctx->report_progress) {

View file

@ -2,9 +2,6 @@
#define COMMIT_GRAPH_H
#include "git-compat-util.h"
#include "repository.h"
#include "string-list.h"
#include "cache.h"
#include "object-store.h"
#include "oidset.h"
@ -23,6 +20,9 @@ void git_test_write_commit_graph_or_die(void);
struct commit;
struct bloom_filter_settings;
struct repository;
struct raw_object_store;
struct string_list;
char *get_commit_graph_filename(struct object_directory *odb);
int open_commit_graph(const char *graph_file, int *fd, struct stat *st);

View file

@ -32,6 +32,7 @@ struct slabname { \
void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
void init_ ##slabname(struct slabname *s); \
void clear_ ##slabname(struct slabname *s); \
void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \
elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \
elemtype *slabname## _at(struct slabname *s, const struct commit *c); \
elemtype *slabname## _peek(struct slabname *s, const struct commit *c)

View file

@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s) \
FREE_AND_NULL(s->slab); \
} \
\
scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \
{ \
unsigned int i; \
for (i = 0; i < s->slab_count; i++) { \
unsigned int j; \
if (!s->slab[i]) \
continue; \
for (j = 0; j < s->slab_size; j++) \
free_fn(&s->slab[i][j * s->stride]); \
} \
clear_ ##slabname(s); \
} \
\
scope elemtype *slabname## _at_peek(struct slabname *s, \
const struct commit *c, \
int add_if_missing) \

View file

@ -47,6 +47,16 @@
*
* Call this function before the slab falls out of scope to avoid
* leaking memory.
*
* - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*))
*
* Empties the slab, similar to clear_indegree(), but in addition it
* calls the given 'free_fn' for each slab entry to release any
* additional memory that might be owned by the entry (but not the
* entry itself!).
* Note that 'free_fn' might be called even for entries for which no
* indegree_at() call has been made; in this case 'free_fn' is invoked
* with a pointer to a zero-initialized location.
*/
#define define_commit_slab(slabname, elemtype) \

10
diff.h
View file

@ -431,11 +431,11 @@ struct combine_diff_path *diff_tree_paths(
struct combine_diff_path *p, const struct object_id *oid,
const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt);
int diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
const char *base, struct diff_options *opt);
int diff_root_tree_oid(const struct object_id *new_oid, const char *base,
struct diff_options *opt);
void diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
const char *base, struct diff_options *opt);
void diff_root_tree_oid(const struct object_id *new_oid, const char *base,
struct diff_options *opt);
struct combine_diff_path {
struct combine_diff_path *next;

View file

@ -791,9 +791,7 @@ static int rev_compare_tree(struct rev_info *revs,
tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0;
if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "",
&revs->pruning) < 0)
return REV_TREE_DIFFERENT;
diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning);
if (!nth_parent)
if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
@ -804,7 +802,6 @@ static int rev_compare_tree(struct rev_info *revs,
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
{
int retval;
struct tree *t1 = get_commit_tree(commit);
if (!t1)
@ -812,9 +809,9 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0;
retval = diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
return retval >= 0 && (tree_difference == REV_TREE_SAME);
return tree_difference == REV_TREE_SAME;
}
struct treesame_state {

View file

@ -110,6 +110,10 @@ void rollback_shallow_file(struct repository *r, struct shallow_lock *lk)
* supports a "valid" flag.
*/
define_commit_slab(commit_depth, int *);
static void free_depth_in_slab(int **ptr)
{
FREE_AND_NULL(*ptr);
}
struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
int shallow_flag, int not_shallow_flag)
{
@ -176,15 +180,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
}
}
}
for (i = 0; i < depths.slab_count; i++) {
int j;
if (!depths.slab[i])
continue;
for (j = 0; j < depths.slab_size; j++)
free(depths.slab[i][j]);
}
clear_commit_depth(&depths);
deep_clear_commit_depth(&depths, free_depth_in_slab);
return result;
}

View file

@ -125,7 +125,9 @@ test_expect_success 'setup submodules' '
test_expect_success 'diff-tree ignores trailing slash on submodule path' '
git diff --name-only HEAD^ HEAD submod >expect &&
git diff --name-only HEAD^ HEAD submod/ >actual &&
test_cmp expect actual
test_cmp expect actual &&
git diff --name-only HEAD^ HEAD -- submod/whatever >actual &&
test_must_be_empty actual
'
test_expect_success 'diff multiple wildcard pathspecs' '

View file

@ -529,7 +529,7 @@ test_expect_success 'detect bad hash version' '
'
test_expect_success 'detect low chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
"missing the .* chunk"
'
@ -615,7 +615,8 @@ test_expect_success 'detect invalid checksum hash' '
test_expect_success 'detect incorrect chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \
"chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET
"commit-graph file is too small to hold [0-9]* chunks" \
$GRAPH_CHUNK_LOOKUP_OFFSET
'
test_expect_success 'git fsck (checks commit-graph)' '

View file

@ -29,9 +29,9 @@ static struct combine_diff_path *ll_diff_tree_paths(
struct combine_diff_path *p, const struct object_id *oid,
const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt);
static int ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt);
static void ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt);
/*
* Compare two tree entries, taking into account only path/S_ISDIR(mode),
@ -679,9 +679,9 @@ static void try_to_follow_renames(const struct object_id *old_oid,
q->nr = 1;
}
static int ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt)
static void ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt)
{
struct combine_diff_path phead, *p;
pathchange_fn_t pathchange_old = opt->pathchange;
@ -697,29 +697,27 @@ static int ll_diff_tree_oid(const struct object_id *old_oid,
}
opt->pathchange = pathchange_old;
return 0;
}
int diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
const char *base_str, struct diff_options *opt)
void diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
const char *base_str, struct diff_options *opt)
{
struct strbuf base;
int retval;
strbuf_init(&base, PATH_MAX);
strbuf_addstr(&base, base_str);
retval = ll_diff_tree_oid(old_oid, new_oid, &base, opt);
ll_diff_tree_oid(old_oid, new_oid, &base, opt);
if (!*base_str && opt->flags.follow_renames && diff_might_be_rename())
try_to_follow_renames(old_oid, new_oid, &base, opt);
strbuf_release(&base);
return retval;
}
int diff_root_tree_oid(const struct object_id *new_oid, const char *base, struct diff_options *opt)
void diff_root_tree_oid(const struct object_id *new_oid,
const char *base,
struct diff_options *opt)
{
return diff_tree_oid(NULL, new_oid, base, opt);
diff_tree_oid(NULL, new_oid, base, opt);
}

View file

@ -851,7 +851,14 @@ static int match_entry(const struct pathspec_item *item,
if (matchlen > pathlen) {
if (match[pathlen] != '/')
return 0;
if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode))
/*
* Reject non-directories as partial pathnames, except
* when match is a submodule with a trailing slash and
* nothing else (to handle 'submod/' and 'submod'
* uniformly).
*/
if (!S_ISDIR(entry->mode) &&
(!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1))
return 0;
}