Merge branch 'ds/commit-graph-lockfile-fix'

Update to ds/generation-numbers topic.

* ds/commit-graph-lockfile-fix:
  commit-graph: fix UX issue when .lock file exists
  commit-graph.txt: update design document
  merge: check config before loading commits
  commit: use generation number in remove_redundant()
  commit: add short-circuit to paint_down_to_common()
  commit: use generation numbers for in_merge_bases()
  ref-filter: use generation number for --contains
  commit-graph: always load commit-graph information
  commit: use generations in paint_down_to_common()
  commit-graph: compute generation numbers
  commit: add generation number to struct commit
  ref-filter: fix outdated comment on in_commit_list
This commit is contained in:
Junio C Hamano 2018-06-25 13:22:36 -07:00
commit a856e7d69f
11 changed files with 209 additions and 56 deletions

View file

@ -77,6 +77,29 @@ in the commit graph. We can treat these commits as having "infinite"
generation number and walk until reaching commits with known generation
number.
We use the macro GENERATION_NUMBER_INFINITY = 0xFFFFFFFF to mark commits not
in the commit-graph file. If a commit-graph file was written by a version
of Git that did not compute generation numbers, then those commits will
have generation number represented by the macro GENERATION_NUMBER_ZERO = 0.
Since the commit-graph file is closed under reachability, we can guarantee
the following weaker condition on all commits:
If A and B are commits with generation numbers N amd M, respectively,
and N < M, then A cannot reach B.
Note how the strict inequality differs from the inequality when we have
fully-computed generation numbers. Using strict inequality may result in
walking a few extra commits, but the simplicity in dealing with commits
with generation number *_INFINITY or *_ZERO is valuable.
We use the macro GENERATION_NUMBER_MAX = 0x3FFFFFFF to for commits whose
generation numbers are computed to be at least this value. We limit at
this value since it is the largest value that can be stored in the
commit-graph file using the 30 bits available to generation numbers. This
presents another case where a commit can have generation number equal to
that of a parent.
Design Details
--------------
@ -98,18 +121,14 @@ Future Work
- The 'commit-graph' subcommand does not have a "verify" mode that is
necessary for integration with fsck.
- The file format includes room for precomputed generation numbers. These
are not currently computed, so all generation numbers will be marked as
0 (or "uncomputed"). A later patch will include this calculation.
- After computing and storing generation numbers, we must make graph
walks aware of generation numbers to gain the performance benefits they
enable. This will mostly be accomplished by swapping a commit-date-ordered
priority queue with one ordered by generation number. The following
operations are important candidates:
- paint_down_to_common()
- 'log --topo-order'
- 'tag --merged'
- Currently, parse_commit_gently() requires filling in the root tree
object for a commit. This passes through lookup_tree() and consequently

View file

@ -94,6 +94,7 @@ void *alloc_commit_node(void)
c->object.type = OBJ_COMMIT;
c->index = alloc_commit_index();
c->graph_pos = COMMIT_NOT_FROM_GRAPH;
c->generation = GENERATION_NUMBER_INFINITY;
return c;
}

View file

@ -1186,14 +1186,15 @@ int cmd_merge(int argc, const char **argv, const char *prefix)
branch = branch_to_free = resolve_refdup("HEAD", 0, &head_oid, NULL);
if (branch)
skip_prefix(branch, "refs/heads/", &branch);
init_diff_ui_defaults();
git_config(git_merge_config, NULL);
if (!branch || is_null_oid(&head_oid))
head_commit = NULL;
else
head_commit = lookup_commit_or_die(&head_oid, "HEAD");
init_diff_ui_defaults();
git_config(git_merge_config, NULL);
if (branch_mergeoptions)
parse_branch_merge_options(branch_mergeoptions);
argc = parse_options(argc, argv, prefix, builtin_merge_options,

View file

@ -1,5 +1,6 @@
#include "cache.h"
#include "config.h"
#include "dir.h"
#include "git-compat-util.h"
#include "lockfile.h"
#include "pack.h"
@ -248,6 +249,13 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g,
return &commit_list_insert(c, pptr)->next;
}
static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)
{
const unsigned char *commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * pos;
item->graph_pos = pos;
item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
}
static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos)
{
uint32_t edge_value;
@ -265,6 +273,8 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin
date_low = get_be32(commit_data + g->hash_len + 12);
item->date = (timestamp_t)((date_high << 32) | date_low);
item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
pptr = &item->parents;
edge_value = get_be32(commit_data + g->hash_len);
@ -293,31 +303,40 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin
return 1;
}
static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
{
if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
*pos = item->graph_pos;
return 1;
} else {
return bsearch_graph(g, &(item->object.oid), pos);
}
}
int parse_commit_in_graph(struct commit *item)
{
uint32_t pos;
if (!core_commit_graph)
return 0;
if (item->object.parsed)
return 1;
prepare_commit_graph();
if (commit_graph) {
uint32_t pos;
int found;
if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
pos = item->graph_pos;
found = 1;
} else {
found = bsearch_graph(commit_graph, &(item->object.oid), &pos);
}
if (found)
return fill_commit_in_graph(item, commit_graph, pos);
}
if (commit_graph && find_commit_in_graph(item, commit_graph, &pos))
return fill_commit_in_graph(item, commit_graph, pos);
return 0;
}
void load_commit_graph_info(struct commit *item)
{
uint32_t pos;
if (!core_commit_graph)
return;
prepare_commit_graph();
if (commit_graph && find_commit_in_graph(item, commit_graph, &pos))
fill_commit_graph_info(item, commit_graph, pos);
}
static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c)
{
struct object_id oid;
@ -440,6 +459,8 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
else
packedDate[0] = 0;
packedDate[0] |= htonl((*list)->generation << 2);
packedDate[1] = htonl((*list)->date);
hashwrite(f, packedDate, 8);
@ -572,6 +593,45 @@ static void close_reachable(struct packed_oid_list *oids)
}
}
static void compute_generation_numbers(struct packed_commit_list* commits)
{
int i;
struct commit_list *list = NULL;
for (i = 0; i < commits->nr; i++) {
if (commits->list[i]->generation != GENERATION_NUMBER_INFINITY &&
commits->list[i]->generation != GENERATION_NUMBER_ZERO)
continue;
commit_list_insert(commits->list[i], &list);
while (list) {
struct commit *current = list->item;
struct commit_list *parent;
int all_parents_computed = 1;
uint32_t max_generation = 0;
for (parent = current->parents; parent; parent = parent->next) {
if (parent->item->generation == GENERATION_NUMBER_INFINITY ||
parent->item->generation == GENERATION_NUMBER_ZERO) {
all_parents_computed = 0;
commit_list_insert(parent->item, &list);
break;
} else if (parent->item->generation > max_generation) {
max_generation = parent->item->generation;
}
}
if (all_parents_computed) {
current->generation = max_generation + 1;
pop_commit(&list);
if (current->generation > GENERATION_NUMBER_MAX)
current->generation = GENERATION_NUMBER_MAX;
}
}
}
}
void write_commit_graph(const char *obj_dir,
const char **pack_indexes,
int nr_packs,
@ -584,7 +644,6 @@ void write_commit_graph(const char *obj_dir,
struct hashfile *f;
uint32_t i, count_distinct = 0;
char *graph_name;
int fd;
struct lock_file lk = LOCK_INIT;
uint32_t chunk_ids[5];
uint64_t chunk_offsets[5];
@ -695,24 +754,14 @@ void write_commit_graph(const char *obj_dir,
if (commits.nr >= GRAPH_PARENT_MISSING)
die(_("too many commits to write graph"));
compute_generation_numbers(&commits);
graph_name = get_commit_graph_filename(obj_dir);
fd = hold_lock_file_for_update(&lk, graph_name, 0);
if (fd < 0) {
struct strbuf folder = STRBUF_INIT;
strbuf_addstr(&folder, graph_name);
strbuf_setlen(&folder, strrchr(folder.buf, '/') - folder.buf);
if (mkdir(folder.buf, 0777) < 0)
die_errno(_("cannot mkdir %s"), folder.buf);
strbuf_release(&folder);
fd = hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR);
if (fd < 0)
die_errno("unable to create '%s'", graph_name);
}
if (safe_create_leading_directories(graph_name))
die_errno(_("unable to create leading directories of %s"),
graph_name);
hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR);
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
hashwrite_be32(f, GRAPH_SIGNATURE);

View file

@ -17,6 +17,14 @@ char *get_commit_graph_filename(const char *obj_dir);
*/
int parse_commit_in_graph(struct commit *item);
/*
* It is possible that we loaded commit contents from the commit buffer,
* but we also want to ensure the commit-graph content is correctly
* checked and filled. Fill the graph_pos and generation members of
* the given commit.
*/
void load_commit_graph_info(struct commit *item);
struct tree *get_commit_tree_in_graph(const struct commit *c);
struct commit_graph {

View file

@ -344,7 +344,7 @@ const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep)
return ret;
}
int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size)
int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph)
{
const char *tail = buffer;
const char *bufptr = buffer;
@ -399,6 +399,9 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s
}
item->date = parse_commit_date(bufptr, tail);
if (check_graph)
load_commit_graph_info(item);
return 0;
}
@ -425,7 +428,7 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing)
return error("Object %s not a commit",
oid_to_hex(&item->object.oid));
}
ret = parse_commit_buffer(item, buffer, size);
ret = parse_commit_buffer(item, buffer, size, 0);
if (save_commit_buffer && !ret) {
set_commit_buffer(item, buffer, size);
return 0;
@ -653,6 +656,24 @@ static int compare_commits_by_author_date(const void *a_, const void *b_,
return 0;
}
int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused)
{
const struct commit *a = a_, *b = b_;
/* newer commits first */
if (a->generation < b->generation)
return 1;
else if (a->generation > b->generation)
return -1;
/* use date as a heuristic when generations are equal */
if (a->date < b->date)
return 1;
else if (a->date > b->date)
return -1;
return 0;
}
int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused)
{
const struct commit *a = a_, *b = b_;
@ -800,11 +821,14 @@ static int queue_has_nonstale(struct prio_queue *queue)
}
/* all input commits in one and twos[] must have been parsed! */
static struct commit_list *paint_down_to_common(struct commit *one, int n, struct commit **twos)
static struct commit_list *paint_down_to_common(struct commit *one, int n,
struct commit **twos,
int min_generation)
{
struct prio_queue queue = { compare_commits_by_commit_date };
struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
struct commit_list *result = NULL;
int i;
uint32_t last_gen = GENERATION_NUMBER_INFINITY;
one->object.flags |= PARENT1;
if (!n) {
@ -823,6 +847,15 @@ static struct commit_list *paint_down_to_common(struct commit *one, int n, struc
struct commit_list *parents;
int flags;
if (commit->generation > last_gen)
BUG("bad generation skip %8x > %8x at %s",
commit->generation, last_gen,
oid_to_hex(&commit->object.oid));
last_gen = commit->generation;
if (commit->generation < min_generation)
break;
flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
if (flags == (PARENT1 | PARENT2)) {
if (!(commit->object.flags & RESULT)) {
@ -871,7 +904,7 @@ static struct commit_list *merge_bases_many(struct commit *one, int n, struct co
return NULL;
}
list = paint_down_to_common(one, n, twos);
list = paint_down_to_common(one, n, twos, 0);
while (list) {
struct commit *commit = pop_commit(&list);
@ -929,6 +962,7 @@ static int remove_redundant(struct commit **array, int cnt)
parse_commit(array[i]);
for (i = 0; i < cnt; i++) {
struct commit_list *common;
uint32_t min_generation = array[i]->generation;
if (redundant[i])
continue;
@ -937,8 +971,12 @@ static int remove_redundant(struct commit **array, int cnt)
continue;
filled_index[filled] = j;
work[filled++] = array[j];
if (array[j]->generation < min_generation)
min_generation = array[j]->generation;
}
common = paint_down_to_common(array[i], filled, work);
common = paint_down_to_common(array[i], filled, work,
min_generation);
if (array[i]->object.flags & PARENT2)
redundant[i] = 1;
for (j = 0; j < filled; j++)
@ -1048,14 +1086,21 @@ int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit *
{
struct commit_list *bases;
int ret = 0, i;
uint32_t min_generation = GENERATION_NUMBER_INFINITY;
if (parse_commit(commit))
return ret;
for (i = 0; i < nr_reference; i++)
for (i = 0; i < nr_reference; i++) {
if (parse_commit(reference[i]))
return ret;
if (reference[i]->generation < min_generation)
min_generation = reference[i]->generation;
}
bases = paint_down_to_common(commit, nr_reference, reference);
if (commit->generation > min_generation)
return ret;
bases = paint_down_to_common(commit, nr_reference, reference, commit->generation);
if (commit->object.flags & PARENT2)
ret = 1;
clear_commit_marks(commit, all_flags);

View file

@ -10,6 +10,9 @@
#include "pretty.h"
#define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF
#define GENERATION_NUMBER_INFINITY 0xFFFFFFFF
#define GENERATION_NUMBER_MAX 0x3FFFFFFF
#define GENERATION_NUMBER_ZERO 0
struct commit_list {
struct commit *item;
@ -33,6 +36,7 @@ struct commit {
*/
struct tree *maybe_tree;
uint32_t graph_pos;
uint32_t generation;
unsigned int index;
};
@ -72,7 +76,7 @@ struct commit *lookup_commit_reference_by_name(const char *name);
*/
struct commit *lookup_commit_or_die(const struct object_id *oid, const char *ref_name);
int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size);
int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph);
int parse_commit_gently(struct commit *item, int quiet_on_missing);
static inline int parse_commit(struct commit *item)
{
@ -341,6 +345,7 @@ extern int remove_signature(struct strbuf *buf);
extern int check_commit_signature(const struct commit *commit, struct signature_check *sigc);
int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused);
int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused);
LAST_ARG_MUST_BE_NULL
extern int run_commit_hook(int editor_is_used, const char *index_file, const char *name, ...);

View file

@ -210,7 +210,7 @@ struct object *parse_object_buffer(const struct object_id *oid, enum object_type
} else if (type == OBJ_COMMIT) {
struct commit *commit = lookup_commit(oid);
if (commit) {
if (parse_commit_buffer(commit, buffer, size))
if (parse_commit_buffer(commit, buffer, size, 1))
return NULL;
if (!get_cached_commit_buffer(commit, NULL)) {
set_commit_buffer(commit, buffer, size);

View file

@ -16,6 +16,7 @@
#include "trailer.h"
#include "wt-status.h"
#include "commit-slab.h"
#include "commit-graph.h"
static struct ref_msg {
const char *gone;
@ -1662,12 +1663,13 @@ static int in_commit_list(const struct commit_list *want, struct commit *c)
}
/*
* Test whether the candidate or one of its parents is contained in the list.
* Test whether the candidate is contained in the list.
* Do not recurse to find out, though, but return -1 if inconclusive.
*/
static enum contains_result contains_test(struct commit *candidate,
const struct commit_list *want,
struct contains_cache *cache)
struct contains_cache *cache,
uint32_t cutoff)
{
enum contains_result *cached = contains_cache_at(cache, candidate);
@ -1683,6 +1685,10 @@ static enum contains_result contains_test(struct commit *candidate,
/* Otherwise, we don't know; prepare to recurse */
parse_commit_or_die(candidate);
if (candidate->generation < cutoff)
return CONTAINS_NO;
return CONTAINS_UNKNOWN;
}
@ -1698,8 +1704,18 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
struct contains_cache *cache)
{
struct contains_stack contains_stack = { 0, 0, NULL };
enum contains_result result = contains_test(candidate, want, cache);
enum contains_result result;
uint32_t cutoff = GENERATION_NUMBER_INFINITY;
const struct commit_list *p;
for (p = want; p; p = p->next) {
struct commit *c = p->item;
load_commit_graph_info(c);
if (c->generation < cutoff)
cutoff = c->generation;
}
result = contains_test(candidate, want, cache, cutoff);
if (result != CONTAINS_UNKNOWN)
return result;
@ -1717,7 +1733,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
* If we just popped the stack, parents->item has been marked,
* therefore contains_test will return a meaningful yes/no.
*/
else switch (contains_test(parents->item, want, cache)) {
else switch (contains_test(parents->item, want, cache, cutoff)) {
case CONTAINS_YES:
*contains_cache_at(cache, commit) = CONTAINS_YES;
contains_stack.nr--;
@ -1731,7 +1747,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
}
}
free(contains_stack.contains_stack);
return contains_test(candidate, want, cache);
return contains_test(candidate, want, cache, cutoff);
}
static int commit_contains(struct ref_filter *filter, struct commit *commit,

View file

@ -1801,7 +1801,7 @@ static void check_commit(const void *buf, size_t size)
{
struct commit c;
memset(&c, 0, sizeof(c));
if (parse_commit_buffer(&c, buf, size))
if (parse_commit_buffer(&c, buf, size, 0))
die("corrupt commit");
}

View file

@ -221,4 +221,13 @@ test_expect_success 'write graph in bare repo' '
graph_git_behavior 'bare repo with graph, commit 8 vs merge 1' bare commits/8 merge/1
graph_git_behavior 'bare repo with graph, commit 8 vs merge 2' bare commits/8 merge/2
test_expect_success 'perform fast-forward merge in full repo' '
cd "$TRASH_DIRECTORY/full" &&
git checkout -b merge-5-to-8 commits/5 &&
git merge commits/8 &&
git show-ref -s merge-5-to-8 >output &&
git show-ref -s commits/8 >expect &&
test_cmp expect output
'
test_done