diff --git a/Documentation/Makefile b/Documentation/Makefile index a3868a462f..dc65759cb1 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -51,6 +51,7 @@ MAN7_TXT += gitdiffcore.txt MAN7_TXT += giteveryday.txt MAN7_TXT += gitfaq.txt MAN7_TXT += gitglossary.txt +MAN7_TXT += gitpacking.txt MAN7_TXT += gitnamespaces.txt MAN7_TXT += gitremote-helpers.txt MAN7_TXT += gitrevisions.txt diff --git a/Documentation/config.txt b/Documentation/config.txt index 2d2d06d7ee..8c0b3ed807 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -384,6 +384,8 @@ include::config/apply.txt[] include::config/attr.txt[] +include::config/bitmap-pseudo-merge.txt[] + include::config/blame.txt[] include::config/branch.txt[] diff --git a/Documentation/config/bitmap-pseudo-merge.txt b/Documentation/config/bitmap-pseudo-merge.txt new file mode 100644 index 0000000000..1f264eca99 --- /dev/null +++ b/Documentation/config/bitmap-pseudo-merge.txt @@ -0,0 +1,91 @@ +NOTE: The configuration options in `bitmapPseudoMerge.*` are considered +EXPERIMENTAL and may be subject to change or be removed entirely in the +future. For more information about the pseudo-merge bitmap feature, see +the "Pseudo-merge bitmaps" section of linkgit:gitpacking[7]. + +bitmapPseudoMerge..pattern:: + Regular expression used to match reference names. Commits + pointed to by references matching this pattern (and meeting + the below criteria, like `bitmapPseudoMerge..sampleRate` + and `bitmapPseudoMerge..threshold`) will be considered + for inclusion in a pseudo-merge bitmap. ++ +Commits are grouped into pseudo-merge groups based on whether or not +any reference(s) that point at a given commit match the pattern, which +is an extended regular expression. ++ +Within a pseudo-merge group, commits may be further grouped into +sub-groups based on the capture groups in the pattern. These +sub-groupings are formed from the regular expressions by concatenating +any capture groups from the regular expression, with a '-' dash in +between. ++ +For example, if the pattern is `refs/tags/`, then all tags (provided +they meet the below criteria) will be considered candidates for the +same pseudo-merge group. However, if the pattern is instead +`refs/remotes/([0-9])+/tags/`, then tags from different remotes will +be grouped into separate pseudo-merge groups, based on the remote +number. + +bitmapPseudoMerge..decay:: + Determines the rate at which consecutive pseudo-merge bitmap + groups decrease in size. Must be non-negative. This parameter + can be thought of as `k` in the function `f(n) = C * n^-k`, + where `f(n)` is the size of the `n`th group. ++ +Setting the decay rate equal to `0` will cause all groups to be the +same size. Setting the decay rate equal to `1` will cause the `n`th +group to be `1/n` the size of the initial group. Higher values of the +decay rate cause consecutive groups to shrink at an increasing rate. +The default is `1`. ++ +If all groups are the same size, it is possible that groups containing +newer commits will be able to be used less often than earlier groups, +since it is more likely that the references pointing at newer commits +will be updated more often than a reference pointing at an old commit. + +bitmapPseudoMerge..sampleRate:: + Determines the proportion of non-bitmapped commits (among + reference tips) which are selected for inclusion in an + unstable pseudo-merge bitmap. Must be between `0` and `1` + (inclusive). The default is `1`. + +bitmapPseudoMerge..threshold:: + Determines the minimum age of non-bitmapped commits (among + reference tips, as above) which are candidates for inclusion + in an unstable pseudo-merge bitmap. The default is + `1.week.ago`. + +bitmapPseudoMerge..maxMerges:: + Determines the maximum number of pseudo-merge commits among + which commits may be distributed. ++ +For pseudo-merge groups whose pattern does not contain any capture +groups, this setting is applied for all commits matching the regular +expression. For patterns that have one or more capture groups, this +setting is applied for each distinct capture group. ++ +For example, if your capture group is `refs/tags/`, then this setting +will distribute all tags into a maximum of `maxMerges` pseudo-merge +commits. However, if your capture group is, say, +`refs/remotes/([0-9]+)/tags/`, then this setting will be applied to +each remote's set of tags individually. ++ +Must be non-negative. The default value is 64. + +bitmapPseudoMerge..stableThreshold:: + Determines the minimum age of commits (among reference tips, + as above, however stable commits are still considered + candidates even when they have been covered by a bitmap) which + are candidates for a stable a pseudo-merge bitmap. The default + is `1.month.ago`. ++ +Setting this threshold to a smaller value (e.g., 1.week.ago) will cause +more stable groups to be generated (which impose a one-time generation +cost) but those groups will likely become stale over time. Using a +larger value incurs the opposite penalty (fewer stable groups which are +more useful). + +bitmapPseudoMerge..stableSize:: + Determines the size (in number of commits) of a stable + psuedo-merge bitmap. The default is `512`. diff --git a/Documentation/gitpacking.txt b/Documentation/gitpacking.txt new file mode 100644 index 0000000000..4a6fcba6f7 --- /dev/null +++ b/Documentation/gitpacking.txt @@ -0,0 +1,189 @@ +gitpacking(7) +============= + +NAME +---- +gitpacking - Advanced concepts related to packing in Git + +SYNOPSIS +-------- +gitpacking + +DESCRIPTION +----------- + +This document aims to describe some advanced concepts related to packing +in Git. + +Many concepts are currently described scattered between manual pages of +various Git commands, including linkgit:git-pack-objects[1], +linkgit:git-repack[1], and others, as well as linkgit:gitformat-pack[5], +and parts of the `Documentation/technical` tree. + +There are many aspects of packing in Git that are not covered in this +document that instead live in the aforementioned areas. Over time, those +scattered bits may coalesce into this document. + +== Pseudo-merge bitmaps + +NOTE: Pseudo-merge bitmaps are considered an experimental feature, so +the configuration and many of the ideas are subject to change. + +=== Background + +Reachability bitmaps are most efficient when we have on-disk stored +bitmaps for one or more of the starting points of a traversal. For this +reason, Git prefers storing bitmaps for commits at the tips of refs, +because traversals tend to start with those points. + +But if you have a large number of refs, it's not feasible to store a +bitmap for _every_ ref tip. It takes up space, and just OR-ing all of +those bitmaps together is expensive. + +One way we can deal with that is to create bitmaps that represent +_groups_ of refs. When a traversal asks about the entire group, then we +can use this single bitmap instead of considering each ref individually. +Because these bitmaps represent the set of objects which would be +reachable in a hypothetical merge of all of the commits, we call them +pseudo-merge bitmaps. + +=== Overview + +A "pseudo-merge bitmap" is used to refer to a pair of bitmaps, as +follows: + +Commit bitmap:: + + A bitmap whose set bits describe the set of commits included in the + pseudo-merge's "merge" bitmap (as below). + +Merge bitmap:: + + A bitmap whose set bits describe the reachability closure over the set + of commits in the pseudo-merge's "commits" bitmap (as above). An + identical bitmap would be generated for an octopus merge with the same + set of parents as described in the commits bitmap. + +Pseudo-merge bitmaps can accelerate bitmap traversals when all commits +for a given pseudo-merge are listed on either side of the traversal, +either directly (by explicitly asking for them as part of the `HAVES` +or `WANTS`) or indirectly (by encountering them during a fill-in +traversal). + +=== Use-cases + +For example, suppose there exists a pseudo-merge bitmap with a large +number of commits, all of which are listed in the `WANTS` section of +some bitmap traversal query. When pseudo-merge bitmaps are enabled, the +bitmap machinery can quickly determine there is a pseudo-merge which +satisfies some subset of the wanted objects on either side of the query. +Then, we can inflate the EWAH-compressed bitmap, and `OR` it in to the +resulting bitmap. By contrast, without pseudo-merge bitmaps, we would +have to repeat the decompression and `OR`-ing step over a potentially +large number of individual bitmaps, which can take proportionally more +time. + +Another benefit of pseudo-merges arises when there is some combination +of (a) a large number of references, with (b) poor bitmap coverage, and +(c) deep, nested trees, making fill-in traversal relatively expensive. +For example, suppose that there are a large enough number of tags where +bitmapping each of the tags individually is infeasible. Without +pseudo-merge bitmaps, computing the result of, say, `git rev-list +--use-bitmap-index --count --objects --tags` would likely require a +large amount of fill-in traversal. But when a large quantity of those +tags are stored together in a pseudo-merge bitmap, the bitmap machinery +can take advantage of the fact that we only care about the union of +objects reachable from all of those tags, and answer the query much +faster. + +=== Configuration + +Reference tips are grouped into different pseudo-merge groups according +to two criteria. A reference name matches one or more of the defined +pseudo-merge patterns, and optionally one or more capture groups within +that pattern which further partition the group. + +Within a group, commits may be considered "stable", or "unstable" +depending on their age. These are adjusted by setting the +`bitmapPseudoMerge..stableThreshold` and +`bitmapPseudoMerge..threshold` configuration values, respectively. + +All stable commits are grouped into pseudo-merges of equal size +(`bitmapPseudoMerge..stableSize`). If the `stableSize` +configuration is set to, say, 100, then the first 100 commits (ordered +by committer date) which are older than the `stableThreshold` value will +form one group, the next 100 commits will form another group, and so on. + +Among unstable commits, the pseudo-merge machinery will attempt to +combine older commits into large groups as opposed to newer commits +which will appear in smaller groups. This is based on the heuristic that +references whose tip commit is older are less likely to be modified to +point at a different commit than a reference whose tip commit is newer. + +The size of groups is determined by a power-law decay function, and the +decay parameter roughly corresponds to "k" in `f(n) = C*n^(-k/100)`, +where `f(n)` describes the size of the `n`-th pseudo-merge group. The +sample rate controls what percentage of eligible commits are considered +as candidates. The threshold parameter indicates the minimum age (so as +to avoid including too-recent commits in a pseudo-merge group, making it +less likely to be valid). The "maxMerges" parameter sets an upper-bound +on the number of pseudo-merge commits an individual group + +The "stable"-related parameters control "stable" pseudo-merge groups, +comprised of a fixed number of commits which are older than the +configured "stable threshold" value and may be grouped together in +chunks of "stableSize" in order of age. + +The exact configuration for pseudo-merges is as follows: + +include::config/bitmap-pseudo-merge.txt[] + +=== Examples + +Suppose that you have a repository with a large number of references, +and you want a bare-bones configuration of pseudo-merge bitmaps that +will enhance bitmap coverage of the `refs/` namespace. You may start +wiht a configuration like so: + + [bitmapPseudoMerge "all"] + pattern = "refs/" + threshold = now + stableThreshold = never + sampleRate = 100 + maxMerges = 64 + +This will create pseudo-merge bitmaps for all references, regardless of +their age, and group them into 64 pseudo-merge commits. + +If you wanted to separate tags from branches when generating +pseudo-merge commits, you would instead define the pattern with a +capture group, like so: + + [bitmapPseudoMerge "all"] + pattern = "refs/(heads/tags)/" + +Suppose instead that you are working in a fork-network repository, with +each fork specified by some numeric ID, and whose refs reside in +`refs/virtual/NNN/` (where `NNN` is the numeric ID corresponding to some +fork) in the network. In this instance, you may instead write something +like: + + [bitmapPseudoMerge "all"] + pattern = "refs/virtual/([0-9]+)/(heads|tags)/" + threshold = now + stableThreshold = never + sampleRate = 100 + maxMerges = 64 + +Which would generate pseudo-merge group identifiers like "1234-heads", +and "5678-tags" (for branches in fork "1234", and tags in remote "5678", +respectively). + +SEE ALSO +-------- +linkgit:git-pack-objects[1] +linkgit:git-repack[1] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/technical/bitmap-format.txt b/Documentation/technical/bitmap-format.txt index f5d200939b..bfb0ec7beb 100644 --- a/Documentation/technical/bitmap-format.txt +++ b/Documentation/technical/bitmap-format.txt @@ -255,3 +255,144 @@ triplet is - xor_row (4 byte integer, network byte order): :: The position of the triplet whose bitmap is used to compress this one, or `0xffffffff` if no such bitmap exists. + +Pseudo-merge bitmaps +-------------------- + +If the `BITMAP_OPT_PSEUDO_MERGES` flag is set, a variable number of +bytes (preceding the name-hash cache, commit lookup table, and trailing +checksum) of the `.bitmap` file is used to store pseudo-merge bitmaps. + +For more information on what pseudo-merges are, why they are useful, and +how to configure them, see the information in linkgit:gitpacking[7]. + +=== File format + +If enabled, pseudo-merge bitmaps are stored in an optional section at +the end of a `.bitmap` file. The format is as follows: + +.... ++-------------------------------------------+ +| .bitmap File | ++-------------------------------------------+ +| | +| Pseudo-merge bitmaps (Variable Length) | +| +---------------------------+ | +| | commits_bitmap (EWAH) | | +| +---------------------------+ | +| | merge_bitmap (EWAH) | | +| +---------------------------+ | +| | ++-------------------------------------------+ +| | +| Lookup Table | +| +---------------------------+ | +| | commit_pos (4 bytes) | | +| +---------------------------+ | +| | offset (8 bytes) | | +| +------------+--------------+ | +| | +| Offset Cases: | +| ------------- | +| | +| 1. MSB Unset: single pseudo-merge bitmap | +| + offset to pseudo-merge bitmap | +| | +| 2. MSB Set: multiple pseudo-merges | +| + offset to extended lookup table | +| | ++-------------------------------------------+ +| | +| Extended Lookup Table (Optional) | +| +----+----------+----------+----------+ | +| | N | Offset 1 | .... | Offset N | | +| +----+----------+----------+----------+ | +| | | 8 bytes | .... | 8 bytes | | +| +----+----------+----------+----------+ | +| | ++-------------------------------------------+ +| | +| Pseudo-merge position table | +| +----+----------+----------+----------+ | +| | N | Offset 1 | .... | Offset N | | +| +----+----------+----------+----------+ | +| | | 8 bytes | .... | 8 bytes | | +| +----+----------+----------+----------+ | +| | ++-------------------------------------------+ +| | +| Pseudo-merge Metadata | +| +-----------------------------------+ | +| | # pseudo-merges (4 bytes) | | +| +-----------------------------------+ | +| | # commits (4 bytes) | | +| +-----------------------------------+ | +| | Lookup offset (8 bytes) | | +| +-----------------------------------+ | +| | Extension size (8 bytes) | | +| +-----------------------------------+ | +| | ++-------------------------------------------+ +.... + +* One or more pseudo-merge bitmaps, each containing: + + ** `commits_bitmap`, an EWAH-compressed bitmap describing the set of + commits included in the this psuedo-merge. + + ** `merge_bitmap`, an EWAH-compressed bitmap describing the union of + the set of objects reachable from all commits listed in the + `commits_bitmap`. + +* A lookup table, mapping pseudo-merged commits to the pseudo-merges + they belong to. Entries appear in increasing order of each commit's + bit position. Each entry is 12 bytes wide, and is comprised of the + following: + + ** `commit_pos`, a 4-byte unsigned value (in network byte-order) + containing the bit position for this commit. + + ** `offset`, an 8-byte unsigned value (also in network byte-order) + containing either one of two possible offsets, depending on whether or + not the most-significant bit is set. + + *** If unset (i.e. `offset & ((uint64_t)1<<63) == 0`), the offset + (relative to the beginning of the `.bitmap` file) at which the + pseudo-merge bitmap for this commit can be read. This indicates + only a single pseudo-merge bitmap contains this commit. + + *** If set (i.e. `offset & ((uint64_t)1<<63) != 0`), the offset + (again relative to the beginning of the `.bitmap` file) at which + the extended offset table can be located describing the set of + pseudo-merge bitmaps which contain this commit. This indicates + that multiple pseudo-merge bitmaps contain this commit. + +* An (optional) extended lookup table (written if and only if there is + at least one commit which appears in more than one pseudo-merge). + There are as many entries as commits which appear in multiple + pseudo-merges. Each entry contains the following: + + ** `N`, a 4-byte unsigned value equal to the number of pseudo-merges + which contain a given commit. + + ** An array of `N` 8-byte unsigned values, each of which is + interpreted as an offset (relative to the beginning of the + `.bitmap` file) at which a pseudo-merge bitmap for this commit can + be read. These values occur in no particular order. + +* Positions for all pseudo-merges, each stored as an 8-byte unsigned + value (in network byte-order) containing the offset (relative to the + beginning of the `.bitmap` file) of each consecutive pseudo-merge. + +* A 4-byte unsigned value (in network byte-order) equal to the number of + pseudo-merges. + +* A 4-byte unsigned value (in network byte-order) equal to the number of + unique commits which appear in any pseudo-merge. + +* An 8-byte unsigned value (in network byte-order) equal to the number + of bytes between the start of the pseudo-merge section and the + beginning of the lookup table. + +* An 8-byte unsigned value (in network byte-order) equal to the number + of bytes in the pseudo-merge section (including this field). diff --git a/Makefile b/Makefile index 83bd9d13af..3eab701b10 100644 --- a/Makefile +++ b/Makefile @@ -1103,6 +1103,7 @@ LIB_OBJS += prompt.o LIB_OBJS += protocol.o LIB_OBJS += protocol-caps.o LIB_OBJS += prune-packed.o +LIB_OBJS += pseudo-merge.o LIB_OBJS += quote.o LIB_OBJS += range-diff.o LIB_OBJS += reachable.o diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 638f5c57f0..dd97fdd5d2 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1341,7 +1341,8 @@ static void write_pack_file(void) hash_to_hex(hash)); if (write_bitmap_index) { - bitmap_writer_init(&bitmap_writer); + bitmap_writer_init(&bitmap_writer, + the_repository); bitmap_writer_set_checksum(&bitmap_writer, hash); bitmap_writer_build_type_index(&bitmap_writer, &to_pack, written_list, nr_written); diff --git a/config.c b/config.c index 8fc7ecf921..716d3d0ebf 100644 --- a/config.c +++ b/config.c @@ -1244,6 +1244,15 @@ ssize_t git_config_ssize_t(const char *name, const char *value, return ret; } +double git_config_double(const char *name, const char *value, + const struct key_value_info *kvi) +{ + double ret; + if (!git_parse_double(value, &ret)) + die_bad_number(name, value, kvi); + return ret; +} + static const struct fsync_component_name { const char *name; enum fsync_component component_bits; diff --git a/config.h b/config.h index 8620aa731b..54b47dec9e 100644 --- a/config.h +++ b/config.h @@ -261,6 +261,13 @@ unsigned long git_config_ulong(const char *, const char *, ssize_t git_config_ssize_t(const char *, const char *, const struct key_value_info *); +/** + * Identically to `git_config_double`, but for double-precision floating point + * values. + */ +double git_config_double(const char *, const char *, + const struct key_value_info *); + /** * Same as `git_config_bool`, except that integers are returned as-is, and * an `is_bool` flag is unset. diff --git a/ewah/bitmap.c b/ewah/bitmap.c index ac7e0af622..55928dada8 100644 --- a/ewah/bitmap.c +++ b/ewah/bitmap.c @@ -138,6 +138,49 @@ void bitmap_or(struct bitmap *self, const struct bitmap *other) self->words[i] |= other->words[i]; } +int ewah_bitmap_is_subset(struct ewah_bitmap *self, struct bitmap *other) +{ + struct ewah_iterator it; + eword_t word; + size_t i; + + ewah_iterator_init(&it, self); + + for (i = 0; i < other->word_alloc; i++) { + if (!ewah_iterator_next(&word, &it)) { + /* + * If we reached the end of `self`, and haven't + * rejected `self` as a possible subset of + * `other` yet, then we are done and `self` is + * indeed a subset of `other`. + */ + return 1; + } + if (word & ~other->words[i]) { + /* + * Otherwise, compare the next two pairs of + * words. If the word from `self` has bit(s) not + * in the word from `other`, `self` is not a + * subset of `other`. + */ + return 0; + } + } + + /* + * If we got to this point, there may be zero or more words + * remaining in `self`, with no remaining words left in `other`. + * If there are any bits set in the remaining word(s) in `self`, + * then `self` is not a subset of `other`. + */ + while (ewah_iterator_next(&word, &it)) + if (word) + return 0; + + /* `self` is definitely a subset of `other` */ + return 1; +} + void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other) { size_t original_size = self->word_alloc; @@ -169,6 +212,20 @@ size_t bitmap_popcount(struct bitmap *self) return count; } +size_t ewah_bitmap_popcount(struct ewah_bitmap *self) +{ + struct ewah_iterator it; + eword_t word; + size_t count = 0; + + ewah_iterator_init(&it, self); + + while (ewah_iterator_next(&word, &it)) + count += ewah_bit_popcount64(word); + + return count; +} + int bitmap_is_empty(struct bitmap *self) { size_t i; @@ -204,6 +261,25 @@ int bitmap_equals(struct bitmap *self, struct bitmap *other) return 1; } +int bitmap_equals_ewah(struct bitmap *self, struct ewah_bitmap *other) +{ + struct ewah_iterator it; + eword_t word; + size_t i = 0; + + ewah_iterator_init(&it, other); + + while (ewah_iterator_next(&word, &it)) + if (word != (i < self->word_alloc ? self->words[i++] : 0)) + return 0; + + for (; i < self->word_alloc; i++) + if (self->words[i]) + return 0; + + return 1; +} + int bitmap_is_subset(struct bitmap *self, struct bitmap *other) { size_t common_size, i; diff --git a/ewah/ewok.h b/ewah/ewok.h index c11d76c6f3..5e357e2493 100644 --- a/ewah/ewok.h +++ b/ewah/ewok.h @@ -179,7 +179,14 @@ void bitmap_unset(struct bitmap *self, size_t pos); int bitmap_get(struct bitmap *self, size_t pos); void bitmap_free(struct bitmap *self); int bitmap_equals(struct bitmap *self, struct bitmap *other); +int bitmap_equals_ewah(struct bitmap *self, struct ewah_bitmap *other); + +/* + * Both `bitmap_is_subset()` and `ewah_bitmap_is_subset()` return 1 if the set + * of bits in 'self' are a subset of the bits in 'other'. Returns 0 otherwise. + */ int bitmap_is_subset(struct bitmap *self, struct bitmap *other); +int ewah_bitmap_is_subset(struct ewah_bitmap *self, struct bitmap *other); struct ewah_bitmap * bitmap_to_ewah(struct bitmap *bitmap); struct bitmap *ewah_to_bitmap(struct ewah_bitmap *ewah); @@ -189,6 +196,7 @@ void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other); void bitmap_or(struct bitmap *self, const struct bitmap *other); size_t bitmap_popcount(struct bitmap *self); +size_t ewah_bitmap_popcount(struct ewah_bitmap *self); int bitmap_is_empty(struct bitmap *self); #endif diff --git a/midx-write.c b/midx-write.c index 0125aa4dc9..a941e363b7 100644 --- a/midx-write.c +++ b/midx-write.c @@ -823,7 +823,7 @@ static int write_midx_bitmap(const char *midx_name, for (i = 0; i < pdata->nr_objects; i++) index[i] = &pdata->objects[i].idx; - bitmap_writer_init(&writer); + bitmap_writer_init(&writer, the_repository); bitmap_writer_show_progress(&writer, flags & MIDX_PROGRESS); bitmap_writer_build_type_index(&writer, pdata, index, pdata->nr_objects); diff --git a/object.h b/object.h index 73b4ec3904..669b6a18fa 100644 --- a/object.h +++ b/object.h @@ -81,7 +81,7 @@ void object_array_init(struct object_array *array); * reflog.c: 10--12 * builtin/show-branch.c: 0-------------------------------------------26 * builtin/unpack-objects.c: 2021 - * pack-bitmap.h: 22 + * pack-bitmap.h: 2122 */ #define FLAG_BITS 28 diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 6cae670412..6e8060f8a0 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -17,6 +17,12 @@ #include "trace2.h" #include "tree.h" #include "tree-walk.h" +#include "pseudo-merge.h" +#include "oid-array.h" +#include "config.h" +#include "alloc.h" +#include "refs.h" +#include "strmap.h" struct bitmapped_commit { struct commit *commit; @@ -25,16 +31,39 @@ struct bitmapped_commit { int flags; int xor_offset; uint32_t commit_pos; + unsigned pseudo_merge : 1; }; -void bitmap_writer_init(struct bitmap_writer *writer) +static inline int bitmap_writer_nr_selected_commits(struct bitmap_writer *writer) +{ + return writer->selected_nr - writer->pseudo_merges_nr; +} + +void bitmap_writer_init(struct bitmap_writer *writer, struct repository *r) { memset(writer, 0, sizeof(struct bitmap_writer)); + if (writer->bitmaps) + BUG("bitmap writer already initialized"); + writer->bitmaps = kh_init_oid_map(); + writer->pseudo_merge_commits = kh_init_oid_map(); + + string_list_init_dup(&writer->pseudo_merge_groups); + + load_pseudo_merges_from_config(&writer->pseudo_merge_groups); +} + +static void free_pseudo_merge_commit_idx(struct pseudo_merge_commit_idx *idx) +{ + if (!idx) + return; + free(idx->pseudo_merge); + free(idx); } void bitmap_writer_free(struct bitmap_writer *writer) { uint32_t i; + struct pseudo_merge_commit_idx *idx; if (!writer) return; @@ -46,6 +75,10 @@ void bitmap_writer_free(struct bitmap_writer *writer) kh_destroy_oid_map(writer->bitmaps); + kh_foreach_value(writer->pseudo_merge_commits, idx, + free_pseudo_merge_commit_idx(idx)); + kh_destroy_oid_map(writer->pseudo_merge_commits); + for (i = 0; i < writer->selected_nr; i++) { struct bitmapped_commit *bc = &writer->selected[i]; if (bc->write_as != bc->bitmap) @@ -121,22 +154,41 @@ void bitmap_writer_build_type_index(struct bitmap_writer *writer, } } +int bitmap_writer_has_bitmapped_object_id(struct bitmap_writer *writer, + const struct object_id *oid) +{ + return kh_get_oid_map(writer->bitmaps, *oid) != kh_end(writer->bitmaps); +} + /** * Compute the actual bitmaps */ -static inline void push_bitmapped_commit(struct bitmap_writer *writer, - struct commit *commit) +void bitmap_writer_push_commit(struct bitmap_writer *writer, + struct commit *commit, unsigned pseudo_merge) { if (writer->selected_nr >= writer->selected_alloc) { writer->selected_alloc = (writer->selected_alloc + 32) * 2; REALLOC_ARRAY(writer->selected, writer->selected_alloc); } + if (!pseudo_merge) { + int hash_ret; + khiter_t hash_pos = kh_put_oid_map(writer->bitmaps, + commit->object.oid, + &hash_ret); + + if (!hash_ret) + die(_("duplicate entry when writing bitmap index: %s"), + oid_to_hex(&commit->object.oid)); + kh_value(writer->bitmaps, hash_pos) = NULL; + } + writer->selected[writer->selected_nr].commit = commit; writer->selected[writer->selected_nr].bitmap = NULL; writer->selected[writer->selected_nr].write_as = NULL; writer->selected[writer->selected_nr].flags = 0; + writer->selected[writer->selected_nr].pseudo_merge = pseudo_merge; writer->selected_nr++; } @@ -167,16 +219,20 @@ static void compute_xor_offsets(struct bitmap_writer *writer) while (next < writer->selected_nr) { struct bitmapped_commit *stored = &writer->selected[next]; - int best_offset = 0; struct ewah_bitmap *best_bitmap = stored->bitmap; struct ewah_bitmap *test_xor; + if (stored->pseudo_merge) + goto next; + for (i = 1; i <= MAX_XOR_OFFSET_SEARCH; ++i) { int curr = next - i; if (curr < 0) break; + if (writer->selected[curr].pseudo_merge) + continue; test_xor = ewah_pool_new(); ewah_xor(writer->selected[curr].bitmap, stored->bitmap, test_xor); @@ -192,6 +248,7 @@ static void compute_xor_offsets(struct bitmap_writer *writer) } } +next: stored->xor_offset = best_offset; stored->write_as = best_bitmap; @@ -204,7 +261,8 @@ struct bb_commit { struct bitmap *commit_mask; struct bitmap *bitmap; unsigned selected:1, - maximal:1; + maximal:1, + pseudo_merge:1; unsigned idx; /* within selected array */ }; @@ -242,17 +300,18 @@ static void bitmap_builder_init(struct bitmap_builder *bb, revs.first_parent_only = 1; for (i = 0; i < writer->selected_nr; i++) { - struct commit *c = writer->selected[i].commit; - struct bb_commit *ent = bb_data_at(&bb->data, c); + struct bitmapped_commit *bc = &writer->selected[i]; + struct bb_commit *ent = bb_data_at(&bb->data, bc->commit); ent->selected = 1; ent->maximal = 1; + ent->pseudo_merge = bc->pseudo_merge; ent->idx = i; ent->commit_mask = bitmap_new(); bitmap_set(ent->commit_mask, i); - add_pending_object(&revs, &c->object, ""); + add_pending_object(&revs, &bc->commit->object, ""); } if (prepare_revision_walk(&revs)) @@ -410,6 +469,7 @@ static int fill_bitmap_tree(struct bitmap_writer *writer, } static int reused_bitmaps_nr; +static int reused_pseudo_merge_bitmaps_nr; static int fill_bitmap_commit(struct bitmap_writer *writer, struct bb_commit *ent, @@ -431,8 +491,13 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, struct commit *c = prio_queue_get(queue); if (old_bitmap && mapping) { - struct ewah_bitmap *old = bitmap_for_commit(old_bitmap, c); + struct ewah_bitmap *old; struct bitmap *remapped = bitmap_new(); + + if (commit->object.flags & BITMAP_PSEUDO_MERGE) + old = pseudo_merge_bitmap_for_commit(old_bitmap, c); + else + old = bitmap_for_commit(old_bitmap, c); /* * If this commit has an old bitmap, then translate that * bitmap and add its bits to this one. No need to walk @@ -441,7 +506,10 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, if (old && !rebuild_bitmap(mapping, old, remapped)) { bitmap_or(ent->bitmap, remapped); bitmap_free(remapped); - reused_bitmaps_nr++; + if (commit->object.flags & BITMAP_PSEUDO_MERGE) + reused_pseudo_merge_bitmaps_nr++; + else + reused_bitmaps_nr++; continue; } bitmap_free(remapped); @@ -451,12 +519,14 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, * Mark ourselves and queue our tree. The commit * walk ensures we cover all parents. */ - pos = find_object_pos(writer, &c->object.oid, &found); - if (!found) - return -1; - bitmap_set(ent->bitmap, pos); - prio_queue_put(tree_queue, - repo_get_commit_tree(the_repository, c)); + if (!(c->object.flags & BITMAP_PSEUDO_MERGE)) { + pos = find_object_pos(writer, &c->object.oid, &found); + if (!found) + return -1; + bitmap_set(ent->bitmap, pos); + prio_queue_put(tree_queue, + repo_get_commit_tree(the_repository, c)); + } for (p = c->parents; p; p = p->next) { pos = find_object_pos(writer, &p->item->object.oid, @@ -483,14 +553,17 @@ static void store_selected(struct bitmap_writer *writer, { struct bitmapped_commit *stored = &writer->selected[ent->idx]; khiter_t hash_pos; - int hash_ret; stored->bitmap = bitmap_to_ewah(ent->bitmap); - hash_pos = kh_put_oid_map(writer->bitmaps, commit->object.oid, &hash_ret); - if (hash_ret == 0) - die("Duplicate entry when writing index: %s", + if (ent->pseudo_merge) + return; + + hash_pos = kh_get_oid_map(writer->bitmaps, commit->object.oid); + if (hash_pos == kh_end(writer->bitmaps)) + die(_("attempted to store non-selected commit: '%s'"), oid_to_hex(&commit->object.oid)); + kh_value(writer->bitmaps, hash_pos) = stored; } @@ -506,7 +579,6 @@ int bitmap_writer_build(struct bitmap_writer *writer, uint32_t *mapping; int closed = 1; /* until proven otherwise */ - writer->bitmaps = kh_init_oid_map(); writer->to_pack = to_pack; if (writer->show_progress) @@ -567,6 +639,9 @@ int bitmap_writer_build(struct bitmap_writer *writer, the_repository); trace2_data_intmax("pack-bitmap-write", the_repository, "building_bitmaps_reused", reused_bitmaps_nr); + trace2_data_intmax("pack-bitmap-write", the_repository, + "building_bitmaps_pseudo_merge_reused", + reused_pseudo_merge_bitmaps_nr); stop_progress(&writer->progress); @@ -619,7 +694,7 @@ void bitmap_writer_select_commits(struct bitmap_writer *writer, if (indexed_commits_nr < 100) { for (i = 0; i < indexed_commits_nr; ++i) - push_bitmapped_commit(writer, indexed_commits[i]); + bitmap_writer_push_commit(writer, indexed_commits[i], 0); return; } @@ -652,13 +727,15 @@ void bitmap_writer_select_commits(struct bitmap_writer *writer, } } - push_bitmapped_commit(writer, chosen); + bitmap_writer_push_commit(writer, chosen, 0); i += next + 1; display_progress(writer->progress, i); } stop_progress(&writer->progress); + + select_pseudo_merges(writer, indexed_commits, indexed_commits_nr); } @@ -689,8 +766,11 @@ static void write_selected_commits_v1(struct bitmap_writer *writer, { int i; - for (i = 0; i < writer->selected_nr; ++i) { + for (i = 0; i < bitmap_writer_nr_selected_commits(writer); ++i) { struct bitmapped_commit *stored = &writer->selected[i]; + if (stored->pseudo_merge) + BUG("unexpected pseudo-merge among selected: %s", + oid_to_hex(&stored->commit->object.oid)); if (offsets) offsets[i] = hashfile_total(f); @@ -703,6 +783,130 @@ static void write_selected_commits_v1(struct bitmap_writer *writer, } } +static void write_pseudo_merges(struct bitmap_writer *writer, + struct hashfile *f) +{ + struct oid_array commits = OID_ARRAY_INIT; + struct bitmap **commits_bitmap = NULL; + off_t *pseudo_merge_ofs = NULL; + off_t start, table_start, next_ext; + + uint32_t base = bitmap_writer_nr_selected_commits(writer); + size_t i, j = 0; + + CALLOC_ARRAY(commits_bitmap, writer->pseudo_merges_nr); + CALLOC_ARRAY(pseudo_merge_ofs, writer->pseudo_merges_nr); + + for (i = 0; i < writer->pseudo_merges_nr; i++) { + struct bitmapped_commit *merge = &writer->selected[base + i]; + struct commit_list *p; + + if (!merge->pseudo_merge) + BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i); + + commits_bitmap[i] = bitmap_new(); + + for (p = merge->commit->parents; p; p = p->next) + bitmap_set(commits_bitmap[i], + find_object_pos(writer, &p->item->object.oid, + NULL)); + } + + start = hashfile_total(f); + + for (i = 0; i < writer->pseudo_merges_nr; i++) { + struct ewah_bitmap *commits_ewah = bitmap_to_ewah(commits_bitmap[i]); + + pseudo_merge_ofs[i] = hashfile_total(f); + + dump_bitmap(f, commits_ewah); + dump_bitmap(f, writer->selected[base+i].write_as); + + ewah_free(commits_ewah); + } + + next_ext = st_add(hashfile_total(f), + st_mult(kh_size(writer->pseudo_merge_commits), + sizeof(uint64_t))); + + table_start = hashfile_total(f); + + commits.alloc = kh_size(writer->pseudo_merge_commits); + CALLOC_ARRAY(commits.oid, commits.alloc); + + for (i = kh_begin(writer->pseudo_merge_commits); i != kh_end(writer->pseudo_merge_commits); i++) { + if (!kh_exist(writer->pseudo_merge_commits, i)) + continue; + oid_array_append(&commits, &kh_key(writer->pseudo_merge_commits, i)); + } + + oid_array_sort(&commits); + + /* write lookup table (non-extended) */ + for (i = 0; i < commits.nr; i++) { + int hash_pos; + struct pseudo_merge_commit_idx *c; + + hash_pos = kh_get_oid_map(writer->pseudo_merge_commits, + commits.oid[i]); + if (hash_pos == kh_end(writer->pseudo_merge_commits)) + BUG("could not find pseudo-merge commit %s", + oid_to_hex(&commits.oid[i])); + + c = kh_value(writer->pseudo_merge_commits, hash_pos); + + hashwrite_be32(f, find_object_pos(writer, &commits.oid[i], + NULL)); + if (c->nr == 1) + hashwrite_be64(f, pseudo_merge_ofs[c->pseudo_merge[0]]); + else if (c->nr > 1) { + if (next_ext & ((uint64_t)1<<63)) + die(_("too many pseudo-merges")); + hashwrite_be64(f, next_ext | ((uint64_t)1<<63)); + next_ext = st_add3(next_ext, + sizeof(uint32_t), + st_mult(c->nr, sizeof(uint64_t))); + } else + BUG("expected commit '%s' to have at least one " + "pseudo-merge", oid_to_hex(&commits.oid[i])); + } + + /* write lookup table (extended) */ + for (i = 0; i < commits.nr; i++) { + int hash_pos; + struct pseudo_merge_commit_idx *c; + + hash_pos = kh_get_oid_map(writer->pseudo_merge_commits, + commits.oid[i]); + if (hash_pos == kh_end(writer->pseudo_merge_commits)) + BUG("could not find pseudo-merge commit %s", + oid_to_hex(&commits.oid[i])); + + c = kh_value(writer->pseudo_merge_commits, hash_pos); + if (c->nr == 1) + continue; + + hashwrite_be32(f, c->nr); + for (j = 0; j < c->nr; j++) + hashwrite_be64(f, pseudo_merge_ofs[c->pseudo_merge[j]]); + } + + /* write positions for all pseudo merges */ + for (i = 0; i < writer->pseudo_merges_nr; i++) + hashwrite_be64(f, pseudo_merge_ofs[i]); + + hashwrite_be32(f, writer->pseudo_merges_nr); + hashwrite_be32(f, kh_size(writer->pseudo_merge_commits)); + hashwrite_be64(f, table_start - start); + hashwrite_be64(f, hashfile_total(f) - start + sizeof(uint64_t)); + + for (i = 0; i < writer->pseudo_merges_nr; i++) + bitmap_free(commits_bitmap[i]); + + free(pseudo_merge_ofs); + free(commits_bitmap); +} + static int table_cmp(const void *_va, const void *_vb, void *_data) { struct bitmap_writer *writer = _data; @@ -723,10 +927,10 @@ static void write_lookup_table(struct bitmap_writer *writer, struct hashfile *f, uint32_t i; uint32_t *table, *table_inv; - ALLOC_ARRAY(table, writer->selected_nr); - ALLOC_ARRAY(table_inv, writer->selected_nr); + ALLOC_ARRAY(table, bitmap_writer_nr_selected_commits(writer)); + ALLOC_ARRAY(table_inv, bitmap_writer_nr_selected_commits(writer)); - for (i = 0; i < writer->selected_nr; i++) + for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) table[i] = i; /* @@ -734,16 +938,16 @@ static void write_lookup_table(struct bitmap_writer *writer, struct hashfile *f, * bitmap corresponds to j'th bitmapped commit (among the selected * commits) in lex order of OIDs. */ - QSORT_S(table, writer->selected_nr, table_cmp, writer); + QSORT_S(table, bitmap_writer_nr_selected_commits(writer), table_cmp, writer); /* table_inv helps us discover that relationship (i'th bitmap * to j'th commit by j = table_inv[i]) */ - for (i = 0; i < writer->selected_nr; i++) + for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) table_inv[table[i]] = i; trace2_region_enter("pack-bitmap-write", "writing_lookup_table", the_repository); - for (i = 0; i < writer->selected_nr; i++) { + for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) { struct bitmapped_commit *selected = &writer->selected[table[i]]; uint32_t xor_offset = selected->xor_offset; uint32_t xor_row; @@ -810,12 +1014,15 @@ void bitmap_writer_finish(struct bitmap_writer *writer, int fd = odb_mkstemp(&tmp_file, "pack/tmp_bitmap_XXXXXX"); + if (writer->pseudo_merges_nr) + options |= BITMAP_OPT_PSEUDO_MERGES; + f = hashfd(fd, tmp_file.buf); memcpy(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)); header.version = htons(default_version); header.options = htons(flags | options); - header.entry_count = htonl(writer->selected_nr); + header.entry_count = htonl(bitmap_writer_nr_selected_commits(writer)); hashcpy(header.checksum, writer->pack_checksum); hashwrite(f, &header, sizeof(header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz); @@ -827,7 +1034,7 @@ void bitmap_writer_finish(struct bitmap_writer *writer, if (options & BITMAP_OPT_LOOKUP_TABLE) CALLOC_ARRAY(offsets, index_nr); - for (i = 0; i < writer->selected_nr; i++) { + for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) { struct bitmapped_commit *stored = &writer->selected[i]; int commit_pos = oid_pos(&stored->commit->object.oid, index, index_nr, oid_access); @@ -839,6 +1046,9 @@ void bitmap_writer_finish(struct bitmap_writer *writer, write_selected_commits_v1(writer, f, offsets); + if (options & BITMAP_OPT_PSEUDO_MERGES) + write_pseudo_merges(writer, f); + if (options & BITMAP_OPT_LOOKUP_TABLE) write_lookup_table(writer, f, offsets); diff --git a/pack-bitmap.c b/pack-bitmap.c index 1d6b7f2826..afe41f6ba6 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -20,6 +20,7 @@ #include "list-objects-filter-options.h" #include "midx.h" #include "config.h" +#include "pseudo-merge.h" /* * An entry on the bitmap index, representing the bitmap for a given @@ -86,6 +87,9 @@ struct bitmap_index { */ unsigned char *table_lookup; + /* This contains the pseudo-merge cache within 'map' (if found). */ + struct pseudo_merge_map pseudo_merges; + /* * Extended index. * @@ -110,6 +114,13 @@ struct bitmap_index { unsigned int version; }; +static int pseudo_merges_satisfied_nr; +static int pseudo_merges_cascades_nr; +static int existing_bitmaps_hits_nr; +static int existing_bitmaps_misses_nr; +static int roots_with_bitmaps_nr; +static int roots_without_bitmaps_nr; + static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st) { struct ewah_bitmap *parent; @@ -129,17 +140,13 @@ static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st) return composed; } -/* - * Read a bitmap from the current read position on the mmaped - * index, and increase the read position accordingly - */ -static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) +struct ewah_bitmap *read_bitmap(const unsigned char *map, + size_t map_size, size_t *map_pos) { struct ewah_bitmap *b = ewah_pool_new(); - ssize_t bitmap_size = ewah_read_mmap(b, - index->map + index->map_pos, - index->map_size - index->map_pos); + ssize_t bitmap_size = ewah_read_mmap(b, map + *map_pos, + map_size - *map_pos); if (bitmap_size < 0) { error(_("failed to load bitmap index (corrupted?)")); @@ -147,10 +154,20 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) return NULL; } - index->map_pos += bitmap_size; + *map_pos += bitmap_size; + return b; } +/* + * Read a bitmap from the current read position on the mmaped + * index, and increase the read position accordingly + */ +static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) +{ + return read_bitmap(index->map, index->map_size, &index->map_pos); +} + static uint32_t bitmap_num_objects(struct bitmap_index *index) { if (index->midx) @@ -199,6 +216,46 @@ static int load_bitmap_header(struct bitmap_index *index) index->table_lookup = (void *)(index_end - table_size); index_end -= table_size; } + + if (flags & BITMAP_OPT_PSEUDO_MERGES) { + unsigned char *pseudo_merge_ofs; + size_t table_size; + uint32_t i; + + if (sizeof(table_size) > index_end - index->map - header_size) + return error(_("corrupted bitmap index file (too short to fit pseudo-merge table header)")); + + table_size = get_be64(index_end - 8); + if (table_size > index_end - index->map - header_size) + return error(_("corrupted bitmap index file (too short to fit pseudo-merge table)")); + + if (git_env_bool("GIT_TEST_USE_PSEUDO_MERGES", 1)) { + const unsigned char *ext = (index_end - table_size); + + index->pseudo_merges.map = index->map; + index->pseudo_merges.map_size = index->map_size; + index->pseudo_merges.commits = ext + get_be64(index_end - 16); + index->pseudo_merges.commits_nr = get_be32(index_end - 20); + index->pseudo_merges.nr = get_be32(index_end - 24); + + if (st_add(st_mult(index->pseudo_merges.nr, + sizeof(uint64_t)), + 24) > table_size) + return error(_("corrupted bitmap index file, pseudo-merge table too short")); + + CALLOC_ARRAY(index->pseudo_merges.v, + index->pseudo_merges.nr); + + pseudo_merge_ofs = index_end - 24 - + (index->pseudo_merges.nr * sizeof(uint64_t)); + for (i = 0; i < index->pseudo_merges.nr; i++) { + index->pseudo_merges.v[i].at = get_be64(pseudo_merge_ofs); + pseudo_merge_ofs += sizeof(uint64_t); + } + } + + index_end -= table_size; + } } index->entry_count = ntohl(header->entry_count); @@ -960,6 +1017,22 @@ static void show_commit(struct commit *commit UNUSED, { } +static unsigned apply_pseudo_merges_for_commit_1(struct bitmap_index *bitmap_git, + struct bitmap *result, + struct commit *commit, + uint32_t commit_pos) +{ + int ret; + + ret = apply_pseudo_merges_for_commit(&bitmap_git->pseudo_merges, + result, commit, commit_pos); + + if (ret) + pseudo_merges_satisfied_nr += ret; + + return ret; +} + static int add_to_include_set(struct bitmap_index *bitmap_git, struct include_data *data, struct commit *commit, @@ -975,11 +1048,19 @@ static int add_to_include_set(struct bitmap_index *bitmap_git, partial = bitmap_for_commit(bitmap_git, commit); if (partial) { + existing_bitmaps_hits_nr++; + bitmap_or_ewah(data->base, partial); return 0; } + existing_bitmaps_misses_nr++; + bitmap_set(data->base, bitmap_pos); + if (apply_pseudo_merges_for_commit_1(bitmap_git, data->base, commit, + bitmap_pos)) + return 0; + return 1; } @@ -1030,8 +1111,12 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git, { struct ewah_bitmap *or_with = bitmap_for_commit(bitmap_git, commit); - if (!or_with) + if (!or_with) { + existing_bitmaps_misses_nr++; return 0; + } + + existing_bitmaps_hits_nr++; if (!*base) *base = ewah_to_bitmap(or_with); @@ -1105,6 +1190,20 @@ static void show_boundary_object(struct object *object UNUSED, BUG("should not be called"); } +static unsigned cascade_pseudo_merges_1(struct bitmap_index *bitmap_git, + struct bitmap *result, + struct bitmap *roots) +{ + int ret = cascade_pseudo_merges(&bitmap_git->pseudo_merges, + result, roots); + if (ret) { + pseudo_merges_cascades_nr++; + pseudo_merges_satisfied_nr += ret; + } + + return ret; +} + static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, struct rev_info *revs, struct object_list *roots) @@ -1114,6 +1213,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, unsigned int i; unsigned int tmp_blobs, tmp_trees, tmp_tags; int any_missing = 0; + int existing_bitmaps = 0; cb.bitmap_git = bitmap_git; cb.base = bitmap_new(); @@ -1121,6 +1221,25 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, revs->ignore_missing_links = 1; + if (bitmap_git->pseudo_merges.nr) { + struct bitmap *roots_bitmap = bitmap_new(); + struct object_list *objects = NULL; + + for (objects = roots; objects; objects = objects->next) { + struct object *object = objects->item; + int pos; + + pos = bitmap_position(bitmap_git, &object->oid); + if (pos < 0) + continue; + + bitmap_set(roots_bitmap, pos); + } + + if (!cascade_pseudo_merges_1(bitmap_git, cb.base, roots_bitmap)) + bitmap_free(roots_bitmap); + } + /* * OR in any existing reachability bitmaps among `roots` into * `cb.base`. @@ -1132,8 +1251,10 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, continue; if (add_commit_to_bitmap(bitmap_git, &cb.base, - (struct commit *)object)) + (struct commit *)object)) { + existing_bitmaps = 1; continue; + } any_missing = 1; } @@ -1141,6 +1262,9 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, if (!any_missing) goto cleanup; + if (existing_bitmaps) + cascade_pseudo_merges_1(bitmap_git, cb.base, NULL); + tmp_blobs = revs->blob_objects; tmp_trees = revs->tree_objects; tmp_tags = revs->blob_objects; @@ -1196,6 +1320,44 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, return cb.base; } +struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git, + struct commit *commit) +{ + struct commit_list *p; + struct bitmap *parents; + struct pseudo_merge *match = NULL; + + if (!bitmap_git->pseudo_merges.nr) + return NULL; + + parents = bitmap_new(); + + for (p = commit->parents; p; p = p->next) { + int pos = bitmap_position(bitmap_git, &p->item->object.oid); + if (pos < 0 || pos >= bitmap_num_objects(bitmap_git)) + goto done; + + bitmap_set(parents, pos); + } + + match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges, + parents); + +done: + bitmap_free(parents); + if (match) + return pseudo_merge_bitmap(&bitmap_git->pseudo_merges, match); + + return NULL; +} + +static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git) +{ + uint32_t i; + for (i = 0; i < bitmap_git->pseudo_merges.nr; i++) + bitmap_git->pseudo_merges.v[i].satisfied = 0; +} + static struct bitmap *find_objects(struct bitmap_index *bitmap_git, struct rev_info *revs, struct object_list *roots, @@ -1203,9 +1365,32 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, { struct bitmap *base = NULL; int needs_walk = 0; + unsigned existing_bitmaps = 0; struct object_list *not_mapped = NULL; + unsatisfy_all_pseudo_merges(bitmap_git); + + if (bitmap_git->pseudo_merges.nr) { + struct bitmap *roots_bitmap = bitmap_new(); + struct object_list *objects = NULL; + + for (objects = roots; objects; objects = objects->next) { + struct object *object = objects->item; + int pos; + + pos = bitmap_position(bitmap_git, &object->oid); + if (pos < 0) + continue; + + bitmap_set(roots_bitmap, pos); + } + + base = bitmap_new(); + if (!cascade_pseudo_merges_1(bitmap_git, base, roots_bitmap)) + bitmap_free(roots_bitmap); + } + /* * Go through all the roots for the walk. The ones that have bitmaps * on the bitmap index will be `or`ed together to form an initial @@ -1216,11 +1401,21 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, */ while (roots) { struct object *object = roots->item; + roots = roots->next; + if (base) { + int pos = bitmap_position(bitmap_git, &object->oid); + if (pos > 0 && bitmap_get(base, pos)) { + object->flags |= SEEN; + continue; + } + } + if (object->type == OBJ_COMMIT && add_commit_to_bitmap(bitmap_git, &base, (struct commit *)object)) { object->flags |= SEEN; + existing_bitmaps = 1; continue; } @@ -1236,6 +1431,9 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, roots = not_mapped; + if (existing_bitmaps) + cascade_pseudo_merges_1(bitmap_git, base, NULL); + /* * Let's iterate through all the roots that don't have bitmaps to * check if we can determine them to be reachable from the existing @@ -1256,8 +1454,12 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, object->flags &= ~UNINTERESTING; add_pending_object(revs, object, ""); needs_walk = 1; + + roots_without_bitmaps_nr++; } else { object->flags |= SEEN; + + roots_with_bitmaps_nr++; } } @@ -1820,6 +2022,19 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, object_list_free(&wants); object_list_free(&haves); + trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied", + pseudo_merges_satisfied_nr); + trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades", + pseudo_merges_cascades_nr); + trace2_data_intmax("bitmap", the_repository, "bitmap/hits", + existing_bitmaps_hits_nr); + trace2_data_intmax("bitmap", the_repository, "bitmap/misses", + existing_bitmaps_misses_nr); + trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap", + roots_with_bitmaps_nr); + trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap", + roots_without_bitmaps_nr); + return bitmap_git; cleanup: @@ -2410,6 +2625,132 @@ int test_bitmap_hashes(struct repository *r) return 0; } +static void bit_pos_to_object_id(struct bitmap_index *bitmap_git, + uint32_t bit_pos, + struct object_id *oid) +{ + uint32_t index_pos; + + if (bitmap_is_midx(bitmap_git)) + index_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos); + else + index_pos = pack_pos_to_index(bitmap_git->pack, bit_pos); + + nth_bitmap_object_oid(bitmap_git, oid, index_pos); +} + +int test_bitmap_pseudo_merges(struct repository *r) +{ + struct bitmap_index *bitmap_git; + uint32_t i; + + bitmap_git = prepare_bitmap_git(r); + if (!bitmap_git || !bitmap_git->pseudo_merges.nr) + goto cleanup; + + for (i = 0; i < bitmap_git->pseudo_merges.nr; i++) { + struct pseudo_merge *merge; + struct ewah_bitmap *commits_bitmap, *merge_bitmap; + + merge = use_pseudo_merge(&bitmap_git->pseudo_merges, + &bitmap_git->pseudo_merges.v[i]); + commits_bitmap = merge->commits; + merge_bitmap = pseudo_merge_bitmap(&bitmap_git->pseudo_merges, + merge); + + printf("at=%"PRIuMAX", commits=%"PRIuMAX", objects=%"PRIuMAX"\n", + (uintmax_t)merge->at, + (uintmax_t)ewah_bitmap_popcount(commits_bitmap), + (uintmax_t)ewah_bitmap_popcount(merge_bitmap)); + } + +cleanup: + free_bitmap_index(bitmap_git); + return 0; +} + +static void dump_ewah_object_ids(struct bitmap_index *bitmap_git, + struct ewah_bitmap *bitmap) + +{ + struct ewah_iterator it; + eword_t word; + uint32_t pos = 0; + + ewah_iterator_init(&it, bitmap); + + while (ewah_iterator_next(&word, &it)) { + struct object_id oid; + uint32_t offset; + + for (offset = 0; offset < BITS_IN_EWORD; offset++) { + if (!(word >> offset)) + break; + + offset += ewah_bit_ctz64(word >> offset); + + bit_pos_to_object_id(bitmap_git, pos + offset, &oid); + printf("%s\n", oid_to_hex(&oid)); + } + pos += BITS_IN_EWORD; + } +} + +int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n) +{ + struct bitmap_index *bitmap_git; + struct pseudo_merge *merge; + int ret = 0; + + bitmap_git = prepare_bitmap_git(r); + if (!bitmap_git || !bitmap_git->pseudo_merges.nr) + goto cleanup; + + if (n >= bitmap_git->pseudo_merges.nr) { + ret = error(_("pseudo-merge index out of range " + "(%"PRIu32" >= %"PRIuMAX")"), + n, (uintmax_t)bitmap_git->pseudo_merges.nr); + goto cleanup; + } + + merge = use_pseudo_merge(&bitmap_git->pseudo_merges, + &bitmap_git->pseudo_merges.v[n]); + dump_ewah_object_ids(bitmap_git, merge->commits); + +cleanup: + free_bitmap_index(bitmap_git); + return ret; +} + +int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n) +{ + struct bitmap_index *bitmap_git; + struct pseudo_merge *merge; + int ret = 0; + + bitmap_git = prepare_bitmap_git(r); + if (!bitmap_git || !bitmap_git->pseudo_merges.nr) + goto cleanup; + + if (n >= bitmap_git->pseudo_merges.nr) { + ret = error(_("pseudo-merge index out of range " + "(%"PRIu32" >= %"PRIuMAX")"), + n, (uintmax_t)bitmap_git->pseudo_merges.nr); + goto cleanup; + } + + merge = use_pseudo_merge(&bitmap_git->pseudo_merges, + &bitmap_git->pseudo_merges.v[n]); + + dump_ewah_object_ids(bitmap_git, + pseudo_merge_bitmap(&bitmap_git->pseudo_merges, + merge)); + +cleanup: + free_bitmap_index(bitmap_git); + return ret; +} + int rebuild_bitmap(const uint32_t *reposition, struct ewah_bitmap *source, struct bitmap *dest) @@ -2516,6 +2857,7 @@ void free_bitmap_index(struct bitmap_index *b) */ close_midx_revindex(b->midx); } + free_pseudo_merge_map(&b->pseudo_merges); free(b); } diff --git a/pack-bitmap.h b/pack-bitmap.h index 3091095f33..1171e6d989 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -21,6 +21,7 @@ struct bitmap_disk_header { unsigned char checksum[GIT_MAX_RAWSZ]; }; +#define BITMAP_PSEUDO_MERGE (1u<<21) #define NEEDS_BITMAP (1u<<22) /* @@ -36,6 +37,7 @@ enum pack_bitmap_opts { BITMAP_OPT_FULL_DAG = 0x1, BITMAP_OPT_HASH_CACHE = 0x4, BITMAP_OPT_LOOKUP_TABLE = 0x10, + BITMAP_OPT_PSEUDO_MERGES = 0x20, }; enum pack_bitmap_flags { @@ -71,6 +73,9 @@ void traverse_bitmap_commit_list(struct bitmap_index *, void test_bitmap_walk(struct rev_info *revs); int test_bitmap_commits(struct repository *r); int test_bitmap_hashes(struct repository *r); +int test_bitmap_pseudo_merges(struct repository *r); +int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n); +int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n); #define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \ "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL" @@ -109,12 +114,16 @@ struct bitmap_writer { struct bitmapped_commit *selected; unsigned int selected_nr, selected_alloc; + struct string_list pseudo_merge_groups; + kh_oid_map_t *pseudo_merge_commits; /* oid -> pseudo merge(s) */ + uint32_t pseudo_merges_nr; + struct progress *progress; int show_progress; unsigned char pack_checksum[GIT_MAX_RAWSZ]; }; -void bitmap_writer_init(struct bitmap_writer *writer); +void bitmap_writer_init(struct bitmap_writer *writer, struct repository *r); void bitmap_writer_show_progress(struct bitmap_writer *writer, int show); void bitmap_writer_set_checksum(struct bitmap_writer *writer, const unsigned char *sha1); @@ -122,6 +131,10 @@ void bitmap_writer_build_type_index(struct bitmap_writer *writer, struct packing_data *to_pack, struct pack_idx_entry **index, uint32_t index_nr); +int bitmap_writer_has_bitmapped_object_id(struct bitmap_writer *writer, + const struct object_id *oid); +void bitmap_writer_push_commit(struct bitmap_writer *writer, + struct commit *commit, unsigned pseudo_merge); uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, struct packing_data *mapping); int rebuild_bitmap(const uint32_t *reposition, @@ -129,6 +142,8 @@ int rebuild_bitmap(const uint32_t *reposition, struct bitmap *dest); struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, struct commit *commit); +struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git, + struct commit *commit); void bitmap_writer_select_commits(struct bitmap_writer *writer, struct commit **indexed_commits, unsigned int indexed_commits_nr); @@ -150,4 +165,6 @@ int bitmap_is_preferred_refname(struct repository *r, const char *refname); int verify_bitmap_files(struct repository *r); +struct ewah_bitmap *read_bitmap(const unsigned char *map, + size_t map_size, size_t *map_pos); #endif diff --git a/parse.c b/parse.c index 42d691a0fb..7a60a4f816 100644 --- a/parse.c +++ b/parse.c @@ -125,6 +125,35 @@ int git_parse_ssize_t(const char *value, ssize_t *ret) return 1; } +int git_parse_double(const char *value, double *ret) +{ + char *end; + double val; + uintmax_t factor; + + if (!value || !*value) { + errno = EINVAL; + return 0; + } + + errno = 0; + val = strtod(value, &end); + if (errno == ERANGE) + return 0; + if (end == value) { + errno = EINVAL; + return 0; + } + factor = get_unit_factor(end); + if (!factor) { + errno = EINVAL; + return 0; + } + val *= factor; + *ret = val; + return 1; +} + int git_parse_maybe_bool_text(const char *value) { if (!value) diff --git a/parse.h b/parse.h index 07d2193d69..6bb9a54d9a 100644 --- a/parse.h +++ b/parse.h @@ -6,6 +6,7 @@ int git_parse_ssize_t(const char *, ssize_t *); int git_parse_ulong(const char *, unsigned long *); int git_parse_int(const char *value, int *ret); int git_parse_int64(const char *value, int64_t *ret); +int git_parse_double(const char *value, double *ret); /** * Same as `git_config_bool`, except that it returns -1 on error rather diff --git a/pseudo-merge.c b/pseudo-merge.c new file mode 100644 index 0000000000..e3e0393f11 --- /dev/null +++ b/pseudo-merge.c @@ -0,0 +1,757 @@ +#include "git-compat-util.h" +#include "pseudo-merge.h" +#include "date.h" +#include "oid-array.h" +#include "strbuf.h" +#include "config.h" +#include "string-list.h" +#include "refs.h" +#include "pack-bitmap.h" +#include "commit.h" +#include "alloc.h" +#include "progress.h" +#include "hex.h" + +#define DEFAULT_PSEUDO_MERGE_DECAY 1.0 +#define DEFAULT_PSEUDO_MERGE_MAX_MERGES 64 +#define DEFAULT_PSEUDO_MERGE_SAMPLE_RATE 1 +#define DEFAULT_PSEUDO_MERGE_THRESHOLD approxidate("1.week.ago") +#define DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD approxidate("1.month.ago") +#define DEFAULT_PSEUDO_MERGE_STABLE_SIZE 512 + +static double gitexp(double base, int exp) +{ + double result = 1; + while (1) { + if (exp % 2) + result *= base; + exp >>= 1; + if (!exp) + break; + base *= base; + } + return result; +} + +static uint32_t pseudo_merge_group_size(const struct pseudo_merge_group *group, + const struct pseudo_merge_matches *matches, + uint32_t i) +{ + double C = 0.0f; + uint32_t n; + + /* + * The size of pseudo-merge groups decays according to a power series, + * which looks like: + * + * f(n) = C * n^-k + * + * , where 'n' is the n-th pseudo-merge group, 'f(n)' is its size, 'k' + * is the decay rate, and 'C' is a scaling value. + * + * The value of C depends on the number of groups, decay rate, and total + * number of commits. It is computed such that if there are M and N + * total groups and commits, respectively, that: + * + * N = f(0) + f(1) + ... f(M-1) + * + * Rearranging to isolate C, we get: + * + * N = \sum_{n=1}^M C / n^k + * + * N / C = \sum_{n=1}^M n^-k + * + * C = N / \sum_{n=1}^M n^-k + * + * For example, if we have a decay rate of 'k' being equal to 1.5, 'N' + * total commits equal to 10,000, and 'M' being equal to 6 groups, then + * the (rounded) group sizes are: + * + * { 5469, 1934, 1053, 684, 489, 372 } + * + * increasing the number of total groups, say to 10, scales the group + * sizes appropriately: + * + * { 5012, 1772, 964, 626, 448, 341, 271, 221, 186, 158 } + */ + for (n = 0; n < group->max_merges; n++) + C += 1.0 / gitexp(n + 1, group->decay); + C = matches->unstable_nr / C; + + return (uint32_t)((C / gitexp(i + 1, group->decay)) + 0.5); +} + +static void pseudo_merge_group_init(struct pseudo_merge_group *group) +{ + memset(group, 0, sizeof(struct pseudo_merge_group)); + + strmap_init_with_options(&group->matches, NULL, 0); + + group->decay = DEFAULT_PSEUDO_MERGE_DECAY; + group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES; + group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE; + group->threshold = DEFAULT_PSEUDO_MERGE_THRESHOLD; + group->stable_threshold = DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD; + group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE; +} + +static int pseudo_merge_config(const char *var, const char *value, + const struct config_context *ctx, + void *cb_data) +{ + struct string_list *list = cb_data; + struct string_list_item *item; + struct pseudo_merge_group *group; + struct strbuf buf = STRBUF_INIT; + const char *sub, *key; + size_t sub_len; + int ret = 0; + + if (parse_config_key(var, "bitmappseudomerge", &sub, &sub_len, &key)) + goto done; + + if (!sub_len) + goto done; + + strbuf_add(&buf, sub, sub_len); + + item = string_list_lookup(list, buf.buf); + if (!item) { + item = string_list_insert(list, buf.buf); + + item->util = xmalloc(sizeof(struct pseudo_merge_group)); + pseudo_merge_group_init(item->util); + } + + group = item->util; + + if (!strcmp(key, "pattern")) { + struct strbuf re = STRBUF_INIT; + + free(group->pattern); + if (*value != '^') + strbuf_addch(&re, '^'); + strbuf_addstr(&re, value); + + group->pattern = xcalloc(1, sizeof(regex_t)); + if (regcomp(group->pattern, re.buf, REG_EXTENDED)) + die(_("failed to load pseudo-merge regex for %s: '%s'"), + sub, re.buf); + + strbuf_release(&re); + } else if (!strcmp(key, "decay")) { + group->decay = git_config_double(var, value, ctx->kvi); + if (group->decay < 0) { + warning(_("%s must be non-negative, using default"), var); + group->decay = DEFAULT_PSEUDO_MERGE_DECAY; + } + } else if (!strcmp(key, "samplerate")) { + group->sample_rate = git_config_double(var, value, ctx->kvi); + if (!(0 <= group->sample_rate && group->sample_rate <= 1)) { + warning(_("%s must be between 0 and 1, using default"), var); + group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE; + } + } else if (!strcmp(key, "threshold")) { + if (git_config_expiry_date(&group->threshold, var, value)) { + ret = -1; + goto done; + } + } else if (!strcmp(key, "maxmerges")) { + group->max_merges = git_config_int(var, value, ctx->kvi); + if (group->max_merges < 0) { + warning(_("%s must be non-negative, using default"), var); + group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES; + } + } else if (!strcmp(key, "stablethreshold")) { + if (git_config_expiry_date(&group->stable_threshold, var, value)) { + ret = -1; + goto done; + } + } else if (!strcmp(key, "stablesize")) { + group->stable_size = git_config_int(var, value, ctx->kvi); + if (group->stable_size <= 0) { + warning(_("%s must be positive, using default"), var); + group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE; + } + } + +done: + strbuf_release(&buf); + + return ret; +} + +void load_pseudo_merges_from_config(struct string_list *list) +{ + struct string_list_item *item; + + git_config(pseudo_merge_config, list); + + for_each_string_list_item(item, list) { + struct pseudo_merge_group *group = item->util; + if (!group->pattern) + die(_("pseudo-merge group '%s' missing required pattern"), + item->string); + if (group->threshold < group->stable_threshold) + die(_("pseudo-merge group '%s' has unstable threshold " + "before stable one"), item->string); + } +} + +static int find_pseudo_merge_group_for_ref(const char *refname, + const struct object_id *oid, + int flags UNUSED, + void *_data) +{ + struct bitmap_writer *writer = _data; + struct object_id peeled; + struct commit *c; + uint32_t i; + int has_bitmap; + + if (!peel_iterated_oid(the_repository, oid, &peeled)) + oid = &peeled; + + c = lookup_commit(the_repository, oid); + if (!c) + return 0; + + has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, oid); + + for (i = 0; i < writer->pseudo_merge_groups.nr; i++) { + struct pseudo_merge_group *group; + struct pseudo_merge_matches *matches; + struct strbuf group_name = STRBUF_INIT; + regmatch_t captures[16]; + size_t j; + + group = writer->pseudo_merge_groups.items[i].util; + if (regexec(group->pattern, refname, ARRAY_SIZE(captures), + captures, 0)) + continue; + + if (captures[ARRAY_SIZE(captures) - 1].rm_so != -1) + warning(_("pseudo-merge regex from config has too many capture " + "groups (max=%"PRIuMAX")"), + (uintmax_t)ARRAY_SIZE(captures) - 2); + + for (j = !!group->pattern->re_nsub; j < ARRAY_SIZE(captures); j++) { + regmatch_t *match = &captures[j]; + if (match->rm_so == -1) + continue; + + if (group_name.len) + strbuf_addch(&group_name, '-'); + + strbuf_add(&group_name, refname + match->rm_so, + match->rm_eo - match->rm_so); + } + + matches = strmap_get(&group->matches, group_name.buf); + if (!matches) { + matches = xcalloc(1, sizeof(*matches)); + strmap_put(&group->matches, strbuf_detach(&group_name, NULL), + matches); + } + + if (c->date <= group->stable_threshold) { + ALLOC_GROW(matches->stable, matches->stable_nr + 1, + matches->stable_alloc); + matches->stable[matches->stable_nr++] = c; + } else if (c->date <= group->threshold && !has_bitmap) { + ALLOC_GROW(matches->unstable, matches->unstable_nr + 1, + matches->unstable_alloc); + matches->unstable[matches->unstable_nr++] = c; + } + + strbuf_release(&group_name); + } + + return 0; +} + +static struct commit *push_pseudo_merge(struct pseudo_merge_group *group) +{ + struct commit *merge; + + ALLOC_GROW(group->merges, group->merges_nr + 1, group->merges_alloc); + + merge = alloc_commit_node(the_repository); + merge->object.parsed = 1; + merge->object.flags |= BITMAP_PSEUDO_MERGE; + + group->merges[group->merges_nr++] = merge; + + return merge; +} + +static struct pseudo_merge_commit_idx *pseudo_merge_idx(kh_oid_map_t *pseudo_merge_commits, + const struct object_id *oid) + +{ + struct pseudo_merge_commit_idx *pmc; + int hash_ret; + khiter_t hash_pos = kh_put_oid_map(pseudo_merge_commits, *oid, + &hash_ret); + + if (hash_ret) { + CALLOC_ARRAY(pmc, 1); + kh_value(pseudo_merge_commits, hash_pos) = pmc; + } else { + pmc = kh_value(pseudo_merge_commits, hash_pos); + } + + return pmc; +} + +#define MIN_PSEUDO_MERGE_SIZE 8 + +static void select_pseudo_merges_1(struct bitmap_writer *writer, + struct pseudo_merge_group *group, + struct pseudo_merge_matches *matches) +{ + uint32_t i, j; + uint32_t stable_merges_nr; + + if (!matches->stable_nr && !matches->unstable_nr) + return; /* all tips in this group already have bitmaps */ + + stable_merges_nr = matches->stable_nr / group->stable_size; + if (matches->stable_nr % group->stable_size) + stable_merges_nr++; + + /* make stable_merges_nr pseudo merges for stable commits */ + for (i = 0, j = 0; i < stable_merges_nr; i++) { + struct commit *merge; + struct commit_list **p; + + merge = push_pseudo_merge(group); + p = &merge->parents; + + /* + * For each pseudo-merge created above, add parents to the + * allocated commit node from the stable set of commits + * (un-bitmapped, newer than the stable threshold). + */ + do { + struct commit *c; + struct pseudo_merge_commit_idx *pmc; + + if (j >= matches->stable_nr) + break; + + c = matches->stable[j++]; + /* + * Here and below, make sure that we keep our mapping of + * commits -> pseudo-merge(s) which include the key'd + * commit up-to-date. + */ + pmc = pseudo_merge_idx(writer->pseudo_merge_commits, + &c->object.oid); + + ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc); + + pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr; + p = commit_list_append(c, p); + } while (j % group->stable_size); + + bitmap_writer_push_commit(writer, merge, 1); + writer->pseudo_merges_nr++; + } + + /* make up to group->max_merges pseudo merges for unstable commits */ + for (i = 0, j = 0; i < group->max_merges; i++) { + struct commit *merge; + struct commit_list **p; + uint32_t size, end; + + merge = push_pseudo_merge(group); + p = &merge->parents; + + size = pseudo_merge_group_size(group, matches, i); + end = size < MIN_PSEUDO_MERGE_SIZE ? matches->unstable_nr : j + size; + + /* + * For each pseudo-merge commit created above, add parents to + * the allocated commit node from the unstable set of commits + * (newer than the stable threshold). + * + * Account for the sample rate, since not every candidate from + * the set of stable commits will be included as a pseudo-merge + * parent. + */ + for (; j < end && j < matches->unstable_nr; j++) { + struct commit *c = matches->unstable[j]; + struct pseudo_merge_commit_idx *pmc; + + if (j % (uint32_t)(1.0 / group->sample_rate)) + continue; + + pmc = pseudo_merge_idx(writer->pseudo_merge_commits, + &c->object.oid); + + ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc); + + pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr; + p = commit_list_append(c, p); + } + + bitmap_writer_push_commit(writer, merge, 1); + writer->pseudo_merges_nr++; + if (end >= matches->unstable_nr) + break; + } +} + +static int commit_date_cmp(const void *va, const void *vb) +{ + timestamp_t a = (*(const struct commit **)va)->date; + timestamp_t b = (*(const struct commit **)vb)->date; + + if (a < b) + return -1; + else if (a > b) + return 1; + return 0; +} + +static void sort_pseudo_merge_matches(struct pseudo_merge_matches *matches) +{ + QSORT(matches->stable, matches->stable_nr, commit_date_cmp); + QSORT(matches->unstable, matches->unstable_nr, commit_date_cmp); +} + +void select_pseudo_merges(struct bitmap_writer *writer, + struct commit **commits, size_t commits_nr) +{ + struct progress *progress = NULL; + uint32_t i; + + if (!writer->pseudo_merge_groups.nr) + return; + + if (writer->show_progress) + progress = start_progress("Selecting pseudo-merge commits", + writer->pseudo_merge_groups.nr); + + refs_for_each_ref(get_main_ref_store(the_repository), + find_pseudo_merge_group_for_ref, writer); + + for (i = 0; i < writer->pseudo_merge_groups.nr; i++) { + struct pseudo_merge_group *group; + struct hashmap_iter iter; + struct strmap_entry *e; + + group = writer->pseudo_merge_groups.items[i].util; + strmap_for_each_entry(&group->matches, &iter, e) { + struct pseudo_merge_matches *matches = e->value; + + sort_pseudo_merge_matches(matches); + + select_pseudo_merges_1(writer, group, matches); + } + + display_progress(progress, i + 1); + } + + stop_progress(&progress); +} + +void free_pseudo_merge_map(struct pseudo_merge_map *pm) +{ + uint32_t i; + for (i = 0; i < pm->nr; i++) { + ewah_pool_free(pm->v[i].commits); + ewah_pool_free(pm->v[i].bitmap); + } + free(pm->v); +} + +struct pseudo_merge_commit_ext { + uint32_t nr; + const unsigned char *ptr; +}; + +static int pseudo_merge_ext_at(const struct pseudo_merge_map *pm, + struct pseudo_merge_commit_ext *ext, size_t at) +{ + if (at >= pm->map_size) + return error(_("extended pseudo-merge read out-of-bounds " + "(%"PRIuMAX" >= %"PRIuMAX")"), + (uintmax_t)at, (uintmax_t)pm->map_size); + if (at + 4 >= pm->map_size) + return error(_("extended pseudo-merge entry is too short " + "(%"PRIuMAX" >= %"PRIuMAX")"), + (uintmax_t)(at + 4), (uintmax_t)pm->map_size); + + ext->nr = get_be32(pm->map + at); + ext->ptr = pm->map + at + sizeof(uint32_t); + + return 0; +} + +struct ewah_bitmap *pseudo_merge_bitmap(const struct pseudo_merge_map *pm, + struct pseudo_merge *merge) +{ + if (!merge->loaded_commits) + BUG("cannot use unloaded pseudo-merge bitmap"); + + if (!merge->loaded_bitmap) { + size_t at = merge->bitmap_at; + + merge->bitmap = read_bitmap(pm->map, pm->map_size, &at); + merge->loaded_bitmap = 1; + } + + return merge->bitmap; +} + +struct pseudo_merge *use_pseudo_merge(const struct pseudo_merge_map *pm, + struct pseudo_merge *merge) +{ + if (!merge->loaded_commits) { + size_t pos = merge->at; + + merge->commits = read_bitmap(pm->map, pm->map_size, &pos); + merge->bitmap_at = pos; + merge->loaded_commits = 1; + } + return merge; +} + +static struct pseudo_merge *pseudo_merge_at(const struct pseudo_merge_map *pm, + struct object_id *oid, + size_t want) +{ + size_t lo = 0; + size_t hi = pm->nr; + + while (lo < hi) { + size_t mi = lo + (hi - lo) / 2; + size_t got = pm->v[mi].at; + + if (got == want) + return use_pseudo_merge(pm, &pm->v[mi]); + else if (got < want) + hi = mi; + else + lo = mi + 1; + } + + warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX), + oid_to_hex(oid), (uintmax_t)want); + + return NULL; +} + +struct pseudo_merge_commit { + uint32_t commit_pos; + uint64_t pseudo_merge_ofs; +}; + +#define PSEUDO_MERGE_COMMIT_RAWSZ (sizeof(uint32_t)+sizeof(uint64_t)) + +static void read_pseudo_merge_commit_at(struct pseudo_merge_commit *merge, + const unsigned char *at) +{ + merge->commit_pos = get_be32(at); + merge->pseudo_merge_ofs = get_be64(at + sizeof(uint32_t)); +} + +static int nth_pseudo_merge_ext(const struct pseudo_merge_map *pm, + struct pseudo_merge_commit_ext *ext, + struct pseudo_merge_commit *merge, + uint32_t n) +{ + size_t ofs; + + if (n >= ext->nr) + return error(_("extended pseudo-merge lookup out-of-bounds " + "(%"PRIu32" >= %"PRIu32")"), n, ext->nr); + + ofs = get_be64(ext->ptr + st_mult(n, sizeof(uint64_t))); + if (ofs >= pm->map_size) + return error(_("out-of-bounds read: (%"PRIuMAX" >= %"PRIuMAX")"), + (uintmax_t)ofs, (uintmax_t)pm->map_size); + + read_pseudo_merge_commit_at(merge, pm->map + ofs); + + return 0; +} + +static unsigned apply_pseudo_merge(const struct pseudo_merge_map *pm, + struct pseudo_merge *merge, + struct bitmap *result, + struct bitmap *roots) +{ + if (merge->satisfied) + return 0; + + if (!ewah_bitmap_is_subset(merge->commits, roots ? roots : result)) + return 0; + + bitmap_or_ewah(result, pseudo_merge_bitmap(pm, merge)); + if (roots) + bitmap_or_ewah(roots, pseudo_merge_bitmap(pm, merge)); + merge->satisfied = 1; + + return 1; +} + +static int pseudo_merge_commit_cmp(const void *va, const void *vb) +{ + struct pseudo_merge_commit merge; + uint32_t key = *(uint32_t*)va; + + read_pseudo_merge_commit_at(&merge, vb); + + if (key < merge.commit_pos) + return -1; + if (key > merge.commit_pos) + return 1; + return 0; +} + +static struct pseudo_merge_commit *find_pseudo_merge(const struct pseudo_merge_map *pm, + uint32_t pos) +{ + if (!pm->commits_nr) + return NULL; + + return bsearch(&pos, pm->commits, pm->commits_nr, + PSEUDO_MERGE_COMMIT_RAWSZ, pseudo_merge_commit_cmp); +} + +int apply_pseudo_merges_for_commit(const struct pseudo_merge_map *pm, + struct bitmap *result, + struct commit *commit, uint32_t commit_pos) +{ + struct pseudo_merge *merge; + struct pseudo_merge_commit *merge_commit; + int ret = 0; + + merge_commit = find_pseudo_merge(pm, commit_pos); + if (!merge_commit) + return 0; + + if (merge_commit->pseudo_merge_ofs & ((uint64_t)1<<63)) { + struct pseudo_merge_commit_ext ext = { 0 }; + off_t ofs = merge_commit->pseudo_merge_ofs & ~((uint64_t)1<<63); + uint32_t i; + + if (pseudo_merge_ext_at(pm, &ext, ofs) < -1) { + warning(_("could not read extended pseudo-merge table " + "for commit %s"), + oid_to_hex(&commit->object.oid)); + return ret; + } + + for (i = 0; i < ext.nr; i++) { + if (nth_pseudo_merge_ext(pm, &ext, merge_commit, i) < 0) + return ret; + + merge = pseudo_merge_at(pm, &commit->object.oid, + merge_commit->pseudo_merge_ofs); + + if (!merge) + return ret; + + if (apply_pseudo_merge(pm, merge, result, NULL)) + ret++; + } + } else { + merge = pseudo_merge_at(pm, &commit->object.oid, + merge_commit->pseudo_merge_ofs); + + if (!merge) + return ret; + + if (apply_pseudo_merge(pm, merge, result, NULL)) + ret++; + } + + if (ret) + cascade_pseudo_merges(pm, result, NULL); + + return ret; +} + +int cascade_pseudo_merges(const struct pseudo_merge_map *pm, + struct bitmap *result, + struct bitmap *roots) +{ + unsigned any_satisfied; + int ret = 0; + + do { + struct pseudo_merge *merge; + uint32_t i; + + any_satisfied = 0; + + for (i = 0; i < pm->nr; i++) { + merge = use_pseudo_merge(pm, &pm->v[i]); + if (apply_pseudo_merge(pm, merge, result, roots)) { + any_satisfied |= 1; + ret++; + } + } + } while (any_satisfied); + + return ret; +} + +struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm, + struct bitmap *parents) +{ + struct pseudo_merge *match = NULL; + size_t i; + + if (!pm->nr) + return NULL; + + /* + * NOTE: this loop is quadratic in the worst-case (where no + * matching pseudo-merge bitmaps are found), but in practice + * this is OK for a few reasons: + * + * - Rejecting pseudo-merge bitmaps that do not match the + * given commit is done quickly (i.e. `bitmap_equals_ewah()` + * returns early when we know the two bitmaps aren't equal. + * + * - Already matched pseudo-merge bitmaps (which we track with + * the `->satisfied` bit here) are skipped as potential + * candidates. + * + * - The number of pseudo-merges should be small (in the + * hundreds for most repositories). + * + * If in the future this semi-quadratic behavior does become a + * problem, another approach would be to keep track of which + * pseudo-merges are still "viable" after enumerating the + * pseudo-merge commit's parents: + * + * - A pseudo-merge bitmap becomes non-viable when the bit(s) + * corresponding to one or more parent(s) of the given + * commit are not set in a candidate pseudo-merge's commits + * bitmap. + * + * - After processing all bits, enumerate the remaining set of + * viable pseudo-merge bitmaps, and check that their + * popcount() matches the number of parents in the given + * commit. + */ + for (i = 0; i < pm->nr; i++) { + struct pseudo_merge *candidate = use_pseudo_merge(pm, &pm->v[i]); + if (!candidate || candidate->satisfied) + continue; + if (!bitmap_equals_ewah(parents, candidate->commits)) + continue; + + match = candidate; + match->satisfied = 1; + break; + } + + return match; +} diff --git a/pseudo-merge.h b/pseudo-merge.h new file mode 100644 index 0000000000..2aca01d056 --- /dev/null +++ b/pseudo-merge.h @@ -0,0 +1,216 @@ +#ifndef PSEUDO_MERGE_H +#define PSEUDO_MERGE_H + +#include "git-compat-util.h" +#include "strmap.h" +#include "khash.h" +#include "ewah/ewok.h" + +struct commit; +struct string_list; +struct bitmap_index; +struct bitmap_writer; + +/* + * A pseudo-merge group tracks the set of non-bitmapped reference tips + * that match the given pattern. + * + * Within those matches, they are further segmented by separating + * consecutive capture groups with '-' dash character capture groups + * with '-' dash characters. + * + * Those groups are then ordered by committer date and partitioned + * into individual pseudo-merge(s) according to the decay, max_merges, + * sample_rate, and threshold parameters. + */ +struct pseudo_merge_group { + regex_t *pattern; + + /* capture group(s) -> struct pseudo_merge_matches */ + struct strmap matches; + + /* + * The individual pseudo-merge(s) that are generated from the + * above array of matches, partitioned according to the below + * parameters. + */ + struct commit **merges; + size_t merges_nr; + size_t merges_alloc; + + /* + * Pseudo-merge grouping parameters. See git-config(1) for + * more information. + */ + double decay; + int max_merges; + double sample_rate; + int stable_size; + timestamp_t threshold; + timestamp_t stable_threshold; +}; + +struct pseudo_merge_matches { + struct commit **stable; + struct commit **unstable; + size_t stable_nr, stable_alloc; + size_t unstable_nr, unstable_alloc; +}; + +/* + * Read the repository's configuration: + * + * - bitmapPseudoMerge..pattern + * - bitmapPseudoMerge..decay + * - bitmapPseudoMerge..sampleRate + * - bitmapPseudoMerge..threshold + * - bitmapPseudoMerge..maxMerges + * - bitmapPseudoMerge..stableThreshold + * - bitmapPseudoMerge..stableSize + * + * and populates the given `list` with pseudo-merge groups. String + * entry keys are the pseudo-merge group names, and the values are + * pointers to the pseudo_merge_group structure itself. + */ +void load_pseudo_merges_from_config(struct string_list *list); + +/* + * A pseudo-merge commit index (pseudo_merge_commit_idx) maps a + * particular (non-pseudo-merge) commit to the list of pseudo-merge(s) + * it appears in. + */ +struct pseudo_merge_commit_idx { + uint32_t *pseudo_merge; + size_t nr, alloc; +}; + +/* + * Selects pseudo-merges from a list of commits, populating the given + * string_list of pseudo-merge groups. + * + * Populates the pseudo_merge_commits map with a commit_idx + * corresponding to each commit in the list. Counts the total number + * of pseudo-merges generated. + * + * Optionally shows a progress meter. + */ +void select_pseudo_merges(struct bitmap_writer *writer, + struct commit **commits, size_t commits_nr); + +/* + * Represents a serialized view of a file containing pseudo-merge(s) + * (see Documentation/technical/bitmap-format.txt for a specification + * of the format). + */ +struct pseudo_merge_map { + /* + * An array of pseudo-merge(s), lazily loaded from the .bitmap + * file. + */ + struct pseudo_merge *v; + size_t nr; + size_t commits_nr; + + /* + * Pointers into a memory-mapped view of the .bitmap file: + * + * - map: the beginning of the .bitmap file + * - commits: the beginning of the pseudo-merge commit index + * - map_size: the size of the .bitmap file + */ + const unsigned char *map; + const unsigned char *commits; + + size_t map_size; +}; + +/* + * An individual pseudo-merge, storing a pair of lazily-loaded + * bitmaps: + * + * - commits: the set of commit(s) that are part of the pseudo-merge + * - bitmap: the set of object(s) reachable from the above set of + * commits. + * + * The `at` and `bitmap_at` fields are used to store the locations of + * each of the above bitmaps in the .bitmap file. + */ +struct pseudo_merge { + struct ewah_bitmap *commits; + struct ewah_bitmap *bitmap; + + off_t at; + off_t bitmap_at; + + /* + * `satisfied` indicates whether the given pseudo-merge has been + * used. + * + * `loaded_commits` and `loaded_bitmap` indicate whether the + * respective bitmaps have been loaded and read from the + * .bitmap file. + */ + unsigned satisfied : 1, + loaded_commits : 1, + loaded_bitmap : 1; +}; + +/* + * Frees the given pseudo-merge map, releasing any memory held by (a) + * parsed EWAH bitmaps, or (b) the array of pseudo-merges itself. Does + * not free the memory-mapped view of the .bitmap file. + */ +void free_pseudo_merge_map(struct pseudo_merge_map *pm); + +/* + * Loads the bitmap corresponding to the given pseudo-merge from the + * map, if it has not already been loaded. + */ +struct ewah_bitmap *pseudo_merge_bitmap(const struct pseudo_merge_map *pm, + struct pseudo_merge *merge); + +/* + * Loads the pseudo-merge and its commits bitmap from the given + * pseudo-merge map, if it has not already been loaded. + */ +struct pseudo_merge *use_pseudo_merge(const struct pseudo_merge_map *pm, + struct pseudo_merge *merge); + +/* + * Applies pseudo-merge(s) containing the given commit to the bitmap + * "result". + * + * If any pseudo-merge(s) were satisfied, returns the number + * satisfied, otherwise returns 0. If any were satisfied, the + * remaining unsatisfied pseudo-merges are cascaded (see below). + */ +int apply_pseudo_merges_for_commit(const struct pseudo_merge_map *pm, + struct bitmap *result, + struct commit *commit, uint32_t commit_pos); + +/* + * Applies pseudo-merge(s) which are satisfied according to the + * current bitmap in result (or roots, see below). If any + * pseudo-merges were satisfied, repeat the process over unsatisfied + * pseudo-merge commits until no more pseudo-merges are satisfied. + * + * Result is the bitmap to which the pseudo-merge(s) are applied. + * Roots (if given) is a bitmap of the traversal tip(s) for either + * side of a reachability traversal. + * + * Roots may given instead of a populated results bitmap at the + * beginning of a traversal on either side where the reachability + * closure over tips is not yet known. + */ +int cascade_pseudo_merges(const struct pseudo_merge_map *pm, + struct bitmap *result, + struct bitmap *roots); + +/* + * Returns a pseudo-merge which contains the exact set of commits + * listed in the "parents" bitamp, or NULL if none could be found. + */ +struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm, + struct bitmap *parents); + +#endif diff --git a/t/helper/test-bitmap.c b/t/helper/test-bitmap.c index af43ee1cb5..6af2b42678 100644 --- a/t/helper/test-bitmap.c +++ b/t/helper/test-bitmap.c @@ -13,21 +13,41 @@ static int bitmap_dump_hashes(void) return test_bitmap_hashes(the_repository); } +static int bitmap_dump_pseudo_merges(void) +{ + return test_bitmap_pseudo_merges(the_repository); +} + +static int bitmap_dump_pseudo_merge_commits(uint32_t n) +{ + return test_bitmap_pseudo_merge_commits(the_repository, n); +} + +static int bitmap_dump_pseudo_merge_objects(uint32_t n) +{ + return test_bitmap_pseudo_merge_objects(the_repository, n); +} + int cmd__bitmap(int argc, const char **argv) { setup_git_directory(); - if (argc != 2) - goto usage; - - if (!strcmp(argv[1], "list-commits")) + if (argc == 2 && !strcmp(argv[1], "list-commits")) return bitmap_list_commits(); - if (!strcmp(argv[1], "dump-hashes")) + if (argc == 2 && !strcmp(argv[1], "dump-hashes")) return bitmap_dump_hashes(); + if (argc == 2 && !strcmp(argv[1], "dump-pseudo-merges")) + return bitmap_dump_pseudo_merges(); + if (argc == 3 && !strcmp(argv[1], "dump-pseudo-merge-commits")) + return bitmap_dump_pseudo_merge_commits(atoi(argv[2])); + if (argc == 3 && !strcmp(argv[1], "dump-pseudo-merge-objects")) + return bitmap_dump_pseudo_merge_objects(atoi(argv[2])); -usage: usage("\ttest-tool bitmap list-commits\n" - "\ttest-tool bitmap dump-hashes"); + "\ttest-tool bitmap dump-hashes\n" + "\ttest-tool bitmap dump-pseudo-merges\n" + "\ttest-tool bitmap dump-pseudo-merge-commits \n" + "\ttest-tool bitmap dump-pseudo-merge-objects "); return -1; } diff --git a/t/perf/p5333-pseudo-merge-bitmaps.sh b/t/perf/p5333-pseudo-merge-bitmaps.sh new file mode 100755 index 0000000000..2e8b1d2635 --- /dev/null +++ b/t/perf/p5333-pseudo-merge-bitmaps.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +test_description='pseudo-merge bitmaps' +. ./perf-lib.sh + +test_perf_large_repo + +test_expect_success 'setup' ' + git \ + -c bitmapPseudoMerge.all.pattern="refs/" \ + -c bitmapPseudoMerge.all.threshold=now \ + -c bitmapPseudoMerge.all.stableThreshold=never \ + -c bitmapPseudoMerge.all.maxMerges=64 \ + -c pack.writeBitmapLookupTable=true \ + repack -adb +' + +test_perf 'git rev-list --count --all --objects (no bitmaps)' ' + git rev-list --objects --all +' + +test_perf 'git rev-list --count --all --objects (no pseudo-merges)' ' + GIT_TEST_USE_PSEUDO_MERGES=0 \ + git rev-list --objects --all --use-bitmap-index +' + +test_perf 'git rev-list --count --all --objects (with pseudo-merges)' ' + GIT_TEST_USE_PSEUDO_MERGES=1 \ + git rev-list --objects --all --use-bitmap-index +' + +test_done diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh new file mode 100755 index 0000000000..f052f395a7 --- /dev/null +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -0,0 +1,393 @@ +#!/bin/sh + +test_description='pseudo-merge bitmaps' + +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 + +. ./test-lib.sh + +test_pseudo_merges () { + test-tool bitmap dump-pseudo-merges +} + +test_pseudo_merge_commits () { + test-tool bitmap dump-pseudo-merge-commits "$1" +} + +test_pseudo_merges_satisfied () { + test_trace2_data bitmap pseudo_merges_satisfied "$1" +} + +test_pseudo_merges_cascades () { + test_trace2_data bitmap pseudo_merges_cascades "$1" +} + +test_pseudo_merges_reused () { + test_trace2_data pack-bitmap-write building_bitmaps_pseudo_merge_reused "$1" +} + +tag_everything () { + git rev-list --all --no-object-names >in && + perl -lne ' + print "create refs/tags/" . $. . " " . $1 if /([0-9a-f]+)/ + ' expect && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --all --objects --use-bitmap-index >actual && + + test_pseudo_merges_satisfied 0 merges && + test_must_be_empty merges && + test_cmp expect actual +' + +test_expect_success 'pseudo-merges accurately represent their objects' ' + test_config bitmapPseudoMerge.test.pattern "refs/tags/" && + test_config bitmapPseudoMerge.test.maxMerges 8 && + test_config bitmapPseudoMerge.test.stableThreshold never && + + git repack -adb && + + test_pseudo_merges >merges && + test_line_count = 8 merges && + + for i in $(test_seq 0 $(($(wc -l commits && + + git rev-list --objects --no-object-names --stdin expect.raw && + test-tool bitmap dump-pseudo-merge-objects $i >actual.raw && + + sort -u expect && + sort -u actual && + + test_cmp expect actual || return 1 + done +' + +test_expect_success 'bitmap traversal with pseudo-merges' ' + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --all --objects --use-bitmap-index >actual && + git rev-list --count --all --objects >expect && + + test_pseudo_merges_satisfied 8 trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --all --objects --use-bitmap-index >actual && + git rev-list --count --all --objects >expect && + + test_pseudo_merges_satisfied 8 merges && + test_line_count = 1 merges && + test_pseudo_merge_commits 0 >commits && + + test-tool bitmap list-commits >bitmaps && + bitmaps_nr="$(wc -l expect && + + test $(cat expect) -eq $(wc -l merges && + test_line_count = 1 merges && + + test_pseudo_merge_commits 0 >oids && + git cat-file --batch commits && + + test $(wc -l in && + git update-ref --stdin merges && + merges_nr="$(wc -l oids && + git cat-file --batch commits && + + expect="$(grep -c "^committer.*$old +0000$" commits)" && + actual="$(wc -l oids && + git cat-file --batch commits && + test $(wc -l err && + + cat >expect <<-EOF && + fatal: pseudo-merge group ${SQ}test${SQ} has unstable threshold before stable one + EOF + + test_cmp expect err +' + +test_expect_success 'pseudo-merge pattern with capture groups' ' + git init pseudo-merge-captures && + ( + cd pseudo-merge-captures && + + test_commit_bulk 128 && + tag_everything && + + for r in $(test_seq 8) + do + test_commit_bulk 16 && + + git rev-list HEAD~16.. >in && + + perl -lne "print \"create refs/remotes/$r/tags/\$. \$_\"" refs && + + test_pseudo_merges >merges && + for m in $(test_seq 0 $(($(wc -l oids && + grep -f oids refs | + perl -lne "print \$1 if /refs\/remotes\/([0-9]+)/" | + sort -u || return 1 + done >remotes && + + test $(wc -l merges && + test_line_count = 2 merges && + + test_pseudo_merge_commits 0 >commits-0.raw && + test_pseudo_merge_commits 1 >commits-1.raw && + + sort commits-0.raw >commits-0 && + sort commits-1.raw >commits-1 && + + comm -12 commits-0 commits-1 >overlap && + + test_line_count -gt 0 overlap + ) +' + +test_expect_success 'pseudo-merge overlap traversal' ' + ( + cd pseudo-merge-overlap && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --all --objects --use-bitmap-index >actual && + git rev-list --count --all --objects >expect && + + test_pseudo_merges_satisfied 2 trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --all --objects --use-bitmap-index >actual && + git rev-list --count --all --objects >expect && + + test_pseudo_merges_satisfied 2 merges && + test_line_count = 2 merges && + + test_pseudo_merge_commits 0 >stable-oids.before && + test_pseudo_merge_commits 1 >unstable-oids.before && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt git \ + -c bitmapPseudoMerge.test.pattern="refs/tags/" \ + -c bitmapPseudoMerge.test.maxMerges=2 \ + -c bitmapPseudoMerge.test.threshold=now \ + -c bitmapPseudoMerge.test.stableThreshold=$(($unstable - 1)) \ + -c bitmapPseudoMerge.test.stableSize=512 \ + repack -adb && + + test_pseudo_merges_reused 1 merges && + test_line_count = 3 merges && + + test_pseudo_merge_commits 0 >stable-oids.after && + for i in 1 2 + do + test_pseudo_merge_commits $i || return 1 + done >unstable-oids.after && + + sort -u expect && + sort -u actual && + test_cmp expect actual && + + sort -u expect && + sort -u actual && + test_cmp expect actual + ) +' + +test_done diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 862d80c974..427b375b39 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -458,6 +458,7 @@ test_commit_bulk () { indir=. ref=HEAD n=1 + notick= message='commit %s' filename='%s.t' contents='content %s' @@ -488,6 +489,9 @@ test_commit_bulk () { filename="${1#--*=}-%s.t" contents="${1#--*=} %s" ;; + --notick) + notick=yes + ;; -*) BUG "invalid test_commit_bulk option: $1" ;; @@ -507,7 +511,10 @@ test_commit_bulk () { while test "$total" -gt 0 do - test_tick && + if test -z "$notick" + then + test_tick + fi && echo "commit $ref" printf 'author %s <%s> %s\n' \ "$GIT_AUTHOR_NAME" \