git/builtin-rev-list.c

748 lines
17 KiB
C
Raw Normal View History

#include "cache.h"
#include "refs.h"
#include "tag.h"
#include "commit.h"
#include "tree.h"
#include "blob.h"
#include "tree-walk.h"
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
#include "builtin.h"
#include "log-tree.h"
#include "graph.h"
/* bits #0-15 in revision.h */
#define COUNTED (1u<<16)
static const char rev_list_usage[] =
"git rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
" limiting output:\n"
" --max-count=nr\n"
" --max-age=epoch\n"
" --min-age=epoch\n"
" --sparse\n"
" --no-merges\n"
" --remove-empty\n"
" --all\n"
" --branches\n"
" --tags\n"
" --remotes\n"
" --stdin\n"
" --quiet\n"
" ordering output:\n"
" --topo-order\n"
" --date-order\n"
" --reverse\n"
" formatting output:\n"
" --parents\n"
" --children\n"
" --objects | --objects-edge\n"
" --unpacked\n"
" --header | --pretty\n"
" --abbrev=nr | --no-abbrev\n"
" --abbrev-commit\n"
" --left-right\n"
" special purpose:\n"
" --bisect\n"
" --bisect-vars\n"
" --bisect-all"
;
static struct rev_info revs;
static int bisect_list;
static int show_timestamp;
static int hdr_termination;
Log message printout cleanups On Sun, 16 Apr 2006, Junio C Hamano wrote: > > In the mid-term, I am hoping we can drop the generate_header() > callchain _and_ the custom code that formats commit log in-core, > found in cmd_log_wc(). Ok, this was nastier than expected, just because the dependencies between the different log-printing stuff were absolutely _everywhere_, but here's a patch that does exactly that. The patch is not very easy to read, and the "--patch-with-stat" thing is still broken (it does not call the "show_log()" thing properly for merges). That's not a new bug. In the new world order it _should_ do something like if (rev->logopt) show_log(rev, rev->logopt, "---\n"); but it doesn't. I haven't looked at the --with-stat logic, so I left it alone. That said, this patch removes more lines than it adds, and in particular, the "cmd_log_wc()" loop is now a very clean: while ((commit = get_revision(rev)) != NULL) { log_tree_commit(rev, commit); free(commit->buffer); commit->buffer = NULL; } so it doesn't get much prettier than this. All the complexity is entirely hidden in log-tree.c, and any code that needs to flush the log literally just needs to do the "if (rev->logopt) show_log(...)" incantation. I had to make the combined_diff() logic take a "struct rev_info" instead of just a "struct diff_options", but that part is pretty clean. This does change "git whatchanged" from using "diff-tree" as the commit descriptor to "commit", and I changed one of the tests to reflect that new reality. Otherwise everything still passes, and my other tests look fine too. Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-17 18:59:32 +00:00
static const char *header_prefix;
static void finish_commit(struct commit *commit);
static void show_commit(struct commit *commit)
{
graph_show_commit(revs.graph);
if (show_timestamp)
printf("%lu ", commit->date);
Log message printout cleanups On Sun, 16 Apr 2006, Junio C Hamano wrote: > > In the mid-term, I am hoping we can drop the generate_header() > callchain _and_ the custom code that formats commit log in-core, > found in cmd_log_wc(). Ok, this was nastier than expected, just because the dependencies between the different log-printing stuff were absolutely _everywhere_, but here's a patch that does exactly that. The patch is not very easy to read, and the "--patch-with-stat" thing is still broken (it does not call the "show_log()" thing properly for merges). That's not a new bug. In the new world order it _should_ do something like if (rev->logopt) show_log(rev, rev->logopt, "---\n"); but it doesn't. I haven't looked at the --with-stat logic, so I left it alone. That said, this patch removes more lines than it adds, and in particular, the "cmd_log_wc()" loop is now a very clean: while ((commit = get_revision(rev)) != NULL) { log_tree_commit(rev, commit); free(commit->buffer); commit->buffer = NULL; } so it doesn't get much prettier than this. All the complexity is entirely hidden in log-tree.c, and any code that needs to flush the log literally just needs to do the "if (rev->logopt) show_log(...)" incantation. I had to make the combined_diff() logic take a "struct rev_info" instead of just a "struct diff_options", but that part is pretty clean. This does change "git whatchanged" from using "diff-tree" as the commit descriptor to "commit", and I changed one of the tests to reflect that new reality. Otherwise everything still passes, and my other tests look fine too. Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-17 18:59:32 +00:00
if (header_prefix)
fputs(header_prefix, stdout);
if (!revs.graph) {
if (commit->object.flags & BOUNDARY)
putchar('-');
else if (commit->object.flags & UNINTERESTING)
putchar('^');
else if (revs.left_right) {
if (commit->object.flags & SYMMETRIC_LEFT)
putchar('<');
else
putchar('>');
}
}
if (revs.abbrev_commit && revs.abbrev)
fputs(find_unique_abbrev(commit->object.sha1, revs.abbrev),
stdout);
else
fputs(sha1_to_hex(commit->object.sha1), stdout);
if (revs.print_parents) {
struct commit_list *parents = commit->parents;
while (parents) {
printf(" %s", sha1_to_hex(parents->item->object.sha1));
parents = parents->next;
}
}
if (revs.children.name) {
struct commit_list *children;
children = lookup_decoration(&revs.children, &commit->object);
while (children) {
printf(" %s", sha1_to_hex(children->item->object.sha1));
children = children->next;
}
}
show_decorations(&revs, commit);
if (revs.commit_format == CMIT_FMT_ONELINE)
putchar(' ');
else
putchar('\n');
Add "--show-all" revision walker flag for debugging It's really not very easy to visualize the commit walker, because - on purpose - it obvously doesn't show the uninteresting commits! This adds a "--show-all" flag to the revision walker, which will make it show uninteresting commits too, and they'll have a '^' in front of them (it also fixes a logic error for !verbose_header for boundary commits - we should show the '-' even if left_right isn't shown). A separate patch to gitk to teach it the new '^' was sent to paulus. With the change in place, it actually is interesting even for the cases that git doesn't have any problems with, ie for the kernel you can do: gitk -d --show-all v2.6.24.. and you see just how far down it has to parse things to see it all. The use of "-d" is a good idea, since the date-ordered toposort is much better at showing why it goes deep down (ie the date of some of those commits after 2.6.24 is much older, because they were merged from trees that weren't rebased). So I think this is a useful feature even for non-debugging - just to visualize what git does internally more. When it actually breaks out due to the "everybody_uninteresting()" case, it adds the uninteresting commits (both the one it's looking at now, and the list of pending ones) to the list This way, we really list *all* the commits we've looked at. Because we now end up listing commits we may not even have been parsed at all "show_log" and "show_commit" need to protect against commits that don't have a commit buffer entry. That second part is debatable just how it should work. Maybe we shouldn't show such entries at all (with this patch those entries do get shown, they just don't get any message shown with them). But I think this is a useful case. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-09 22:02:07 +00:00
if (revs.verbose_header && commit->buffer) {
struct strbuf buf = STRBUF_INIT;
pretty_print_commit(revs.commit_format, commit,
&buf, revs.abbrev, NULL, NULL,
revs.date_mode, 0);
if (revs.graph) {
if (buf.len) {
if (revs.commit_format != CMIT_FMT_ONELINE)
graph_show_oneline(revs.graph);
graph_show_commit_msg(revs.graph, &buf);
/*
* Add a newline after the commit message.
*
* Usually, this newline produces a blank
* padding line between entries, in which case
* we need to add graph padding on this line.
*
* However, the commit message may not end in a
* newline. In this case the newline simply
* ends the last line of the commit message,
* and we don't need any graph output. (This
* always happens with CMIT_FMT_ONELINE, and it
* happens with CMIT_FMT_USERFORMAT when the
* format doesn't explicitly end in a newline.)
*/
if (buf.len && buf.buf[buf.len - 1] == '\n')
graph_show_padding(revs.graph);
putchar('\n');
} else {
/*
* If the message buffer is empty, just show
* the rest of the graph output for this
* commit.
*/
if (graph_show_remainder(revs.graph))
putchar('\n');
}
} else {
if (buf.len)
printf("%s%c", buf.buf, hdr_termination);
}
strbuf_release(&buf);
} else {
if (graph_show_remainder(revs.graph))
putchar('\n');
}
maybe_flush_or_die(stdout, "stdout");
finish_commit(commit);
}
static void finish_commit(struct commit *commit)
{
if (commit->parents) {
free_commit_list(commit->parents);
commit->parents = NULL;
}
free(commit->buffer);
commit->buffer = NULL;
[PATCH] Modify git-rev-list to linearise the commit history in merge order. This patch linearises the GIT commit history graph into merge order which is defined by invariants specified in Documentation/git-rev-list.txt. The linearisation produced by this patch is superior in an objective sense to that produced by the existing git-rev-list implementation in that the linearisation produced is guaranteed to have the minimum number of discontinuities, where a discontinuity is defined as an adjacent pair of commits in the output list which are not related in a direct child-parent relationship. With this patch a graph like this: a4 --- | \ \ | b4 | |/ | | a3 | | | | | a2 | | | | c3 | | | | | c2 | b3 | | | /| | b2 | | | c1 | | / | b1 a1 | | | a0 | | / root Sorts like this: = a4 | c3 | c2 | c1 ^ b4 | b3 | b2 | b1 ^ a3 | a2 | a1 | a0 = root Instead of this: = a4 | c3 ^ b4 | a3 ^ c2 ^ b3 ^ a2 ^ b2 ^ c1 ^ a1 ^ b1 ^ a0 = root A test script, t/t6000-rev-list.sh, includes a test which demonstrates that the linearisation produced by --merge-order has less discontinuities than the linearisation produced by git-rev-list without the --merge-order flag specified. To see this, do the following: cd t ./t6000-rev-list.sh cd trash cat actual-default-order cat actual-merge-order The existing behaviour of git-rev-list is preserved, by default. To obtain the modified behaviour, specify --merge-order or --merge-order --show-breaks on the command line. This version of the patch has been tested on the git repository and also on the linux-2.6 repository and has reasonable performance on both - ~50-100% slower than the original algorithm. This version of the patch has incorporated a functional equivalent of the Linus' output limiting algorithm into the merge-order algorithm itself. This operates per the notes associated with Linus' commit 337cb3fb8da45f10fe9a0c3cf571600f55ead2ce. This version has incorporated Linus' feedback regarding proposed changes to rev-list.c. (see: [PATCH] Factor out filtering in rev-list.c) This version has improved the way sort_first_epoch marks commits as uninteresting. For more details about this change, refer to Documentation/git-rev-list.txt and http://blackcubes.dyndns.org/epoch/. Signed-off-by: Jon Seymour <jon.seymour@gmail.com> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-06 15:39:40 +00:00
}
static void finish_object(struct object_array_entry *p)
{
if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1))
die("missing blob object '%s'", sha1_to_hex(p->item->sha1));
}
static void show_object(struct object_array_entry *p)
{
/* An object with name "foo\n0000000..." can be used to
* confuse downstream "git pack-objects" very badly.
*/
const char *ep = strchr(p->name, '\n');
Make sure quickfetch is not fooled with a previous, incomplete fetch. This updates git-rev-list --objects to be a bit more careful when listing a blob object to make sure the blob actually exists, and uses it to make sure the quick-fetch optimization we introduced earlier is not fooled by a previous incomplete fetch. The quick-fetch optimization works by running this command: git rev-list --objects <<commit-list>> --not --all where <<commit-list>> is a list of commits that we are going to fetch from the other side. If there is any object missing to complete the <<commit-list>>, the rev-list would fail and die (say, the commit was in our repository, but its tree wasn't -- then it will barf while trying to list the blobs the tree contains because it cannot read that tree). Usually we do not have the objects (otherwise why would we fetching?), but in one important special case we do: when the remote repository is used as an alternate object store (i.e. pointed by .git/objects/info/alternates). We could check .git/objects/info/alternates to see if the remote we are interacting with is one of them (or is used as an alternate, recursively, by one of them), but that check is more cumbersome than it is worth. The above check however did not catch missing blob, because object listing code did not read nor check blob objects, knowing that blobs do not contain any further references to other objects. This commit fixes it with practically unmeasurable overhead. I've benched this with git rev-list --objects --all >/dev/null in the kernel repository, with three different implementations of the "check-blob". - Checking with has_sha1_file() has negligible (unmeasurable) performance penalty. - Checking with sha1_object_info() makes it somewhat slower, perhaps by 5%. - Checking with read_sha1_file() to cause a fully re-validation is prohibitively expensive (about 4 times as much runtime). In my original patch, I had this as a command line option, but the overhead is small enough that it is not really worth it. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-16 07:42:29 +00:00
finish_object(p);
if (ep) {
printf("%s %.*s\n", sha1_to_hex(p->item->sha1),
(int) (ep - p->name),
p->name);
}
else
printf("%s %s\n", sha1_to_hex(p->item->sha1), p->name);
}
static void show_edge(struct commit *commit)
{
printf("-%s\n", sha1_to_hex(commit->object.sha1));
}
/*
* This is a truly stupid algorithm, but it's only
* used for bisection, and we just don't care enough.
*
* We care just barely enough to avoid recursing for
* non-merge entries.
*/
static int count_distance(struct commit_list *entry)
{
int nr = 0;
while (entry) {
struct commit *commit = entry->item;
struct commit_list *p;
if (commit->object.flags & (UNINTERESTING | COUNTED))
break;
if (!(commit->object.flags & TREESAME))
nr++;
commit->object.flags |= COUNTED;
p = commit->parents;
entry = p;
if (p) {
p = p->next;
while (p) {
nr += count_distance(p);
p = p->next;
}
}
}
return nr;
}
static void clear_distance(struct commit_list *list)
{
while (list) {
struct commit *commit = list->item;
commit->object.flags &= ~COUNTED;
list = list->next;
}
}
#define DEBUG_BISECT 0
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
static inline int weight(struct commit_list *elem)
{
return *((int*)(elem->item->util));
}
static inline void weight_set(struct commit_list *elem, int weight)
{
*((int*)(elem->item->util)) = weight;
}
static int count_interesting_parents(struct commit *commit)
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
{
struct commit_list *p;
int count;
for (count = 0, p = commit->parents; p; p = p->next) {
if (p->item->object.flags & UNINTERESTING)
continue;
count++;
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
}
return count;
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
}
static inline int halfway(struct commit_list *p, int nr)
{
/*
* Don't short-cut something we are not going to return!
*/
if (p->item->object.flags & TREESAME)
return 0;
if (DEBUG_BISECT)
return 0;
/*
* 2 and 3 are halfway of 5.
* 3 is halfway of 6 but 2 and 4 are not.
*/
switch (2 * weight(p) - nr) {
case -1: case 0: case 1:
return 1;
default:
return 0;
}
}
#if !DEBUG_BISECT
#define show_list(a,b,c,d) do { ; } while (0)
#else
static void show_list(const char *debug, int counted, int nr,
struct commit_list *list)
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
{
struct commit_list *p;
fprintf(stderr, "%s (%d/%d)\n", debug, counted, nr);
for (p = list; p; p = p->next) {
struct commit_list *pp;
struct commit *commit = p->item;
unsigned flags = commit->object.flags;
enum object_type type;
unsigned long size;
char *buf = read_sha1_file(commit->object.sha1, &type, &size);
char *ep, *sp;
fprintf(stderr, "%c%c%c ",
(flags & TREESAME) ? ' ' : 'T',
(flags & UNINTERESTING) ? 'U' : ' ',
(flags & COUNTED) ? 'C' : ' ');
if (commit->util)
fprintf(stderr, "%3d", weight(p));
else
fprintf(stderr, "---");
fprintf(stderr, " %.*s", 8, sha1_to_hex(commit->object.sha1));
for (pp = commit->parents; pp; pp = pp->next)
fprintf(stderr, " %.*s", 8,
sha1_to_hex(pp->item->object.sha1));
sp = strstr(buf, "\n\n");
if (sp) {
sp += 2;
for (ep = sp; *ep && *ep != '\n'; ep++)
;
fprintf(stderr, " %.*s", (int)(ep - sp), sp);
}
fprintf(stderr, "\n");
}
}
#endif /* DEBUG_BISECT */
static struct commit_list *best_bisection(struct commit_list *list, int nr)
{
struct commit_list *p, *best;
int best_distance = -1;
best = list;
for (p = list; p; p = p->next) {
int distance;
unsigned flags = p->item->object.flags;
if (flags & TREESAME)
continue;
distance = weight(p);
if (nr - distance < distance)
distance = nr - distance;
if (distance > best_distance) {
best = p;
best_distance = distance;
}
}
return best;
}
struct commit_dist {
struct commit *commit;
int distance;
};
static int compare_commit_dist(const void *a_, const void *b_)
{
struct commit_dist *a, *b;
a = (struct commit_dist *)a_;
b = (struct commit_dist *)b_;
if (a->distance != b->distance)
return b->distance - a->distance; /* desc sort */
return hashcmp(a->commit->object.sha1, b->commit->object.sha1);
}
static struct commit_list *best_bisection_sorted(struct commit_list *list, int nr)
{
struct commit_list *p;
struct commit_dist *array = xcalloc(nr, sizeof(*array));
int cnt, i;
for (p = list, cnt = 0; p; p = p->next) {
int distance;
unsigned flags = p->item->object.flags;
if (flags & TREESAME)
continue;
distance = weight(p);
if (nr - distance < distance)
distance = nr - distance;
array[cnt].commit = p->item;
array[cnt].distance = distance;
cnt++;
}
qsort(array, cnt, sizeof(*array), compare_commit_dist);
for (p = list, i = 0; i < cnt; i++) {
struct name_decoration *r = xmalloc(sizeof(*r) + 100);
struct object *obj = &(array[i].commit->object);
sprintf(r->name, "dist=%d", array[i].distance);
r->next = add_decoration(&name_decoration, obj, r);
p->item = array[i].commit;
p = p->next;
}
if (p)
p->next = NULL;
free(array);
return list;
}
/*
* zero or positive weight is the number of interesting commits it can
* reach, including itself. Especially, weight = 0 means it does not
* reach any tree-changing commits (e.g. just above uninteresting one
* but traversal is with pathspec).
*
* weight = -1 means it has one parent and its distance is yet to
* be computed.
*
* weight = -2 means it has more than one parent and its distance is
* unknown. After running count_distance() first, they will get zero
* or positive distance.
*/
static struct commit_list *do_find_bisection(struct commit_list *list,
int nr, int *weights,
int find_all)
{
int n, counted;
struct commit_list *p;
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
counted = 0;
for (n = 0, p = list; p; p = p->next) {
struct commit *commit = p->item;
unsigned flags = commit->object.flags;
p->item->util = &weights[n++];
switch (count_interesting_parents(commit)) {
case 0:
if (!(flags & TREESAME)) {
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
weight_set(p, 1);
counted++;
show_list("bisection 2 count one",
counted, nr, list);
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
}
/*
* otherwise, it is known not to reach any
* tree-changing commit and gets weight 0.
*/
break;
case 1:
weight_set(p, -1);
break;
default:
weight_set(p, -2);
break;
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
}
}
show_list("bisection 2 initialize", counted, nr, list);
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
/*
* If you have only one parent in the resulting set
* then you can reach one commit more than that parent
* can reach. So we do not have to run the expensive
* count_distance() for single strand of pearls.
*
* However, if you have more than one parents, you cannot
* just add their distance and one for yourself, since
* they usually reach the same ancestor and you would
* end up counting them twice that way.
*
* So we will first count distance of merges the usual
* way, and then fill the blanks using cheaper algorithm.
*/
for (p = list; p; p = p->next) {
if (p->item->object.flags & UNINTERESTING)
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
continue;
if (weight(p) != -2)
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
continue;
weight_set(p, count_distance(p));
clear_distance(list);
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
/* Does it happen to be at exactly half-way? */
if (!find_all && halfway(p, nr))
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
return p;
counted++;
}
show_list("bisection 2 count_distance", counted, nr, list);
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
while (counted < nr) {
for (p = list; p; p = p->next) {
struct commit_list *q;
unsigned flags = p->item->object.flags;
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
if (0 <= weight(p))
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
continue;
for (q = p->item->parents; q; q = q->next) {
if (q->item->object.flags & UNINTERESTING)
continue;
if (0 <= weight(q))
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
break;
}
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
if (!q)
continue;
/*
* weight for p is unknown but q is known.
* add one for p itself if p is to be counted,
* otherwise inherit it from q directly.
*/
if (!(flags & TREESAME)) {
weight_set(p, weight(q)+1);
counted++;
show_list("bisection 2 count one",
counted, nr, list);
}
else
weight_set(p, weight(q));
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
/* Does it happen to be at exactly half-way? */
if (!find_all && halfway(p, nr))
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
return p;
}
}
show_list("bisection 2 counted all", counted, nr, list);
if (!find_all)
return best_bisection(list, nr);
else
return best_bisection_sorted(list, nr);
}
static struct commit_list *find_bisection(struct commit_list *list,
int *reaches, int *all,
int find_all)
{
int nr, on_list;
struct commit_list *p, *best, *next, *last;
int *weights;
show_list("bisection 2 entry", 0, 0, list);
/*
* Count the number of total and tree-changing items on the
* list, while reversing the list.
*/
for (nr = on_list = 0, last = NULL, p = list;
p;
p = next) {
unsigned flags = p->item->object.flags;
next = p->next;
if (flags & UNINTERESTING)
continue;
p->next = last;
last = p;
if (!(flags & TREESAME))
nr++;
on_list++;
}
list = last;
show_list("bisection 2 sorted", 0, nr, list);
*all = nr;
weights = xcalloc(on_list, sizeof(*weights));
/* Do the real work of finding bisection commit. */
best = do_find_bisection(list, nr, weights, find_all);
if (best) {
if (!find_all)
best->next = NULL;
*reaches = weight(best);
}
git-rev-list --bisect: optimization This improves the performance of revision bisection. The idea is to avoid rather expensive count_distance() function, which counts the number of commits that are reachable from any given commit (including itself) in the set. When a commit has only one relevant parent commit, the number of commits the commit can reach is exactly the number of commits that the parent can reach plus one; instead of running count_distance() on commits that are on straight single strand of pearls, we can just add one to the parents' count. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' counts up plus one for the commit itself; that would overcount ancestors that are reachable from more than one parents. The algorithm used in the patch runs count_distance() on merge commits, and uses the util field of commit objects to remember them. After that, the number of commits reachable from each of the remaining commits is counted by finding a commit whose count is not yet known but the count for its (sole) parent is known, and adding one to the parent's count, until we assign numbers to everybody. Another small optimization is whenever we find a half-way commit (that is, a commit that can reach exactly half of the commits), we stop giving counts to remaining commits, as we will not find any better commit than we just found. The performance to bisect between v1.0.0 and v1.5.0 in git.git repository was improved by saying good and bad in turns from 3.68 seconds down to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22 seconds. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 05:16:24 +00:00
free(weights);
return best;
}
rev-list: estimate number of bisection step left This patch teaches "git rev-list --bisect-vars" to output an estimate of the number of bisection step left _after the current one_ along with the other variables it already outputs. This patch also makes "git-bisect.sh" display this number of steps left _after the current one_, along with the estimate of the number of revisions left to test (after the current one). Here is a table to help analyse what should be the best estimate for the number of bisect steps left. N : linear case --> probabilities --> best ------------------------------------------------------------- 1 : G-B --> 0 --> 0 2 : G-U1-B --> 0 --> 0 3 : G-U1-U2-B --> 0(1/3) 1(2/3) --> 1 4 : G-U1-U2-U3-B --> 1 --> 1 5 : G-U1-U2-U3-U4-B --> 1(3/5) 2(2/5) --> 1 6 : G-U1-U2-U3-U4-U5-B --> 1(2/6) 2(4/6) --> 2 7 : G-U1-U2-U3-U4-U5-U6-B --> 1(1/7) 2(6/7) --> 2 8 : G-U1-U2-U3-U4-U5-U6-U7-B --> 2 --> 2 9 : G-U1-U2-U3-U4-U5-U6-U7-U8-B --> 2(7/9) 3(2/9) --> 2 10: G-U1-U2-U3-U4-U5-U6-U7-U8-U9-B --> 2(6/10)3(4/10)--> 2 In the column "N", there is the number of revisions that could _now_ be the first bad commit we are looking for. The "linear case" column describes the linear history corresponding to the number in column N. G means good, B means bad, and Ux means unknown. Note that the first bad revision we are looking for can be any Ux or B. In the "probabilities" column, there are the different outcomes in number of steps with the odds of each outcome in parenthesis corresponding to the linear case. The "best" column gives the most accurate estimate among the different outcomes in the "probabilities" column. We have the following: best(2^n) == n - 1 and for any x between 0 included and 2^n excluded, the probability for n - 1 steps left looks like: P(2^n + x) == (2^n - x) / (2^n + x) and P(2^n + x) < 0.5 means 2^n < 3x So the algorithm used in this patch calculates 2^n and x, and then choose between returning n - 1 and n. Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-02-21 08:26:01 +00:00
static inline int log2i(int n)
{
int log2 = 0;
for (; n > 1; n >>= 1)
log2++;
return log2;
}
static inline int exp2i(int n)
{
return 1 << n;
}
/*
* Estimate the number of bisect steps left (after the current step)
*
* For any x between 0 included and 2^n excluded, the probability for
* n - 1 steps left looks like:
*
* P(2^n + x) == (2^n - x) / (2^n + x)
*
* and P(2^n + x) < 0.5 means 2^n < 3x
*/
static int estimate_bisect_steps(int all)
{
int n, x, e;
if (all < 3)
return 0;
n = log2i(all);
e = exp2i(n);
x = all - e;
return (e < 3 * x) ? n : n - 1;
}
int cmd_rev_list(int argc, const char **argv, const char *prefix)
{
struct commit_list *list;
int i;
int read_from_stdin = 0;
int bisect_show_vars = 0;
int bisect_find_all = 0;
int quiet = 0;
git_config(git_default_config, NULL);
init_revisions(&revs, prefix);
revs.abbrev = 0;
revs.commit_format = CMIT_FMT_UNSPECIFIED;
argc = setup_revisions(argc, argv, &revs, NULL);
quiet = DIFF_OPT_TST(&revs.diffopt, QUIET);
for (i = 1 ; i < argc; i++) {
2005-10-21 04:25:09 +00:00
const char *arg = argv[i];
if (!strcmp(arg, "--header")) {
revs.verbose_header = 1;
continue;
}
if (!strcmp(arg, "--timestamp")) {
show_timestamp = 1;
continue;
}
if (!strcmp(arg, "--bisect")) {
bisect_list = 1;
continue;
}
if (!strcmp(arg, "--bisect-all")) {
bisect_list = 1;
bisect_find_all = 1;
revs.show_decorations = 1;
continue;
}
if (!strcmp(arg, "--bisect-vars")) {
bisect_list = 1;
bisect_show_vars = 1;
continue;
}
if (!strcmp(arg, "--stdin")) {
if (read_from_stdin++)
die("--stdin given twice?");
read_revisions_from_stdin(&revs);
continue;
}
usage(rev_list_usage);
}
if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {
/* The command line has a --pretty */
hdr_termination = '\n';
if (revs.commit_format == CMIT_FMT_ONELINE)
Log message printout cleanups On Sun, 16 Apr 2006, Junio C Hamano wrote: > > In the mid-term, I am hoping we can drop the generate_header() > callchain _and_ the custom code that formats commit log in-core, > found in cmd_log_wc(). Ok, this was nastier than expected, just because the dependencies between the different log-printing stuff were absolutely _everywhere_, but here's a patch that does exactly that. The patch is not very easy to read, and the "--patch-with-stat" thing is still broken (it does not call the "show_log()" thing properly for merges). That's not a new bug. In the new world order it _should_ do something like if (rev->logopt) show_log(rev, rev->logopt, "---\n"); but it doesn't. I haven't looked at the --with-stat logic, so I left it alone. That said, this patch removes more lines than it adds, and in particular, the "cmd_log_wc()" loop is now a very clean: while ((commit = get_revision(rev)) != NULL) { log_tree_commit(rev, commit); free(commit->buffer); commit->buffer = NULL; } so it doesn't get much prettier than this. All the complexity is entirely hidden in log-tree.c, and any code that needs to flush the log literally just needs to do the "if (rev->logopt) show_log(...)" incantation. I had to make the combined_diff() logic take a "struct rev_info" instead of just a "struct diff_options", but that part is pretty clean. This does change "git whatchanged" from using "diff-tree" as the commit descriptor to "commit", and I changed one of the tests to reflect that new reality. Otherwise everything still passes, and my other tests look fine too. Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-17 18:59:32 +00:00
header_prefix = "";
else
Log message printout cleanups On Sun, 16 Apr 2006, Junio C Hamano wrote: > > In the mid-term, I am hoping we can drop the generate_header() > callchain _and_ the custom code that formats commit log in-core, > found in cmd_log_wc(). Ok, this was nastier than expected, just because the dependencies between the different log-printing stuff were absolutely _everywhere_, but here's a patch that does exactly that. The patch is not very easy to read, and the "--patch-with-stat" thing is still broken (it does not call the "show_log()" thing properly for merges). That's not a new bug. In the new world order it _should_ do something like if (rev->logopt) show_log(rev, rev->logopt, "---\n"); but it doesn't. I haven't looked at the --with-stat logic, so I left it alone. That said, this patch removes more lines than it adds, and in particular, the "cmd_log_wc()" loop is now a very clean: while ((commit = get_revision(rev)) != NULL) { log_tree_commit(rev, commit); free(commit->buffer); commit->buffer = NULL; } so it doesn't get much prettier than this. All the complexity is entirely hidden in log-tree.c, and any code that needs to flush the log literally just needs to do the "if (rev->logopt) show_log(...)" incantation. I had to make the combined_diff() logic take a "struct rev_info" instead of just a "struct diff_options", but that part is pretty clean. This does change "git whatchanged" from using "diff-tree" as the commit descriptor to "commit", and I changed one of the tests to reflect that new reality. Otherwise everything still passes, and my other tests look fine too. Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-17 18:59:32 +00:00
header_prefix = "commit ";
}
else if (revs.verbose_header)
/* Only --header was specified */
revs.commit_format = CMIT_FMT_RAW;
list = revs.commits;
if ((!list &&
(!(revs.tag_objects||revs.tree_objects||revs.blob_objects) &&
Add "named object array" concept We've had this notion of a "object_list" for a long time, which eventually grew a "name" member because some users (notably git-rev-list) wanted to name each object as it is generated. That object_list is great for some things, but it isn't all that wonderful for others, and the "name" member is generally not used by everybody. This patch splits the users of the object_list array up into two: the traditional list users, who want the list-like format, and who don't actually use or want the name. And another class of users that really used the list as an extensible array, and generally wanted to name the objects. The patch is fairly straightforward, but it's also biggish. Most of it really just cleans things up: switching the revision parsing and listing over to the array makes things like the builtin-diff usage much simpler (we now see exactly how many members the array has, and we don't get the objects reversed from the order they were on the command line). One of the main reasons for doing this at all is that the malloc overhead of the simple object list was actually pretty high, and the array is just a lot denser. So this patch brings down memory usage by git-rev-list by just under 3% (on top of all the other memory use optimizations) on the mozilla archive. It does add more lines than it removes, and more importantly, it adds a whole new infrastructure for maintaining lists of objects, but on the other hand, the new dynamic array code is pretty obvious. The change to builtin-diff-tree.c shows a fairly good example of why an array interface is sometimes more natural, and just much simpler for everybody. Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 00:42:35 +00:00
!revs.pending.nr)) ||
revs.diff)
usage(rev_list_usage);
save_commit_buffer = revs.verbose_header ||
revs.grep_filter.pattern_list;
rev-list --bisect: limit list before bisecting. I noticed bisect does not work well without both good and bad. Running this script in git.git repository would give you quite different results: #!/bin/sh initial=e83c5163316f89bfbde7d9ab23ca2e25604af290 mid0=`git rev-list --bisect ^$initial --all` git rev-list $mid0 | wc -l git rev-list ^$mid0 --all | wc -l mid1=`git rev-list --bisect --all` git rev-list $mid1 | wc -l git rev-list ^$mid1 --all | wc -l The $initial commit is the very first commit you made. The first midpoint bisects things evenly as designed, but the latter does not. The reason I got interested in this was because I was wondering if something like the following would help people converting a huge repository from foreign SCM, or preparing a repository to be fetched over plain dumb HTTP only: #!/bin/sh N=4 P=.git/objects/pack bottom= while test 0 \< $N do N=$((N-1)) if test -z "$bottom" then newbottom=`git rev-list --bisect --all` else newbottom=`git rev-list --bisect ^$bottom --all` fi if test -z "$bottom" then rev_list="$newbottom" elif test 0 = $N then rev_list="^$bottom --all" else rev_list="^$bottom $newbottom" fi p=$(git rev-list --unpacked --objects $rev_list | git pack-objects $P/pack) git show-index <$P/pack-$p.idx | wc -l bottom=$newbottom done The idea is to pack older half of the history to one pack, then older half of the remaining history to another, to continue a few times, using finer granularity as we get closer to the tip. This may not matter, since for a truly huge history, running bisect number of times could be quite time consuming, and we might be better off running "git rev-list --all" once into a temporary file, and manually pick cut-off points from the resulting list of commits. After all we are talking about "approximately half" for such an usage, and older history does not matter much. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-14 22:57:32 +00:00
if (bisect_list)
revs.limited = 1;
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
if (revs.tree_objects)
mark_edges_uninteresting(revs.commits, &revs, show_edge);
if (bisect_list) {
int reaches = reaches, all = all;
revs.commits = find_bisection(revs.commits, &reaches, &all,
bisect_find_all);
if (bisect_show_vars) {
int cnt;
char hex[41];
if (!revs.commits)
return 1;
/*
* revs.commits can reach "reaches" commits among
* "all" commits. If it is good, then there are
* (all-reaches) commits left to be bisected.
* On the other hand, if it is bad, then the set
* to bisect is "reaches".
* A bisect set of size N has (N-1) commits further
* to test, as we already know one bad one.
*/
cnt = all - reaches;
if (cnt < reaches)
cnt = reaches;
strcpy(hex, sha1_to_hex(revs.commits->item->object.sha1));
if (bisect_find_all) {
traverse_commit_list(&revs, show_commit, show_object);
printf("------\n");
}
printf("bisect_rev=%s\n"
"bisect_nr=%d\n"
"bisect_good=%d\n"
"bisect_bad=%d\n"
rev-list: estimate number of bisection step left This patch teaches "git rev-list --bisect-vars" to output an estimate of the number of bisection step left _after the current one_ along with the other variables it already outputs. This patch also makes "git-bisect.sh" display this number of steps left _after the current one_, along with the estimate of the number of revisions left to test (after the current one). Here is a table to help analyse what should be the best estimate for the number of bisect steps left. N : linear case --> probabilities --> best ------------------------------------------------------------- 1 : G-B --> 0 --> 0 2 : G-U1-B --> 0 --> 0 3 : G-U1-U2-B --> 0(1/3) 1(2/3) --> 1 4 : G-U1-U2-U3-B --> 1 --> 1 5 : G-U1-U2-U3-U4-B --> 1(3/5) 2(2/5) --> 1 6 : G-U1-U2-U3-U4-U5-B --> 1(2/6) 2(4/6) --> 2 7 : G-U1-U2-U3-U4-U5-U6-B --> 1(1/7) 2(6/7) --> 2 8 : G-U1-U2-U3-U4-U5-U6-U7-B --> 2 --> 2 9 : G-U1-U2-U3-U4-U5-U6-U7-U8-B --> 2(7/9) 3(2/9) --> 2 10: G-U1-U2-U3-U4-U5-U6-U7-U8-U9-B --> 2(6/10)3(4/10)--> 2 In the column "N", there is the number of revisions that could _now_ be the first bad commit we are looking for. The "linear case" column describes the linear history corresponding to the number in column N. G means good, B means bad, and Ux means unknown. Note that the first bad revision we are looking for can be any Ux or B. In the "probabilities" column, there are the different outcomes in number of steps with the odds of each outcome in parenthesis corresponding to the linear case. The "best" column gives the most accurate estimate among the different outcomes in the "probabilities" column. We have the following: best(2^n) == n - 1 and for any x between 0 included and 2^n excluded, the probability for n - 1 steps left looks like: P(2^n + x) == (2^n - x) / (2^n + x) and P(2^n + x) < 0.5 means 2^n < 3x So the algorithm used in this patch calculates 2^n and x, and then choose between returning n - 1 and n. Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-02-21 08:26:01 +00:00
"bisect_all=%d\n"
"bisect_steps=%d\n",
hex,
cnt - 1,
all - reaches - 1,
reaches - 1,
rev-list: estimate number of bisection step left This patch teaches "git rev-list --bisect-vars" to output an estimate of the number of bisection step left _after the current one_ along with the other variables it already outputs. This patch also makes "git-bisect.sh" display this number of steps left _after the current one_, along with the estimate of the number of revisions left to test (after the current one). Here is a table to help analyse what should be the best estimate for the number of bisect steps left. N : linear case --> probabilities --> best ------------------------------------------------------------- 1 : G-B --> 0 --> 0 2 : G-U1-B --> 0 --> 0 3 : G-U1-U2-B --> 0(1/3) 1(2/3) --> 1 4 : G-U1-U2-U3-B --> 1 --> 1 5 : G-U1-U2-U3-U4-B --> 1(3/5) 2(2/5) --> 1 6 : G-U1-U2-U3-U4-U5-B --> 1(2/6) 2(4/6) --> 2 7 : G-U1-U2-U3-U4-U5-U6-B --> 1(1/7) 2(6/7) --> 2 8 : G-U1-U2-U3-U4-U5-U6-U7-B --> 2 --> 2 9 : G-U1-U2-U3-U4-U5-U6-U7-U8-B --> 2(7/9) 3(2/9) --> 2 10: G-U1-U2-U3-U4-U5-U6-U7-U8-U9-B --> 2(6/10)3(4/10)--> 2 In the column "N", there is the number of revisions that could _now_ be the first bad commit we are looking for. The "linear case" column describes the linear history corresponding to the number in column N. G means good, B means bad, and Ux means unknown. Note that the first bad revision we are looking for can be any Ux or B. In the "probabilities" column, there are the different outcomes in number of steps with the odds of each outcome in parenthesis corresponding to the linear case. The "best" column gives the most accurate estimate among the different outcomes in the "probabilities" column. We have the following: best(2^n) == n - 1 and for any x between 0 included and 2^n excluded, the probability for n - 1 steps left looks like: P(2^n + x) == (2^n - x) / (2^n + x) and P(2^n + x) < 0.5 means 2^n < 3x So the algorithm used in this patch calculates 2^n and x, and then choose between returning n - 1 and n. Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-02-21 08:26:01 +00:00
all,
estimate_bisect_steps(all));
return 0;
}
}
traverse_commit_list(&revs,
quiet ? finish_commit : show_commit,
quiet ? finish_object : show_object);
return 0;
}