git/builtin-reflog.c
Junio Hamano 24cb1bb198 Speed up reflog pruning of unreachable commits
Instead of doing the (potentially very expensive) "in_merge_base()"
check for each commit that might be pruned if it is unreachable, do a
preparatory reachability graph of the commit space, so that the common
case of being reachable can be tested directly.

[ Cleaned up a bit and tweaked to actually work.  - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-19 12:31:56 -07:00

717 lines
18 KiB
C

#include "cache.h"
#include "builtin.h"
#include "commit.h"
#include "refs.h"
#include "dir.h"
#include "tree-walk.h"
#include "diff.h"
#include "revision.h"
#include "reachable.h"
/*
* reflog expire
*/
static const char reflog_expire_usage[] =
"git reflog (show|expire) [--verbose] [--dry-run] [--stale-fix] [--expire=<time>] [--expire-unreachable=<time>] [--all] <refs>...";
static const char reflog_delete_usage[] =
"git reflog delete [--verbose] [--dry-run] [--rewrite] [--updateref] <refs>...";
static unsigned long default_reflog_expire;
static unsigned long default_reflog_expire_unreachable;
struct cmd_reflog_expire_cb {
struct rev_info revs;
int dry_run;
int stalefix;
int rewrite;
int updateref;
int verbose;
unsigned long expire_total;
unsigned long expire_unreachable;
int recno;
};
struct expire_reflog_cb {
FILE *newlog;
const char *ref;
struct commit *ref_commit;
struct cmd_reflog_expire_cb *cmd;
unsigned char last_kept_sha1[20];
};
struct collected_reflog {
unsigned char sha1[20];
char reflog[FLEX_ARRAY];
};
struct collect_reflog_cb {
struct collected_reflog **e;
int alloc;
int nr;
};
#define INCOMPLETE (1u<<10)
#define STUDYING (1u<<11)
#define REACHABLE (1u<<12)
static int tree_is_complete(const unsigned char *sha1)
{
struct tree_desc desc;
struct name_entry entry;
int complete;
struct tree *tree;
tree = lookup_tree(sha1);
if (!tree)
return 0;
if (tree->object.flags & SEEN)
return 1;
if (tree->object.flags & INCOMPLETE)
return 0;
if (!tree->buffer) {
enum object_type type;
unsigned long size;
void *data = read_sha1_file(sha1, &type, &size);
if (!data) {
tree->object.flags |= INCOMPLETE;
return 0;
}
tree->buffer = data;
tree->size = size;
}
init_tree_desc(&desc, tree->buffer, tree->size);
complete = 1;
while (tree_entry(&desc, &entry)) {
if (!has_sha1_file(entry.sha1) ||
(S_ISDIR(entry.mode) && !tree_is_complete(entry.sha1))) {
tree->object.flags |= INCOMPLETE;
complete = 0;
}
}
free(tree->buffer);
tree->buffer = NULL;
if (complete)
tree->object.flags |= SEEN;
return complete;
}
static int commit_is_complete(struct commit *commit)
{
struct object_array study;
struct object_array found;
int is_incomplete = 0;
int i;
/* early return */
if (commit->object.flags & SEEN)
return 1;
if (commit->object.flags & INCOMPLETE)
return 0;
/*
* Find all commits that are reachable and are not marked as
* SEEN. Then make sure the trees and blobs contained are
* complete. After that, mark these commits also as SEEN.
* If some of the objects that are needed to complete this
* commit are missing, mark this commit as INCOMPLETE.
*/
memset(&study, 0, sizeof(study));
memset(&found, 0, sizeof(found));
add_object_array(&commit->object, NULL, &study);
add_object_array(&commit->object, NULL, &found);
commit->object.flags |= STUDYING;
while (study.nr) {
struct commit *c;
struct commit_list *parent;
c = (struct commit *)study.objects[--study.nr].item;
if (!c->object.parsed && !parse_object(c->object.sha1))
c->object.flags |= INCOMPLETE;
if (c->object.flags & INCOMPLETE) {
is_incomplete = 1;
break;
}
else if (c->object.flags & SEEN)
continue;
for (parent = c->parents; parent; parent = parent->next) {
struct commit *p = parent->item;
if (p->object.flags & STUDYING)
continue;
p->object.flags |= STUDYING;
add_object_array(&p->object, NULL, &study);
add_object_array(&p->object, NULL, &found);
}
}
if (!is_incomplete) {
/*
* make sure all commits in "found" array have all the
* necessary objects.
*/
for (i = 0; i < found.nr; i++) {
struct commit *c =
(struct commit *)found.objects[i].item;
if (!tree_is_complete(c->tree->object.sha1)) {
is_incomplete = 1;
c->object.flags |= INCOMPLETE;
}
}
if (!is_incomplete) {
/* mark all found commits as complete, iow SEEN */
for (i = 0; i < found.nr; i++)
found.objects[i].item->flags |= SEEN;
}
}
/* clear flags from the objects we traversed */
for (i = 0; i < found.nr; i++)
found.objects[i].item->flags &= ~STUDYING;
if (is_incomplete)
commit->object.flags |= INCOMPLETE;
else {
/*
* If we come here, we have (1) traversed the ancestry chain
* from the "commit" until we reach SEEN commits (which are
* known to be complete), and (2) made sure that the commits
* encountered during the above traversal refer to trees that
* are complete. Which means that we know *all* the commits
* we have seen during this process are complete.
*/
for (i = 0; i < found.nr; i++)
found.objects[i].item->flags |= SEEN;
}
/* free object arrays */
free(study.objects);
free(found.objects);
return !is_incomplete;
}
static int keep_entry(struct commit **it, unsigned char *sha1)
{
struct commit *commit;
if (is_null_sha1(sha1))
return 1;
commit = lookup_commit_reference_gently(sha1, 1);
if (!commit)
return 0;
/*
* Make sure everything in this commit exists.
*
* We have walked all the objects reachable from the refs
* and cache earlier. The commits reachable by this commit
* must meet SEEN commits -- and then we should mark them as
* SEEN as well.
*/
if (!commit_is_complete(commit))
return 0;
*it = commit;
return 1;
}
static int unreachable(struct expire_reflog_cb *cb, struct commit *commit, unsigned char *sha1)
{
/*
* We may or may not have the commit yet - if not, look it
* up using the supplied sha1.
*/
if (!commit) {
if (is_null_sha1(sha1))
return 0;
commit = lookup_commit_reference_gently(sha1, 1);
/* Not a commit -- keep it */
if (!commit)
return 0;
}
/* Reachable from the current ref? Don't prune. */
if (commit->object.flags & REACHABLE)
return 0;
if (in_merge_bases(commit, &cb->ref_commit, 1))
return 0;
/* We can't reach it - prune it. */
return 1;
}
static void mark_reachable(struct commit *commit, unsigned long expire_limit)
{
/*
* We need to compute if commit on either side of an reflog
* entry is reachable from the tip of the ref for all entries.
* Mark commits that are reachable from the tip down to the
* time threashold first; we know a commit marked thusly is
* reachable from the tip without running in_merge_bases()
* at all.
*/
struct commit_list *pending = NULL;
commit_list_insert(commit, &pending);
while (pending) {
struct commit_list *entry = pending;
struct commit_list *parent;
pending = entry->next;
commit = entry->item;
free(entry);
if (commit->object.flags & REACHABLE)
continue;
if (parse_commit(commit))
continue;
commit->object.flags |= REACHABLE;
if (commit->date < expire_limit)
continue;
parent = commit->parents;
while (parent) {
commit = parent->item;
parent = parent->next;
if (commit->object.flags & REACHABLE)
continue;
commit_list_insert(commit, &pending);
}
}
}
static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
const char *email, unsigned long timestamp, int tz,
const char *message, void *cb_data)
{
struct expire_reflog_cb *cb = cb_data;
struct commit *old, *new;
if (timestamp < cb->cmd->expire_total)
goto prune;
if (cb->cmd->rewrite)
osha1 = cb->last_kept_sha1;
old = new = NULL;
if (cb->cmd->stalefix &&
(!keep_entry(&old, osha1) || !keep_entry(&new, nsha1)))
goto prune;
if (timestamp < cb->cmd->expire_unreachable) {
if (!cb->ref_commit)
goto prune;
if (unreachable(cb, old, osha1) || unreachable(cb, new, nsha1))
goto prune;
}
if (cb->cmd->recno && --(cb->cmd->recno) == 0)
goto prune;
if (cb->newlog) {
char sign = (tz < 0) ? '-' : '+';
int zone = (tz < 0) ? (-tz) : tz;
fprintf(cb->newlog, "%s %s %s %lu %c%04d\t%s",
sha1_to_hex(osha1), sha1_to_hex(nsha1),
email, timestamp, sign, zone,
message);
hashcpy(cb->last_kept_sha1, nsha1);
}
if (cb->cmd->verbose)
printf("keep %s", message);
return 0;
prune:
if (!cb->newlog || cb->cmd->verbose)
printf("%sprune %s", cb->newlog ? "" : "would ", message);
return 0;
}
static int expire_reflog(const char *ref, const unsigned char *sha1, int unused, void *cb_data)
{
struct cmd_reflog_expire_cb *cmd = cb_data;
struct expire_reflog_cb cb;
struct ref_lock *lock;
char *log_file, *newlog_path = NULL;
int status = 0;
memset(&cb, 0, sizeof(cb));
/*
* we take the lock for the ref itself to prevent it from
* getting updated.
*/
lock = lock_any_ref_for_update(ref, sha1, 0);
if (!lock)
return error("cannot lock ref '%s'", ref);
log_file = git_pathdup("logs/%s", ref);
if (!file_exists(log_file))
goto finish;
if (!cmd->dry_run) {
newlog_path = git_pathdup("logs/%s.lock", ref);
cb.newlog = fopen(newlog_path, "w");
}
cb.ref_commit = lookup_commit_reference_gently(sha1, 1);
cb.ref = ref;
cb.cmd = cmd;
if (cb.ref_commit)
mark_reachable(cb.ref_commit, cmd->expire_total);
for_each_reflog_ent(ref, expire_reflog_ent, &cb);
if (cb.ref_commit)
clear_commit_marks(cb.ref_commit, REACHABLE);
finish:
if (cb.newlog) {
if (fclose(cb.newlog)) {
status |= error("%s: %s", strerror(errno),
newlog_path);
unlink(newlog_path);
} else if (cmd->updateref &&
(write_in_full(lock->lock_fd,
sha1_to_hex(cb.last_kept_sha1), 40) != 40 ||
write_in_full(lock->lock_fd, "\n", 1) != 1 ||
close_ref(lock) < 0)) {
status |= error("Couldn't write %s",
lock->lk->filename);
unlink(newlog_path);
} else if (rename(newlog_path, log_file)) {
status |= error("cannot rename %s to %s",
newlog_path, log_file);
unlink(newlog_path);
} else if (cmd->updateref && commit_ref(lock)) {
status |= error("Couldn't set %s", lock->ref_name);
} else {
adjust_shared_perm(log_file);
}
}
free(newlog_path);
free(log_file);
unlock_ref(lock);
return status;
}
static int collect_reflog(const char *ref, const unsigned char *sha1, int unused, void *cb_data)
{
struct collected_reflog *e;
struct collect_reflog_cb *cb = cb_data;
size_t namelen = strlen(ref);
e = xmalloc(sizeof(*e) + namelen + 1);
hashcpy(e->sha1, sha1);
memcpy(e->reflog, ref, namelen + 1);
ALLOC_GROW(cb->e, cb->nr + 1, cb->alloc);
cb->e[cb->nr++] = e;
return 0;
}
static struct reflog_expire_cfg {
struct reflog_expire_cfg *next;
unsigned long expire_total;
unsigned long expire_unreachable;
size_t len;
char pattern[FLEX_ARRAY];
} *reflog_expire_cfg, **reflog_expire_cfg_tail;
static struct reflog_expire_cfg *find_cfg_ent(const char *pattern, size_t len)
{
struct reflog_expire_cfg *ent;
if (!reflog_expire_cfg_tail)
reflog_expire_cfg_tail = &reflog_expire_cfg;
for (ent = reflog_expire_cfg; ent; ent = ent->next)
if (ent->len == len &&
!memcmp(ent->pattern, pattern, len))
return ent;
ent = xcalloc(1, (sizeof(*ent) + len));
memcpy(ent->pattern, pattern, len);
ent->len = len;
*reflog_expire_cfg_tail = ent;
reflog_expire_cfg_tail = &(ent->next);
return ent;
}
static int parse_expire_cfg_value(const char *var, const char *value, unsigned long *expire)
{
if (!value)
return config_error_nonbool(var);
if (!strcmp(value, "never") || !strcmp(value, "false")) {
*expire = 0;
return 0;
}
*expire = approxidate(value);
return 0;
}
/* expiry timer slot */
#define EXPIRE_TOTAL 01
#define EXPIRE_UNREACH 02
static int reflog_expire_config(const char *var, const char *value, void *cb)
{
const char *lastdot = strrchr(var, '.');
unsigned long expire;
int slot;
struct reflog_expire_cfg *ent;
if (!lastdot || prefixcmp(var, "gc."))
return git_default_config(var, value, cb);
if (!strcmp(lastdot, ".reflogexpire")) {
slot = EXPIRE_TOTAL;
if (parse_expire_cfg_value(var, value, &expire))
return -1;
} else if (!strcmp(lastdot, ".reflogexpireunreachable")) {
slot = EXPIRE_UNREACH;
if (parse_expire_cfg_value(var, value, &expire))
return -1;
} else
return git_default_config(var, value, cb);
if (lastdot == var + 2) {
switch (slot) {
case EXPIRE_TOTAL:
default_reflog_expire = expire;
break;
case EXPIRE_UNREACH:
default_reflog_expire_unreachable = expire;
break;
}
return 0;
}
ent = find_cfg_ent(var + 3, lastdot - (var+3));
if (!ent)
return -1;
switch (slot) {
case EXPIRE_TOTAL:
ent->expire_total = expire;
break;
case EXPIRE_UNREACH:
ent->expire_unreachable = expire;
break;
}
return 0;
}
static void set_reflog_expiry_param(struct cmd_reflog_expire_cb *cb, int slot, const char *ref)
{
struct reflog_expire_cfg *ent;
if (slot == (EXPIRE_TOTAL|EXPIRE_UNREACH))
return; /* both given explicitly -- nothing to tweak */
for (ent = reflog_expire_cfg; ent; ent = ent->next) {
if (!fnmatch(ent->pattern, ref, 0)) {
if (!(slot & EXPIRE_TOTAL))
cb->expire_total = ent->expire_total;
if (!(slot & EXPIRE_UNREACH))
cb->expire_unreachable = ent->expire_unreachable;
return;
}
}
/*
* If unconfigured, make stash never expire
*/
if (!strcmp(ref, "refs/stash")) {
if (!(slot & EXPIRE_TOTAL))
cb->expire_total = 0;
if (!(slot & EXPIRE_UNREACH))
cb->expire_unreachable = 0;
return;
}
/* Nothing matched -- use the default value */
if (!(slot & EXPIRE_TOTAL))
cb->expire_total = default_reflog_expire;
if (!(slot & EXPIRE_UNREACH))
cb->expire_unreachable = default_reflog_expire_unreachable;
}
static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
{
struct cmd_reflog_expire_cb cb;
unsigned long now = time(NULL);
int i, status, do_all;
int explicit_expiry = 0;
git_config(reflog_expire_config, NULL);
save_commit_buffer = 0;
do_all = status = 0;
memset(&cb, 0, sizeof(cb));
if (!default_reflog_expire_unreachable)
default_reflog_expire_unreachable = now - 30 * 24 * 3600;
if (!default_reflog_expire)
default_reflog_expire = now - 90 * 24 * 3600;
cb.expire_total = default_reflog_expire;
cb.expire_unreachable = default_reflog_expire_unreachable;
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--dry-run") || !strcmp(arg, "-n"))
cb.dry_run = 1;
else if (!prefixcmp(arg, "--expire=")) {
cb.expire_total = approxidate(arg + 9);
explicit_expiry |= EXPIRE_TOTAL;
}
else if (!prefixcmp(arg, "--expire-unreachable=")) {
cb.expire_unreachable = approxidate(arg + 21);
explicit_expiry |= EXPIRE_UNREACH;
}
else if (!strcmp(arg, "--stale-fix"))
cb.stalefix = 1;
else if (!strcmp(arg, "--rewrite"))
cb.rewrite = 1;
else if (!strcmp(arg, "--updateref"))
cb.updateref = 1;
else if (!strcmp(arg, "--all"))
do_all = 1;
else if (!strcmp(arg, "--verbose"))
cb.verbose = 1;
else if (!strcmp(arg, "--")) {
i++;
break;
}
else if (arg[0] == '-')
usage(reflog_expire_usage);
else
break;
}
/*
* We can trust the commits and objects reachable from refs
* even in older repository. We cannot trust what's reachable
* from reflog if the repository was pruned with older git.
*/
if (cb.stalefix) {
init_revisions(&cb.revs, prefix);
if (cb.verbose)
printf("Marking reachable objects...");
mark_reachable_objects(&cb.revs, 0);
if (cb.verbose)
putchar('\n');
}
if (do_all) {
struct collect_reflog_cb collected;
int i;
memset(&collected, 0, sizeof(collected));
for_each_reflog(collect_reflog, &collected);
for (i = 0; i < collected.nr; i++) {
struct collected_reflog *e = collected.e[i];
set_reflog_expiry_param(&cb, explicit_expiry, e->reflog);
status |= expire_reflog(e->reflog, e->sha1, 0, &cb);
free(e);
}
free(collected.e);
}
for (; i < argc; i++) {
char *ref;
unsigned char sha1[20];
if (!dwim_log(argv[i], strlen(argv[i]), sha1, &ref)) {
status |= error("%s points nowhere!", argv[i]);
continue;
}
set_reflog_expiry_param(&cb, explicit_expiry, ref);
status |= expire_reflog(ref, sha1, 0, &cb);
}
return status;
}
static int count_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
const char *email, unsigned long timestamp, int tz,
const char *message, void *cb_data)
{
struct cmd_reflog_expire_cb *cb = cb_data;
if (!cb->expire_total || timestamp < cb->expire_total)
cb->recno++;
return 0;
}
static int cmd_reflog_delete(int argc, const char **argv, const char *prefix)
{
struct cmd_reflog_expire_cb cb;
int i, status = 0;
memset(&cb, 0, sizeof(cb));
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--dry-run") || !strcmp(arg, "-n"))
cb.dry_run = 1;
else if (!strcmp(arg, "--rewrite"))
cb.rewrite = 1;
else if (!strcmp(arg, "--updateref"))
cb.updateref = 1;
else if (!strcmp(arg, "--verbose"))
cb.verbose = 1;
else if (!strcmp(arg, "--")) {
i++;
break;
}
else if (arg[0] == '-')
usage(reflog_delete_usage);
else
break;
}
if (argc - i < 1)
return error("Nothing to delete?");
for ( ; i < argc; i++) {
const char *spec = strstr(argv[i], "@{");
unsigned char sha1[20];
char *ep, *ref;
int recno;
if (!spec) {
status |= error("Not a reflog: %s", argv[i]);
continue;
}
if (!dwim_log(argv[i], spec - argv[i], sha1, &ref)) {
status |= error("no reflog for '%s'", argv[i]);
continue;
}
recno = strtoul(spec + 2, &ep, 10);
if (*ep == '}') {
cb.recno = -recno;
for_each_reflog_ent(ref, count_reflog_ent, &cb);
} else {
cb.expire_total = approxidate(spec + 2);
for_each_reflog_ent(ref, count_reflog_ent, &cb);
cb.expire_total = 0;
}
status |= expire_reflog(ref, sha1, 0, &cb);
free(ref);
}
return status;
}
/*
* main "reflog"
*/
static const char reflog_usage[] =
"git reflog (expire | ...)";
int cmd_reflog(int argc, const char **argv, const char *prefix)
{
/* With no command, we default to showing it. */
if (argc < 2 || *argv[1] == '-')
return cmd_log_reflog(argc, argv, prefix);
if (!strcmp(argv[1], "show"))
return cmd_log_reflog(argc - 1, argv + 1, prefix);
if (!strcmp(argv[1], "expire"))
return cmd_reflog_expire(argc - 1, argv + 1, prefix);
if (!strcmp(argv[1], "delete"))
return cmd_reflog_delete(argc - 1, argv + 1, prefix);
/* Not a recognized reflog command..*/
usage(reflog_usage);
}