diff --git a/diff-tree-helper.c b/diff-tree-helper.c index 621f90ad09..a68328321a 100644 --- a/diff-tree-helper.c +++ b/diff-tree-helper.c @@ -35,7 +35,7 @@ static int parse_oneside_change(const char *cp, struct diff_spec *one, if (strncmp(cp, "\tblob\t", 6)) return -1; cp += 6; - if (get_sha1_hex(cp, one->u.sha1)) + if (get_sha1_hex(cp, one->blob_sha1)) return -1; cp += 40; if (*cp++ != '\t') @@ -83,13 +83,13 @@ static int parse_diff_tree_output(const char *buf, if (strncmp(cp, "\tblob\t", 6)) return -1; cp += 6; - if (get_sha1_hex(cp, old.u.sha1)) + if (get_sha1_hex(cp, old.blob_sha1)) return -1; cp += 40; if (strncmp(cp, "->", 2)) return -1; cp += 2; - if (get_sha1_hex(cp, new.u.sha1)) + if (get_sha1_hex(cp, new.blob_sha1)) return -1; cp += 40; if (*cp++ != '\t') diff --git a/diff.c b/diff.c index a4d2b2d726..8dfa624432 100644 --- a/diff.c +++ b/diff.c @@ -83,7 +83,7 @@ static void builtin_diff(const char *name, { int i, next_at; const char *diff_cmd = "diff -L'%s%s' -L'%s%s'"; - const char *diff_arg = "'%s' '%s'"; + const char *diff_arg = "'%s' '%s'||:"; /* "||:" is to return 0 */ const char *input_name_sq[2]; const char *path0[2]; const char *path1[2]; @@ -132,11 +132,50 @@ static void builtin_diff(const char *name, execlp("/bin/sh","sh", "-c", cmd, NULL); } +/* + * Given a name and sha1 pair, if the dircache tells us the file in + * the work tree has that object contents, return true, so that + * prepare_temp_file() does not have to inflate and extract. + */ +static int work_tree_matches(const char *name, const unsigned char *sha1) +{ + struct cache_entry *ce; + struct stat st; + int pos, len; + + /* We do not read the cache ourselves here, because the + * benchmark with my previous version that always reads cache + * shows that it makes things worse for diff-tree comparing + * two linux-2.6 kernel trees in an already checked out work + * tree. This is because most diff-tree comparison deals with + * only a small number of files, while reading the cache is + * expensive for a large project, and its cost outweighs the + * savings we get by not inflating the object to a temporary + * file. Practically, this code only helps when we are used + * by diff-cache --cached, which does read the cache before + * calling us. + */ + if (!active_cache) + return 0; + + len = strlen(name); + pos = cache_name_pos(name, len); + if (pos < 0) + return 0; + ce = active_cache[pos]; + if ((stat(name, &st) < 0) || + cache_match_stat(ce, &st) || + memcmp(sha1, ce->sha1, 20)) + return 0; + return 1; +} + static void prepare_temp_file(const char *name, struct diff_tempfile *temp, struct diff_spec *one) { static unsigned char null_sha1[20] = { 0, }; + int use_work_tree = 0; if (!one->file_valid) { not_a_valid_file: @@ -150,20 +189,22 @@ static void prepare_temp_file(const char *name, } if (one->sha1_valid && - !memcmp(one->u.sha1, null_sha1, sizeof(null_sha1))) { - one->sha1_valid = 0; - one->u.name = name; - } + (!memcmp(one->blob_sha1, null_sha1, sizeof(null_sha1)) || + work_tree_matches(name, one->blob_sha1))) + use_work_tree = 1; - if (!one->sha1_valid) { + if (!one->sha1_valid || use_work_tree) { struct stat st; - temp->name = one->u.name; + temp->name = name; if (stat(temp->name, &st) < 0) { if (errno == ENOENT) goto not_a_valid_file; die("stat(%s): %s", temp->name, strerror(errno)); } - strcpy(temp->hex, sha1_to_hex(null_sha1)); + if (!one->sha1_valid) + strcpy(temp->hex, sha1_to_hex(null_sha1)); + else + strcpy(temp->hex, sha1_to_hex(one->blob_sha1)); sprintf(temp->mode, "%06o", S_IFREG |ce_permissions(st.st_mode)); } @@ -173,10 +214,10 @@ static void prepare_temp_file(const char *name, char type[20]; unsigned long size; - blob = read_sha1_file(one->u.sha1, type, &size); + blob = read_sha1_file(one->blob_sha1, type, &size); if (!blob || strcmp(type, "blob")) die("unable to read blob object for %s (%s)", - name, sha1_to_hex(one->u.sha1)); + name, sha1_to_hex(one->blob_sha1)); strcpy(temp->tmp_path, ".diff_XXXXXX"); fd = mkstemp(temp->tmp_path); @@ -187,7 +228,7 @@ static void prepare_temp_file(const char *name, close(fd); free(blob); temp->name = temp->tmp_path; - strcpy(temp->hex, sha1_to_hex(one->u.sha1)); + strcpy(temp->hex, sha1_to_hex(one->blob_sha1)); temp->hex[40] = 0; sprintf(temp->mode, "%06o", one->mode); } @@ -261,16 +302,20 @@ void run_external_diff(const char *name, printf("* Unmerged path %s\n", name); exit(0); } - if (waitpid(pid, &status, 0) < 0 || !WIFEXITED(status)) { - /* We do not check the exit status because typically + if (waitpid(pid, &status, 0) < 0 || + !WIFEXITED(status) || WEXITSTATUS(status)) { + /* Earlier we did not check the exit status because * diff exits non-zero if files are different, and - * we are not interested in knowing that. We *knew* - * they are different and that's why we ran diff - * in the first place! However if it dies by a signal, - * we stop processing immediately. + * we are not interested in knowing that. It was a + * mistake which made it harder to quit a diff-* + * session that uses the git-apply-patch-script as + * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF + * should also exit non-zero only when it wants to + * abort the entire diff-* session. */ remove_tempfile(); - die("external diff died unexpectedly.\n"); + fprintf(stderr, "external diff died, stopping at %s.\n", name); + exit(1); } remove_tempfile(); } @@ -282,7 +327,7 @@ void diff_addremove(int addremove, unsigned mode, char concatpath[PATH_MAX]; struct diff_spec spec[2], *one, *two; - memcpy(spec[0].u.sha1, sha1, 20); + memcpy(spec[0].blob_sha1, sha1, 20); spec[0].mode = mode; spec[0].sha1_valid = spec[0].file_valid = 1; spec[1].file_valid = 0; @@ -307,9 +352,9 @@ void diff_change(unsigned old_mode, unsigned new_mode, char concatpath[PATH_MAX]; struct diff_spec spec[2]; - memcpy(spec[0].u.sha1, old_sha1, 20); + memcpy(spec[0].blob_sha1, old_sha1, 20); spec[0].mode = old_mode; - memcpy(spec[1].u.sha1, new_sha1, 20); + memcpy(spec[1].blob_sha1, new_sha1, 20); spec[1].mode = new_mode; spec[0].sha1_valid = spec[0].file_valid = 1; spec[1].sha1_valid = spec[1].file_valid = 1; diff --git a/diff.h b/diff.h index 8f269b2772..0b76cc4281 100644 --- a/diff.h +++ b/diff.h @@ -20,15 +20,12 @@ extern void diff_unmerge(const char *path); /* These are for diff-tree-helper */ struct diff_spec { - union { - const char *name; /* path on the filesystem */ - unsigned char sha1[20]; /* blob object ID */ - } u; + unsigned char blob_sha1[20]; unsigned short mode; /* file mode */ - unsigned sha1_valid : 1; /* if true, use u.sha1 and trust mode. - * (however with a NULL SHA1, read them - * from the file!). - * if false, use u.name and read mode from + unsigned sha1_valid : 1; /* if true, use blob_sha1 and trust mode; + * however with a NULL SHA1, read them + * from the file system. + * if false, use the name and read mode from * the filesystem. */ unsigned file_valid : 1; /* if false the file does not even exist */ diff --git a/fsck-cache.c b/fsck-cache.c index e328cec75d..301cc67b76 100644 --- a/fsck-cache.c +++ b/fsck-cache.c @@ -12,6 +12,7 @@ static int show_root = 0; static int show_tags = 0; static int show_unreachable = 0; +static int keep_cache_objects = 0; static unsigned char head_sha1[20]; static void check_connectivity(void) @@ -275,8 +276,12 @@ int main(int argc, char **argv) show_root = 1; continue; } + if (!strcmp(arg, "--cache")) { + keep_cache_objects = 1; + continue; + } if (*arg == '-') - usage("fsck-cache [--tags] [[--unreachable] *]"); + usage("fsck-cache [--tags] [[--unreachable] [--cache] *]"); } sha1_dir = getenv(DB_ENVIRONMENT) ? : DEFAULT_DB_ENVIRONMENT; @@ -309,12 +314,27 @@ int main(int argc, char **argv) error("expected sha1, got %s", arg); } - if (!heads) { + if (keep_cache_objects) { + int i; + read_cache(); + for (i = 0; i < active_nr; i++) { + struct blob *blob = lookup_blob(active_cache[i]->sha1); + struct object *obj; + if (!blob) + continue; + obj = &blob->object; + obj->used = 1; + mark_reachable(obj, REACHABLE); + } + } + + if (!heads && !keep_cache_objects) { if (show_unreachable) { - fprintf(stderr, "unable to do reachability without a head\n"); + fprintf(stderr, "unable to do reachability without a head nor --cache\n"); show_unreachable = 0; } - fprintf(stderr, "expect dangling commits - potential heads - due to lack of head information\n"); + if (!heads) + fprintf(stderr, "expect dangling commits - potential heads - due to lack of head information\n"); } check_connectivity(); diff --git a/git-apply-patch-script b/git-apply-patch-script index c28015aad3..dccad27061 100755 --- a/git-apply-patch-script +++ b/git-apply-patch-script @@ -19,40 +19,60 @@ then echo >&2 "Unresolved patch conflicts in the previous run found." exit 1 fi -# This will say "patching ..." so we do not say anything outselves. -diff -u -L "a/$name" -L "b/$name" "$tmp1" "$tmp2" | patch -p1 -test -f "$name.rej" || { - case "$mode1,$mode2" in - .,?x) - # newly created - case "$mode2" in - +x) - echo >&2 "created $name with mode +x." - chmod "$mode2" "$name" - ;; - -x) - echo >&2 "created $name." - ;; - esac - git-update-cache --add -- "$name" +case "$mode1,$mode2" in +.,?x) + # newly created + dir=$(dirname "$name") + case "$dir" in '' | .) ;; *) mkdir -p "$dir" esac || { + echo >&2 "cannot create leading path for $name." + exit 1 + } + if test -f "$name" + then + echo >&2 "file $name to be created already exists." + exit 1 + fi + cat "$tmp2" >"$name" || { + echo >&2 "cannot create $name." + exit 1 + } + case "$mode2" in + +x) + echo >&2 "created $name with mode +x." + chmod "$mode2" "$name" ;; - ?x,.) - # deleted - echo >&2 "deleted $name." - rm -f "$name" - git-update-cache --remove -- "$name" + -x) + echo >&2 "created $name." ;; - *) - # changed - case "$mode1,$mode2" in - "$mode2,$mode1") ;; - *) - echo >&2 "changing mode from $mode1 to $mode2." - chmod "$mode2" "$name" - ;; - esac - git-update-cache -- "$name" esac -} -exit 0 + git-update-cache --add -- "$name" + ;; +?x,.) + # deleted + echo >&2 "deleted $name." + rm -f "$name" || { + echo >&2 "cannot remove $name"; + exit 1 + } + git-update-cache --remove -- "$name" + ;; +*) + # changed + dir=$(dirname "$name") + case "$dir" in '' | .) ;; *) mkdir -p "$dir" esac || { + echo >&2 "cannot create leading path for $name." + exit 1 + } + # This will say "patching ..." so we do not say anything outselves. + diff -u -L "a/$name" -L "b/$name" "$tmp1" "$tmp2" | patch -p1 || exit + + case "$mode1,$mode2" in + "$mode2,$mode1") ;; + *) + echo >&2 "changing mode from $mode1 to $mode2." + chmod "$mode2" "$name" + ;; + esac + git-update-cache -- "$name" +esac diff --git a/git-prune-script b/git-prune-script old mode 100755 new mode 100644 index a24ba616dd..9ba89a5b9d --- a/git-prune-script +++ b/git-prune-script @@ -1,4 +1,36 @@ #!/bin/sh -REFS=$(cat .git/refs/*/*) -[ "$REFS" ] || exit 1 -git-fsck-cache --unreachable $REFS | grep unreachable | cut -d' ' -f3 | sed 's:^\(..\):.git/objects/\1/:' | xargs -r rm +dryrun= +while case "$#" in 0) break ;; esac +do + case "$1" in + -n) dryrun=echo ;; + --) break ;; + -*) echo >&2 "usage: git-prune-script [ -n ] [ heads... ]"; exit 1 ;; + *) break ;; + esac + shift; +done + +# Defaulting to include .git/refs/*/* may be debatable from the +# purist POV but power users can always give explicit parameters +# to the script anyway. + +case "$#" in +0) + x_40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]' + x_40="$x_40$x_40$x_40$x_40$x_40$x_40$x_40$x_40" + set x $(sed -ne "/^$x_40\$/p" .git/HEAD .git/refs/*/* 2>/dev/null) + shift ;; +esac + +git-fsck-cache --cache --unreachable "$@" | +sed -ne '/unreachable /{ + s/unreachable [^ ][^ ]* // + s|\(..\)|\1/|p +}' | { + case "$SHA1_FILE_DIRECTORY" in + '') cd .git/objects/ ;; + *) cd "$SHA1_FILE_DIRECTORY" ;; + esac || exit + xargs -r $dryrun rm -f +} diff --git a/local-pull.c b/local-pull.c index ea38d87dfd..4f52bca48c 100644 --- a/local-pull.c +++ b/local-pull.c @@ -39,12 +39,19 @@ int fetch(unsigned char *sha1) filename[object_name_start+1] = hex[1]; filename[object_name_start+2] = '/'; strcpy(filename + object_name_start + 3, hex + 2); - if (use_link && !link(filename, dest_filename)) { - say("Hardlinked %s.\n", hex); - return 0; + if (use_link) { + if (!link(filename, dest_filename)) { + say("link %s\n", hex); + return 0; + } + /* If we got ENOENT there is no point continuing. */ + if (errno == ENOENT) { + fprintf(stderr, "does not exist %s\n", filename); + return -1; + } } if (use_symlink && !symlink(filename, dest_filename)) { - say("Symlinked %s.\n", hex); + say("symlink %s\n", hex); return 0; } if (use_filecopy) { @@ -54,13 +61,13 @@ int fetch(unsigned char *sha1) ifd = open(filename, O_RDONLY); if (ifd < 0 || fstat(ifd, &st) < 0) { close(ifd); - fprintf(stderr, "Cannot open %s\n", filename); + fprintf(stderr, "cannot open %s\n", filename); return -1; } map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, ifd, 0); close(ifd); if (-1 == (int)(long)map) { - fprintf(stderr, "Cannot mmap %s\n", filename); + fprintf(stderr, "cannot mmap %s\n", filename); return -1; } ofd = open(dest_filename, O_WRONLY | O_CREAT | O_EXCL, 0666); @@ -69,13 +76,13 @@ int fetch(unsigned char *sha1) munmap(map, st.st_size); close(ofd); if (status) - fprintf(stderr, "Cannot write %s (%ld bytes)\n", + fprintf(stderr, "cannot write %s (%ld bytes)\n", dest_filename, st.st_size); else - say("Copied %s.\n", hex); + say("copy %s\n", hex); return status; } - fprintf(stderr, "No copy method was provided to copy %s.\n", hex); + fprintf(stderr, "failed to copy %s with given copy methods.\n", hex); return -1; } diff --git a/pull.c b/pull.c index bbef245fc8..55f17c0a03 100644 --- a/pull.c +++ b/pull.c @@ -7,12 +7,31 @@ int get_tree = 0; int get_history = 0; int get_all = 0; +static unsigned char current_commit_sha1[20]; -static int make_sure_we_have_it(unsigned char *sha1) +static const char commitS[] = "commit"; +static const char treeS[] = "tree"; +static const char blobS[] = "blob"; + +static void report_missing(const char *what, const unsigned char *missing) { + char missing_hex[41]; + + strcpy(missing_hex, sha1_to_hex(missing));; + fprintf(stderr, + "Cannot obtain needed %s %s\nwhile processing commit %s.\n", + what, missing_hex, sha1_to_hex(current_commit_sha1)); +} + +static int make_sure_we_have_it(const char *what, unsigned char *sha1) +{ + int status; if (has_sha1_file(sha1)) return 0; - return fetch(sha1); + status = fetch(sha1); + if (status && what) + report_missing(what, sha1); + return status; } static int process_tree(unsigned char *sha1) @@ -24,7 +43,8 @@ static int process_tree(unsigned char *sha1) return -1; for (entries = tree->entries; entries; entries = entries->next) { - if (make_sure_we_have_it(entries->item.tree->object.sha1)) + const char *what = entries->directory ? treeS : blobS; + if (make_sure_we_have_it(what, entries->item.tree->object.sha1)) return -1; if (entries->directory) { if (process_tree(entries->item.tree->object.sha1)) @@ -38,14 +58,14 @@ static int process_commit(unsigned char *sha1) { struct commit *obj = lookup_commit(sha1); - if (make_sure_we_have_it(sha1)) + if (make_sure_we_have_it(commitS, sha1)) return -1; if (parse_commit(obj)) return -1; if (get_tree) { - if (make_sure_we_have_it(obj->tree->object.sha1)) + if (make_sure_we_have_it(treeS, obj->tree->object.sha1)) return -1; if (process_tree(obj->tree->object.sha1)) return -1; @@ -57,7 +77,8 @@ static int process_commit(unsigned char *sha1) for (; parents; parents = parents->next) { if (has_sha1_file(parents->item->object.sha1)) continue; - if (make_sure_we_have_it(parents->item->object.sha1)) { + if (make_sure_we_have_it(NULL, + parents->item->object.sha1)) { /* The server might not have it, and * we don't mind. */ @@ -65,6 +86,7 @@ static int process_commit(unsigned char *sha1) } if (process_commit(parents->item->object.sha1)) return -1; + memcpy(current_commit_sha1, sha1, 20); } } return 0; @@ -77,8 +99,9 @@ int pull(char *target) retval = get_sha1_hex(target, sha1); if (retval) return retval; - retval = make_sure_we_have_it(sha1); + retval = make_sure_we_have_it(commitS, sha1); if (retval) return retval; + memcpy(current_commit_sha1, sha1, 20); return process_commit(sha1); }