From b2d62f161ebfb628cbe32900213ac18f9aa5bcb4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 4 May 2005 01:26:24 -0700 Subject: [PATCH 1/6] Make git-*-pull say who wants them for missing objects. This patch updates pull.c, the engine that decides which objects are needed, given a commit to traverse from, to report which commit was calling for the object that cannot be retrieved from the remote side. This complements git-fsck-cache in that it checks the consistency of the remote repository for reachability. Signed-off-by: Junio C Hamano --- pull.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/pull.c b/pull.c index bbef245fc8..55f17c0a03 100644 --- a/pull.c +++ b/pull.c @@ -7,12 +7,31 @@ int get_tree = 0; int get_history = 0; int get_all = 0; +static unsigned char current_commit_sha1[20]; -static int make_sure_we_have_it(unsigned char *sha1) +static const char commitS[] = "commit"; +static const char treeS[] = "tree"; +static const char blobS[] = "blob"; + +static void report_missing(const char *what, const unsigned char *missing) { + char missing_hex[41]; + + strcpy(missing_hex, sha1_to_hex(missing));; + fprintf(stderr, + "Cannot obtain needed %s %s\nwhile processing commit %s.\n", + what, missing_hex, sha1_to_hex(current_commit_sha1)); +} + +static int make_sure_we_have_it(const char *what, unsigned char *sha1) +{ + int status; if (has_sha1_file(sha1)) return 0; - return fetch(sha1); + status = fetch(sha1); + if (status && what) + report_missing(what, sha1); + return status; } static int process_tree(unsigned char *sha1) @@ -24,7 +43,8 @@ static int process_tree(unsigned char *sha1) return -1; for (entries = tree->entries; entries; entries = entries->next) { - if (make_sure_we_have_it(entries->item.tree->object.sha1)) + const char *what = entries->directory ? treeS : blobS; + if (make_sure_we_have_it(what, entries->item.tree->object.sha1)) return -1; if (entries->directory) { if (process_tree(entries->item.tree->object.sha1)) @@ -38,14 +58,14 @@ static int process_commit(unsigned char *sha1) { struct commit *obj = lookup_commit(sha1); - if (make_sure_we_have_it(sha1)) + if (make_sure_we_have_it(commitS, sha1)) return -1; if (parse_commit(obj)) return -1; if (get_tree) { - if (make_sure_we_have_it(obj->tree->object.sha1)) + if (make_sure_we_have_it(treeS, obj->tree->object.sha1)) return -1; if (process_tree(obj->tree->object.sha1)) return -1; @@ -57,7 +77,8 @@ static int process_commit(unsigned char *sha1) for (; parents; parents = parents->next) { if (has_sha1_file(parents->item->object.sha1)) continue; - if (make_sure_we_have_it(parents->item->object.sha1)) { + if (make_sure_we_have_it(NULL, + parents->item->object.sha1)) { /* The server might not have it, and * we don't mind. */ @@ -65,6 +86,7 @@ static int process_commit(unsigned char *sha1) } if (process_commit(parents->item->object.sha1)) return -1; + memcpy(current_commit_sha1, sha1, 20); } } return 0; @@ -77,8 +99,9 @@ int pull(char *target) retval = get_sha1_hex(target, sha1); if (retval) return retval; - retval = make_sure_we_have_it(sha1); + retval = make_sure_we_have_it(commitS, sha1); if (retval) return retval; + memcpy(current_commit_sha1, sha1, 20); return process_commit(sha1); } From fd0ffd3ad12d6aa49b8ac2cce7728976d678cc63 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 4 May 2005 01:28:45 -0700 Subject: [PATCH 2/6] Short-cut error return path in git-local-pull. When git-local-pull with -l option gets ENOENT attempting to create a hard link, there is no point falling back to other copy methods. With this patch, git-local-pull detects such a case and gives up copying the file early. Signed-off-by: Junio C Hamano --- local-pull.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/local-pull.c b/local-pull.c index ea38d87dfd..4f52bca48c 100644 --- a/local-pull.c +++ b/local-pull.c @@ -39,12 +39,19 @@ int fetch(unsigned char *sha1) filename[object_name_start+1] = hex[1]; filename[object_name_start+2] = '/'; strcpy(filename + object_name_start + 3, hex + 2); - if (use_link && !link(filename, dest_filename)) { - say("Hardlinked %s.\n", hex); - return 0; + if (use_link) { + if (!link(filename, dest_filename)) { + say("link %s\n", hex); + return 0; + } + /* If we got ENOENT there is no point continuing. */ + if (errno == ENOENT) { + fprintf(stderr, "does not exist %s\n", filename); + return -1; + } } if (use_symlink && !symlink(filename, dest_filename)) { - say("Symlinked %s.\n", hex); + say("symlink %s\n", hex); return 0; } if (use_filecopy) { @@ -54,13 +61,13 @@ int fetch(unsigned char *sha1) ifd = open(filename, O_RDONLY); if (ifd < 0 || fstat(ifd, &st) < 0) { close(ifd); - fprintf(stderr, "Cannot open %s\n", filename); + fprintf(stderr, "cannot open %s\n", filename); return -1; } map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, ifd, 0); close(ifd); if (-1 == (int)(long)map) { - fprintf(stderr, "Cannot mmap %s\n", filename); + fprintf(stderr, "cannot mmap %s\n", filename); return -1; } ofd = open(dest_filename, O_WRONLY | O_CREAT | O_EXCL, 0666); @@ -69,13 +76,13 @@ int fetch(unsigned char *sha1) munmap(map, st.st_size); close(ofd); if (status) - fprintf(stderr, "Cannot write %s (%ld bytes)\n", + fprintf(stderr, "cannot write %s (%ld bytes)\n", dest_filename, st.st_size); else - say("Copied %s.\n", hex); + say("copy %s\n", hex); return status; } - fprintf(stderr, "No copy method was provided to copy %s.\n", hex); + fprintf(stderr, "failed to copy %s with given copy methods.\n", hex); return -1; } From ae7c0c92c0713307986bcd1fb54fa0694aae962a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 4 May 2005 01:33:33 -0700 Subject: [PATCH 3/6] Git-prune-script loses blobs referenced from an uncommitted cache. (updated from the version posted to GIT mailing list). When a new blob is registered with update-cache, and before the cache is written as a tree and committed, git-fsck-cache will find the blob unreachable. This patch adds a new flag, "--cache" to git-fsck-cache, with which it keeps such blobs from considered "unreachable". The git-prune-script is updated to use this new flag. At the same time it adds .git/refs/*/* to the set of default locations to look for heads, which should be consistent with expectations from Cogito users. Without this fix, "diff-cache -p --cached" after git-prune-script has pruned the blob object will fail mysteriously and git-write-tree would also fail. Signed-off-by: Junio C Hamano --- fsck-cache.c | 28 ++++++++++++++++++++++++---- git-prune-script | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/fsck-cache.c b/fsck-cache.c index fb0d82f332..5db07e0f6d 100644 --- a/fsck-cache.c +++ b/fsck-cache.c @@ -12,6 +12,7 @@ static int show_root = 0; static int show_tags = 0; static int show_unreachable = 0; +static int keep_cache_objects = 0; static unsigned char head_sha1[20]; static void check_connectivity(void) @@ -275,8 +276,12 @@ int main(int argc, char **argv) show_root = 1; continue; } + if (!strcmp(arg, "--cache")) { + keep_cache_objects = 1; + continue; + } if (*arg == '-') - usage("fsck-cache [--tags] [[--unreachable] *]"); + usage("fsck-cache [--tags] [[--unreachable] [--cache] *]"); } sha1_dir = getenv(DB_ENVIRONMENT) ? : DEFAULT_DB_ENVIRONMENT; @@ -311,12 +316,27 @@ int main(int argc, char **argv) error("expected sha1, got %s", arg); } - if (!heads) { + if (keep_cache_objects) { + int i; + read_cache(); + for (i = 0; i < active_nr; i++) { + struct blob *blob = lookup_blob(active_cache[i]->sha1); + struct object *obj; + if (!blob) + continue; + obj = &blob->object; + obj->used = 1; + mark_reachable(obj, REACHABLE); + } + } + + if (!heads && !keep_cache_objects) { if (show_unreachable) { - fprintf(stderr, "unable to do reachability without a head\n"); + fprintf(stderr, "unable to do reachability without a head nor --cache\n"); show_unreachable = 0; } - fprintf(stderr, "expect dangling commits - potential heads - due to lack of head information\n"); + if (!heads) + fprintf(stderr, "expect dangling commits - potential heads - due to lack of head information\n"); } check_connectivity(); diff --git a/git-prune-script b/git-prune-script index b6e927a749..30a989e8f4 100755 --- a/git-prune-script +++ b/git-prune-script @@ -1,2 +1,37 @@ #!/bin/sh -git-fsck-cache --unreachable $(cat .git/HEAD ) | grep unreachable | cut -d' ' -f3 | sed 's:^\(..\):.git/objects/\1/:' | xargs rm + +dryrun= +while case "$#" in 0) break ;; esac +do + case "$1" in + -n) dryrun=echo ;; + --) break ;; + -*) echo >&2 "usage: git-prune-script [ -n ] [ heads... ]"; exit 1 ;; + *) break ;; + esac + shift; +done + +# Defaulting to include .git/refs/*/* may be debatable from the +# purist POV but power users can always give explicit parameters +# to the script anyway. + +case "$#" in +0) + x_40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]' + x_40="$x_40$x_40$x_40$x_40$x_40$x_40$x_40$x_40" + set x $(sed -ne "/^$x_40\$/p" .git/HEAD .git/refs/*/* 2>/dev/null) + shift ;; +esac + +git-fsck-cache --cache --unreachable "$@" | +sed -ne '/unreachable /{ + s/unreachable [^ ][^ ]* // + s|\(..\)|\1/|p +}' | { + case "$SHA1_FILE_DIRECTORY" in + '') cd .git/objects/ ;; + *) cd "$SHA1_FILE_DIRECTORY" ;; + esac || exit + xargs -r $dryrun rm -f +} From 6fa28064b0c9da9c2c8696b4eac28ee5beaee469 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 4 May 2005 01:38:06 -0700 Subject: [PATCH 4/6] Terminate diff-* on non-zero exit from GIT_EXTERNAL_DIFF (slightly updated from the version posted to the GIT mailing list with small bugfixes). This patch changes the git-apply-patch-script to exit non-zero when the patch cannot be applied. Previously, the external diff driver deliberately ignored the exit status of GIT_EXTERNAL_DIFF command, which was a design mistake. It now stops the processing when GIT_EXTERNAL_DIFF exits non-zero, so the damages from running git-diff-* with git-apply-patch-script between two wrong trees can be contained. The "diff" command line generated by the built-in driver is changed to always exit 0 in order to match this new behaviour. I know Pasky does not use GIT_EXTERNAL_DIFF yet, so this change should not break Cogito, either. Signed-off-by: Junio C Hamano --- diff.c | 20 ++++++----- git-apply-patch-script | 77 ++++++++++++++++++++++++------------------ 2 files changed, 56 insertions(+), 41 deletions(-) diff --git a/diff.c b/diff.c index a4d2b2d726..4a54688cbc 100644 --- a/diff.c +++ b/diff.c @@ -83,7 +83,7 @@ static void builtin_diff(const char *name, { int i, next_at; const char *diff_cmd = "diff -L'%s%s' -L'%s%s'"; - const char *diff_arg = "'%s' '%s'"; + const char *diff_arg = "'%s' '%s'||:"; /* "||:" is to return 0 */ const char *input_name_sq[2]; const char *path0[2]; const char *path1[2]; @@ -261,16 +261,20 @@ void run_external_diff(const char *name, printf("* Unmerged path %s\n", name); exit(0); } - if (waitpid(pid, &status, 0) < 0 || !WIFEXITED(status)) { - /* We do not check the exit status because typically + if (waitpid(pid, &status, 0) < 0 || + !WIFEXITED(status) || WEXITSTATUS(status)) { + /* Earlier we did not check the exit status because * diff exits non-zero if files are different, and - * we are not interested in knowing that. We *knew* - * they are different and that's why we ran diff - * in the first place! However if it dies by a signal, - * we stop processing immediately. + * we are not interested in knowing that. It was a + * mistake which made it harder to quit a diff-* + * session that uses the git-apply-patch-script as + * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF + * should also exit non-zero only when it wants to + * abort the entire diff-* session. */ remove_tempfile(); - die("external diff died unexpectedly.\n"); + fprintf(stderr, "external diff died, stopping at %s.\n", name); + exit(1); } remove_tempfile(); } diff --git a/git-apply-patch-script b/git-apply-patch-script index c28015aad3..29548ba6aa 100755 --- a/git-apply-patch-script +++ b/git-apply-patch-script @@ -19,40 +19,51 @@ then echo >&2 "Unresolved patch conflicts in the previous run found." exit 1 fi -# This will say "patching ..." so we do not say anything outselves. -diff -u -L "a/$name" -L "b/$name" "$tmp1" "$tmp2" | patch -p1 -test -f "$name.rej" || { - case "$mode1,$mode2" in - .,?x) - # newly created - case "$mode2" in - +x) - echo >&2 "created $name with mode +x." - chmod "$mode2" "$name" - ;; - -x) - echo >&2 "created $name." - ;; - esac - git-update-cache --add -- "$name" +case "$mode1,$mode2" in +.,?x) + # newly created + dir=$(dirname "$name") + case "$dir" in '' | .) ;; *) mkdir -p "$dir" esac || { + echo >&2 "cannot create leading path for $name." + exit 1 + } + case "$mode2" in + +x) + echo >&2 "created $name with mode +x." + chmod "$mode2" "$name" ;; - ?x,.) - # deleted - echo >&2 "deleted $name." - rm -f "$name" - git-update-cache --remove -- "$name" + -x) + echo >&2 "created $name." ;; - *) - # changed - case "$mode1,$mode2" in - "$mode2,$mode1") ;; - *) - echo >&2 "changing mode from $mode1 to $mode2." - chmod "$mode2" "$name" - ;; - esac - git-update-cache -- "$name" esac -} -exit 0 + git-update-cache --add -- "$name" + ;; +?x,.) + # deleted + echo >&2 "deleted $name." + rm -f "$name" || { + echo >&2 "cannot remove $name"; + exit 1 + } + git-update-cache --remove -- "$name" + ;; +*) + # changed + dir=$(dirname "$name") + case "$dir" in '' | .) ;; *) mkdir -p "$dir" esac || { + echo >&2 "cannot create leading path for $name." + exit 1 + } + # This will say "patching ..." so we do not say anything outselves. + diff -u -L "a/$name" -L "b/$name" "$tmp1" "$tmp2" | patch -p1 || exit + + case "$mode1,$mode2" in + "$mode2,$mode1") ;; + *) + echo >&2 "changing mode from $mode1 to $mode2." + chmod "$mode2" "$name" + ;; + esac + git-update-cache -- "$name" +esac From b46f0b6dfd09629645efc59cda76e7e4fad7ca39 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 4 May 2005 01:45:24 -0700 Subject: [PATCH 5/6] Optimize diff-cache -p --cached This patch optimizes "diff-cache -p --cached" by avoiding to inflate blobs into temporary files when the blob recorded in the cache matches the corresponding file in the work tree. The file in the work tree is passed as the comparison source in such a case instead. This optimization kicks in only when we have already read the cache this optimization and this is deliberate. Especially, diff-tree does not use this code, because changes are contained in small number of files relative to the project size most of the time, and reading cache is so expensive for a large project that the cost of reading it outweighs the savings by not inflating blobs. Also this patch cleans up the structure passed from diff clients by removing one unused structure member. Signed-off-by: Junio C Hamano --- diff-tree-helper.c | 6 ++--- diff.c | 67 +++++++++++++++++++++++++++++++++++++--------- diff.h | 13 ++++----- 3 files changed, 62 insertions(+), 24 deletions(-) diff --git a/diff-tree-helper.c b/diff-tree-helper.c index 621f90ad09..a68328321a 100644 --- a/diff-tree-helper.c +++ b/diff-tree-helper.c @@ -35,7 +35,7 @@ static int parse_oneside_change(const char *cp, struct diff_spec *one, if (strncmp(cp, "\tblob\t", 6)) return -1; cp += 6; - if (get_sha1_hex(cp, one->u.sha1)) + if (get_sha1_hex(cp, one->blob_sha1)) return -1; cp += 40; if (*cp++ != '\t') @@ -83,13 +83,13 @@ static int parse_diff_tree_output(const char *buf, if (strncmp(cp, "\tblob\t", 6)) return -1; cp += 6; - if (get_sha1_hex(cp, old.u.sha1)) + if (get_sha1_hex(cp, old.blob_sha1)) return -1; cp += 40; if (strncmp(cp, "->", 2)) return -1; cp += 2; - if (get_sha1_hex(cp, new.u.sha1)) + if (get_sha1_hex(cp, new.blob_sha1)) return -1; cp += 40; if (*cp++ != '\t') diff --git a/diff.c b/diff.c index 4a54688cbc..8dfa624432 100644 --- a/diff.c +++ b/diff.c @@ -132,11 +132,50 @@ static void builtin_diff(const char *name, execlp("/bin/sh","sh", "-c", cmd, NULL); } +/* + * Given a name and sha1 pair, if the dircache tells us the file in + * the work tree has that object contents, return true, so that + * prepare_temp_file() does not have to inflate and extract. + */ +static int work_tree_matches(const char *name, const unsigned char *sha1) +{ + struct cache_entry *ce; + struct stat st; + int pos, len; + + /* We do not read the cache ourselves here, because the + * benchmark with my previous version that always reads cache + * shows that it makes things worse for diff-tree comparing + * two linux-2.6 kernel trees in an already checked out work + * tree. This is because most diff-tree comparison deals with + * only a small number of files, while reading the cache is + * expensive for a large project, and its cost outweighs the + * savings we get by not inflating the object to a temporary + * file. Practically, this code only helps when we are used + * by diff-cache --cached, which does read the cache before + * calling us. + */ + if (!active_cache) + return 0; + + len = strlen(name); + pos = cache_name_pos(name, len); + if (pos < 0) + return 0; + ce = active_cache[pos]; + if ((stat(name, &st) < 0) || + cache_match_stat(ce, &st) || + memcmp(sha1, ce->sha1, 20)) + return 0; + return 1; +} + static void prepare_temp_file(const char *name, struct diff_tempfile *temp, struct diff_spec *one) { static unsigned char null_sha1[20] = { 0, }; + int use_work_tree = 0; if (!one->file_valid) { not_a_valid_file: @@ -150,20 +189,22 @@ static void prepare_temp_file(const char *name, } if (one->sha1_valid && - !memcmp(one->u.sha1, null_sha1, sizeof(null_sha1))) { - one->sha1_valid = 0; - one->u.name = name; - } + (!memcmp(one->blob_sha1, null_sha1, sizeof(null_sha1)) || + work_tree_matches(name, one->blob_sha1))) + use_work_tree = 1; - if (!one->sha1_valid) { + if (!one->sha1_valid || use_work_tree) { struct stat st; - temp->name = one->u.name; + temp->name = name; if (stat(temp->name, &st) < 0) { if (errno == ENOENT) goto not_a_valid_file; die("stat(%s): %s", temp->name, strerror(errno)); } - strcpy(temp->hex, sha1_to_hex(null_sha1)); + if (!one->sha1_valid) + strcpy(temp->hex, sha1_to_hex(null_sha1)); + else + strcpy(temp->hex, sha1_to_hex(one->blob_sha1)); sprintf(temp->mode, "%06o", S_IFREG |ce_permissions(st.st_mode)); } @@ -173,10 +214,10 @@ static void prepare_temp_file(const char *name, char type[20]; unsigned long size; - blob = read_sha1_file(one->u.sha1, type, &size); + blob = read_sha1_file(one->blob_sha1, type, &size); if (!blob || strcmp(type, "blob")) die("unable to read blob object for %s (%s)", - name, sha1_to_hex(one->u.sha1)); + name, sha1_to_hex(one->blob_sha1)); strcpy(temp->tmp_path, ".diff_XXXXXX"); fd = mkstemp(temp->tmp_path); @@ -187,7 +228,7 @@ static void prepare_temp_file(const char *name, close(fd); free(blob); temp->name = temp->tmp_path; - strcpy(temp->hex, sha1_to_hex(one->u.sha1)); + strcpy(temp->hex, sha1_to_hex(one->blob_sha1)); temp->hex[40] = 0; sprintf(temp->mode, "%06o", one->mode); } @@ -286,7 +327,7 @@ void diff_addremove(int addremove, unsigned mode, char concatpath[PATH_MAX]; struct diff_spec spec[2], *one, *two; - memcpy(spec[0].u.sha1, sha1, 20); + memcpy(spec[0].blob_sha1, sha1, 20); spec[0].mode = mode; spec[0].sha1_valid = spec[0].file_valid = 1; spec[1].file_valid = 0; @@ -311,9 +352,9 @@ void diff_change(unsigned old_mode, unsigned new_mode, char concatpath[PATH_MAX]; struct diff_spec spec[2]; - memcpy(spec[0].u.sha1, old_sha1, 20); + memcpy(spec[0].blob_sha1, old_sha1, 20); spec[0].mode = old_mode; - memcpy(spec[1].u.sha1, new_sha1, 20); + memcpy(spec[1].blob_sha1, new_sha1, 20); spec[1].mode = new_mode; spec[0].sha1_valid = spec[0].file_valid = 1; spec[1].sha1_valid = spec[1].file_valid = 1; diff --git a/diff.h b/diff.h index 8f269b2772..0b76cc4281 100644 --- a/diff.h +++ b/diff.h @@ -20,15 +20,12 @@ extern void diff_unmerge(const char *path); /* These are for diff-tree-helper */ struct diff_spec { - union { - const char *name; /* path on the filesystem */ - unsigned char sha1[20]; /* blob object ID */ - } u; + unsigned char blob_sha1[20]; unsigned short mode; /* file mode */ - unsigned sha1_valid : 1; /* if true, use u.sha1 and trust mode. - * (however with a NULL SHA1, read them - * from the file!). - * if false, use u.name and read mode from + unsigned sha1_valid : 1; /* if true, use blob_sha1 and trust mode; + * however with a NULL SHA1, read them + * from the file system. + * if false, use the name and read mode from * the filesystem. */ unsigned file_valid : 1; /* if false the file does not even exist */ From 8a9d32b7e9f3aebd9c322c4613c4608224d09a60 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 4 May 2005 10:45:36 -0700 Subject: [PATCH 6/6] When the patch tries to create a new file and the file exists, abort. This fixes an error introduced to git-apply-patch-script in the previous round. We do not invoke patch for create/delete case, so we need to be a bit careful about detecting conflicts like this. Signed-off-by: Junio C Hamano --- git-apply-patch-script | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/git-apply-patch-script b/git-apply-patch-script index 29548ba6aa..dccad27061 100755 --- a/git-apply-patch-script +++ b/git-apply-patch-script @@ -28,6 +28,15 @@ case "$mode1,$mode2" in echo >&2 "cannot create leading path for $name." exit 1 } + if test -f "$name" + then + echo >&2 "file $name to be created already exists." + exit 1 + fi + cat "$tmp2" >"$name" || { + echo >&2 "cannot create $name." + exit 1 + } case "$mode2" in +x) echo >&2 "created $name with mode +x."