From 200589abcb4d7f107ed9da85e43b9a7268016c23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 19 Sep 2020 23:23:32 +0200 Subject: [PATCH 1/3] archive: read short blobs in archive.c::write_archive_entry() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Centralize reading of symlink destinations and the contents of regular files that are too small to be streamed. This reduces code duplication and allows future patches to add support for adding non-tracked files to archives. The backends are expected to stream blobs if buffer is NULL. object_file_to_archive() is only called from archive.c and thus no longer exported. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- archive-tar.c | 22 +++------------------- archive-zip.c | 22 +++++----------------- archive.c | 31 ++++++++++++++++++++++++------- archive.h | 7 ++----- 4 files changed, 34 insertions(+), 48 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 5ceec3684b..f1a1447ebd 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -242,13 +242,12 @@ static void write_extended_header(struct archiver_args *args, static int write_tar_entry(struct archiver_args *args, const struct object_id *oid, const char *path, size_t pathlen, - unsigned int mode) + unsigned int mode, + void *buffer, unsigned long size) { struct ustar_header header; struct strbuf ext_header = STRBUF_INIT; - unsigned int old_mode = mode; - unsigned long size, size_in_header; - void *buffer; + unsigned long size_in_header; int err = 0; memset(&header, 0, sizeof(header)); @@ -282,20 +281,6 @@ static int write_tar_entry(struct archiver_args *args, } else memcpy(header.name, path, pathlen); - if (S_ISREG(mode) && !args->convert && - oid_object_info(args->repo, oid, &size) == OBJ_BLOB && - size > big_file_threshold) - buffer = NULL; - else if (S_ISLNK(mode) || S_ISREG(mode)) { - enum object_type type; - buffer = object_file_to_archive(args, path, oid, old_mode, &type, &size); - if (!buffer) - return error(_("cannot read %s"), oid_to_hex(oid)); - } else { - buffer = NULL; - size = 0; - } - if (S_ISLNK(mode)) { if (size > sizeof(header.linkname)) { xsnprintf(header.linkname, sizeof(header.linkname), @@ -326,7 +311,6 @@ static int write_tar_entry(struct archiver_args *args, else err = stream_blocked(args->repo, oid); } - free(buffer); return err; } diff --git a/archive-zip.c b/archive-zip.c index e9f426298b..2961e01c75 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -285,7 +285,8 @@ static int entry_is_binary(struct index_state *istate, const char *path, static int write_zip_entry(struct archiver_args *args, const struct object_id *oid, const char *path, size_t pathlen, - unsigned int mode) + unsigned int mode, + void *buffer, unsigned long size) { struct zip_local_header header; uintmax_t offset = zip_offset; @@ -299,10 +300,8 @@ static int write_zip_entry(struct archiver_args *args, enum zip_method method; unsigned char *out; void *deflated = NULL; - void *buffer; struct git_istream *stream = NULL; unsigned long flags = 0; - unsigned long size; int is_binary = -1; const char *path_without_prefix = path + args->baselen; unsigned int creator_version = 0; @@ -328,13 +327,8 @@ static int write_zip_entry(struct archiver_args *args, method = ZIP_METHOD_STORE; attr2 = 16; out = NULL; - size = 0; compressed_size = 0; - buffer = NULL; } else if (S_ISREG(mode) || S_ISLNK(mode)) { - enum object_type type = oid_object_info(args->repo, oid, - &size); - method = ZIP_METHOD_STORE; attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : (mode & 0111) ? ((mode) << 16) : 0; @@ -343,21 +337,16 @@ static int write_zip_entry(struct archiver_args *args, if (S_ISREG(mode) && args->compression_level != 0 && size > 0) method = ZIP_METHOD_DEFLATE; - if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert && - size > big_file_threshold) { + if (!buffer) { + enum object_type type; stream = open_istream(args->repo, oid, &type, &size, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); flags |= ZIP_STREAM; - out = buffer = NULL; + out = NULL; } else { - buffer = object_file_to_archive(args, path, oid, mode, - &type, &size); - if (!buffer) - return error(_("cannot read %s"), - oid_to_hex(oid)); crc = crc32(crc, buffer, size); is_binary = entry_is_binary(args->repo->index, path_without_prefix, @@ -511,7 +500,6 @@ static int write_zip_entry(struct archiver_args *args, } free(deflated); - free(buffer); if (compressed_size > 0xffffffff || size > 0xffffffff || offset > 0xffffffff) { diff --git a/archive.c b/archive.c index 0de6048bfc..4fbe5329c5 100644 --- a/archive.c +++ b/archive.c @@ -70,10 +70,12 @@ static void format_subst(const struct commit *commit, free(to_free); } -void *object_file_to_archive(const struct archiver_args *args, - const char *path, const struct object_id *oid, - unsigned int mode, enum object_type *type, - unsigned long *sizep) +static void *object_file_to_archive(const struct archiver_args *args, + const char *path, + const struct object_id *oid, + unsigned int mode, + enum object_type *type, + unsigned long *sizep) { void *buffer; const struct commit *commit = args->convert ? args->commit : NULL; @@ -145,6 +147,9 @@ static int write_archive_entry(const struct object_id *oid, const char *base, write_archive_entry_fn_t write_entry = c->write_entry; int err; const char *path_without_prefix; + unsigned long size; + void *buffer; + enum object_type type; args->convert = 0; strbuf_reset(&path); @@ -167,7 +172,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base, if (S_ISDIR(mode) || S_ISGITLINK(mode)) { if (args->verbose) fprintf(stderr, "%.*s\n", (int)path.len, path.buf); - err = write_entry(args, oid, path.buf, path.len, mode); + err = write_entry(args, oid, path.buf, path.len, mode, NULL, 0); if (err) return err; return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0); @@ -175,7 +180,19 @@ static int write_archive_entry(const struct object_id *oid, const char *base, if (args->verbose) fprintf(stderr, "%.*s\n", (int)path.len, path.buf); - return write_entry(args, oid, path.buf, path.len, mode); + + /* Stream it? */ + if (S_ISREG(mode) && !args->convert && + oid_object_info(args->repo, oid, &size) == OBJ_BLOB && + size > big_file_threshold) + return write_entry(args, oid, path.buf, path.len, mode, NULL, size); + + buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size); + if (!buffer) + return error(_("cannot read %s"), oid_to_hex(oid)); + err = write_entry(args, oid, path.buf, path.len, mode, buffer, size); + free(buffer); + return err; } static void queue_directory(const unsigned char *sha1, @@ -265,7 +282,7 @@ int write_archive_entries(struct archiver_args *args, if (args->verbose) fprintf(stderr, "%.*s\n", (int)len, args->base); err = write_entry(args, &args->tree->object.oid, args->base, - len, 040777); + len, 040777, NULL, 0); if (err) return err; } diff --git a/archive.h b/archive.h index 3bd96bf6bb..d83b41a01f 100644 --- a/archive.h +++ b/archive.h @@ -49,12 +49,9 @@ void init_archivers(void); typedef int (*write_archive_entry_fn_t)(struct archiver_args *args, const struct object_id *oid, const char *path, size_t pathlen, - unsigned int mode); + unsigned int mode, + void *buffer, unsigned long size); int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry); -void *object_file_to_archive(const struct archiver_args *args, - const char *path, const struct object_id *oid, - unsigned int mode, enum object_type *type, - unsigned long *sizep); #endif /* ARCHIVE_H */ From 2947a7930d2864cfbc3f9815959cd6539e2ea9ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 19 Sep 2020 23:23:42 +0200 Subject: [PATCH 2/3] archive: add --add-file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow users to append non-tracked files. This simplifies the generation of source packages with a few extra files, e.g. containing version information. They get the same access times and user information as tracked files. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- Documentation/git-archive.txt | 6 +++ archive.c | 86 ++++++++++++++++++++++++++++++++++- archive.h | 2 + t/t5000-tar-tree.sh | 29 ++++++++++++ t/t5003-archive-zip.sh | 28 ++++++++++++ 5 files changed, 150 insertions(+), 1 deletion(-) diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt index cfa1e4ebe4..9f8172828d 100644 --- a/Documentation/git-archive.txt +++ b/Documentation/git-archive.txt @@ -55,6 +55,12 @@ OPTIONS --output=:: Write the archive to instead of stdout. +--add-file=:: + Add a non-tracked file to the archive. Can be repeated to add + multiple files. The path of the file in the archive is built + by concatenating the value for `--prefix` (if any) and the + basename of . + --worktree-attributes:: Look for attributes in .gitattributes files in the working tree as well (see <>). diff --git a/archive.c b/archive.c index 4fbe5329c5..3c1541af9e 100644 --- a/archive.c +++ b/archive.c @@ -266,6 +266,11 @@ static int queue_or_write_archive_entry(const struct object_id *oid, stage, context); } +struct extra_file_info { + char *base; + struct stat stat; +}; + int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry) { @@ -273,6 +278,10 @@ int write_archive_entries(struct archiver_args *args, struct unpack_trees_options opts; struct tree_desc t; int err; + struct strbuf path_in_archive = STRBUF_INIT; + struct strbuf content = STRBUF_INIT; + struct object_id fake_oid = null_oid; + int i; if (args->baselen > 0 && args->base[args->baselen - 1] == '/') { size_t len = args->baselen; @@ -318,6 +327,33 @@ int write_archive_entries(struct archiver_args *args, free(context.bottom); context.bottom = next; } + + for (i = 0; i < args->extra_files.nr; i++) { + struct string_list_item *item = args->extra_files.items + i; + char *path = item->string; + struct extra_file_info *info = item->util; + + put_be64(fake_oid.hash, i + 1); + + strbuf_reset(&path_in_archive); + if (info->base) + strbuf_addstr(&path_in_archive, info->base); + strbuf_addstr(&path_in_archive, basename(path)); + + strbuf_reset(&content); + if (strbuf_read_file(&content, path, info->stat.st_size) < 0) + err = error_errno(_("could not read '%s'"), path); + else + err = write_entry(args, &fake_oid, path_in_archive.buf, + path_in_archive.len, + info->stat.st_mode, + content.buf, content.len); + if (err) + break; + } + strbuf_release(&path_in_archive); + strbuf_release(&content); + return err; } @@ -457,6 +493,42 @@ static void parse_treeish_arg(const char **argv, ar_args->time = archive_time; } +static void extra_file_info_clear(void *util, const char *str) +{ + struct extra_file_info *info = util; + free(info->base); + free(info); +} + +static int add_file_cb(const struct option *opt, const char *arg, int unset) +{ + struct archiver_args *args = opt->value; + const char **basep = (const char **)opt->defval; + const char *base = *basep; + char *path; + struct string_list_item *item; + struct extra_file_info *info; + + if (unset) { + string_list_clear_func(&args->extra_files, + extra_file_info_clear); + return 0; + } + + if (!arg) + return -1; + + path = prefix_filename(args->prefix, arg); + item = string_list_append_nodup(&args->extra_files, path); + item->util = info = xmalloc(sizeof(*info)); + info->base = xstrdup_or_null(base); + if (stat(path, &info->stat)) + die(_("File not found: %s"), path); + if (!S_ISREG(info->stat.st_mode)) + die(_("Not a regular file: %s"), path); + return 0; +} + #define OPT__COMPR(s, v, h, p) \ OPT_SET_INT_F(s, NULL, v, h, p, PARSE_OPT_NONEG) #define OPT__COMPR_HIDDEN(s, v, p) \ @@ -481,6 +553,9 @@ static int parse_archive_args(int argc, const char **argv, OPT_STRING(0, "format", &format, N_("fmt"), N_("archive format")), OPT_STRING(0, "prefix", &base, N_("prefix"), N_("prepend prefix to each pathname in the archive")), + { OPTION_CALLBACK, 0, "add-file", args, N_("file"), + N_("add untracked file to archive"), 0, add_file_cb, + (intptr_t)&base }, OPT_STRING('o', "output", &output, N_("file"), N_("write the archive to this file")), OPT_BOOL(0, "worktree-attributes", &worktree_attributes, @@ -515,6 +590,8 @@ static int parse_archive_args(int argc, const char **argv, die(_("Option --exec can only be used together with --remote")); if (output) die(_("Unexpected option --output")); + if (is_remote && args->extra_files.nr) + die(_("Options --add-file and --remote cannot be used together")); if (!base) base = ""; @@ -561,11 +638,14 @@ int write_archive(int argc, const char **argv, const char *prefix, { const struct archiver *ar = NULL; struct archiver_args args; + int rc; git_config_get_bool("uploadarchive.allowunreachable", &remote_allow_unreachable); git_config(git_default_config, NULL); args.repo = repo; + args.prefix = prefix; + string_list_init(&args.extra_files, 1); argc = parse_archive_args(argc, argv, &ar, &args, name_hint, remote); if (!startup_info->have_repository) { /* @@ -579,7 +659,11 @@ int write_archive(int argc, const char **argv, const char *prefix, parse_treeish_arg(argv, &args, prefix, remote); parse_pathspec_arg(argv + 1, &args); - return ar->write_archive(ar, &args); + rc = ar->write_archive(ar, &args); + + string_list_clear_func(&args.extra_files, extra_file_info_clear); + + return rc; } static int match_extension(const char *filename, const char *ext) diff --git a/archive.h b/archive.h index d83b41a01f..82b226011a 100644 --- a/archive.h +++ b/archive.h @@ -9,6 +9,7 @@ struct repository; struct archiver_args { struct repository *repo; const char *refname; + const char *prefix; const char *base; size_t baselen; struct tree *tree; @@ -20,6 +21,7 @@ struct archiver_args { unsigned int worktree_attributes : 1; unsigned int convert : 1; int compression_level; + struct string_list extra_files; }; /* main api */ diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 37655a237c..3ebb0d3b65 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -94,6 +94,16 @@ check_tar() { ' } +check_added() { + dir=$1 + path_in_fs=$2 + path_in_archive=$3 + + test_expect_success " validate extra file $path_in_archive" ' + diff -r $path_in_fs $dir/$path_in_archive + ' +} + test_expect_success 'setup' ' test_oid_cache <<-EOF obj sha1:19f9c8273ec45a8938e6999cb59b3ff66739902a @@ -164,6 +174,25 @@ test_expect_success 'git-archive --prefix=olde-' ' check_tar with_olde-prefix olde- +test_expect_success 'git archive --add-file' ' + echo untracked >untracked && + git archive --add-file=untracked HEAD >with_untracked.tar +' + +check_tar with_untracked +check_added with_untracked untracked untracked + +test_expect_success 'git archive --add-file twice' ' + echo untracked >untracked && + git archive --prefix=one/ --add-file=untracked \ + --prefix=two/ --add-file=untracked \ + --prefix= HEAD >with_untracked2.tar +' + +check_tar with_untracked2 +check_added with_untracked2 untracked one/untracked +check_added with_untracked2 untracked two/untracked + test_expect_success 'git archive on large files' ' test_config core.bigfilethreshold 1 && git archive HEAD >b3.tar && diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh index 3b76d2eb65..1e6d18b140 100755 --- a/t/t5003-archive-zip.sh +++ b/t/t5003-archive-zip.sh @@ -72,6 +72,16 @@ check_zip() { " } +check_added() { + dir=$1 + path_in_fs=$2 + path_in_archive=$3 + + test_expect_success UNZIP " validate extra file $path_in_archive" ' + diff -r $path_in_fs $dir/$path_in_archive + ' +} + test_expect_success \ 'populate workdir' \ 'mkdir a && @@ -188,4 +198,22 @@ test_expect_success 'git archive --format=zip on large files' ' check_zip large-compressed +test_expect_success 'git archive --format=zip --add-file' ' + echo untracked >untracked && + git archive --format=zip --add-file=untracked HEAD >with_untracked.zip +' + +check_zip with_untracked +check_added with_untracked untracked untracked + +test_expect_success 'git archive --format=zip --add-file twice' ' + echo untracked >untracked && + git archive --format=zip --prefix=one/ --add-file=untracked \ + --prefix=two/ --add-file=untracked \ + --prefix= HEAD >with_untracked2.zip +' +check_zip with_untracked2 +check_added with_untracked2 untracked one/untracked +check_added with_untracked2 untracked two/untracked + test_done From df368fae35ce23f5b373cd567595063997bdb61c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 19 Sep 2020 23:23:50 +0200 Subject: [PATCH 3/3] Makefile: use git-archive --add-file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add untracked files for the dist target directly using git archive instead of calling tar cr to append them. This reduces the dependency on external tools and gives the untracked files the same access times and user information as tracked ones, integrating them seamlessly. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- Makefile | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index f1b1bc8aa0..56bf12b238 100644 --- a/Makefile +++ b/Makefile @@ -3045,32 +3045,29 @@ quick-install-html: # With GNU tar, "--mode=u+rwX,og+rX,og-w" would be a good idea, for example. TAR_DIST_EXTRA_OPTS = GIT_TARNAME = git-$(GIT_VERSION) +GIT_ARCHIVE_EXTRA_FILES = \ + --prefix=$(GIT_TARNAME)/ \ + --add-file=configure \ + --add-file=$(GIT_TARNAME)/version \ + --prefix=$(GIT_TARNAME)/git-gui/ \ + --add-file=$(GIT_TARNAME)/git-gui/version +ifdef DC_SHA1_SUBMODULE +GIT_ARCHIVE_EXTRA_FILES += \ + --prefix=$(GIT_TARNAME)/sha1collisiondetection/ \ + --add-file=sha1collisiondetection/LICENSE.txt \ + --prefix=$(GIT_TARNAME)/sha1collisiondetection/lib/ \ + --add-file=sha1collisiondetection/lib/sha1.c \ + --add-file=sha1collisiondetection/lib/sha1.h \ + --add-file=sha1collisiondetection/lib/ubc_check.c \ + --add-file=sha1collisiondetection/lib/ubc_check.h +endif dist: git-archive$(X) configure - ./git-archive --format=tar \ - --prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar @mkdir -p $(GIT_TARNAME) - @cp configure $(GIT_TARNAME) @echo $(GIT_VERSION) > $(GIT_TARNAME)/version @$(MAKE) -C git-gui TARDIR=../$(GIT_TARNAME)/git-gui dist-version - $(TAR) rf $(GIT_TARNAME).tar $(TAR_DIST_EXTRA_OPTS) \ - $(GIT_TARNAME)/configure \ - $(GIT_TARNAME)/version \ - $(GIT_TARNAME)/git-gui/version -ifdef DC_SHA1_SUBMODULE - @mkdir -p $(GIT_TARNAME)/sha1collisiondetection/lib - @cp sha1collisiondetection/LICENSE.txt \ - $(GIT_TARNAME)/sha1collisiondetection/ - @cp sha1collisiondetection/LICENSE.txt \ - $(GIT_TARNAME)/sha1collisiondetection/ - @cp sha1collisiondetection/lib/sha1.[ch] \ - $(GIT_TARNAME)/sha1collisiondetection/lib/ - @cp sha1collisiondetection/lib/ubc_check.[ch] \ - $(GIT_TARNAME)/sha1collisiondetection/lib/ - $(TAR) rf $(GIT_TARNAME).tar $(TAR_DIST_EXTRA_OPTS) \ - $(GIT_TARNAME)/sha1collisiondetection/LICENSE.txt \ - $(GIT_TARNAME)/sha1collisiondetection/lib/sha1.[ch] \ - $(GIT_TARNAME)/sha1collisiondetection/lib/ubc_check.[ch] -endif + ./git-archive --format=tar \ + $(GIT_ARCHIVE_EXTRA_FILES) \ + --prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar @$(RM) -r $(GIT_TARNAME) gzip -f -9 $(GIT_TARNAME).tar