From a83f7f51e172704a445f481d550a3cee962f1dc4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:28:57 +0200 Subject: [PATCH 01/12] setup: unset ref storage when reinitializing repository version When reinitializing a repository's version we may end up unsetting the hash algorithm when it matches the default hash algorithm. If we didn't do that then the previously configured value might remain intact. While the same issue exists for the ref storage extension, we don't do this here. This has been fine for most of the part because it is not supported to re-initialize a repository with a different ref storage format anyway. We're about to introduce a new command to migrate ref storages though, so this is about to become an issue there. Prepare for this and unset the ref storage format when reinitializing a repository with the "files" format. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- setup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.c b/setup.c index 7975230ffb..8c84ec9d4b 100644 --- a/setup.c +++ b/setup.c @@ -2028,6 +2028,8 @@ void initialize_repository_version(int hash_algo, if (ref_storage_format != REF_STORAGE_FORMAT_FILES) git_config_set("extensions.refstorage", ref_storage_format_to_name(ref_storage_format)); + else if (reinit) + git_config_set_gently("extensions.refstorage", NULL); } static int is_reinit(void) From 318efb966bc9b246703152f77cadd4e407de7cd9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:29:01 +0200 Subject: [PATCH 02/12] refs: convert ref storage format to an enum The ref storage format is tracked as a simple unsigned integer, which makes it harder than necessary to discover what that integer actually is or where its values are defined. Convert the ref storage format to instead be an enum. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- builtin/init-db.c | 2 +- refs.c | 7 ++++--- refs.h | 10 ++++++++-- repository.c | 3 ++- repository.h | 10 ++++------ setup.c | 8 ++++---- setup.h | 9 +++++---- 8 files changed, 29 insertions(+), 22 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 1e07524c53..e808e02017 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -970,7 +970,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) int submodule_progress; int filter_submodules = 0; int hash_algo; - unsigned int ref_storage_format = REF_STORAGE_FORMAT_UNKNOWN; + enum ref_storage_format ref_storage_format = REF_STORAGE_FORMAT_UNKNOWN; const int do_not_override_repo_unix_permissions = -1; const char *template_dir; char *template_dir_dup = NULL; diff --git a/builtin/init-db.c b/builtin/init-db.c index 0170469b84..582dcf20f8 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -81,7 +81,7 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) const char *ref_format = NULL; const char *initial_branch = NULL; int hash_algo = GIT_HASH_UNKNOWN; - unsigned int ref_storage_format = REF_STORAGE_FORMAT_UNKNOWN; + enum ref_storage_format ref_storage_format = REF_STORAGE_FORMAT_UNKNOWN; int init_shared_repository = -1; const struct option init_db_options[] = { OPT_STRING(0, "template", &template_dir, N_("template-directory"), diff --git a/refs.c b/refs.c index 31032588e0..e6db85a165 100644 --- a/refs.c +++ b/refs.c @@ -37,14 +37,15 @@ static const struct ref_storage_be *refs_backends[] = { [REF_STORAGE_FORMAT_REFTABLE] = &refs_be_reftable, }; -static const struct ref_storage_be *find_ref_storage_backend(unsigned int ref_storage_format) +static const struct ref_storage_be *find_ref_storage_backend( + enum ref_storage_format ref_storage_format) { if (ref_storage_format < ARRAY_SIZE(refs_backends)) return refs_backends[ref_storage_format]; return NULL; } -unsigned int ref_storage_format_by_name(const char *name) +enum ref_storage_format ref_storage_format_by_name(const char *name) { for (unsigned int i = 0; i < ARRAY_SIZE(refs_backends); i++) if (refs_backends[i] && !strcmp(refs_backends[i]->name, name)) @@ -52,7 +53,7 @@ unsigned int ref_storage_format_by_name(const char *name) return REF_STORAGE_FORMAT_UNKNOWN; } -const char *ref_storage_format_to_name(unsigned int ref_storage_format) +const char *ref_storage_format_to_name(enum ref_storage_format ref_storage_format) { const struct ref_storage_be *be = find_ref_storage_backend(ref_storage_format); if (!be) diff --git a/refs.h b/refs.h index fe7f0db35e..a7afa9bede 100644 --- a/refs.h +++ b/refs.h @@ -11,8 +11,14 @@ struct string_list; struct string_list_item; struct worktree; -unsigned int ref_storage_format_by_name(const char *name); -const char *ref_storage_format_to_name(unsigned int ref_storage_format); +enum ref_storage_format { + REF_STORAGE_FORMAT_UNKNOWN, + REF_STORAGE_FORMAT_FILES, + REF_STORAGE_FORMAT_REFTABLE, +}; + +enum ref_storage_format ref_storage_format_by_name(const char *name); +const char *ref_storage_format_to_name(enum ref_storage_format ref_storage_format); /* * Resolve a reference, recursively following symbolic refererences. diff --git a/repository.c b/repository.c index d29b0304fb..166863f852 100644 --- a/repository.c +++ b/repository.c @@ -124,7 +124,8 @@ void repo_set_compat_hash_algo(struct repository *repo, int algo) repo_read_loose_object_map(repo); } -void repo_set_ref_storage_format(struct repository *repo, unsigned int format) +void repo_set_ref_storage_format(struct repository *repo, + enum ref_storage_format format) { repo->ref_storage_format = format; } diff --git a/repository.h b/repository.h index 4bd8969005..a35cd77c35 100644 --- a/repository.h +++ b/repository.h @@ -1,6 +1,7 @@ #ifndef REPOSITORY_H #define REPOSITORY_H +#include "refs.h" #include "strmap.h" struct config_set; @@ -26,10 +27,6 @@ enum fetch_negotiation_setting { FETCH_NEGOTIATION_NOOP, }; -#define REF_STORAGE_FORMAT_UNKNOWN 0 -#define REF_STORAGE_FORMAT_FILES 1 -#define REF_STORAGE_FORMAT_REFTABLE 2 - struct repo_settings { int initialized; @@ -181,7 +178,7 @@ struct repository { const struct git_hash_algo *compat_hash_algo; /* Repository's reference storage format, as serialized on disk. */ - unsigned int ref_storage_format; + enum ref_storage_format ref_storage_format; /* A unique-id for tracing purposes. */ int trace2_repo_id; @@ -220,7 +217,8 @@ void repo_set_gitdir(struct repository *repo, const char *root, void repo_set_worktree(struct repository *repo, const char *path); void repo_set_hash_algo(struct repository *repo, int algo); void repo_set_compat_hash_algo(struct repository *repo, int compat_algo); -void repo_set_ref_storage_format(struct repository *repo, unsigned int format); +void repo_set_ref_storage_format(struct repository *repo, + enum ref_storage_format format); void initialize_repository(struct repository *repo); RESULT_MUST_BE_USED int repo_init(struct repository *r, const char *gitdir, const char *worktree); diff --git a/setup.c b/setup.c index 8c84ec9d4b..b49ee3e95f 100644 --- a/setup.c +++ b/setup.c @@ -1997,7 +1997,7 @@ static int needs_work_tree_config(const char *git_dir, const char *work_tree) } void initialize_repository_version(int hash_algo, - unsigned int ref_storage_format, + enum ref_storage_format ref_storage_format, int reinit) { char repo_version_string[10]; @@ -2044,7 +2044,7 @@ static int is_reinit(void) return ret; } -void create_reference_database(unsigned int ref_storage_format, +void create_reference_database(enum ref_storage_format ref_storage_format, const char *initial_branch, int quiet) { struct strbuf err = STRBUF_INIT; @@ -2243,7 +2243,7 @@ static void validate_hash_algorithm(struct repository_format *repo_fmt, int hash } static void validate_ref_storage_format(struct repository_format *repo_fmt, - unsigned int format) + enum ref_storage_format format) { const char *name = getenv("GIT_DEFAULT_REF_FORMAT"); @@ -2263,7 +2263,7 @@ static void validate_ref_storage_format(struct repository_format *repo_fmt, int init_db(const char *git_dir, const char *real_git_dir, const char *template_dir, int hash, - unsigned int ref_storage_format, + enum ref_storage_format ref_storage_format, const char *initial_branch, int init_shared_repository, unsigned int flags) { diff --git a/setup.h b/setup.h index b3fd3bf45a..cd8dbc2497 100644 --- a/setup.h +++ b/setup.h @@ -1,6 +1,7 @@ #ifndef SETUP_H #define SETUP_H +#include "refs.h" #include "string-list.h" int is_inside_git_dir(void); @@ -128,7 +129,7 @@ struct repository_format { int is_bare; int hash_algo; int compat_hash_algo; - unsigned int ref_storage_format; + enum ref_storage_format ref_storage_format; int sparse_index; char *work_tree; struct string_list unknown_extensions; @@ -192,13 +193,13 @@ const char *get_template_dir(const char *option_template); int init_db(const char *git_dir, const char *real_git_dir, const char *template_dir, int hash_algo, - unsigned int ref_storage_format, + enum ref_storage_format ref_storage_format, const char *initial_branch, int init_shared_repository, unsigned int flags); void initialize_repository_version(int hash_algo, - unsigned int ref_storage_format, + enum ref_storage_format ref_storage_format, int reinit); -void create_reference_database(unsigned int ref_storage_format, +void create_reference_database(enum ref_storage_format ref_storage_format, const char *initial_branch, int quiet); /* From 6e1683ace981b094d9adbd08de406bf59d39b549 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:29:06 +0200 Subject: [PATCH 03/12] refs: pass storage format to `ref_store_init()` explicitly We're about to introduce logic to migrate refs from one storage format to another one. This will require us to initialize a ref store with a different format than the one used by the passed-in repository. Prepare for this by accepting the desired ref storage format as parameter. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/refs.c b/refs.c index e6db85a165..423684b8b8 100644 --- a/refs.c +++ b/refs.c @@ -1891,16 +1891,17 @@ static struct ref_store *lookup_ref_store_map(struct strmap *map, /* * Create, record, and return a ref_store instance for the specified - * gitdir. + * gitdir using the given ref storage format. */ static struct ref_store *ref_store_init(struct repository *repo, + enum ref_storage_format format, const char *gitdir, unsigned int flags) { const struct ref_storage_be *be; struct ref_store *refs; - be = find_ref_storage_backend(repo->ref_storage_format); + be = find_ref_storage_backend(format); if (!be) BUG("reference backend is unknown"); @@ -1922,7 +1923,8 @@ struct ref_store *get_main_ref_store(struct repository *r) if (!r->gitdir) BUG("attempting to get main_ref_store outside of repository"); - r->refs_private = ref_store_init(r, r->gitdir, REF_STORE_ALL_CAPS); + r->refs_private = ref_store_init(r, r->ref_storage_format, + r->gitdir, REF_STORE_ALL_CAPS); r->refs_private = maybe_debug_wrap_ref_store(r->gitdir, r->refs_private); return r->refs_private; } @@ -1982,7 +1984,8 @@ struct ref_store *repo_get_submodule_ref_store(struct repository *repo, free(subrepo); goto done; } - refs = ref_store_init(subrepo, submodule_sb.buf, + refs = ref_store_init(subrepo, the_repository->ref_storage_format, + submodule_sb.buf, REF_STORE_READ | REF_STORE_ODB); register_ref_store_map(&repo->submodule_ref_stores, "submodule", refs, submodule); @@ -2011,12 +2014,12 @@ struct ref_store *get_worktree_ref_store(const struct worktree *wt) struct strbuf common_path = STRBUF_INIT; strbuf_git_common_path(&common_path, wt->repo, "worktrees/%s", wt->id); - refs = ref_store_init(wt->repo, common_path.buf, - REF_STORE_ALL_CAPS); + refs = ref_store_init(wt->repo, wt->repo->ref_storage_format, + common_path.buf, REF_STORE_ALL_CAPS); strbuf_release(&common_path); } else { - refs = ref_store_init(wt->repo, wt->repo->commondir, - REF_STORE_ALL_CAPS); + refs = ref_store_init(wt->repo, the_repository->ref_storage_format, + wt->repo->commondir, REF_STORE_ALL_CAPS); } if (refs) From fbd1a693c7343d4b37ae6d99b19f15d1293b77c2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:29:11 +0200 Subject: [PATCH 04/12] refs: allow to skip creation of reflog entries The ref backends do not have any way to disable the creation of reflog entries. This will be required for upcoming ref format migration logic so that we do not create any entries that didn't exist in the original ref database. Provide a new `REF_SKIP_CREATE_REFLOG` flag that allows the caller to disable reflog entry creation. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 6 ++++++ refs.h | 8 +++++++- refs/files-backend.c | 4 ++++ refs/reftable-backend.c | 3 ++- t/helper/test-ref-store.c | 1 + 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/refs.c b/refs.c index 423684b8b8..fa3b0a82d4 100644 --- a/refs.c +++ b/refs.c @@ -1194,6 +1194,12 @@ int ref_transaction_update(struct ref_transaction *transaction, { assert(err); + if ((flags & REF_FORCE_CREATE_REFLOG) && + (flags & REF_SKIP_CREATE_REFLOG)) { + strbuf_addstr(err, _("refusing to force and skip creation of reflog")); + return -1; + } + if (!(flags & REF_SKIP_REFNAME_VERIFICATION) && ((new_oid && !is_null_oid(new_oid)) ? check_refname_format(refname, REFNAME_ALLOW_ONELEVEL) : diff --git a/refs.h b/refs.h index a7afa9bede..50a2b3ab09 100644 --- a/refs.h +++ b/refs.h @@ -659,13 +659,19 @@ struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs, */ #define REF_SKIP_REFNAME_VERIFICATION (1 << 11) +/* + * Skip creation of a reflog entry, even if it would have otherwise been + * created. + */ +#define REF_SKIP_CREATE_REFLOG (1 << 12) + /* * Bitmask of all of the flags that are allowed to be passed in to * ref_transaction_update() and friends: */ #define REF_TRANSACTION_UPDATE_ALLOWED_FLAGS \ (REF_NO_DEREF | REF_FORCE_CREATE_REFLOG | REF_SKIP_OID_VERIFICATION | \ - REF_SKIP_REFNAME_VERIFICATION) + REF_SKIP_REFNAME_VERIFICATION | REF_SKIP_CREATE_REFLOG) /* * Add a reference update to transaction. `new_oid` is the value that diff --git a/refs/files-backend.c b/refs/files-backend.c index 73380d7e99..bd0d63bcba 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1750,6 +1750,9 @@ static int files_log_ref_write(struct files_ref_store *refs, { int logfd, result; + if (flags & REF_SKIP_CREATE_REFLOG) + return 0; + if (log_all_ref_updates == LOG_REFS_UNSET) log_all_ref_updates = is_bare_repository() ? LOG_REFS_NONE : LOG_REFS_NORMAL; @@ -2251,6 +2254,7 @@ static int split_head_update(struct ref_update *update, struct ref_update *new_update; if ((update->flags & REF_LOG_ONLY) || + (update->flags & REF_SKIP_CREATE_REFLOG) || (update->flags & REF_IS_PRUNING) || (update->flags & REF_UPDATE_VIA_HEAD)) return 0; diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index f6edfdf5b3..bffed9257f 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1103,7 +1103,8 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data if (ret) goto done; - } else if (u->flags & REF_HAVE_NEW && + } else if (!(u->flags & REF_SKIP_CREATE_REFLOG) && + (u->flags & REF_HAVE_NEW) && (u->flags & REF_FORCE_CREATE_REFLOG || should_write_log(&arg->refs->base, u->refname))) { struct reftable_log_record *log; diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index c9efd74c2b..ad24300170 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -126,6 +126,7 @@ static struct flag_definition transaction_flags[] = { FLAG_DEF(REF_FORCE_CREATE_REFLOG), FLAG_DEF(REF_SKIP_OID_VERIFICATION), FLAG_DEF(REF_SKIP_REFNAME_VERIFICATION), + FLAG_DEF(REF_SKIP_CREATE_REFLOG), { NULL, 0 } }; From 66275a63117261749b40c774042bcb6e20e5d13b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:29:16 +0200 Subject: [PATCH 05/12] refs/files: refactor `add_pseudoref_and_head_entries()` The `add_pseudoref_and_head_entries()` function accepts both the ref store as well as a directory name as input. This is unnecessary though as the ref store already uniquely identifies the root directory of the ref store anyway. Furthermore, the function is misnamed now that we have clarified the meaning of pseudorefs as it doesn't add pseudorefs, but root refs. Rename it accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/files-backend.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index bd0d63bcba..b4e5437ffe 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -324,16 +324,14 @@ static void loose_fill_ref_dir(struct ref_store *ref_store, } /* - * Add pseudorefs to the ref dir by parsing the directory for any files - * which follow the pseudoref syntax. + * Add root refs to the ref dir by parsing the directory for any files which + * follow the root ref syntax. */ -static void add_pseudoref_and_head_entries(struct ref_store *ref_store, - struct ref_dir *dir, - const char *dirname) +static void add_root_refs(struct files_ref_store *refs, + struct ref_dir *dir) { - struct files_ref_store *refs = - files_downcast(ref_store, REF_STORE_READ, "fill_ref_dir"); struct strbuf path = STRBUF_INIT, refname = STRBUF_INIT; + const char *dirname = refs->loose->root->name; struct dirent *de; size_t dirnamelen; DIR *d; @@ -388,8 +386,7 @@ static struct ref_cache *get_loose_ref_cache(struct files_ref_store *refs, dir = get_ref_dir(refs->loose->root); if (flags & DO_FOR_EACH_INCLUDE_ROOT_REFS) - add_pseudoref_and_head_entries(dir->cache->ref_store, dir, - refs->loose->root->name); + add_root_refs(refs, dir); /* * Add an incomplete entry for "refs/" (to be filled From 120b67172f7858a70b36941fa31af58a0534020e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:29:20 +0200 Subject: [PATCH 06/12] refs/files: extract function to iterate through root refs Extract a new function that can be used to iterate through all root refs known to the "files" backend. This will be used in the next commit, where we start to teach ref backends to remove themselves. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/files-backend.c | 51 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index b4e5437ffe..de8cc83174 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -323,17 +323,15 @@ static void loose_fill_ref_dir(struct ref_store *ref_store, add_per_worktree_entries_to_dir(dir, dirname); } -/* - * Add root refs to the ref dir by parsing the directory for any files which - * follow the root ref syntax. - */ -static void add_root_refs(struct files_ref_store *refs, - struct ref_dir *dir) +static int for_each_root_ref(struct files_ref_store *refs, + int (*cb)(const char *refname, void *cb_data), + void *cb_data) { struct strbuf path = STRBUF_INIT, refname = STRBUF_INIT; const char *dirname = refs->loose->root->name; struct dirent *de; size_t dirnamelen; + int ret; DIR *d; files_ref_path(refs, &path, dirname); @@ -341,7 +339,7 @@ static void add_root_refs(struct files_ref_store *refs, d = opendir(path.buf); if (!d) { strbuf_release(&path); - return; + return -1; } strbuf_addstr(&refname, dirname); @@ -357,14 +355,49 @@ static void add_root_refs(struct files_ref_store *refs, strbuf_addstr(&refname, de->d_name); dtype = get_dtype(de, &path, 1); - if (dtype == DT_REG && is_root_ref(de->d_name)) - loose_fill_ref_dir_regular_file(refs, refname.buf, dir); + if (dtype == DT_REG && is_root_ref(de->d_name)) { + ret = cb(refname.buf, cb_data); + if (ret) + goto done; + } strbuf_setlen(&refname, dirnamelen); } + + ret = 0; + +done: strbuf_release(&refname); strbuf_release(&path); closedir(d); + return ret; +} + +struct fill_root_ref_data { + struct files_ref_store *refs; + struct ref_dir *dir; +}; + +static int fill_root_ref(const char *refname, void *cb_data) +{ + struct fill_root_ref_data *data = cb_data; + loose_fill_ref_dir_regular_file(data->refs, refname, data->dir); + return 0; +} + +/* + * Add root refs to the ref dir by parsing the directory for any files which + * follow the root ref syntax. + */ +static void add_root_refs(struct files_ref_store *refs, + struct ref_dir *dir) +{ + struct fill_root_ref_data data = { + .refs = refs, + .dir = dir, + }; + + for_each_root_ref(refs, fill_root_ref, &data); } static struct ref_cache *get_loose_ref_cache(struct files_ref_store *refs, From b3e098d6e77db87946c50c3517fbdeffe9168ca9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:29:25 +0200 Subject: [PATCH 07/12] refs/files: fix NULL pointer deref when releasing ref store The `free_ref_cache()` function is not `NULL` safe and will thus segfault when being passed such a pointer. This can easily happen when trying to release a partially initialized "files" ref store. Fix this. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/ref-cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/refs/ref-cache.c b/refs/ref-cache.c index b6c53fc8ed..4ce519bbc8 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -71,6 +71,8 @@ static void free_ref_entry(struct ref_entry *entry) void free_ref_cache(struct ref_cache *cache) { + if (!cache) + return; free_ref_entry(cache->root); free(cache); } From b5d7db9e83e16cad6fb2a055029a8d1c72af04c9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:29:30 +0200 Subject: [PATCH 08/12] reftable: inline `merged_table_release()` The function `merged_table_release()` releases a merged table, whereas `reftable_merged_table_free()` releases a merged table and then also free's its pointer. But all callsites of `merged_table_release()` are in fact followed by `reftable_merged_table_free()`, which is redundant. Inline `merged_table_release()` into `reftable_merged_table_free()` to get rid of this redundance. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 12 ++---------- reftable/merged.h | 2 -- reftable/stack.c | 8 ++------ 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index f85a24c678..804fdc0de0 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -207,19 +207,11 @@ int reftable_new_merged_table(struct reftable_merged_table **dest, return 0; } -/* clears the list of subtable, without affecting the readers themselves. */ -void merged_table_release(struct reftable_merged_table *mt) -{ - FREE_AND_NULL(mt->stack); - mt->stack_len = 0; -} - void reftable_merged_table_free(struct reftable_merged_table *mt) { - if (!mt) { + if (!mt) return; - } - merged_table_release(mt); + FREE_AND_NULL(mt->stack); reftable_free(mt); } diff --git a/reftable/merged.h b/reftable/merged.h index a2571dbc99..9db45c3196 100644 --- a/reftable/merged.h +++ b/reftable/merged.h @@ -24,6 +24,4 @@ struct reftable_merged_table { uint64_t max; }; -void merged_table_release(struct reftable_merged_table *mt); - #endif diff --git a/reftable/stack.c b/reftable/stack.c index a59ebe038d..984fd866d0 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -261,10 +261,8 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, new_tables = NULL; st->readers_len = new_readers_len; - if (st->merged) { - merged_table_release(st->merged); + if (st->merged) reftable_merged_table_free(st->merged); - } if (st->readers) { reftable_free(st->readers); } @@ -968,10 +966,8 @@ static int stack_write_compact(struct reftable_stack *st, done: reftable_iterator_destroy(&it); - if (mt) { - merged_table_release(mt); + if (mt) reftable_merged_table_free(mt); - } reftable_ref_record_release(&ref); reftable_log_record_release(&log); st->stats.entries_written += entries; From 1339cb3c47aafb08a51d2517fcbaed2954d7d127 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:29:34 +0200 Subject: [PATCH 09/12] worktree: don't store main worktree twice In `get_worktree_ref_store()` we either return the repository's main ref store, or we look up the ref store via the map of worktree ref stores. Which of these worktrees gets picked depends on the `is_current` bit of the worktree, which indicates whether the worktree is the one that corresponds to `the_repository`. The bit is getting set in `get_worktrees()`, but only after we have computed the list of all worktrees. This is too late though, because at that time we have already called `get_worktree_ref_store()` on each of the worktrees via `add_head_info()`. The consequence is that the current worktree will not have been marked accordingly, which means that we did not use the main ref store, but instead created a new ref store. We thus have two separate ref stores now that map to the same ref database. Fix this by setting `is_current` before we call `add_head_info()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- worktree.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/worktree.c b/worktree.c index 12eadacc61..70844d023a 100644 --- a/worktree.c +++ b/worktree.c @@ -53,6 +53,15 @@ static void add_head_info(struct worktree *wt) wt->is_detached = 1; } +static int is_current_worktree(struct worktree *wt) +{ + char *git_dir = absolute_pathdup(get_git_dir()); + const char *wt_git_dir = get_worktree_git_dir(wt); + int is_current = !fspathcmp(git_dir, absolute_path(wt_git_dir)); + free(git_dir); + return is_current; +} + /** * get the main worktree */ @@ -76,6 +85,7 @@ static struct worktree *get_main_worktree(int skip_reading_head) */ worktree->is_bare = (is_bare_repository_cfg == 1) || is_bare_repository(); + worktree->is_current = is_current_worktree(worktree); if (!skip_reading_head) add_head_info(worktree); return worktree; @@ -102,6 +112,7 @@ struct worktree *get_linked_worktree(const char *id, worktree->repo = the_repository; worktree->path = strbuf_detach(&worktree_path, NULL); worktree->id = xstrdup(id); + worktree->is_current = is_current_worktree(worktree); if (!skip_reading_head) add_head_info(worktree); @@ -111,23 +122,6 @@ struct worktree *get_linked_worktree(const char *id, return worktree; } -static void mark_current_worktree(struct worktree **worktrees) -{ - char *git_dir = absolute_pathdup(get_git_dir()); - int i; - - for (i = 0; worktrees[i]; i++) { - struct worktree *wt = worktrees[i]; - const char *wt_git_dir = get_worktree_git_dir(wt); - - if (!fspathcmp(git_dir, absolute_path(wt_git_dir))) { - wt->is_current = 1; - break; - } - } - free(git_dir); -} - /* * NEEDSWORK: This function exists so that we can look up metadata of a * worktree without trying to access any of its internals like the refdb. It @@ -164,7 +158,6 @@ static struct worktree **get_worktrees_internal(int skip_reading_head) ALLOC_GROW(list, counter + 1, alloc); list[counter] = NULL; - mark_current_worktree(list); return list; } From 64a6dd8ffc2f120fc13e438af3236aa00cebc241 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:29:39 +0200 Subject: [PATCH 10/12] refs: implement removal of ref storages We're about to introduce logic to migrate ref storages. One part of the migration will be to delete the files that are part of the old ref storage format. We don't yet have a way to delete such data generically across ref backends though. Implement a new `delete` callback and expose it via a new `ref_storage_delete()` function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 5 ++++ refs.h | 5 ++++ refs/files-backend.c | 62 +++++++++++++++++++++++++++++++++++++++++ refs/packed-backend.c | 15 ++++++++++ refs/refs-internal.h | 7 +++++ refs/reftable-backend.c | 52 ++++++++++++++++++++++++++++++++++ 6 files changed, 146 insertions(+) diff --git a/refs.c b/refs.c index fa3b0a82d4..31fd391214 100644 --- a/refs.c +++ b/refs.c @@ -1861,6 +1861,11 @@ int ref_store_create_on_disk(struct ref_store *refs, int flags, struct strbuf *e return refs->be->create_on_disk(refs, flags, err); } +int ref_store_remove_on_disk(struct ref_store *refs, struct strbuf *err) +{ + return refs->be->remove_on_disk(refs, err); +} + int repo_resolve_gitlink_ref(struct repository *r, const char *submodule, const char *refname, struct object_id *oid) diff --git a/refs.h b/refs.h index 50a2b3ab09..61ee7b7a15 100644 --- a/refs.h +++ b/refs.h @@ -129,6 +129,11 @@ int ref_store_create_on_disk(struct ref_store *refs, int flags, struct strbuf *e */ void ref_store_release(struct ref_store *ref_store); +/* + * Remove the ref store from disk. This deletes all associated data. + */ +int ref_store_remove_on_disk(struct ref_store *refs, struct strbuf *err); + /* * Return the peeled value of the oid currently being iterated via * for_each_ref(), etc. This is equivalent to calling: diff --git a/refs/files-backend.c b/refs/files-backend.c index de8cc83174..e663781199 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3342,11 +3342,73 @@ static int files_ref_store_create_on_disk(struct ref_store *ref_store, return 0; } +struct remove_one_root_ref_data { + const char *gitdir; + struct strbuf *err; +}; + +static int remove_one_root_ref(const char *refname, + void *cb_data) +{ + struct remove_one_root_ref_data *data = cb_data; + struct strbuf buf = STRBUF_INIT; + int ret = 0; + + strbuf_addf(&buf, "%s/%s", data->gitdir, refname); + + ret = unlink(buf.buf); + if (ret < 0) + strbuf_addf(data->err, "could not delete %s: %s\n", + refname, strerror(errno)); + + strbuf_release(&buf); + return ret; +} + +static int files_ref_store_remove_on_disk(struct ref_store *ref_store, + struct strbuf *err) +{ + struct files_ref_store *refs = + files_downcast(ref_store, REF_STORE_WRITE, "remove"); + struct remove_one_root_ref_data data = { + .gitdir = refs->base.gitdir, + .err = err, + }; + struct strbuf sb = STRBUF_INIT; + int ret = 0; + + strbuf_addf(&sb, "%s/refs", refs->base.gitdir); + if (remove_dir_recursively(&sb, 0) < 0) { + strbuf_addf(err, "could not delete refs: %s", + strerror(errno)); + ret = -1; + } + strbuf_reset(&sb); + + strbuf_addf(&sb, "%s/logs", refs->base.gitdir); + if (remove_dir_recursively(&sb, 0) < 0) { + strbuf_addf(err, "could not delete logs: %s", + strerror(errno)); + ret = -1; + } + strbuf_reset(&sb); + + if (for_each_root_ref(refs, remove_one_root_ref, &data) < 0) + ret = -1; + + if (ref_store_remove_on_disk(refs->packed_ref_store, err) < 0) + ret = -1; + + strbuf_release(&sb); + return ret; +} + struct ref_storage_be refs_be_files = { .name = "files", .init = files_ref_store_init, .release = files_ref_store_release, .create_on_disk = files_ref_store_create_on_disk, + .remove_on_disk = files_ref_store_remove_on_disk, .transaction_prepare = files_transaction_prepare, .transaction_finish = files_transaction_finish, diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 2789fd92f5..c4c1e36aa2 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1,5 +1,6 @@ #include "../git-compat-util.h" #include "../config.h" +#include "../dir.h" #include "../gettext.h" #include "../hash.h" #include "../hex.h" @@ -1266,6 +1267,19 @@ static int packed_ref_store_create_on_disk(struct ref_store *ref_store UNUSED, return 0; } +static int packed_ref_store_remove_on_disk(struct ref_store *ref_store, + struct strbuf *err) +{ + struct packed_ref_store *refs = packed_downcast(ref_store, 0, "remove"); + + if (remove_path(refs->path) < 0) { + strbuf_addstr(err, "could not delete packed-refs"); + return -1; + } + + return 0; +} + /* * Write the packed refs from the current snapshot to the packed-refs * tempfile, incorporating any changes from `updates`. `updates` must @@ -1724,6 +1738,7 @@ struct ref_storage_be refs_be_packed = { .init = packed_ref_store_init, .release = packed_ref_store_release, .create_on_disk = packed_ref_store_create_on_disk, + .remove_on_disk = packed_ref_store_remove_on_disk, .transaction_prepare = packed_transaction_prepare, .transaction_finish = packed_transaction_finish, diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 33749fbd83..cbcb6f9c36 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -517,6 +517,12 @@ typedef int ref_store_create_on_disk_fn(struct ref_store *refs, int flags, struct strbuf *err); +/* + * Remove the reference store from disk. + */ +typedef int ref_store_remove_on_disk_fn(struct ref_store *refs, + struct strbuf *err); + typedef int ref_transaction_prepare_fn(struct ref_store *refs, struct ref_transaction *transaction, struct strbuf *err); @@ -649,6 +655,7 @@ struct ref_storage_be { ref_store_init_fn *init; ref_store_release_fn *release; ref_store_create_on_disk_fn *create_on_disk; + ref_store_remove_on_disk_fn *remove_on_disk; ref_transaction_prepare_fn *transaction_prepare; ref_transaction_finish_fn *transaction_finish; diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index bffed9257f..da6b3162f3 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1,6 +1,7 @@ #include "../git-compat-util.h" #include "../abspath.h" #include "../chdir-notify.h" +#include "../dir.h" #include "../environment.h" #include "../gettext.h" #include "../hash.h" @@ -343,6 +344,56 @@ static int reftable_be_create_on_disk(struct ref_store *ref_store, return 0; } +static int reftable_be_remove_on_disk(struct ref_store *ref_store, + struct strbuf *err) +{ + struct reftable_ref_store *refs = + reftable_be_downcast(ref_store, REF_STORE_WRITE, "remove"); + struct strbuf sb = STRBUF_INIT; + int ret = 0; + + /* + * Release the ref store such that all stacks are closed. This is + * required so that the "tables.list" file is not open anymore, which + * would otherwise make it impossible to remove the file on Windows. + */ + reftable_be_release(ref_store); + + strbuf_addf(&sb, "%s/reftable", refs->base.gitdir); + if (remove_dir_recursively(&sb, 0) < 0) { + strbuf_addf(err, "could not delete reftables: %s", + strerror(errno)); + ret = -1; + } + strbuf_reset(&sb); + + strbuf_addf(&sb, "%s/HEAD", refs->base.gitdir); + if (unlink(sb.buf) < 0) { + strbuf_addf(err, "could not delete stub HEAD: %s", + strerror(errno)); + ret = -1; + } + strbuf_reset(&sb); + + strbuf_addf(&sb, "%s/refs/heads", refs->base.gitdir); + if (unlink(sb.buf) < 0) { + strbuf_addf(err, "could not delete stub heads: %s", + strerror(errno)); + ret = -1; + } + strbuf_reset(&sb); + + strbuf_addf(&sb, "%s/refs", refs->base.gitdir); + if (rmdir(sb.buf) < 0) { + strbuf_addf(err, "could not delete refs directory: %s", + strerror(errno)); + ret = -1; + } + + strbuf_release(&sb); + return ret; +} + struct reftable_ref_iterator { struct ref_iterator base; struct reftable_ref_store *refs; @@ -2196,6 +2247,7 @@ struct ref_storage_be refs_be_reftable = { .init = reftable_be_init, .release = reftable_be_release, .create_on_disk = reftable_be_create_on_disk, + .remove_on_disk = reftable_be_remove_on_disk, .transaction_prepare = reftable_be_transaction_prepare, .transaction_finish = reftable_be_transaction_finish, From 6d6a3a99c7c35093c4a39cbf4d20869e2db3a4d8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:29:45 +0200 Subject: [PATCH 11/12] refs: implement logic to migrate between ref storage formats With the introduction of the new "reftable" backend, users may want to migrate repositories between the backends without having to recreate the whole repository. Add the logic to do so. The implementation is generic and works with arbitrary ref storage formats so that a backend does not need to implement any migration logic. It does have a few limitations though: - We do not migrate repositories with worktrees, because worktrees have separate ref storages. It makes the overall affair more complex if we have to migrate multiple storages at once. - We do not migrate reflogs, because we have no interfaces to write many reflog entries. - We do not lock the repository for concurrent access, and thus concurrent writes may end up with weird in-between states. There is no way to fully lock the "files" backend for writes due to its format, and thus we punt on this topic altogether and defer to the user to avoid those from happening. In other words, this version is a minimum viable product for migrating a repository's ref storage format. It works alright for bare repos, which often have neither worktrees nor reflogs. But it will not work for many other repositories without some preparations. These limitations are not set into stone though, and ideally we will eventually address them over time. The logic is not yet used by anything, and thus there are no tests for it. Those will be added in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ refs.h | 18 ++++ 2 files changed, 326 insertions(+) diff --git a/refs.c b/refs.c index 31fd391214..1304d3dd87 100644 --- a/refs.c +++ b/refs.c @@ -2570,3 +2570,311 @@ int ref_update_check_old_target(const char *referent, struct ref_update *update, referent, update->old_target); return -1; } + +struct migration_data { + struct ref_store *old_refs; + struct ref_transaction *transaction; + struct strbuf *errbuf; +}; + +static int migrate_one_ref(const char *refname, const struct object_id *oid, + int flags, void *cb_data) +{ + struct migration_data *data = cb_data; + struct strbuf symref_target = STRBUF_INIT; + int ret; + + if (flags & REF_ISSYMREF) { + ret = refs_read_symbolic_ref(data->old_refs, refname, &symref_target); + if (ret < 0) + goto done; + + ret = ref_transaction_update(data->transaction, refname, NULL, null_oid(), + symref_target.buf, NULL, + REF_SKIP_CREATE_REFLOG | REF_NO_DEREF, NULL, data->errbuf); + if (ret < 0) + goto done; + } else { + ret = ref_transaction_create(data->transaction, refname, oid, + REF_SKIP_CREATE_REFLOG | REF_SKIP_OID_VERIFICATION, + NULL, data->errbuf); + if (ret < 0) + goto done; + } + +done: + strbuf_release(&symref_target); + return ret; +} + +static int move_files(const char *from_path, const char *to_path, struct strbuf *errbuf) +{ + struct strbuf from_buf = STRBUF_INIT, to_buf = STRBUF_INIT; + size_t from_len, to_len; + DIR *from_dir; + int ret; + + from_dir = opendir(from_path); + if (!from_dir) { + strbuf_addf(errbuf, "could not open source directory '%s': %s", + from_path, strerror(errno)); + ret = -1; + goto done; + } + + strbuf_addstr(&from_buf, from_path); + strbuf_complete(&from_buf, '/'); + from_len = from_buf.len; + + strbuf_addstr(&to_buf, to_path); + strbuf_complete(&to_buf, '/'); + to_len = to_buf.len; + + while (1) { + struct dirent *ent; + + errno = 0; + ent = readdir(from_dir); + if (!ent) + break; + + if (!strcmp(ent->d_name, ".") || + !strcmp(ent->d_name, "..")) + continue; + + strbuf_setlen(&from_buf, from_len); + strbuf_addstr(&from_buf, ent->d_name); + + strbuf_setlen(&to_buf, to_len); + strbuf_addstr(&to_buf, ent->d_name); + + ret = rename(from_buf.buf, to_buf.buf); + if (ret < 0) { + strbuf_addf(errbuf, "could not link file '%s' to '%s': %s", + from_buf.buf, to_buf.buf, strerror(errno)); + goto done; + } + } + + if (errno) { + strbuf_addf(errbuf, "could not read entry from directory '%s': %s", + from_path, strerror(errno)); + ret = -1; + goto done; + } + + ret = 0; + +done: + strbuf_release(&from_buf); + strbuf_release(&to_buf); + if (from_dir) + closedir(from_dir); + return ret; +} + +static int count_reflogs(const char *reflog UNUSED, void *payload) +{ + size_t *reflog_count = payload; + (*reflog_count)++; + return 0; +} + +static int has_worktrees(void) +{ + struct worktree **worktrees = get_worktrees(); + int ret = 0; + size_t i; + + for (i = 0; worktrees[i]; i++) { + if (is_main_worktree(worktrees[i])) + continue; + ret = 1; + } + + free_worktrees(worktrees); + return ret; +} + +int repo_migrate_ref_storage_format(struct repository *repo, + enum ref_storage_format format, + unsigned int flags, + struct strbuf *errbuf) +{ + struct ref_store *old_refs = NULL, *new_refs = NULL; + struct ref_transaction *transaction = NULL; + struct strbuf new_gitdir = STRBUF_INIT; + struct migration_data data; + size_t reflog_count = 0; + int did_migrate_refs = 0; + int ret; + + if (repo->ref_storage_format == format) { + strbuf_addstr(errbuf, "current and new ref storage format are equal"); + ret = -1; + goto done; + } + + old_refs = get_main_ref_store(repo); + + /* + * We do not have any interfaces that would allow us to write many + * reflog entries. Once we have them we can remove this restriction. + */ + if (refs_for_each_reflog(old_refs, count_reflogs, &reflog_count) < 0) { + strbuf_addstr(errbuf, "cannot count reflogs"); + ret = -1; + goto done; + } + if (reflog_count) { + strbuf_addstr(errbuf, "migrating reflogs is not supported yet"); + ret = -1; + goto done; + } + + /* + * Worktrees complicate the migration because every worktree has a + * separate ref storage. While it should be feasible to implement, this + * is pushed out to a future iteration. + * + * TODO: we should really be passing the caller-provided repository to + * `has_worktrees()`, but our worktree subsystem doesn't yet support + * that. + */ + if (has_worktrees()) { + strbuf_addstr(errbuf, "migrating repositories with worktrees is not supported yet"); + ret = -1; + goto done; + } + + /* + * The overall logic looks like this: + * + * 1. Set up a new temporary directory and initialize it with the new + * format. This is where all refs will be migrated into. + * + * 2. Enumerate all refs and write them into the new ref storage. + * This operation is safe as we do not yet modify the main + * repository. + * + * 3. If we're in dry-run mode then we are done and can hand over the + * directory to the caller for inspection. If not, we now start + * with the destructive part. + * + * 4. Delete the old ref storage from disk. As we have a copy of refs + * in the new ref storage it's okay(ish) if we now get interrupted + * as there is an equivalent copy of all refs available. + * + * 5. Move the new ref storage files into place. + * + * 6. Change the repository format to the new ref format. + */ + strbuf_addf(&new_gitdir, "%s/%s", old_refs->gitdir, "ref_migration.XXXXXX"); + if (!mkdtemp(new_gitdir.buf)) { + strbuf_addf(errbuf, "cannot create migration directory: %s", + strerror(errno)); + ret = -1; + goto done; + } + + new_refs = ref_store_init(repo, format, new_gitdir.buf, + REF_STORE_ALL_CAPS); + ret = ref_store_create_on_disk(new_refs, 0, errbuf); + if (ret < 0) + goto done; + + transaction = ref_store_transaction_begin(new_refs, errbuf); + if (!transaction) + goto done; + + data.old_refs = old_refs; + data.transaction = transaction; + data.errbuf = errbuf; + + /* + * We need to use the internal `do_for_each_ref()` here so that we can + * also include broken refs and symrefs. These would otherwise be + * skipped silently. + * + * Ideally, we would do this call while locking the old ref storage + * such that there cannot be any concurrent modifications. We do not + * have the infra for that though, and the "files" backend does not + * allow for a central lock due to its design. It's thus on the user to + * ensure that there are no concurrent writes. + */ + ret = do_for_each_ref(old_refs, "", NULL, migrate_one_ref, 0, + DO_FOR_EACH_INCLUDE_ROOT_REFS | DO_FOR_EACH_INCLUDE_BROKEN, + &data); + if (ret < 0) + goto done; + + /* + * TODO: we might want to migrate to `initial_ref_transaction_commit()` + * here, which is more efficient for the files backend because it would + * write new refs into the packed-refs file directly. At this point, + * the files backend doesn't handle pseudo-refs and symrefs correctly + * though, so this requires some more work. + */ + ret = ref_transaction_commit(transaction, errbuf); + if (ret < 0) + goto done; + did_migrate_refs = 1; + + if (flags & REPO_MIGRATE_REF_STORAGE_FORMAT_DRYRUN) { + printf(_("Finished dry-run migration of refs, " + "the result can be found at '%s'\n"), new_gitdir.buf); + ret = 0; + goto done; + } + + /* + * Until now we were in the non-destructive phase, where we only + * populated the new ref store. From hereon though we are about + * to get hands by deleting the old ref store and then moving + * the new one into place. + * + * Assuming that there were no concurrent writes, the new ref + * store should have all information. So if we fail from hereon + * we may be in an in-between state, but it would still be able + * to recover by manually moving remaining files from the + * temporary migration directory into place. + */ + ret = ref_store_remove_on_disk(old_refs, errbuf); + if (ret < 0) + goto done; + + ret = move_files(new_gitdir.buf, old_refs->gitdir, errbuf); + if (ret < 0) + goto done; + + if (rmdir(new_gitdir.buf) < 0) + warning_errno(_("could not remove temporary migration directory '%s'"), + new_gitdir.buf); + + /* + * We have migrated the repository, so we now need to adjust the + * repository format so that clients will use the new ref store. + * We also need to swap out the repository's main ref store. + */ + initialize_repository_version(hash_algo_by_ptr(repo->hash_algo), format, 1); + + free(new_refs->gitdir); + new_refs->gitdir = xstrdup(old_refs->gitdir); + repo->refs_private = new_refs; + ref_store_release(old_refs); + + ret = 0; + +done: + if (ret && did_migrate_refs) { + strbuf_complete(errbuf, '\n'); + strbuf_addf(errbuf, _("migrated refs can be found at '%s'"), + new_gitdir.buf); + } + + if (ret && new_refs) + ref_store_release(new_refs); + ref_transaction_free(transaction); + strbuf_release(&new_gitdir); + return ret; +} diff --git a/refs.h b/refs.h index 61ee7b7a15..76d25df4de 100644 --- a/refs.h +++ b/refs.h @@ -1070,6 +1070,24 @@ int is_root_ref(const char *refname); */ int is_pseudo_ref(const char *refname); +/* + * The following flags can be passed to `repo_migrate_ref_storage_format()`: + * + * - REPO_MIGRATE_REF_STORAGE_FORMAT_DRYRUN: perform a dry-run migration + * without touching the main repository. The result will be written into a + * temporary ref storage directory. + */ +#define REPO_MIGRATE_REF_STORAGE_FORMAT_DRYRUN (1 << 0) + +/* + * Migrate the ref storage format used by the repository to the + * specified one. + */ +int repo_migrate_ref_storage_format(struct repository *repo, + enum ref_storage_format format, + unsigned int flags, + struct strbuf *err); + /* * The following functions have been removed in Git v2.45 in favor of functions * that receive a `ref_store` as parameter. The intent of this section is From 25a0023f28600102f54e7529c20da5928c3e9c75 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Jun 2024 07:29:49 +0200 Subject: [PATCH 12/12] builtin/refs: new command to migrate ref storage formats Introduce a new command that allows the user to migrate a repository between ref storage formats. This new command is implemented as part of a new git-refs(1) executable. This is due to two reasons: - There is no good place to put the migration logic in existing commands. git-maintenance(1) felt unwieldy, and git-pack-refs(1) is not the correct place to put it, either. - I had it in my mind to create a new low-level command for accessing refs for quite a while already. git-refs(1) is that command and can over time grow more functionality relating to refs. This should help discoverability by consolidating low-level access to refs into a single executable. As mentioned in the preceding commit that introduces the ref storage format migration logic, the new `git refs migrate` command still has a bunch of restrictions. These restrictions are documented accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- .gitignore | 1 + Documentation/git-refs.txt | 61 ++++++++++ Makefile | 1 + builtin.h | 1 + builtin/refs.c | 75 ++++++++++++ command-list.txt | 1 + git.c | 1 + t/t1460-refs-migrate.sh | 243 +++++++++++++++++++++++++++++++++++++ 8 files changed, 384 insertions(+) create mode 100644 Documentation/git-refs.txt create mode 100644 builtin/refs.c create mode 100755 t/t1460-refs-migrate.sh diff --git a/.gitignore b/.gitignore index 612c0f6a0f..8caf3700c2 100644 --- a/.gitignore +++ b/.gitignore @@ -126,6 +126,7 @@ /git-rebase /git-receive-pack /git-reflog +/git-refs /git-remote /git-remote-http /git-remote-https diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt new file mode 100644 index 0000000000..5b99e04385 --- /dev/null +++ b/Documentation/git-refs.txt @@ -0,0 +1,61 @@ +git-refs(1) +=========== + +NAME +---- +git-refs - Low-level access to refs + + +SYNOPSIS +-------- +[verse] +'git refs migrate' --ref-format= [--dry-run] + +DESCRIPTION +----------- + +This command provides low-level access to refs. + +COMMANDS +-------- + +migrate:: + Migrate ref store between different formats. + +OPTIONS +------- + +The following options are specific to 'git refs migrate': + +--ref-format=:: + The ref format to migrate the ref store to. Can be one of: ++ +include::ref-storage-format.txt[] + +--dry-run:: + Perform the migration, but do not modify the repository. The migrated + refs will be written into a separate directory that can be inspected + separately. The name of the directory will be reported on stdout. This + can be used to double check that the migration works as expected before + performing the actual migration. + +KNOWN LIMITATIONS +----------------- + +The ref format migration has several known limitations in its current form: + +* It is not possible to migrate repositories that have reflogs. + +* It is not possible to migrate repositories that have worktrees. + +* There is no way to block concurrent writes to the repository during an + ongoing migration. Concurrent writes can lead to an inconsistent migrated + state. Users are expected to block writes on a higher level. If your + repository is registered for scheduled maintenance, it is recommended to + unregister it first with git-maintenance(1). + +These limitations may eventually be lifted. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index cf504963c2..2d702b552c 100644 --- a/Makefile +++ b/Makefile @@ -1283,6 +1283,7 @@ BUILTIN_OBJS += builtin/read-tree.o BUILTIN_OBJS += builtin/rebase.o BUILTIN_OBJS += builtin/receive-pack.o BUILTIN_OBJS += builtin/reflog.o +BUILTIN_OBJS += builtin/refs.o BUILTIN_OBJS += builtin/remote-ext.o BUILTIN_OBJS += builtin/remote-fd.o BUILTIN_OBJS += builtin/remote.o diff --git a/builtin.h b/builtin.h index 28280636da..7eda9b2486 100644 --- a/builtin.h +++ b/builtin.h @@ -207,6 +207,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix); int cmd_rebase__interactive(int argc, const char **argv, const char *prefix); int cmd_receive_pack(int argc, const char **argv, const char *prefix); int cmd_reflog(int argc, const char **argv, const char *prefix); +int cmd_refs(int argc, const char **argv, const char *prefix); int cmd_remote(int argc, const char **argv, const char *prefix); int cmd_remote_ext(int argc, const char **argv, const char *prefix); int cmd_remote_fd(int argc, const char **argv, const char *prefix); diff --git a/builtin/refs.c b/builtin/refs.c new file mode 100644 index 0000000000..46dcd150d4 --- /dev/null +++ b/builtin/refs.c @@ -0,0 +1,75 @@ +#include "builtin.h" +#include "parse-options.h" +#include "refs.h" +#include "repository.h" +#include "strbuf.h" + +#define REFS_MIGRATE_USAGE \ + N_("git refs migrate --ref-format= [--dry-run]") + +static int cmd_refs_migrate(int argc, const char **argv, const char *prefix) +{ + const char * const migrate_usage[] = { + REFS_MIGRATE_USAGE, + NULL, + }; + const char *format_str = NULL; + enum ref_storage_format format; + unsigned int flags = 0; + struct option options[] = { + OPT_STRING_F(0, "ref-format", &format_str, N_("format"), + N_("specify the reference format to convert to"), + PARSE_OPT_NONEG), + OPT_BIT(0, "dry-run", &flags, + N_("perform a non-destructive dry-run"), + REPO_MIGRATE_REF_STORAGE_FORMAT_DRYRUN), + OPT_END(), + }; + struct strbuf errbuf = STRBUF_INIT; + int err; + + argc = parse_options(argc, argv, prefix, options, migrate_usage, 0); + if (argc) + usage(_("too many arguments")); + if (!format_str) + usage(_("missing --ref-format=")); + + format = ref_storage_format_by_name(format_str); + if (format == REF_STORAGE_FORMAT_UNKNOWN) { + err = error(_("unknown ref storage format '%s'"), format_str); + goto out; + } + + if (the_repository->ref_storage_format == format) { + err = error(_("repository already uses '%s' format"), + ref_storage_format_to_name(format)); + goto out; + } + + if (repo_migrate_ref_storage_format(the_repository, format, flags, &errbuf) < 0) { + err = error("%s", errbuf.buf); + goto out; + } + + err = 0; + +out: + strbuf_release(&errbuf); + return err; +} + +int cmd_refs(int argc, const char **argv, const char *prefix) +{ + const char * const refs_usage[] = { + REFS_MIGRATE_USAGE, + NULL, + }; + parse_opt_subcommand_fn *fn = NULL; + struct option opts[] = { + OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate), + OPT_END(), + }; + + argc = parse_options(argc, argv, prefix, opts, refs_usage, 0); + return fn(argc, argv, prefix); +} diff --git a/command-list.txt b/command-list.txt index c4cd0f352b..e0bb87b3b5 100644 --- a/command-list.txt +++ b/command-list.txt @@ -157,6 +157,7 @@ git-read-tree plumbingmanipulators git-rebase mainporcelain history git-receive-pack synchelpers git-reflog ancillarymanipulators complete +git-refs ancillarymanipulators complete git-remote ancillarymanipulators complete git-repack ancillarymanipulators complete git-replace ancillarymanipulators complete diff --git a/git.c b/git.c index 637c61ca9c..683bb69194 100644 --- a/git.c +++ b/git.c @@ -594,6 +594,7 @@ static struct cmd_struct commands[] = { { "rebase", cmd_rebase, RUN_SETUP | NEED_WORK_TREE }, { "receive-pack", cmd_receive_pack }, { "reflog", cmd_reflog, RUN_SETUP }, + { "refs", cmd_refs, RUN_SETUP }, { "remote", cmd_remote, RUN_SETUP }, { "remote-ext", cmd_remote_ext, NO_PARSEOPT }, { "remote-fd", cmd_remote_fd, NO_PARSEOPT }, diff --git a/t/t1460-refs-migrate.sh b/t/t1460-refs-migrate.sh new file mode 100755 index 0000000000..f7c0783d30 --- /dev/null +++ b/t/t1460-refs-migrate.sh @@ -0,0 +1,243 @@ +#!/bin/sh + +test_description='migration of ref storage backends' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +TEST_PASSES_SANITIZE_LEAK=true +. ./test-lib.sh + +test_migration () { + git -C "$1" for-each-ref --include-root-refs \ + --format='%(refname) %(objectname) %(symref)' >expect && + git -C "$1" refs migrate --ref-format="$2" && + git -C "$1" for-each-ref --include-root-refs \ + --format='%(refname) %(objectname) %(symref)' >actual && + test_cmp expect actual && + + git -C "$1" rev-parse --show-ref-format >actual && + echo "$2" >expect && + test_cmp expect actual +} + +test_expect_success 'setup' ' + rm -rf .git && + # The migration does not yet support reflogs. + git config --global core.logAllRefUpdates false +' + +test_expect_success "superfluous arguments" ' + test_when_finished "rm -rf repo" && + git init repo && + test_must_fail git -C repo refs migrate foo 2>err && + cat >expect <<-EOF && + usage: too many arguments + EOF + test_cmp expect err +' + +test_expect_success "missing ref storage format" ' + test_when_finished "rm -rf repo" && + git init repo && + test_must_fail git -C repo refs migrate 2>err && + cat >expect <<-EOF && + usage: missing --ref-format= + EOF + test_cmp expect err +' + +test_expect_success "unknown ref storage format" ' + test_when_finished "rm -rf repo" && + git init repo && + test_must_fail git -C repo refs migrate \ + --ref-format=unknown 2>err && + cat >expect <<-EOF && + error: unknown ref storage format ${SQ}unknown${SQ} + EOF + test_cmp expect err +' + +ref_formats="files reftable" +for from_format in $ref_formats +do + for to_format in $ref_formats + do + if test "$from_format" = "$to_format" + then + continue + fi + + test_expect_success "$from_format: migration to same format fails" ' + test_when_finished "rm -rf repo" && + git init --ref-format=$from_format repo && + test_must_fail git -C repo refs migrate \ + --ref-format=$from_format 2>err && + cat >expect <<-EOF && + error: repository already uses ${SQ}$from_format${SQ} format + EOF + test_cmp expect err + ' + + test_expect_success "$from_format -> $to_format: migration with reflog fails" ' + test_when_finished "rm -rf repo" && + git init --ref-format=$from_format repo && + test_config -C repo core.logAllRefUpdates true && + test_commit -C repo logged && + test_must_fail git -C repo refs migrate \ + --ref-format=$to_format 2>err && + cat >expect <<-EOF && + error: migrating reflogs is not supported yet + EOF + test_cmp expect err + ' + + test_expect_success "$from_format -> $to_format: migration with worktree fails" ' + test_when_finished "rm -rf repo" && + git init --ref-format=$from_format repo && + git -C repo worktree add wt && + test_must_fail git -C repo refs migrate \ + --ref-format=$to_format 2>err && + cat >expect <<-EOF && + error: migrating repositories with worktrees is not supported yet + EOF + test_cmp expect err + ' + + test_expect_success "$from_format -> $to_format: unborn HEAD" ' + test_when_finished "rm -rf repo" && + git init --ref-format=$from_format repo && + test_migration repo "$to_format" + ' + + test_expect_success "$from_format -> $to_format: single ref" ' + test_when_finished "rm -rf repo" && + git init --ref-format=$from_format repo && + test_commit -C repo initial && + test_migration repo "$to_format" + ' + + test_expect_success "$from_format -> $to_format: bare repository" ' + test_when_finished "rm -rf repo repo.git" && + git init --ref-format=$from_format repo && + test_commit -C repo initial && + git clone --ref-format=$from_format --mirror repo repo.git && + test_migration repo.git "$to_format" + ' + + test_expect_success "$from_format -> $to_format: dangling symref" ' + test_when_finished "rm -rf repo" && + git init --ref-format=$from_format repo && + test_commit -C repo initial && + git -C repo symbolic-ref BROKEN_HEAD refs/heads/nonexistent && + test_migration repo "$to_format" && + echo refs/heads/nonexistent >expect && + git -C repo symbolic-ref BROKEN_HEAD >actual && + test_cmp expect actual + ' + + test_expect_success "$from_format -> $to_format: broken ref" ' + test_when_finished "rm -rf repo" && + git init --ref-format=$from_format repo && + test_commit -C repo initial && + test-tool -C repo ref-store main update-ref "" refs/heads/broken \ + "$(test_oid 001)" "$ZERO_OID" REF_SKIP_CREATE_REFLOG,REF_SKIP_OID_VERIFICATION && + test_migration repo "$to_format" && + test_oid 001 >expect && + git -C repo rev-parse refs/heads/broken >actual && + test_cmp expect actual + ' + + test_expect_success "$from_format -> $to_format: pseudo-refs" ' + test_when_finished "rm -rf repo" && + git init --ref-format=$from_format repo && + test_commit -C repo initial && + git -C repo update-ref FOO_HEAD HEAD && + test_migration repo "$to_format" + ' + + test_expect_success "$from_format -> $to_format: special refs are left alone" ' + test_when_finished "rm -rf repo" && + git init --ref-format=$from_format repo && + test_commit -C repo initial && + git -C repo rev-parse HEAD >repo/.git/MERGE_HEAD && + git -C repo rev-parse MERGE_HEAD && + test_migration repo "$to_format" && + test_path_is_file repo/.git/MERGE_HEAD + ' + + test_expect_success "$from_format -> $to_format: a bunch of refs" ' + test_when_finished "rm -rf repo" && + git init --ref-format=$from_format repo && + + test_commit -C repo initial && + cat >input <<-EOF && + create FOO_HEAD HEAD + create refs/heads/branch-1 HEAD + create refs/heads/branch-2 HEAD + create refs/heads/branch-3 HEAD + create refs/heads/branch-4 HEAD + create refs/tags/tag-1 HEAD + create refs/tags/tag-2 HEAD + EOF + git -C repo update-ref --stdin $to_format: dry-run migration does not modify repository" ' + test_when_finished "rm -rf repo" && + git init --ref-format=$from_format repo && + test_commit -C repo initial && + git -C repo refs migrate --dry-run \ + --ref-format=$to_format >output && + grep "Finished dry-run migration of refs" output && + test_path_is_dir repo/.git/ref_migration.* && + echo $from_format >expect && + git -C repo rev-parse --show-ref-format >actual && + test_cmp expect actual + ' + done +done + +test_expect_success 'migrating from files format deletes backend files' ' + test_when_finished "rm -rf repo" && + git init --ref-format=files repo && + test_commit -C repo first && + git -C repo pack-refs --all && + test_commit -C repo second && + git -C repo update-ref ORIG_HEAD HEAD && + git -C repo rev-parse HEAD >repo/.git/FETCH_HEAD && + + test_path_is_file repo/.git/HEAD && + test_path_is_file repo/.git/ORIG_HEAD && + test_path_is_file repo/.git/refs/heads/main && + test_path_is_file repo/.git/packed-refs && + + test_migration repo reftable && + + echo "ref: refs/heads/.invalid" >expect && + test_cmp expect repo/.git/HEAD && + echo "this repository uses the reftable format" >expect && + test_cmp expect repo/.git/refs/heads && + test_path_is_file repo/.git/FETCH_HEAD && + test_path_is_missing repo/.git/ORIG_HEAD && + test_path_is_missing repo/.git/refs/heads/main && + test_path_is_missing repo/.git/logs && + test_path_is_missing repo/.git/packed-refs +' + +test_expect_success 'migrating from reftable format deletes backend files' ' + test_when_finished "rm -rf repo" && + git init --ref-format=reftable repo && + test_commit -C repo first && + + test_path_is_dir repo/.git/reftable && + test_migration repo files && + + test_path_is_missing repo/.git/reftable && + echo "ref: refs/heads/main" >expect && + test_cmp expect repo/.git/HEAD && + test_path_is_file repo/.git/refs/heads/main +' + +test_done