diff --git a/refs.c b/refs.c index 31fd391214..1304d3dd87 100644 --- a/refs.c +++ b/refs.c @@ -2570,3 +2570,311 @@ int ref_update_check_old_target(const char *referent, struct ref_update *update, referent, update->old_target); return -1; } + +struct migration_data { + struct ref_store *old_refs; + struct ref_transaction *transaction; + struct strbuf *errbuf; +}; + +static int migrate_one_ref(const char *refname, const struct object_id *oid, + int flags, void *cb_data) +{ + struct migration_data *data = cb_data; + struct strbuf symref_target = STRBUF_INIT; + int ret; + + if (flags & REF_ISSYMREF) { + ret = refs_read_symbolic_ref(data->old_refs, refname, &symref_target); + if (ret < 0) + goto done; + + ret = ref_transaction_update(data->transaction, refname, NULL, null_oid(), + symref_target.buf, NULL, + REF_SKIP_CREATE_REFLOG | REF_NO_DEREF, NULL, data->errbuf); + if (ret < 0) + goto done; + } else { + ret = ref_transaction_create(data->transaction, refname, oid, + REF_SKIP_CREATE_REFLOG | REF_SKIP_OID_VERIFICATION, + NULL, data->errbuf); + if (ret < 0) + goto done; + } + +done: + strbuf_release(&symref_target); + return ret; +} + +static int move_files(const char *from_path, const char *to_path, struct strbuf *errbuf) +{ + struct strbuf from_buf = STRBUF_INIT, to_buf = STRBUF_INIT; + size_t from_len, to_len; + DIR *from_dir; + int ret; + + from_dir = opendir(from_path); + if (!from_dir) { + strbuf_addf(errbuf, "could not open source directory '%s': %s", + from_path, strerror(errno)); + ret = -1; + goto done; + } + + strbuf_addstr(&from_buf, from_path); + strbuf_complete(&from_buf, '/'); + from_len = from_buf.len; + + strbuf_addstr(&to_buf, to_path); + strbuf_complete(&to_buf, '/'); + to_len = to_buf.len; + + while (1) { + struct dirent *ent; + + errno = 0; + ent = readdir(from_dir); + if (!ent) + break; + + if (!strcmp(ent->d_name, ".") || + !strcmp(ent->d_name, "..")) + continue; + + strbuf_setlen(&from_buf, from_len); + strbuf_addstr(&from_buf, ent->d_name); + + strbuf_setlen(&to_buf, to_len); + strbuf_addstr(&to_buf, ent->d_name); + + ret = rename(from_buf.buf, to_buf.buf); + if (ret < 0) { + strbuf_addf(errbuf, "could not link file '%s' to '%s': %s", + from_buf.buf, to_buf.buf, strerror(errno)); + goto done; + } + } + + if (errno) { + strbuf_addf(errbuf, "could not read entry from directory '%s': %s", + from_path, strerror(errno)); + ret = -1; + goto done; + } + + ret = 0; + +done: + strbuf_release(&from_buf); + strbuf_release(&to_buf); + if (from_dir) + closedir(from_dir); + return ret; +} + +static int count_reflogs(const char *reflog UNUSED, void *payload) +{ + size_t *reflog_count = payload; + (*reflog_count)++; + return 0; +} + +static int has_worktrees(void) +{ + struct worktree **worktrees = get_worktrees(); + int ret = 0; + size_t i; + + for (i = 0; worktrees[i]; i++) { + if (is_main_worktree(worktrees[i])) + continue; + ret = 1; + } + + free_worktrees(worktrees); + return ret; +} + +int repo_migrate_ref_storage_format(struct repository *repo, + enum ref_storage_format format, + unsigned int flags, + struct strbuf *errbuf) +{ + struct ref_store *old_refs = NULL, *new_refs = NULL; + struct ref_transaction *transaction = NULL; + struct strbuf new_gitdir = STRBUF_INIT; + struct migration_data data; + size_t reflog_count = 0; + int did_migrate_refs = 0; + int ret; + + if (repo->ref_storage_format == format) { + strbuf_addstr(errbuf, "current and new ref storage format are equal"); + ret = -1; + goto done; + } + + old_refs = get_main_ref_store(repo); + + /* + * We do not have any interfaces that would allow us to write many + * reflog entries. Once we have them we can remove this restriction. + */ + if (refs_for_each_reflog(old_refs, count_reflogs, &reflog_count) < 0) { + strbuf_addstr(errbuf, "cannot count reflogs"); + ret = -1; + goto done; + } + if (reflog_count) { + strbuf_addstr(errbuf, "migrating reflogs is not supported yet"); + ret = -1; + goto done; + } + + /* + * Worktrees complicate the migration because every worktree has a + * separate ref storage. While it should be feasible to implement, this + * is pushed out to a future iteration. + * + * TODO: we should really be passing the caller-provided repository to + * `has_worktrees()`, but our worktree subsystem doesn't yet support + * that. + */ + if (has_worktrees()) { + strbuf_addstr(errbuf, "migrating repositories with worktrees is not supported yet"); + ret = -1; + goto done; + } + + /* + * The overall logic looks like this: + * + * 1. Set up a new temporary directory and initialize it with the new + * format. This is where all refs will be migrated into. + * + * 2. Enumerate all refs and write them into the new ref storage. + * This operation is safe as we do not yet modify the main + * repository. + * + * 3. If we're in dry-run mode then we are done and can hand over the + * directory to the caller for inspection. If not, we now start + * with the destructive part. + * + * 4. Delete the old ref storage from disk. As we have a copy of refs + * in the new ref storage it's okay(ish) if we now get interrupted + * as there is an equivalent copy of all refs available. + * + * 5. Move the new ref storage files into place. + * + * 6. Change the repository format to the new ref format. + */ + strbuf_addf(&new_gitdir, "%s/%s", old_refs->gitdir, "ref_migration.XXXXXX"); + if (!mkdtemp(new_gitdir.buf)) { + strbuf_addf(errbuf, "cannot create migration directory: %s", + strerror(errno)); + ret = -1; + goto done; + } + + new_refs = ref_store_init(repo, format, new_gitdir.buf, + REF_STORE_ALL_CAPS); + ret = ref_store_create_on_disk(new_refs, 0, errbuf); + if (ret < 0) + goto done; + + transaction = ref_store_transaction_begin(new_refs, errbuf); + if (!transaction) + goto done; + + data.old_refs = old_refs; + data.transaction = transaction; + data.errbuf = errbuf; + + /* + * We need to use the internal `do_for_each_ref()` here so that we can + * also include broken refs and symrefs. These would otherwise be + * skipped silently. + * + * Ideally, we would do this call while locking the old ref storage + * such that there cannot be any concurrent modifications. We do not + * have the infra for that though, and the "files" backend does not + * allow for a central lock due to its design. It's thus on the user to + * ensure that there are no concurrent writes. + */ + ret = do_for_each_ref(old_refs, "", NULL, migrate_one_ref, 0, + DO_FOR_EACH_INCLUDE_ROOT_REFS | DO_FOR_EACH_INCLUDE_BROKEN, + &data); + if (ret < 0) + goto done; + + /* + * TODO: we might want to migrate to `initial_ref_transaction_commit()` + * here, which is more efficient for the files backend because it would + * write new refs into the packed-refs file directly. At this point, + * the files backend doesn't handle pseudo-refs and symrefs correctly + * though, so this requires some more work. + */ + ret = ref_transaction_commit(transaction, errbuf); + if (ret < 0) + goto done; + did_migrate_refs = 1; + + if (flags & REPO_MIGRATE_REF_STORAGE_FORMAT_DRYRUN) { + printf(_("Finished dry-run migration of refs, " + "the result can be found at '%s'\n"), new_gitdir.buf); + ret = 0; + goto done; + } + + /* + * Until now we were in the non-destructive phase, where we only + * populated the new ref store. From hereon though we are about + * to get hands by deleting the old ref store and then moving + * the new one into place. + * + * Assuming that there were no concurrent writes, the new ref + * store should have all information. So if we fail from hereon + * we may be in an in-between state, but it would still be able + * to recover by manually moving remaining files from the + * temporary migration directory into place. + */ + ret = ref_store_remove_on_disk(old_refs, errbuf); + if (ret < 0) + goto done; + + ret = move_files(new_gitdir.buf, old_refs->gitdir, errbuf); + if (ret < 0) + goto done; + + if (rmdir(new_gitdir.buf) < 0) + warning_errno(_("could not remove temporary migration directory '%s'"), + new_gitdir.buf); + + /* + * We have migrated the repository, so we now need to adjust the + * repository format so that clients will use the new ref store. + * We also need to swap out the repository's main ref store. + */ + initialize_repository_version(hash_algo_by_ptr(repo->hash_algo), format, 1); + + free(new_refs->gitdir); + new_refs->gitdir = xstrdup(old_refs->gitdir); + repo->refs_private = new_refs; + ref_store_release(old_refs); + + ret = 0; + +done: + if (ret && did_migrate_refs) { + strbuf_complete(errbuf, '\n'); + strbuf_addf(errbuf, _("migrated refs can be found at '%s'"), + new_gitdir.buf); + } + + if (ret && new_refs) + ref_store_release(new_refs); + ref_transaction_free(transaction); + strbuf_release(&new_gitdir); + return ret; +} diff --git a/refs.h b/refs.h index 61ee7b7a15..76d25df4de 100644 --- a/refs.h +++ b/refs.h @@ -1070,6 +1070,24 @@ int is_root_ref(const char *refname); */ int is_pseudo_ref(const char *refname); +/* + * The following flags can be passed to `repo_migrate_ref_storage_format()`: + * + * - REPO_MIGRATE_REF_STORAGE_FORMAT_DRYRUN: perform a dry-run migration + * without touching the main repository. The result will be written into a + * temporary ref storage directory. + */ +#define REPO_MIGRATE_REF_STORAGE_FORMAT_DRYRUN (1 << 0) + +/* + * Migrate the ref storage format used by the repository to the + * specified one. + */ +int repo_migrate_ref_storage_format(struct repository *repo, + enum ref_storage_format format, + unsigned int flags, + struct strbuf *err); + /* * The following functions have been removed in Git v2.45 in favor of functions * that receive a `ref_store` as parameter. The intent of this section is