git/diagnose.c

283 lines
6.8 KiB
C
Raw Permalink Normal View History

global: introduce `USE_THE_REPOSITORY_VARIABLE` macro Use of the `the_repository` variable is deprecated nowadays, and we slowly but steadily convert the codebase to not use it anymore. Instead, callers should be passing down the repository to work on via parameters. It is hard though to prove that a given code unit does not use this variable anymore. The most trivial case, merely demonstrating that there is no direct use of `the_repository`, is already a bit of a pain during code reviews as the reviewer needs to manually verify claims made by the patch author. The bigger problem though is that we have many interfaces that implicitly rely on `the_repository`. Introduce a new `USE_THE_REPOSITORY_VARIABLE` macro that allows code units to opt into usage of `the_repository`. The intent of this macro is to demonstrate that a certain code unit does not use this variable anymore, and to keep it from new dependencies on it in future changes, be it explicit or implicit For now, the macro only guards `the_repository` itself as well as `the_hash_algo`. There are many more known interfaces where we have an implicit dependency on `the_repository`, but those are not guarded at the current point in time. Over time though, we should start to add guards as required (or even better, just remove them). Define the macro as required in our code units. As expected, most of our code still relies on the global variable. Nearly all of our builtins rely on the variable as there is no way yet to pass `the_repository` to their entry point. For now, declare the macro in "biultin.h" to keep the required changes at least a little bit more contained. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-06-14 06:50:23 +00:00
#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "diagnose.h"
#include "compat/disk.h"
#include "archive.h"
#include "dir.h"
#include "help.h"
#include "gettext.h"
#include "hex.h"
#include "strvec.h"
#include "object-store-ll.h"
#include "packfile.h"
#include "parse-options.h"
#include "write-or-die.h"
diagnose.c: add option to configure archive contents Update 'create_diagnostics_archive()' to take an argument 'mode'. When archiving diagnostics for a repository, 'mode' is used to selectively include/exclude information based on its value. The initial options for 'mode' are: * DIAGNOSE_NONE: do not collect any diagnostics or create an archive (no-op). * DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path, filesystem available space) as well as sizing and count statistics for the repository's objects and packfiles. * DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics, and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and '.git/objects/info' directories. These modes are introduced to provide users the option to collect diagnostics without the sensitive information included in copies of '.git' dir contents. At the moment, only 'scalar diagnose' uses 'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to match existing functionality), but more callers will be introduced in subsequent patches. Finally, refactor from a hardcoded set of 'add_directory_to_archiver()' calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for easier future modification of the set of directories to archive and improves error reporting when 'add_directory_to_archiver()' fails. Helped-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Victoria Dye <vdye@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 20:10:14 +00:00
struct archive_dir {
const char *path;
int recursive;
};
struct diagnose_option {
enum diagnose_mode mode;
const char *option_name;
};
static struct diagnose_option diagnose_options[] = {
{ DIAGNOSE_STATS, "stats" },
{ DIAGNOSE_ALL, "all" },
};
int option_parse_diagnose(const struct option *opt, const char *arg, int unset)
{
int i;
enum diagnose_mode *diagnose = opt->value;
if (!arg) {
*diagnose = unset ? DIAGNOSE_NONE : DIAGNOSE_STATS;
return 0;
}
for (i = 0; i < ARRAY_SIZE(diagnose_options); i++) {
if (!strcmp(arg, diagnose_options[i].option_name)) {
*diagnose = diagnose_options[i].mode;
return 0;
}
}
return error(_("invalid --%s value '%s'"), opt->long_name, arg);
}
static void dir_file_stats_objects(const char *full_path,
size_t full_path_len UNUSED,
const char *file_name, void *data)
{
struct strbuf *buf = data;
struct stat st;
if (!stat(full_path, &st))
strbuf_addf(buf, "%-70s %16" PRIuMAX "\n", file_name,
(uintmax_t)st.st_size);
}
static int dir_file_stats(struct object_directory *object_dir, void *data)
{
struct strbuf *buf = data;
strbuf_addf(buf, "Contents of %s:\n", object_dir->path);
for_each_file_in_pack_dir(object_dir->path, dir_file_stats_objects,
data);
return 0;
}
static int count_files(struct strbuf *path)
{
DIR *dir = opendir(path->buf);
struct dirent *e;
int count = 0;
if (!dir)
return 0;
while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL)
if (get_dtype(e, path, 0) == DT_REG)
count++;
closedir(dir);
return count;
}
static void loose_objs_stats(struct strbuf *buf, const char *path)
{
DIR *dir = opendir(path);
struct dirent *e;
int count;
int total = 0;
unsigned char c;
struct strbuf count_path = STRBUF_INIT;
size_t base_path_len;
if (!dir)
return;
strbuf_addstr(buf, "Object directory stats for ");
strbuf_add_absolute_path(buf, path);
strbuf_addstr(buf, ":\n");
strbuf_add_absolute_path(&count_path, path);
strbuf_addch(&count_path, '/');
base_path_len = count_path.len;
while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL)
if (get_dtype(e, &count_path, 0) == DT_DIR &&
strlen(e->d_name) == 2 &&
!hex_to_bytes(&c, e->d_name, 1)) {
strbuf_setlen(&count_path, base_path_len);
strbuf_addf(&count_path, "%s/", e->d_name);
total += (count = count_files(&count_path));
strbuf_addf(buf, "%s : %7d files\n", e->d_name, count);
}
strbuf_addf(buf, "Total: %d loose objects", total);
strbuf_release(&count_path);
closedir(dir);
}
static int add_directory_to_archiver(struct strvec *archiver_args,
const char *path, int recurse)
{
int at_root = !*path;
DIR *dir;
struct dirent *e;
struct strbuf buf = STRBUF_INIT;
size_t len;
int res = 0;
dir = opendir(at_root ? "." : path);
if (!dir) {
if (errno == ENOENT) {
warning(_("could not archive missing directory '%s'"), path);
return 0;
}
return error_errno(_("could not open directory '%s'"), path);
}
if (!at_root)
strbuf_addf(&buf, "%s/", path);
len = buf.len;
strvec_pushf(archiver_args, "--prefix=%s", buf.buf);
while (!res && (e = readdir_skip_dot_and_dotdot(dir))) {
struct strbuf abspath = STRBUF_INIT;
unsigned char dtype;
strbuf_add_absolute_path(&abspath, at_root ? "." : path);
strbuf_addch(&abspath, '/');
dtype = get_dtype(e, &abspath, 0);
strbuf_setlen(&buf, len);
strbuf_addstr(&buf, e->d_name);
if (dtype == DT_REG)
strvec_pushf(archiver_args, "--add-file=%s", buf.buf);
else if (dtype != DT_DIR)
warning(_("skipping '%s', which is neither file nor "
"directory"), buf.buf);
else if (recurse &&
add_directory_to_archiver(archiver_args,
buf.buf, recurse) < 0)
res = -1;
strbuf_release(&abspath);
}
closedir(dir);
strbuf_release(&buf);
return res;
}
diagnose.c: add option to configure archive contents Update 'create_diagnostics_archive()' to take an argument 'mode'. When archiving diagnostics for a repository, 'mode' is used to selectively include/exclude information based on its value. The initial options for 'mode' are: * DIAGNOSE_NONE: do not collect any diagnostics or create an archive (no-op). * DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path, filesystem available space) as well as sizing and count statistics for the repository's objects and packfiles. * DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics, and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and '.git/objects/info' directories. These modes are introduced to provide users the option to collect diagnostics without the sensitive information included in copies of '.git' dir contents. At the moment, only 'scalar diagnose' uses 'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to match existing functionality), but more callers will be introduced in subsequent patches. Finally, refactor from a hardcoded set of 'add_directory_to_archiver()' calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for easier future modification of the set of directories to archive and improves error reporting when 'add_directory_to_archiver()' fails. Helped-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Victoria Dye <vdye@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 20:10:14 +00:00
int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode)
{
struct strvec archiver_args = STRVEC_INIT;
char **argv_copy = NULL;
int stdout_fd = -1, archiver_fd = -1;
struct strbuf buf = STRBUF_INIT;
diagnose.c: add option to configure archive contents Update 'create_diagnostics_archive()' to take an argument 'mode'. When archiving diagnostics for a repository, 'mode' is used to selectively include/exclude information based on its value. The initial options for 'mode' are: * DIAGNOSE_NONE: do not collect any diagnostics or create an archive (no-op). * DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path, filesystem available space) as well as sizing and count statistics for the repository's objects and packfiles. * DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics, and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and '.git/objects/info' directories. These modes are introduced to provide users the option to collect diagnostics without the sensitive information included in copies of '.git' dir contents. At the moment, only 'scalar diagnose' uses 'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to match existing functionality), but more callers will be introduced in subsequent patches. Finally, refactor from a hardcoded set of 'add_directory_to_archiver()' calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for easier future modification of the set of directories to archive and improves error reporting when 'add_directory_to_archiver()' fails. Helped-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Victoria Dye <vdye@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 20:10:14 +00:00
int res, i;
struct archive_dir archive_dirs[] = {
{ ".git", 0 },
{ ".git/hooks", 0 },
{ ".git/info", 0 },
{ ".git/logs", 1 },
{ ".git/objects/info", 0 }
};
if (mode == DIAGNOSE_NONE) {
res = 0;
goto diagnose_cleanup;
}
stdout_fd = dup(STDOUT_FILENO);
if (stdout_fd < 0) {
res = error_errno(_("could not duplicate stdout"));
goto diagnose_cleanup;
}
archiver_fd = xopen(zip_path->buf, O_CREAT | O_WRONLY | O_TRUNC, 0666);
if (dup2(archiver_fd, STDOUT_FILENO) < 0) {
res = error_errno(_("could not redirect output"));
goto diagnose_cleanup;
}
init_zip_archiver();
strvec_pushl(&archiver_args, "git-diagnose", "--format=zip", NULL);
strbuf_reset(&buf);
strbuf_addstr(&buf, "Collecting diagnostic info\n\n");
get_version_info(&buf, 1);
strbuf_addf(&buf, "Repository root: %s\n", the_repository->worktree);
get_disk_info(&buf);
write_or_die(stdout_fd, buf.buf, buf.len);
strvec_pushf(&archiver_args,
"--add-virtual-file=diagnostics.log:%.*s",
(int)buf.len, buf.buf);
strbuf_reset(&buf);
strbuf_addstr(&buf, "--add-virtual-file=packs-local.txt:");
dir_file_stats(the_repository->objects->odb, &buf);
foreach_alt_odb(dir_file_stats, &buf);
strvec_push(&archiver_args, buf.buf);
strbuf_reset(&buf);
strbuf_addstr(&buf, "--add-virtual-file=objects-local.txt:");
loose_objs_stats(&buf, ".git/objects");
strvec_push(&archiver_args, buf.buf);
diagnose.c: add option to configure archive contents Update 'create_diagnostics_archive()' to take an argument 'mode'. When archiving diagnostics for a repository, 'mode' is used to selectively include/exclude information based on its value. The initial options for 'mode' are: * DIAGNOSE_NONE: do not collect any diagnostics or create an archive (no-op). * DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path, filesystem available space) as well as sizing and count statistics for the repository's objects and packfiles. * DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics, and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and '.git/objects/info' directories. These modes are introduced to provide users the option to collect diagnostics without the sensitive information included in copies of '.git' dir contents. At the moment, only 'scalar diagnose' uses 'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to match existing functionality), but more callers will be introduced in subsequent patches. Finally, refactor from a hardcoded set of 'add_directory_to_archiver()' calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for easier future modification of the set of directories to archive and improves error reporting when 'add_directory_to_archiver()' fails. Helped-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Victoria Dye <vdye@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 20:10:14 +00:00
/* Only include this if explicitly requested */
if (mode == DIAGNOSE_ALL) {
for (i = 0; i < ARRAY_SIZE(archive_dirs); i++) {
if (add_directory_to_archiver(&archiver_args,
archive_dirs[i].path,
archive_dirs[i].recursive)) {
res = error_errno(_("could not add directory '%s' to archiver"),
archive_dirs[i].path);
goto diagnose_cleanup;
}
}
}
strvec_pushl(&archiver_args, "--prefix=",
oid_to_hex(the_hash_algo->empty_tree), "--", NULL);
/* `write_archive()` modifies the `argv` passed to it. Let it. */
argv_copy = xmemdupz(archiver_args.v,
sizeof(char *) * archiver_args.nr);
res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL,
the_repository, NULL, 0);
if (res) {
error(_("failed to write archive"));
goto diagnose_cleanup;
}
fprintf(stderr, "\n"
"Diagnostics complete.\n"
"All of the gathered info is captured in '%s'\n",
zip_path->buf);
diagnose_cleanup:
if (archiver_fd >= 0) {
dup2(stdout_fd, STDOUT_FILENO);
close(stdout_fd);
close(archiver_fd);
}
free(argv_copy);
strvec_clear(&archiver_args);
strbuf_release(&buf);
return res;
}