global: introduce `USE_THE_REPOSITORY_VARIABLE` macro
Use of the `the_repository` variable is deprecated nowadays, and we
slowly but steadily convert the codebase to not use it anymore. Instead,
callers should be passing down the repository to work on via parameters.
It is hard though to prove that a given code unit does not use this
variable anymore. The most trivial case, merely demonstrating that there
is no direct use of `the_repository`, is already a bit of a pain during
code reviews as the reviewer needs to manually verify claims made by the
patch author. The bigger problem though is that we have many interfaces
that implicitly rely on `the_repository`.
Introduce a new `USE_THE_REPOSITORY_VARIABLE` macro that allows code
units to opt into usage of `the_repository`. The intent of this macro is
to demonstrate that a certain code unit does not use this variable
anymore, and to keep it from new dependencies on it in future changes,
be it explicit or implicit
For now, the macro only guards `the_repository` itself as well as
`the_hash_algo`. There are many more known interfaces where we have an
implicit dependency on `the_repository`, but those are not guarded at
the current point in time. Over time though, we should start to add
guards as required (or even better, just remove them).
Define the macro as required in our code units. As expected, most of our
code still relies on the global variable. Nearly all of our builtins
rely on the variable as there is no way yet to pass `the_repository` to
their entry point. For now, declare the macro in "biultin.h" to keep the
required changes at least a little bit more contained.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-06-14 06:50:23 +00:00
|
|
|
#define USE_THE_REPOSITORY_VARIABLE
|
|
|
|
|
2023-04-22 20:17:23 +00:00
|
|
|
#include "git-compat-util.h"
|
2022-08-12 20:10:13 +00:00
|
|
|
#include "diagnose.h"
|
|
|
|
#include "compat/disk.h"
|
|
|
|
#include "archive.h"
|
|
|
|
#include "dir.h"
|
|
|
|
#include "help.h"
|
2023-03-21 06:25:54 +00:00
|
|
|
#include "gettext.h"
|
2023-02-24 00:09:27 +00:00
|
|
|
#include "hex.h"
|
2022-08-12 20:10:13 +00:00
|
|
|
#include "strvec.h"
|
2023-05-16 06:34:06 +00:00
|
|
|
#include "object-store-ll.h"
|
2022-08-12 20:10:13 +00:00
|
|
|
#include "packfile.h"
|
treewide: include parse-options.h in source files
The builtins 'ls-remote', 'pack-objects', 'receive-pack', 'reflog' and
'send-pack' use parse_options(), but their source files don't directly
include 'parse-options.h'. Furthermore, the source files
'diagnose.c', 'list-objects-filter-options.c', 'remote.c' and
'send-pack.c' define option parsing callback functions, while
'revision.c' defines an option parsing helper function, and thus need
access to various fields in 'struct option' and 'struct
parse_opt_ctx_t', but they don't directly include 'parse-options.h'
either. They all can still be built, of course, because they include
one of the header files that does include 'parse-options.h' (though
unnecessarily, see the next commit).
Add those missing includes to these files, as our general rule is that
"a C file must directly include the header files that declare the
functions and the types it uses".
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-19 16:27:11 +00:00
|
|
|
#include "parse-options.h"
|
2023-03-21 06:26:07 +00:00
|
|
|
#include "write-or-die.h"
|
2022-08-12 20:10:13 +00:00
|
|
|
|
diagnose.c: add option to configure archive contents
Update 'create_diagnostics_archive()' to take an argument 'mode'. When
archiving diagnostics for a repository, 'mode' is used to selectively
include/exclude information based on its value. The initial options for
'mode' are:
* DIAGNOSE_NONE: do not collect any diagnostics or create an archive
(no-op).
* DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path,
filesystem available space) as well as sizing and count statistics for the
repository's objects and packfiles.
* DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics,
and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and
'.git/objects/info' directories.
These modes are introduced to provide users the option to collect
diagnostics without the sensitive information included in copies of '.git'
dir contents. At the moment, only 'scalar diagnose' uses
'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to
match existing functionality), but more callers will be introduced in
subsequent patches.
Finally, refactor from a hardcoded set of 'add_directory_to_archiver()'
calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for
easier future modification of the set of directories to archive and improves
error reporting when 'add_directory_to_archiver()' fails.
Helped-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 20:10:14 +00:00
|
|
|
struct archive_dir {
|
|
|
|
const char *path;
|
|
|
|
int recursive;
|
|
|
|
};
|
|
|
|
|
2022-08-12 20:10:16 +00:00
|
|
|
struct diagnose_option {
|
|
|
|
enum diagnose_mode mode;
|
|
|
|
const char *option_name;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct diagnose_option diagnose_options[] = {
|
|
|
|
{ DIAGNOSE_STATS, "stats" },
|
|
|
|
{ DIAGNOSE_ALL, "all" },
|
|
|
|
};
|
|
|
|
|
|
|
|
int option_parse_diagnose(const struct option *opt, const char *arg, int unset)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
enum diagnose_mode *diagnose = opt->value;
|
|
|
|
|
|
|
|
if (!arg) {
|
|
|
|
*diagnose = unset ? DIAGNOSE_NONE : DIAGNOSE_STATS;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(diagnose_options); i++) {
|
|
|
|
if (!strcmp(arg, diagnose_options[i].option_name)) {
|
|
|
|
*diagnose = diagnose_options[i].mode;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return error(_("invalid --%s value '%s'"), opt->long_name, arg);
|
|
|
|
}
|
|
|
|
|
2023-02-24 06:39:24 +00:00
|
|
|
static void dir_file_stats_objects(const char *full_path,
|
|
|
|
size_t full_path_len UNUSED,
|
2022-08-12 20:10:13 +00:00
|
|
|
const char *file_name, void *data)
|
|
|
|
{
|
|
|
|
struct strbuf *buf = data;
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (!stat(full_path, &st))
|
|
|
|
strbuf_addf(buf, "%-70s %16" PRIuMAX "\n", file_name,
|
|
|
|
(uintmax_t)st.st_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dir_file_stats(struct object_directory *object_dir, void *data)
|
|
|
|
{
|
|
|
|
struct strbuf *buf = data;
|
|
|
|
|
|
|
|
strbuf_addf(buf, "Contents of %s:\n", object_dir->path);
|
|
|
|
|
|
|
|
for_each_file_in_pack_dir(object_dir->path, dir_file_stats_objects,
|
|
|
|
data);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 18:16:55 +00:00
|
|
|
static int count_files(struct strbuf *path)
|
|
|
|
{
|
|
|
|
DIR *dir = opendir(path->buf);
|
2022-08-12 20:10:13 +00:00
|
|
|
struct dirent *e;
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
if (!dir)
|
|
|
|
return 0;
|
|
|
|
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 18:16:55 +00:00
|
|
|
while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL)
|
2023-10-09 21:58:55 +00:00
|
|
|
if (get_dtype(e, path, 0) == DT_REG)
|
2022-08-12 20:10:13 +00:00
|
|
|
count++;
|
|
|
|
|
|
|
|
closedir(dir);
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void loose_objs_stats(struct strbuf *buf, const char *path)
|
|
|
|
{
|
|
|
|
DIR *dir = opendir(path);
|
|
|
|
struct dirent *e;
|
|
|
|
int count;
|
|
|
|
int total = 0;
|
|
|
|
unsigned char c;
|
|
|
|
struct strbuf count_path = STRBUF_INIT;
|
|
|
|
size_t base_path_len;
|
|
|
|
|
|
|
|
if (!dir)
|
|
|
|
return;
|
|
|
|
|
|
|
|
strbuf_addstr(buf, "Object directory stats for ");
|
|
|
|
strbuf_add_absolute_path(buf, path);
|
|
|
|
strbuf_addstr(buf, ":\n");
|
|
|
|
|
|
|
|
strbuf_add_absolute_path(&count_path, path);
|
|
|
|
strbuf_addch(&count_path, '/');
|
|
|
|
base_path_len = count_path.len;
|
|
|
|
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 18:16:55 +00:00
|
|
|
while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL)
|
2023-10-09 21:58:55 +00:00
|
|
|
if (get_dtype(e, &count_path, 0) == DT_DIR &&
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 18:16:55 +00:00
|
|
|
strlen(e->d_name) == 2 &&
|
2022-08-12 20:10:13 +00:00
|
|
|
!hex_to_bytes(&c, e->d_name, 1)) {
|
|
|
|
strbuf_setlen(&count_path, base_path_len);
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 18:16:55 +00:00
|
|
|
strbuf_addf(&count_path, "%s/", e->d_name);
|
|
|
|
total += (count = count_files(&count_path));
|
2022-08-12 20:10:13 +00:00
|
|
|
strbuf_addf(buf, "%s : %7d files\n", e->d_name, count);
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_addf(buf, "Total: %d loose objects", total);
|
|
|
|
|
|
|
|
strbuf_release(&count_path);
|
|
|
|
closedir(dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int add_directory_to_archiver(struct strvec *archiver_args,
|
|
|
|
const char *path, int recurse)
|
|
|
|
{
|
|
|
|
int at_root = !*path;
|
|
|
|
DIR *dir;
|
|
|
|
struct dirent *e;
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
size_t len;
|
|
|
|
int res = 0;
|
|
|
|
|
|
|
|
dir = opendir(at_root ? "." : path);
|
|
|
|
if (!dir) {
|
|
|
|
if (errno == ENOENT) {
|
|
|
|
warning(_("could not archive missing directory '%s'"), path);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return error_errno(_("could not open directory '%s'"), path);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!at_root)
|
|
|
|
strbuf_addf(&buf, "%s/", path);
|
|
|
|
len = buf.len;
|
|
|
|
strvec_pushf(archiver_args, "--prefix=%s", buf.buf);
|
|
|
|
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 18:16:55 +00:00
|
|
|
while (!res && (e = readdir_skip_dot_and_dotdot(dir))) {
|
|
|
|
struct strbuf abspath = STRBUF_INIT;
|
|
|
|
unsigned char dtype;
|
|
|
|
|
|
|
|
strbuf_add_absolute_path(&abspath, at_root ? "." : path);
|
|
|
|
strbuf_addch(&abspath, '/');
|
2023-10-09 21:58:55 +00:00
|
|
|
dtype = get_dtype(e, &abspath, 0);
|
2022-08-12 20:10:13 +00:00
|
|
|
|
|
|
|
strbuf_setlen(&buf, len);
|
|
|
|
strbuf_addstr(&buf, e->d_name);
|
|
|
|
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 18:16:55 +00:00
|
|
|
if (dtype == DT_REG)
|
2022-08-12 20:10:13 +00:00
|
|
|
strvec_pushf(archiver_args, "--add-file=%s", buf.buf);
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 18:16:55 +00:00
|
|
|
else if (dtype != DT_DIR)
|
2022-08-12 20:10:13 +00:00
|
|
|
warning(_("skipping '%s', which is neither file nor "
|
|
|
|
"directory"), buf.buf);
|
|
|
|
else if (recurse &&
|
|
|
|
add_directory_to_archiver(archiver_args,
|
|
|
|
buf.buf, recurse) < 0)
|
|
|
|
res = -1;
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 18:16:55 +00:00
|
|
|
|
|
|
|
strbuf_release(&abspath);
|
2022-08-12 20:10:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
closedir(dir);
|
|
|
|
strbuf_release(&buf);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
diagnose.c: add option to configure archive contents
Update 'create_diagnostics_archive()' to take an argument 'mode'. When
archiving diagnostics for a repository, 'mode' is used to selectively
include/exclude information based on its value. The initial options for
'mode' are:
* DIAGNOSE_NONE: do not collect any diagnostics or create an archive
(no-op).
* DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path,
filesystem available space) as well as sizing and count statistics for the
repository's objects and packfiles.
* DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics,
and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and
'.git/objects/info' directories.
These modes are introduced to provide users the option to collect
diagnostics without the sensitive information included in copies of '.git'
dir contents. At the moment, only 'scalar diagnose' uses
'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to
match existing functionality), but more callers will be introduced in
subsequent patches.
Finally, refactor from a hardcoded set of 'add_directory_to_archiver()'
calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for
easier future modification of the set of directories to archive and improves
error reporting when 'add_directory_to_archiver()' fails.
Helped-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 20:10:14 +00:00
|
|
|
int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode)
|
2022-08-12 20:10:13 +00:00
|
|
|
{
|
|
|
|
struct strvec archiver_args = STRVEC_INIT;
|
|
|
|
char **argv_copy = NULL;
|
|
|
|
int stdout_fd = -1, archiver_fd = -1;
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
diagnose.c: add option to configure archive contents
Update 'create_diagnostics_archive()' to take an argument 'mode'. When
archiving diagnostics for a repository, 'mode' is used to selectively
include/exclude information based on its value. The initial options for
'mode' are:
* DIAGNOSE_NONE: do not collect any diagnostics or create an archive
(no-op).
* DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path,
filesystem available space) as well as sizing and count statistics for the
repository's objects and packfiles.
* DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics,
and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and
'.git/objects/info' directories.
These modes are introduced to provide users the option to collect
diagnostics without the sensitive information included in copies of '.git'
dir contents. At the moment, only 'scalar diagnose' uses
'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to
match existing functionality), but more callers will be introduced in
subsequent patches.
Finally, refactor from a hardcoded set of 'add_directory_to_archiver()'
calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for
easier future modification of the set of directories to archive and improves
error reporting when 'add_directory_to_archiver()' fails.
Helped-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 20:10:14 +00:00
|
|
|
int res, i;
|
|
|
|
struct archive_dir archive_dirs[] = {
|
|
|
|
{ ".git", 0 },
|
|
|
|
{ ".git/hooks", 0 },
|
|
|
|
{ ".git/info", 0 },
|
|
|
|
{ ".git/logs", 1 },
|
|
|
|
{ ".git/objects/info", 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
if (mode == DIAGNOSE_NONE) {
|
|
|
|
res = 0;
|
|
|
|
goto diagnose_cleanup;
|
|
|
|
}
|
2022-08-12 20:10:13 +00:00
|
|
|
|
|
|
|
stdout_fd = dup(STDOUT_FILENO);
|
|
|
|
if (stdout_fd < 0) {
|
|
|
|
res = error_errno(_("could not duplicate stdout"));
|
|
|
|
goto diagnose_cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
archiver_fd = xopen(zip_path->buf, O_CREAT | O_WRONLY | O_TRUNC, 0666);
|
|
|
|
if (dup2(archiver_fd, STDOUT_FILENO) < 0) {
|
|
|
|
res = error_errno(_("could not redirect output"));
|
|
|
|
goto diagnose_cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
init_zip_archiver();
|
|
|
|
strvec_pushl(&archiver_args, "git-diagnose", "--format=zip", NULL);
|
|
|
|
|
|
|
|
strbuf_reset(&buf);
|
|
|
|
strbuf_addstr(&buf, "Collecting diagnostic info\n\n");
|
|
|
|
get_version_info(&buf, 1);
|
|
|
|
|
|
|
|
strbuf_addf(&buf, "Repository root: %s\n", the_repository->worktree);
|
|
|
|
get_disk_info(&buf);
|
|
|
|
write_or_die(stdout_fd, buf.buf, buf.len);
|
|
|
|
strvec_pushf(&archiver_args,
|
|
|
|
"--add-virtual-file=diagnostics.log:%.*s",
|
|
|
|
(int)buf.len, buf.buf);
|
|
|
|
|
|
|
|
strbuf_reset(&buf);
|
|
|
|
strbuf_addstr(&buf, "--add-virtual-file=packs-local.txt:");
|
|
|
|
dir_file_stats(the_repository->objects->odb, &buf);
|
|
|
|
foreach_alt_odb(dir_file_stats, &buf);
|
|
|
|
strvec_push(&archiver_args, buf.buf);
|
|
|
|
|
|
|
|
strbuf_reset(&buf);
|
|
|
|
strbuf_addstr(&buf, "--add-virtual-file=objects-local.txt:");
|
|
|
|
loose_objs_stats(&buf, ".git/objects");
|
|
|
|
strvec_push(&archiver_args, buf.buf);
|
|
|
|
|
diagnose.c: add option to configure archive contents
Update 'create_diagnostics_archive()' to take an argument 'mode'. When
archiving diagnostics for a repository, 'mode' is used to selectively
include/exclude information based on its value. The initial options for
'mode' are:
* DIAGNOSE_NONE: do not collect any diagnostics or create an archive
(no-op).
* DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path,
filesystem available space) as well as sizing and count statistics for the
repository's objects and packfiles.
* DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics,
and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and
'.git/objects/info' directories.
These modes are introduced to provide users the option to collect
diagnostics without the sensitive information included in copies of '.git'
dir contents. At the moment, only 'scalar diagnose' uses
'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to
match existing functionality), but more callers will be introduced in
subsequent patches.
Finally, refactor from a hardcoded set of 'add_directory_to_archiver()'
calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for
easier future modification of the set of directories to archive and improves
error reporting when 'add_directory_to_archiver()' fails.
Helped-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 20:10:14 +00:00
|
|
|
/* Only include this if explicitly requested */
|
|
|
|
if (mode == DIAGNOSE_ALL) {
|
|
|
|
for (i = 0; i < ARRAY_SIZE(archive_dirs); i++) {
|
|
|
|
if (add_directory_to_archiver(&archiver_args,
|
|
|
|
archive_dirs[i].path,
|
|
|
|
archive_dirs[i].recursive)) {
|
|
|
|
res = error_errno(_("could not add directory '%s' to archiver"),
|
|
|
|
archive_dirs[i].path);
|
|
|
|
goto diagnose_cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-08-12 20:10:13 +00:00
|
|
|
|
|
|
|
strvec_pushl(&archiver_args, "--prefix=",
|
|
|
|
oid_to_hex(the_hash_algo->empty_tree), "--", NULL);
|
|
|
|
|
|
|
|
/* `write_archive()` modifies the `argv` passed to it. Let it. */
|
|
|
|
argv_copy = xmemdupz(archiver_args.v,
|
|
|
|
sizeof(char *) * archiver_args.nr);
|
|
|
|
res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL,
|
|
|
|
the_repository, NULL, 0);
|
|
|
|
if (res) {
|
|
|
|
error(_("failed to write archive"));
|
|
|
|
goto diagnose_cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stderr, "\n"
|
|
|
|
"Diagnostics complete.\n"
|
|
|
|
"All of the gathered info is captured in '%s'\n",
|
|
|
|
zip_path->buf);
|
|
|
|
|
|
|
|
diagnose_cleanup:
|
|
|
|
if (archiver_fd >= 0) {
|
|
|
|
dup2(stdout_fd, STDOUT_FILENO);
|
|
|
|
close(stdout_fd);
|
|
|
|
close(archiver_fd);
|
|
|
|
}
|
|
|
|
free(argv_copy);
|
|
|
|
strvec_clear(&archiver_args);
|
|
|
|
strbuf_release(&buf);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|