1
0
mirror of https://github.com/git/git synced 2024-07-02 15:48:44 +00:00
git/builtin/patch-id.c

257 lines
6.0 KiB
C
Raw Permalink Normal View History

Fix sparse warnings Fix warnings from 'make check'. - These files don't include 'builtin.h' causing sparse to complain that cmd_* isn't declared: builtin/clone.c:364, builtin/fetch-pack.c:797, builtin/fmt-merge-msg.c:34, builtin/hash-object.c:78, builtin/merge-index.c:69, builtin/merge-recursive.c:22 builtin/merge-tree.c:341, builtin/mktag.c:156, builtin/notes.c:426 builtin/notes.c:822, builtin/pack-redundant.c:596, builtin/pack-refs.c:10, builtin/patch-id.c:60, builtin/patch-id.c:149, builtin/remote.c:1512, builtin/remote-ext.c:240, builtin/remote-fd.c:53, builtin/reset.c:236, builtin/send-pack.c:384, builtin/unpack-file.c:25, builtin/var.c:75 - These files have symbols which should be marked static since they're only file scope: submodule.c:12, diff.c:631, replace_object.c:92, submodule.c:13, submodule.c:14, trace.c:78, transport.c:195, transport-helper.c:79, unpack-trees.c:19, url.c:3, url.c:18, url.c:104, url.c:117, url.c:123, url.c:129, url.c:136, thread-utils.c:21, thread-utils.c:48 - These files redeclare symbols to be different types: builtin/index-pack.c:210, parse-options.c:564, parse-options.c:571, usage.c:49, usage.c:58, usage.c:63, usage.c:72 - These files use a literal integer 0 when they really should use a NULL pointer: daemon.c:663, fast-import.c:2942, imap-send.c:1072, notes-merge.c:362 While we're in the area, clean up some unused #includes in builtin files (mostly exec_cmd.h). Signed-off-by: Stephen Boyd <bebarino@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-22 07:51:05 +00:00
#include "builtin.h"
#include "config.h"
#include "diff.h"
#include "gettext.h"
#include "hash.h"
#include "hex.h"
#include "parse-options.h"
builtin/patch-id: fix uninitialized hash function In c8aed5e8da (repository: stop setting SHA1 as the default object hash, 2024-05-07), we have adapted `initialize_repository()` to no longer set up a default hash function. As this function is also used to set up `the_repository`, the consequence is that `the_hash_algo` will now by default be a `NULL` pointer unless the hash algorithm was configured properly. This is done as a mechanism to detect cases where we may be using the wrong hash function by accident. This change now causes git-patch-id(1) to segfault when it's run outside of a repository. As this command can read diffs from stdin, it does not necessarily need a repository, but then relies on `the_hash_algo` to compute the patch ID itself. It is somewhat dubious that git-patch-id(1) relies on `the_hash_algo` in the first place. Quoting its manpage: A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a patch, with line numbers ignored. As such, it’s "reasonably stable", but at the same time also reasonably unique, i.e., two patches that have the same "patch ID" are almost guaranteed to be the same thing. We explicitly document patch IDs to be using SHA-1. Furthermore, patch IDs are supposed to be stable for most of the part. But even with the same input, the patch IDs will now be different depending on the repo's configured object hash. Work around the issue by setting up SHA-1 when there was no startup repository for now. This is arguably not the correct fix, but for now we rather want to focus on getting the segfault fixed. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-20 23:14:32 +00:00
#include "setup.h"
static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result)
{
if (patchlen)
printf("%s %s\n", oid_to_hex(result), oid_to_hex(id));
}
static int remove_space(char *line)
{
char *src = line;
char *dst = line;
unsigned char c;
while ((c = *src++) != '\0') {
if (!isspace(c))
*dst++ = c;
}
return dst - line;
}
static int scan_hunk_header(const char *p, int *p_before, int *p_after)
{
static const char digits[] = "0123456789";
const char *q, *r;
int n;
q = p + 4;
n = strspn(q, digits);
if (q[n] == ',') {
q += n + 1;
*p_before = atoi(q);
n = strspn(q, digits);
} else {
*p_before = 1;
}
if (n == 0 || q[n] != ' ' || q[n+1] != '+')
return 0;
r = q + n + 2;
n = strspn(r, digits);
if (r[n] == ',') {
r += n + 1;
*p_after = atoi(r);
n = strspn(r, digits);
} else {
*p_after = 1;
}
if (n == 0)
return 0;
return 1;
}
static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
struct strbuf *line_buf, int stable, int verbatim)
{
int patchlen = 0, found_next = 0;
int before = -1, after = -1;
int diff_is_binary = 0;
char pre_oid_str[GIT_MAX_HEXSZ + 1], post_oid_str[GIT_MAX_HEXSZ + 1];
git_hash_ctx ctx;
the_hash_algo->init_fn(&ctx);
oidclr(result);
while (strbuf_getwholeline(line_buf, stdin, '\n') != EOF) {
char *line = line_buf->buf;
const char *p = line;
int len;
/* Possibly skip over the prefix added by "log" or "format-patch" */
if (!skip_prefix(line, "commit ", &p) &&
!skip_prefix(line, "From ", &p) &&
starts_with(line, "\\ ") && 12 < strlen(line)) {
if (verbatim)
the_hash_algo->update_fn(&ctx, line, strlen(line));
continue;
}
if (!get_oid_hex(p, next_oid)) {
found_next = 1;
break;
}
/* Ignore commit comments */
if (!patchlen && !starts_with(line, "diff "))
continue;
/* Parsing diff header? */
if (before == -1) {
if (starts_with(line, "GIT binary patch") ||
starts_with(line, "Binary files")) {
diff_is_binary = 1;
before = 0;
the_hash_algo->update_fn(&ctx, pre_oid_str,
strlen(pre_oid_str));
the_hash_algo->update_fn(&ctx, post_oid_str,
strlen(post_oid_str));
if (stable)
flush_one_hunk(result, &ctx);
continue;
} else if (skip_prefix(line, "index ", &p)) {
char *oid1_end = strstr(line, "..");
char *oid2_end = NULL;
if (oid1_end)
oid2_end = strstr(oid1_end, " ");
if (!oid2_end)
oid2_end = line + strlen(line) - 1;
if (oid1_end != NULL && oid2_end != NULL) {
*oid1_end = *oid2_end = '\0';
strlcpy(pre_oid_str, p, GIT_MAX_HEXSZ + 1);
strlcpy(post_oid_str, oid1_end + 2, GIT_MAX_HEXSZ + 1);
}
continue;
} else if (starts_with(line, "--- "))
before = after = 1;
else if (!isalpha(line[0]))
break;
}
if (diff_is_binary) {
if (starts_with(line, "diff ")) {
diff_is_binary = 0;
before = -1;
}
continue;
}
/* Looking for a valid hunk header? */
if (before == 0 && after == 0) {
if (starts_with(line, "@@ -")) {
/* Parse next hunk, but ignore line numbers. */
scan_hunk_header(line, &before, &after);
continue;
}
/* Split at the end of the patch. */
if (!starts_with(line, "diff "))
break;
/* Else we're parsing another header. */
if (stable)
flush_one_hunk(result, &ctx);
before = after = -1;
}
/* If we get here, we're inside a hunk. */
if (line[0] == '-' || line[0] == ' ')
before--;
if (line[0] == '+' || line[0] == ' ')
after--;
/* Add line to hash algo (possibly removing whitespace) */
len = verbatim ? strlen(line) : remove_space(line);
patchlen += len;
the_hash_algo->update_fn(&ctx, line, len);
}
if (!found_next)
oidclr(next_oid);
flush_one_hunk(result, &ctx);
return patchlen;
}
static void generate_id_list(int stable, int verbatim)
{
struct object_id oid, n, result;
int patchlen;
struct strbuf line_buf = STRBUF_INIT;
oidclr(&oid);
while (!feof(stdin)) {
patchlen = get_one_patchid(&n, &result, &line_buf, stable, verbatim);
flush_current_id(patchlen, &oid, &result);
oidcpy(&oid, &n);
}
strbuf_release(&line_buf);
}
static const char *const patch_id_usage[] = {
N_("git patch-id [--stable | --unstable | --verbatim]"), NULL
};
struct patch_id_opts {
int stable;
int verbatim;
};
config: add ctx arg to config_fn_t Add a new "const struct config_context *ctx" arg to config_fn_t to hold additional information about the config iteration operation. config_context has a "struct key_value_info kvi" member that holds metadata about the config source being read (e.g. what kind of config source it is, the filename, etc). In this series, we're only interested in .kvi, so we could have just used "struct key_value_info" as an arg, but config_context makes it possible to add/adjust members in the future without changing the config_fn_t signature. We could also consider other ways of organizing the args (e.g. moving the config name and value into config_context or key_value_info), but in my experiments, the incremental benefit doesn't justify the added complexity (e.g. a config_fn_t will sometimes invoke another config_fn_t but with a different config value). In subsequent commits, the .kvi member will replace the global "struct config_reader" in config.c, making config iteration a global-free operation. It requires much more work for the machinery to provide meaningful values of .kvi, so for now, merely change the signature and call sites, pass NULL as a placeholder value, and don't rely on the arg in any meaningful way. Most of the changes are performed by contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every config_fn_t: - Modifies the signature to accept "const struct config_context *ctx" - Passes "ctx" to any inner config_fn_t, if needed - Adds UNUSED attributes to "ctx", if needed Most config_fn_t instances are easily identified by seeing if they are called by the various config functions. Most of the remaining ones are manually named in the .cocci patch. Manual cleanups are still needed, but the majority of it is trivial; it's either adjusting config_fn_t that the .cocci patch didn't catch, or adding forward declarations of "struct config_context ctx" to make the signatures make sense. The non-trivial changes are in cases where we are invoking a config_fn_t outside of config machinery, and we now need to decide what value of "ctx" to pass. These cases are: - trace2/tr2_cfg.c:tr2_cfg_set_fl() This is indirectly called by git_config_set() so that the trace2 machinery can notice the new config values and update its settings using the tr2 config parsing function, i.e. tr2_cfg_cb(). - builtin/checkout.c:checkout_main() This calls git_xmerge_config() as a shorthand for parsing a CLI arg. This might be worth refactoring away in the future, since git_xmerge_config() can call git_default_config(), which can do much more than just parsing. Handle them by creating a KVI_INIT macro that initializes "struct key_value_info" to a reasonable default, and use that to construct the "ctx" arg. Signed-off-by: Glen Choo <chooglen@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 19:26:22 +00:00
static int git_patch_id_config(const char *var, const char *value,
const struct config_context *ctx, void *cb)
{
struct patch_id_opts *opts = cb;
if (!strcmp(var, "patchid.stable")) {
opts->stable = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "patchid.verbatim")) {
opts->verbatim = git_config_bool(var, value);
return 0;
}
config: add ctx arg to config_fn_t Add a new "const struct config_context *ctx" arg to config_fn_t to hold additional information about the config iteration operation. config_context has a "struct key_value_info kvi" member that holds metadata about the config source being read (e.g. what kind of config source it is, the filename, etc). In this series, we're only interested in .kvi, so we could have just used "struct key_value_info" as an arg, but config_context makes it possible to add/adjust members in the future without changing the config_fn_t signature. We could also consider other ways of organizing the args (e.g. moving the config name and value into config_context or key_value_info), but in my experiments, the incremental benefit doesn't justify the added complexity (e.g. a config_fn_t will sometimes invoke another config_fn_t but with a different config value). In subsequent commits, the .kvi member will replace the global "struct config_reader" in config.c, making config iteration a global-free operation. It requires much more work for the machinery to provide meaningful values of .kvi, so for now, merely change the signature and call sites, pass NULL as a placeholder value, and don't rely on the arg in any meaningful way. Most of the changes are performed by contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every config_fn_t: - Modifies the signature to accept "const struct config_context *ctx" - Passes "ctx" to any inner config_fn_t, if needed - Adds UNUSED attributes to "ctx", if needed Most config_fn_t instances are easily identified by seeing if they are called by the various config functions. Most of the remaining ones are manually named in the .cocci patch. Manual cleanups are still needed, but the majority of it is trivial; it's either adjusting config_fn_t that the .cocci patch didn't catch, or adding forward declarations of "struct config_context ctx" to make the signatures make sense. The non-trivial changes are in cases where we are invoking a config_fn_t outside of config machinery, and we now need to decide what value of "ctx" to pass. These cases are: - trace2/tr2_cfg.c:tr2_cfg_set_fl() This is indirectly called by git_config_set() so that the trace2 machinery can notice the new config values and update its settings using the tr2 config parsing function, i.e. tr2_cfg_cb(). - builtin/checkout.c:checkout_main() This calls git_xmerge_config() as a shorthand for parsing a CLI arg. This might be worth refactoring away in the future, since git_xmerge_config() can call git_default_config(), which can do much more than just parsing. Handle them by creating a KVI_INIT macro that initializes "struct key_value_info" to a reasonable default, and use that to construct the "ctx" arg. Signed-off-by: Glen Choo <chooglen@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 19:26:22 +00:00
return git_default_config(var, value, ctx, cb);
}
int cmd_patch_id(int argc, const char **argv, const char *prefix)
{
/* if nothing is set, default to unstable */
struct patch_id_opts config = {0, 0};
int opts = 0;
struct option builtin_patch_id_options[] = {
OPT_CMDMODE(0, "unstable", &opts,
N_("use the unstable patch-id algorithm"), 1),
OPT_CMDMODE(0, "stable", &opts,
N_("use the stable patch-id algorithm"), 2),
OPT_CMDMODE(0, "verbatim", &opts,
N_("don't strip whitespace from the patch"), 3),
OPT_END()
};
git_config(git_patch_id_config, &config);
/* verbatim implies stable */
if (config.verbatim)
config.stable = 1;
argc = parse_options(argc, argv, prefix, builtin_patch_id_options,
patch_id_usage, 0);
builtin/patch-id: fix uninitialized hash function In c8aed5e8da (repository: stop setting SHA1 as the default object hash, 2024-05-07), we have adapted `initialize_repository()` to no longer set up a default hash function. As this function is also used to set up `the_repository`, the consequence is that `the_hash_algo` will now by default be a `NULL` pointer unless the hash algorithm was configured properly. This is done as a mechanism to detect cases where we may be using the wrong hash function by accident. This change now causes git-patch-id(1) to segfault when it's run outside of a repository. As this command can read diffs from stdin, it does not necessarily need a repository, but then relies on `the_hash_algo` to compute the patch ID itself. It is somewhat dubious that git-patch-id(1) relies on `the_hash_algo` in the first place. Quoting its manpage: A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a patch, with line numbers ignored. As such, it’s "reasonably stable", but at the same time also reasonably unique, i.e., two patches that have the same "patch ID" are almost guaranteed to be the same thing. We explicitly document patch IDs to be using SHA-1. Furthermore, patch IDs are supposed to be stable for most of the part. But even with the same input, the patch IDs will now be different depending on the repo's configured object hash. Work around the issue by setting up SHA-1 when there was no startup repository for now. This is arguably not the correct fix, but for now we rather want to focus on getting the segfault fixed. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-05-20 23:14:32 +00:00
/*
* We rely on `the_hash_algo` to compute patch IDs. This is dubious as
* it means that the hash algorithm now depends on the object hash of
* the repository, even though git-patch-id(1) clearly defines that
* patch IDs always use SHA1.
*
* NEEDSWORK: This hack should be removed in favor of converting
* the code that computes patch IDs to always use SHA1.
*/
if (!the_hash_algo)
repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
generate_id_list(opts ? opts > 1 : config.stable,
opts ? opts == 3 : config.verbatim);
return 0;
}