Merge branch 'jc/patch-id'

The patch parser in "git patch-id" has been tightened to avoid
getting confused by lines that look like a patch header in the log
message.

* jc/patch-id:
  patch-id: tighten code to detect the patch header
  patch-id: rewrite code that detects the beginning of a patch
  patch-id: make get_one_patchid() more extensible
  patch-id: call flush_current_id() only when needed
  t4204: patch-id supports various input format
This commit is contained in:
Junio C Hamano 2024-08-14 14:54:53 -07:00
commit 44773b9f70
2 changed files with 112 additions and 21 deletions

View file

@ -7,10 +7,9 @@
#include "parse-options.h"
#include "setup.h"
static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result)
static void flush_current_id(struct object_id *id, struct object_id *result)
{
if (patchlen)
printf("%s %s\n", oid_to_hex(result), oid_to_hex(id));
printf("%s %s\n", oid_to_hex(result), oid_to_hex(id));
}
static int remove_space(char *line)
@ -60,9 +59,27 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after)
return 1;
}
/*
* flag bits to control get_one_patchid()'s behaviour.
*
* STABLE/VERBATIM are given from the command line option as
* --stable/--verbatim. FIND_HEADER conveys the internal state
* maintained by the caller to allow the function to avoid mistaking
* lines of log message before seeing the "diff" part as the beginning
* of the next patch.
*/
enum {
GOPID_STABLE = (1<<0), /* --stable */
GOPID_VERBATIM = (1<<1), /* --verbatim */
GOPID_FIND_HEADER = (1<<2), /* stop at the beginning of patch message */
};
static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
struct strbuf *line_buf, int stable, int verbatim)
struct strbuf *line_buf, unsigned flags)
{
int stable = flags & GOPID_STABLE;
int verbatim = flags & GOPID_VERBATIM;
int find_header = flags & GOPID_FIND_HEADER;
int patchlen = 0, found_next = 0;
int before = -1, after = -1;
int diff_is_binary = 0;
@ -77,24 +94,40 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
const char *p = line;
int len;
/* Possibly skip over the prefix added by "log" or "format-patch" */
if (!skip_prefix(line, "commit ", &p) &&
!skip_prefix(line, "From ", &p) &&
starts_with(line, "\\ ") && 12 < strlen(line)) {
if (verbatim)
the_hash_algo->update_fn(&ctx, line, strlen(line));
continue;
}
if (!get_oid_hex(p, next_oid)) {
found_next = 1;
break;
/*
* The caller hasn't seen us find a patch header and
* return to it, or we have started processing patch
* and may encounter the beginning of the next patch.
*/
if (find_header) {
/*
* If we see a line that begins with "<object name>",
* "commit <object name>" or "From <object name>", it is
* the beginning of a patch. Return to the caller, as
* we are done with the one we have been processing.
*/
if (skip_prefix(line, "commit ", &p))
;
else if (skip_prefix(line, "From ", &p))
;
if (!get_oid_hex(p, next_oid)) {
if (verbatim)
the_hash_algo->update_fn(&ctx, line, strlen(line));
found_next = 1;
break;
}
}
/* Ignore commit comments */
if (!patchlen && !starts_with(line, "diff "))
continue;
/*
* We are past the commit log message. Prepare to
* stop at the beginning of the next patch header.
*/
find_header = 1;
/* Parsing diff header? */
if (before == -1) {
if (starts_with(line, "GIT binary patch") ||
@ -127,6 +160,16 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
break;
}
/*
* A hunk about an incomplete line may have this
* marker at the end, which should just be ignored.
*/
if (starts_with(line, "\\ ") && 12 < strlen(line)) {
if (verbatim)
the_hash_algo->update_fn(&ctx, line, strlen(line));
continue;
}
if (diff_is_binary) {
if (starts_with(line, "diff ")) {
diff_is_binary = 0;
@ -173,17 +216,20 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
return patchlen;
}
static void generate_id_list(int stable, int verbatim)
static void generate_id_list(unsigned flags)
{
struct object_id oid, n, result;
int patchlen;
struct strbuf line_buf = STRBUF_INIT;
oidclr(&oid, the_repository->hash_algo);
flags |= GOPID_FIND_HEADER;
while (!feof(stdin)) {
patchlen = get_one_patchid(&n, &result, &line_buf, stable, verbatim);
flush_current_id(patchlen, &oid, &result);
patchlen = get_one_patchid(&n, &result, &line_buf, flags);
if (patchlen)
flush_current_id(&oid, &result);
oidcpy(&oid, &n);
flags &= ~GOPID_FIND_HEADER;
}
strbuf_release(&line_buf);
}
@ -219,6 +265,7 @@ int cmd_patch_id(int argc, const char **argv, const char *prefix)
/* if nothing is set, default to unstable */
struct patch_id_opts config = {0, 0};
int opts = 0;
unsigned flags = 0;
struct option builtin_patch_id_options[] = {
OPT_CMDMODE(0, "unstable", &opts,
N_("use the unstable patch-id algorithm"), 1),
@ -250,7 +297,11 @@ int cmd_patch_id(int argc, const char **argv, const char *prefix)
if (!the_hash_algo)
repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
generate_id_list(opts ? opts > 1 : config.stable,
opts ? opts == 3 : config.verbatim);
if (opts ? opts > 1 : config.stable)
flags |= GOPID_STABLE;
if (opts ? opts == 3 : config.verbatim)
flags |= GOPID_VERBATIM;
generate_id_list(flags);
return 0;
}

View file

@ -114,6 +114,46 @@ test_expect_success 'patch-id supports git-format-patch output' '
test "$2" = $(git rev-parse HEAD)
'
test_expect_success 'patch-id computes the same for various formats' '
# This test happens to consider "git log -p -1" output
# the canonical input format, so use it as the norm.
git log -1 -p same >log-p.output &&
git patch-id <log-p.output >expect &&
# format-patch begins with "From <commit object name>"
git format-patch -1 --stdout same >format-patch.output &&
git patch-id <format-patch.output >actual &&
test_cmp actual expect &&
# "diff-tree --stdin -p" begins with "<commit object name>"
same=$(git rev-parse same) &&
echo $same | git diff-tree --stdin -p >diff-tree.output &&
git patch-id <diff-tree.output >actual &&
test_cmp actual expect &&
# "diff-tree --stdin -v -p" begins with "commit <commit object name>"
echo $same | git diff-tree --stdin -p -v >diff-tree-v.output &&
git patch-id <diff-tree-v.output >actual &&
test_cmp actual expect
'
hash=$(git rev-parse same:)
for cruft in "$hash" "commit $hash is bad" "From $hash status"
do
test_expect_success "patch-id with <$cruft> in log message" '
git format-patch -1 --stdout same >patch-0 &&
git patch-id <patch-0 >expect &&
{
sed -e "/^$/q" patch-0 &&
printf "random message\n%s\n\n" "$cruft" &&
sed -e "1,/^$/d" patch-0
} >patch-cruft &&
git patch-id <patch-cruft >actual &&
test_cmp actual expect
'
done
test_expect_success 'whitespace is irrelevant in footer' '
get_patch_id main &&
git checkout same &&