git/versioncmp.c
Ævar Arnfjörð Bjarmason 9e2d884d0f config API: add "string" version of *_value_multi(), fix segfaults
Fix numerous and mostly long-standing segfaults in consumers of
the *_config_*value_multi() API. As discussed in the preceding commit
an empty key in the config syntax yields a "NULL" string, which these
users would give to strcmp() (or similar), resulting in segfaults.

As this change shows, most users users of the *_config_*value_multi()
API didn't really want such an an unsafe and low-level API, let's give
them something with the safety of git_config_get_string() instead.

This fix is similar to what the *_string() functions and others
acquired in[1] and [2]. Namely introducing and using a safer
"*_get_string_multi()" variant of the low-level "_*value_multi()"
function.

This fixes segfaults in code introduced in:

  - d811c8e17c (versionsort: support reorder prerelease suffixes, 2015-02-26)
  - c026557a37 (versioncmp: generalize version sort suffix reordering, 2016-12-08)
  - a086f921a7 (submodule: decouple url and submodule interest, 2017-03-17)
  - a6be5e6764 (log: add log.excludeDecoration config option, 2020-04-16)
  - 92156291ca (log: add default decoration filter, 2022-08-05)
  - 50a044f1e4 (gc: replace config subprocesses with API calls, 2022-09-27)

There are now two users ofthe low-level API:

- One in "builtin/for-each-repo.c", which we'll convert in a
  subsequent commit.

- The "t/helper/test-config.c" code added in [3].

As seen in the preceding commit we need to give the
"t/helper/test-config.c" caller these "NULL" entries.

We could also alter the underlying git_configset_get_value_multi()
function to be "string safe", but doing so would leave no room for
other variants of "*_get_value_multi()" that coerce to other types.

Such coercion can't be built on the string version, since as we've
established "NULL" is a true value in the boolean context, but if we
coerced it to "" for use in a list of strings it'll be subsequently
coerced to "false" as a boolean.

The callback pattern being used here will make it easy to introduce
e.g. a "multi" variant which coerces its values to "bool", "int",
"path" etc.

1. 40ea4ed903 (Add config_error_nonbool() helper function,
   2008-02-11)
2. 6c47d0e8f3 (config.c: guard config parser from value=NULL,
   2008-02-11).
3. 4c715ebb96 (test-config: add tests for the config_set API,
   2014-07-28)

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-28 07:37:53 -07:00

199 lines
5.2 KiB
C

#include "cache.h"
#include "config.h"
#include "string-list.h"
/*
* versioncmp(): copied from string/strverscmp.c in glibc commit
* ee9247c38a8def24a59eb5cfb7196a98bef8cfdc, reformatted to Git coding
* style. The implementation is under LGPL-2.1 and Git relicenses it
* to GPLv2.
*/
/*
* states: S_N: normal, S_I: comparing integral part, S_F: comparing
* fractionnal parts, S_Z: idem but with leading Zeroes only
*/
#define S_N 0x0
#define S_I 0x3
#define S_F 0x6
#define S_Z 0x9
/* result_type: CMP: return diff; LEN: compare using len_diff/diff */
#define CMP 2
#define LEN 3
static const struct string_list *prereleases;
static int initialized;
struct suffix_match {
int conf_pos;
int start;
int len;
};
static void find_better_matching_suffix(const char *tagname, const char *suffix,
int suffix_len, int start, int conf_pos,
struct suffix_match *match)
{
/*
* A better match either starts earlier or starts at the same offset
* but is longer.
*/
int end = match->len < suffix_len ? match->start : match->start-1;
int i;
for (i = start; i <= end; i++)
if (starts_with(tagname + i, suffix)) {
match->conf_pos = conf_pos;
match->start = i;
match->len = suffix_len;
break;
}
}
/*
* off is the offset of the first different character in the two strings
* s1 and s2. If either s1 or s2 contains a prerelease suffix containing
* that offset or a suffix ends right before that offset, then that
* string will be forced to be on top.
*
* If both s1 and s2 contain a (different) suffix around that position,
* their order is determined by the order of those two suffixes in the
* configuration.
* If any of the strings contains more than one different suffixes around
* that position, then that string is sorted according to the contained
* suffix which starts at the earliest offset in that string.
* If more than one different contained suffixes start at that earliest
* offset, then that string is sorted according to the longest of those
* suffixes.
*
* Return non-zero if *diff contains the return value for versioncmp()
*/
static int swap_prereleases(const char *s1,
const char *s2,
int off,
int *diff)
{
int i;
struct suffix_match match1 = { -1, off, -1 };
struct suffix_match match2 = { -1, off, -1 };
for (i = 0; i < prereleases->nr; i++) {
const char *suffix = prereleases->items[i].string;
int start, suffix_len = strlen(suffix);
if (suffix_len < off)
start = off - suffix_len;
else
start = 0;
find_better_matching_suffix(s1, suffix, suffix_len, start,
i, &match1);
find_better_matching_suffix(s2, suffix, suffix_len, start,
i, &match2);
}
if (match1.conf_pos == -1 && match2.conf_pos == -1)
return 0;
if (match1.conf_pos == match2.conf_pos)
/* Found the same suffix in both, e.g. "-rc" in "v1.0-rcX"
* and "v1.0-rcY": the caller should decide based on "X"
* and "Y". */
return 0;
if (match1.conf_pos >= 0 && match2.conf_pos >= 0)
*diff = match1.conf_pos - match2.conf_pos;
else if (match1.conf_pos >= 0)
*diff = -1;
else /* if (match2.conf_pos >= 0) */
*diff = 1;
return 1;
}
/*
* Compare S1 and S2 as strings holding indices/version numbers,
* returning less than, equal to or greater than zero if S1 is less
* than, equal to or greater than S2 (for more info, see the texinfo
* doc).
*/
int versioncmp(const char *s1, const char *s2)
{
const unsigned char *p1 = (const unsigned char *) s1;
const unsigned char *p2 = (const unsigned char *) s2;
unsigned char c1, c2;
int state, diff;
/*
* Symbol(s) 0 [1-9] others
* Transition (10) 0 (01) d (00) x
*/
static const uint8_t next_state[] = {
/* state x d 0 */
/* S_N */ S_N, S_I, S_Z,
/* S_I */ S_N, S_I, S_I,
/* S_F */ S_N, S_F, S_F,
/* S_Z */ S_N, S_F, S_Z
};
static const int8_t result_type[] = {
/* state x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0 */
/* S_N */ CMP, CMP, CMP, CMP, LEN, CMP, CMP, CMP, CMP,
/* S_I */ CMP, -1, -1, +1, LEN, LEN, +1, LEN, LEN,
/* S_F */ CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
/* S_Z */ CMP, +1, +1, -1, CMP, CMP, -1, CMP, CMP
};
if (p1 == p2)
return 0;
c1 = *p1++;
c2 = *p2++;
/* Hint: '0' is a digit too. */
state = S_N + ((c1 == '0') + (isdigit (c1) != 0));
while ((diff = c1 - c2) == 0) {
if (c1 == '\0')
return diff;
state = next_state[state];
c1 = *p1++;
c2 = *p2++;
state += (c1 == '0') + (isdigit (c1) != 0);
}
if (!initialized) {
const char *const newk = "versionsort.suffix";
const char *const oldk = "versionsort.prereleasesuffix";
const struct string_list *newl;
const struct string_list *oldl;
int new = git_config_get_string_multi(newk, &newl);
int old = git_config_get_string_multi(oldk, &oldl);
if (!new && !old)
warning("ignoring %s because %s is set", oldk, newk);
if (!new)
prereleases = newl;
else if (!old)
prereleases = oldl;
initialized = 1;
}
if (prereleases && swap_prereleases(s1, s2, (const char *) p1 - s1 - 1,
&diff))
return diff;
state = result_type[state * 3 + (((c2 == '0') + (isdigit (c2) != 0)))];
switch (state) {
case CMP:
return diff;
case LEN:
while (isdigit (*p1++))
if (!isdigit (*p2++))
return 1;
return isdigit (*p2) ? -1 : diff;
default:
return state;
}
}