mirror of
https://github.com/freebsd/freebsd-src
synced 2024-10-04 15:40:44 +00:00
libarchive: merge bugfixes from vendor branch
#2147 archive_string: clean up strncat_from_utf8_to_utf8 (36047967a)
#2153 archive_match: check archive_read_support_format_raw()
return value (0ce1b4c38)
#2154 archive_match: turn counter into flag (287e05d53)
#2155 lha: Do not allow negative file sizes (93b11caed)
#2156 tests: setenv LANG to en_US.UTF-8 in bsdunzip test_I.c (83e8b0ea8)
(cherry picked from commit c0b58e65de
)
This commit is contained in:
parent
3a77c21a25
commit
c923dda75a
|
@ -46,7 +46,7 @@
|
|||
|
||||
struct match {
|
||||
struct match *next;
|
||||
int matches;
|
||||
int matched;
|
||||
struct archive_mstring pattern;
|
||||
};
|
||||
|
||||
|
@ -605,7 +605,8 @@ add_pattern_from_file(struct archive_match *a, struct match_list *mlist,
|
|||
return (ARCHIVE_FATAL);
|
||||
}
|
||||
r = archive_read_support_format_raw(ar);
|
||||
r = archive_read_support_format_empty(ar);
|
||||
if (r == ARCHIVE_OK)
|
||||
r = archive_read_support_format_empty(ar);
|
||||
if (r != ARCHIVE_OK) {
|
||||
archive_copy_error(&(a->archive), ar);
|
||||
archive_read_free(ar);
|
||||
|
@ -724,12 +725,12 @@ path_excluded(struct archive_match *a, int mbs, const void *pathname)
|
|||
matched = NULL;
|
||||
for (match = a->inclusions.first; match != NULL;
|
||||
match = match->next){
|
||||
if (match->matches == 0 &&
|
||||
if (!match->matched &&
|
||||
(r = match_path_inclusion(a, match, mbs, pathname)) != 0) {
|
||||
if (r < 0)
|
||||
return (r);
|
||||
a->inclusions.unmatched_count--;
|
||||
match->matches++;
|
||||
match->matched = 1;
|
||||
matched = match;
|
||||
}
|
||||
}
|
||||
|
@ -752,11 +753,10 @@ path_excluded(struct archive_match *a, int mbs, const void *pathname)
|
|||
for (match = a->inclusions.first; match != NULL;
|
||||
match = match->next){
|
||||
/* We looked at previously-unmatched inclusions already. */
|
||||
if (match->matches > 0 &&
|
||||
if (match->matched &&
|
||||
(r = match_path_inclusion(a, match, mbs, pathname)) != 0) {
|
||||
if (r < 0)
|
||||
return (r);
|
||||
match->matches++;
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
@ -879,7 +879,7 @@ match_list_unmatched_inclusions_next(struct archive_match *a,
|
|||
for (m = list->unmatched_next; m != NULL; m = m->next) {
|
||||
int r;
|
||||
|
||||
if (m->matches)
|
||||
if (m->matched)
|
||||
continue;
|
||||
if (mbs) {
|
||||
const char *p;
|
||||
|
@ -1793,7 +1793,7 @@ match_owner_name_mbs(struct archive_match *a, struct match_list *list,
|
|||
< 0 && errno == ENOMEM)
|
||||
return (error_nomem(a));
|
||||
if (p != NULL && strcmp(p, name) == 0) {
|
||||
m->matches++;
|
||||
m->matched = 1;
|
||||
return (1);
|
||||
}
|
||||
}
|
||||
|
@ -1814,7 +1814,7 @@ match_owner_name_wcs(struct archive_match *a, struct match_list *list,
|
|||
< 0 && errno == ENOMEM)
|
||||
return (error_nomem(a));
|
||||
if (p != NULL && wcscmp(p, name) == 0) {
|
||||
m->matches++;
|
||||
m->matched = 1;
|
||||
return (1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1347,6 +1347,8 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
|
|||
lha->compsize = archive_le64dec(extdheader);
|
||||
extdheader += sizeof(uint64_t);
|
||||
lha->origsize = archive_le64dec(extdheader);
|
||||
if (lha->compsize < 0 || lha->origsize < 0)
|
||||
goto invalid;
|
||||
}
|
||||
break;
|
||||
case EXT_CODEPAGE:
|
||||
|
|
|
@ -2640,81 +2640,69 @@ unicode_to_utf16le(char *p, size_t remaining, uint32_t uc)
|
|||
}
|
||||
|
||||
/*
|
||||
* Copy UTF-8 string in checking surrogate pair.
|
||||
* If any surrogate pair are found, it would be canonicalized.
|
||||
* Append new UTF-8 string to existing UTF-8 string.
|
||||
* Existing string is assumed to already be in proper form;
|
||||
* the new string will have invalid sequences replaced and
|
||||
* surrogate pairs canonicalized.
|
||||
*/
|
||||
static int
|
||||
strncat_from_utf8_to_utf8(struct archive_string *as, const void *_p,
|
||||
strncat_from_utf8_to_utf8(struct archive_string *as, const void *_src,
|
||||
size_t len, struct archive_string_conv *sc)
|
||||
{
|
||||
const char *s;
|
||||
char *p, *endp;
|
||||
int n, ret = 0;
|
||||
|
||||
int ret = 0;
|
||||
const char *src = _src;
|
||||
(void)sc; /* UNUSED */
|
||||
|
||||
/* Pre-extend the destination */
|
||||
if (archive_string_ensure(as, as->length + len + 1) == NULL)
|
||||
return (-1);
|
||||
|
||||
s = (const char *)_p;
|
||||
p = as->s + as->length;
|
||||
endp = as->s + as->buffer_length -1;
|
||||
do {
|
||||
/* Invariant: src points to the first UTF8 byte that hasn't
|
||||
* been copied to the destination `as`. */
|
||||
for (;;) {
|
||||
int n;
|
||||
uint32_t uc;
|
||||
const char *ss = s;
|
||||
size_t w;
|
||||
const char *e = src;
|
||||
|
||||
/*
|
||||
* Forward byte sequence until a conversion of that is needed.
|
||||
*/
|
||||
while ((n = utf8_to_unicode(&uc, s, len)) > 0) {
|
||||
s += n;
|
||||
/* Skip UTF-8 sequences until we reach end-of-string or
|
||||
* a code point that needs conversion. */
|
||||
while ((n = utf8_to_unicode(&uc, e, len)) > 0) {
|
||||
e += n;
|
||||
len -= n;
|
||||
}
|
||||
if (ss < s) {
|
||||
if (p + (s - ss) > endp) {
|
||||
as->length = p - as->s;
|
||||
if (archive_string_ensure(as,
|
||||
as->buffer_length + len + 1) == NULL)
|
||||
return (-1);
|
||||
p = as->s + as->length;
|
||||
endp = as->s + as->buffer_length -1;
|
||||
}
|
||||
|
||||
memcpy(p, ss, s - ss);
|
||||
p += s - ss;
|
||||
/* Copy the part that doesn't need conversion */
|
||||
if (e > src) {
|
||||
if (archive_string_append(as, src, e - src) == NULL)
|
||||
return (-1);
|
||||
src = e;
|
||||
}
|
||||
|
||||
/*
|
||||
* If n is negative, current byte sequence needs a replacement.
|
||||
*/
|
||||
if (n < 0) {
|
||||
if (n == 0) {
|
||||
/* We reached end-of-string */
|
||||
return (ret);
|
||||
} else {
|
||||
/* Next code point needs conversion */
|
||||
char t[4];
|
||||
size_t w;
|
||||
|
||||
/* Try decoding a surrogate pair */
|
||||
if (n == -3 && IS_SURROGATE_PAIR_LA(uc)) {
|
||||
/* Current byte sequence may be CESU-8. */
|
||||
n = cesu8_to_unicode(&uc, s, len);
|
||||
n = cesu8_to_unicode(&uc, src, len);
|
||||
}
|
||||
/* Not a (valid) surrogate, so use a replacement char */
|
||||
if (n < 0) {
|
||||
ret = -1;
|
||||
n *= -1;/* Use a replaced unicode character. */
|
||||
ret = -1; /* Return -1 if we used any replacement */
|
||||
n *= -1;
|
||||
}
|
||||
|
||||
/* Rebuild UTF-8 byte sequence. */
|
||||
while ((w = unicode_to_utf8(p, endp - p, uc)) == 0) {
|
||||
as->length = p - as->s;
|
||||
if (archive_string_ensure(as,
|
||||
as->buffer_length + len + 1) == NULL)
|
||||
return (-1);
|
||||
p = as->s + as->length;
|
||||
endp = as->s + as->buffer_length -1;
|
||||
}
|
||||
p += w;
|
||||
s += n;
|
||||
/* Consume converted code point */
|
||||
src += n;
|
||||
len -= n;
|
||||
/* Convert and append new UTF-8 sequence. */
|
||||
w = unicode_to_utf8(t, sizeof(t), uc);
|
||||
if (archive_string_append(as, t, w) == NULL)
|
||||
return (-1);
|
||||
}
|
||||
} while (n > 0);
|
||||
as->length = p - as->s;
|
||||
as->s[as->length] = '\0';
|
||||
return (ret);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
DEFINE_TEST(test_I)
|
||||
{
|
||||
const char *reffile = "test_I.zip";
|
||||
const char *lang;
|
||||
int r;
|
||||
|
||||
#if HAVE_SETLOCALE
|
||||
|
@ -44,6 +45,8 @@ DEFINE_TEST(test_I)
|
|||
skipping("setlocale() not available on this system.");
|
||||
#endif
|
||||
|
||||
lang = getenv("LANG");
|
||||
setenv("LANG", "en_US.UTF-8", 1);
|
||||
extract_reference_file(reffile);
|
||||
r = systemf("%s -I UTF-8 %s >test.out 2>test.err", testprog, reffile);
|
||||
assertEqualInt(0, r);
|
||||
|
@ -51,4 +54,9 @@ DEFINE_TEST(test_I)
|
|||
assertEmptyFile("test.err");
|
||||
|
||||
assertTextFileContents("Hello, World!\n", "Γειά σου Κόσμε.txt");
|
||||
|
||||
if (lang == NULL)
|
||||
unsetenv("LANG");
|
||||
else
|
||||
setenv("LANG", lang, 1);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue