Merge pull request #26628 from msizanoen1/utf8-quote-valid

escape: Ensure that output is always valid UTF-8
This commit is contained in:
Luca Boccassi 2023-03-02 17:33:16 +00:00 committed by GitHub
commit a41ac8ac40
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 24 additions and 7 deletions

View file

@ -473,15 +473,23 @@ char* octescape(const char *s, size_t len) {
static char* strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
assert(bad);
assert(t);
assert(s);
for (; *s; s++)
if (char_is_cc(*s))
t += cescape_char(*s, t);
else {
while (*s) {
int l = utf8_encoded_valid_unichar(s, SIZE_MAX);
if (char_is_cc(*s) || l < 0)
t += cescape_char(*(s++), t);
else if (l == 1) {
if (*s == '\\' || strchr(bad, *s))
*(t++) = '\\';
*(t++) = *s;
*(t++) = *(s++);
} else {
t = mempcpy(t, s, l);
s += l;
}
}
return t;
}
@ -510,11 +518,16 @@ char* shell_maybe_quote(const char *s, ShellEscapeFlags flags) {
if (FLAGS_SET(flags, SHELL_ESCAPE_EMPTY) && isempty(s))
return strdup("\"\""); /* We don't use $'' here in the POSIX mode. "" is fine too. */
for (p = s; *p; p++)
if (char_is_cc(*p) ||
for (p = s; *p; ) {
int l = utf8_encoded_valid_unichar(p, SIZE_MAX);
if (char_is_cc(*p) || l < 0 ||
strchr(WHITESPACE SHELL_NEED_QUOTES, *p))
break;
p += l;
}
if (!*p)
return strdup(s);

View file

@ -196,6 +196,10 @@ TEST(shell_maybe_quote) {
test_shell_maybe_quote_one("głąb\002\003rząd", 0, "\"głąb\\002\\003rząd\"");
test_shell_maybe_quote_one("głąb\002\003rząd", SHELL_ESCAPE_POSIX, "$'głąb\\002\\003rząd'");
/* Bogus UTF-8 strings */
test_shell_maybe_quote_one("\250\350", 0, "\"\\250\\350\"");
test_shell_maybe_quote_one("\250\350", SHELL_ESCAPE_POSIX, "$'\\250\\350'");
}
static void test_quote_command_line_one(char **argv, const char *expected) {