escape: Ensure that output is always valid UTF-8

This ensures that shell string escape operations will not produce output
with invalid UTF-8 from the input by escaping invalid UTF-8 data as if
they were single byte characters.
This commit is contained in:
msizanoen1 2023-03-01 17:35:17 +07:00
parent 45db7b53e7
commit 00f57157f3

View file

@ -474,14 +474,20 @@ char* octescape(const char *s, size_t len) {
static char* strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
assert(bad);
for (; *s; s++)
if (char_is_cc(*s))
t += cescape_char(*s, t);
else {
while (*s) {
int l = utf8_encoded_valid_unichar(s, SIZE_MAX);
if (char_is_cc(*s) || l < 0)
t += cescape_char(*(s++), t);
else if (l == 1) {
if (*s == '\\' || strchr(bad, *s))
*(t++) = '\\';
*(t++) = *s;
*(t++) = *(s++);
} else {
t = mempcpy(t, s, l);
s += l;
}
}
return t;
}
@ -510,11 +516,16 @@ char* shell_maybe_quote(const char *s, ShellEscapeFlags flags) {
if (FLAGS_SET(flags, SHELL_ESCAPE_EMPTY) && isempty(s))
return strdup("\"\""); /* We don't use $'' here in the POSIX mode. "" is fine too. */
for (p = s; *p; p++)
if (char_is_cc(*p) ||
for (p = s; *p; ) {
int l = utf8_encoded_valid_unichar(p, SIZE_MAX);
if (char_is_cc(*p) || l < 0 ||
strchr(WHITESPACE SHELL_NEED_QUOTES, *p))
break;
p += l;
}
if (!*p)
return strdup(s);