mirror of
https://github.com/systemd/systemd
synced 2024-07-21 10:17:21 +00:00
Merge pull request #28146 from keszybz/ansi-seq-skip
Pass ANSI sequences through when ellipsizing output
This commit is contained in:
commit
4d8eca03e5
|
@ -295,6 +295,62 @@ static int write_ellipsis(char *buf, bool unicode) {
|
|||
return 3;
|
||||
}
|
||||
|
||||
static size_t ansi_sequence_length(const char *s, size_t len) {
|
||||
assert(s);
|
||||
|
||||
if (len < 2)
|
||||
return 0;
|
||||
|
||||
if (s[0] != 0x1B) /* ASCII 27, aka ESC, aka Ctrl-[ */
|
||||
return 0; /* Not the start of a sequence */
|
||||
|
||||
if (s[1] == 0x5B) { /* [, start of CSI sequence */
|
||||
size_t i = 2;
|
||||
|
||||
if (i == len)
|
||||
return 0;
|
||||
|
||||
while (s[i] >= 0x30 && s[i] <= 0x3F) /* Parameter bytes */
|
||||
if (++i == len)
|
||||
return 0;
|
||||
while (s[i] >= 0x20 && s[i] <= 0x2F) /* Intermediate bytes */
|
||||
if (++i == len)
|
||||
return 0;
|
||||
if (s[i] >= 0x40 && s[i] <= 0x7E) /* Final byte */
|
||||
return i + 1;
|
||||
return 0; /* Bad sequence */
|
||||
|
||||
} else if (s[1] >= 0x40 && s[1] <= 0x5F) /* other non-CSI Fe sequence */
|
||||
return 2;
|
||||
|
||||
return 0; /* Bad escape? */
|
||||
}
|
||||
|
||||
static bool string_has_ansi_sequence(const char *s, size_t len) {
|
||||
const char *t = s;
|
||||
|
||||
while ((t = memchr(s, 0x1B, len - (t - s))))
|
||||
if (ansi_sequence_length(t, len - (t - s)) > 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static size_t previous_ansi_sequence(const char *s, size_t length, const char **ret_where) {
|
||||
/* Locate the previous ANSI sequence and save its start in *ret_where and return length. */
|
||||
|
||||
for (size_t i = length - 2; i > 0; i--) { /* -2 because at least two bytes are needed */
|
||||
size_t slen = ansi_sequence_length(s + (i - 1), length - (i - 1));
|
||||
if (slen == 0)
|
||||
continue;
|
||||
|
||||
*ret_where = s + (i - 1);
|
||||
return slen;
|
||||
}
|
||||
|
||||
*ret_where = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
|
||||
size_t x, need_space, suffix_len;
|
||||
char *t;
|
||||
|
@ -354,7 +410,6 @@ static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_le
|
|||
char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
|
||||
size_t x, k, len, len2;
|
||||
const char *i, *j;
|
||||
char *e;
|
||||
int r;
|
||||
|
||||
/* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
|
||||
|
@ -378,73 +433,117 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
|
|||
if (new_length == 0)
|
||||
return strdup("");
|
||||
|
||||
/* If no multibyte characters use ascii_ellipsize_mem for speed */
|
||||
if (ascii_is_valid_n(s, old_length))
|
||||
bool has_ansi_seq = string_has_ansi_sequence(s, old_length);
|
||||
|
||||
/* If no multibyte characters or ANSI sequences, use ascii_ellipsize_mem for speed */
|
||||
if (!has_ansi_seq && ascii_is_valid_n(s, old_length))
|
||||
return ascii_ellipsize_mem(s, old_length, new_length, percent);
|
||||
|
||||
x = ((new_length - 1) * percent) / 100;
|
||||
x = (new_length - 1) * percent / 100;
|
||||
assert(x <= new_length - 1);
|
||||
|
||||
k = 0;
|
||||
for (i = s; i < s + old_length; i = utf8_next_char(i)) {
|
||||
char32_t c;
|
||||
int w;
|
||||
for (i = s; i < s + old_length; ) {
|
||||
size_t slen = has_ansi_seq ? ansi_sequence_length(i, old_length - (i - s)) : 0;
|
||||
if (slen > 0) {
|
||||
i += slen;
|
||||
continue; /* ANSI sequences don't take up any space in output */
|
||||
}
|
||||
|
||||
char32_t c;
|
||||
r = utf8_encoded_to_unichar(i, &c);
|
||||
if (r < 0)
|
||||
return NULL;
|
||||
|
||||
w = unichar_iswide(c) ? 2 : 1;
|
||||
if (k + w <= x)
|
||||
k += w;
|
||||
else
|
||||
int w = unichar_iswide(c) ? 2 : 1;
|
||||
if (k + w > x)
|
||||
break;
|
||||
|
||||
k += w;
|
||||
i += r;
|
||||
}
|
||||
|
||||
for (j = s + old_length; j > i; ) {
|
||||
const char *ansi_start = s + old_length;
|
||||
size_t ansi_len = 0;
|
||||
|
||||
for (const char *t = j = s + old_length; t > i && k < new_length; ) {
|
||||
char32_t c;
|
||||
int w;
|
||||
const char *jj;
|
||||
const char *tt;
|
||||
|
||||
jj = utf8_prev_char(j);
|
||||
r = utf8_encoded_to_unichar(jj, &c);
|
||||
if (has_ansi_seq && ansi_start >= t)
|
||||
/* Figure out the previous ANSI sequence, if any */
|
||||
ansi_len = previous_ansi_sequence(s, t - s, &ansi_start);
|
||||
|
||||
/* If the sequence extends all the way to the current position, skip it. */
|
||||
if (has_ansi_seq && ansi_len > 0 && ansi_start + ansi_len == t) {
|
||||
t = ansi_start;
|
||||
continue;
|
||||
}
|
||||
|
||||
tt = utf8_prev_char(t);
|
||||
r = utf8_encoded_to_unichar(tt, &c);
|
||||
if (r < 0)
|
||||
return NULL;
|
||||
|
||||
w = unichar_iswide(c) ? 2 : 1;
|
||||
if (k + w <= new_length) {
|
||||
k += w;
|
||||
j = jj;
|
||||
} else
|
||||
if (k + w > new_length)
|
||||
break;
|
||||
}
|
||||
assert(i <= j);
|
||||
|
||||
/* we don't actually need to ellipsize */
|
||||
if (i == j)
|
||||
k += w;
|
||||
j = t = tt; /* j should always point to the first "real" character */
|
||||
}
|
||||
|
||||
/* We don't actually need to ellipsize */
|
||||
if (i >= j)
|
||||
return memdup_suffix0(s, old_length);
|
||||
|
||||
/* make space for ellipsis, if possible */
|
||||
if (j < s + old_length)
|
||||
j = utf8_next_char(j);
|
||||
else if (i > s)
|
||||
i = utf8_prev_char(i);
|
||||
if (k >= new_length) {
|
||||
/* Make space for ellipsis, if required and possible. We know that the edge character is not
|
||||
* part of an ANSI sequence (because then we'd skip it). If the last character we looked at
|
||||
* was wide, we don't need to make space. */
|
||||
if (j < s + old_length)
|
||||
j = utf8_next_char(j);
|
||||
else if (i > s)
|
||||
i = utf8_prev_char(i);
|
||||
}
|
||||
|
||||
len = i - s;
|
||||
len2 = s + old_length - j;
|
||||
e = new(char, len + 3 + len2 + 1);
|
||||
|
||||
/* If we have ANSI, allow the same length as the source string + ellipsis. It'd be too involved to
|
||||
* figure out what exact space is needed. Strings with ANSI sequences are most likely to be fairly
|
||||
* short anyway. */
|
||||
size_t alloc_len = has_ansi_seq ? old_length + 3 + 1 : len + 3 + len2 + 1;
|
||||
|
||||
char *e = new(char, alloc_len);
|
||||
if (!e)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
|
||||
printf("old_length=%zu new_length=%zu x=%zu len=%zu len2=%zu k=%zu\n",
|
||||
old_length, new_length, x, len, len2, k);
|
||||
*/
|
||||
|
||||
memcpy(e, s, len);
|
||||
memcpy_safe(e, s, len);
|
||||
write_ellipsis(e + len, true);
|
||||
memcpy(e + len + 3, j, len2);
|
||||
*(e + len + 3 + len2) = '\0';
|
||||
|
||||
char *dst = e + len + 3;
|
||||
|
||||
if (has_ansi_seq)
|
||||
/* Copy over any ANSI sequences in full */
|
||||
for (const char *p = s + len; p < j; ) {
|
||||
size_t slen = ansi_sequence_length(p, j - p);
|
||||
if (slen > 0) {
|
||||
memcpy(dst, p, slen);
|
||||
dst += slen;
|
||||
p += slen;
|
||||
} else
|
||||
p = utf8_next_char(p);
|
||||
}
|
||||
|
||||
memcpy_safe(dst, j, len2);
|
||||
dst[len2] = '\0';
|
||||
|
||||
return e;
|
||||
}
|
||||
|
|
|
@ -90,7 +90,7 @@ int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar) {
|
|||
switch (len) {
|
||||
case 1:
|
||||
*ret_unichar = (char32_t)str[0];
|
||||
return 0;
|
||||
return 1;
|
||||
case 2:
|
||||
unichar = str[0] & 0x1f;
|
||||
break;
|
||||
|
@ -119,15 +119,14 @@ int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar) {
|
|||
}
|
||||
|
||||
*ret_unichar = unichar;
|
||||
|
||||
return 0;
|
||||
return len;
|
||||
}
|
||||
|
||||
bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newline) {
|
||||
assert(str);
|
||||
|
||||
for (const char *p = str; length > 0;) {
|
||||
int encoded_len, r;
|
||||
int encoded_len;
|
||||
char32_t val;
|
||||
|
||||
encoded_len = utf8_encoded_valid_unichar(p, length);
|
||||
|
@ -135,8 +134,7 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newlin
|
|||
return false;
|
||||
assert(encoded_len > 0 && (size_t) encoded_len <= length);
|
||||
|
||||
r = utf8_encoded_to_unichar(p, &val);
|
||||
if (r < 0 ||
|
||||
if (utf8_encoded_to_unichar(p, &val) < 0 ||
|
||||
unichar_is_control(val) ||
|
||||
(!allow_newline && val == '\n'))
|
||||
return false;
|
||||
|
|
|
@ -311,7 +311,7 @@ static mhd_result request_handler(
|
|||
|
||||
if (chunked)
|
||||
return mhd_respond(connection, MHD_HTTP_BAD_REQUEST,
|
||||
"Content-Length must not specified when Transfer-Encoding type is 'chunked'");
|
||||
"Content-Length not allowed when Transfer-Encoding type is 'chunked'");
|
||||
|
||||
r = safe_atozu(header, &len);
|
||||
if (r < 0)
|
||||
|
|
|
@ -93,6 +93,7 @@ simple_tests += files(
|
|||
'test-fstab-util.c',
|
||||
'test-glob-util.c',
|
||||
'test-gpt.c',
|
||||
'test-gunicode.c',
|
||||
'test-hash-funcs.c',
|
||||
'test-hexdecoct.c',
|
||||
'test-hmac.c',
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include "alloc-util.h"
|
||||
#include "constants.h"
|
||||
#include "escape.h"
|
||||
#include "string-util.h"
|
||||
#include "strv.h"
|
||||
#include "terminal-util.h"
|
||||
|
@ -115,4 +116,44 @@ TEST(ellipsize) {
|
|||
test_ellipsize_one("shórt");
|
||||
}
|
||||
|
||||
TEST(ellipsize_ansi) {
|
||||
const char *s = ANSI_HIGHLIGHT_YELLOW_UNDERLINE "yęllow"
|
||||
ANSI_HIGHLIGHT_GREY_UNDERLINE "grěy"
|
||||
ANSI_HIGHLIGHT_BLUE_UNDERLINE "blue"
|
||||
ANSI_NORMAL "nórmął";
|
||||
size_t len = strlen(s);
|
||||
|
||||
for (unsigned percent = 0; percent <= 100; percent += 15)
|
||||
for (ssize_t x = 21; x >= 0; x--) {
|
||||
_cleanup_free_ char *t = ellipsize_mem(s, len, x, percent);
|
||||
printf("%02zd: \"%s\"\n", x, t);
|
||||
assert_se(utf8_is_valid(t));
|
||||
|
||||
if (DEBUG_LOGGING) {
|
||||
_cleanup_free_ char *e = cescape(t);
|
||||
printf(" : \"%s\"\n", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ellipsize_ansi_cats) {
|
||||
_cleanup_free_ char *e, *f, *g, *h;
|
||||
|
||||
/* Make sure we don't cut off in the middle of an ANSI escape sequence. */
|
||||
|
||||
e = ellipsize("01" ANSI_NORMAL "23", 4, 0);
|
||||
puts(e);
|
||||
assert_se(streq(e, "01" ANSI_NORMAL "23"));
|
||||
f = ellipsize("ab" ANSI_NORMAL "cd", 4, 90);
|
||||
puts(f);
|
||||
assert_se(streq(f, "ab" ANSI_NORMAL "cd"));
|
||||
|
||||
g = ellipsize("🐱🐱" ANSI_NORMAL "🐱🐱" ANSI_NORMAL, 5, 0);
|
||||
puts(g);
|
||||
assert_se(streq(g, "…" ANSI_NORMAL "🐱🐱" ANSI_NORMAL));
|
||||
h = ellipsize("🐱🐱" ANSI_NORMAL "🐱🐱" ANSI_NORMAL, 5, 90);
|
||||
puts(h);
|
||||
assert_se(streq(h, "🐱…" ANSI_NORMAL "🐱" ANSI_NORMAL));
|
||||
}
|
||||
|
||||
DEFINE_TEST_MAIN(LOG_INFO);
|
||||
|
|
27
src/test/test-gunicode.c
Normal file
27
src/test/test-gunicode.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||
|
||||
#include "gunicode.h"
|
||||
#include "tests.h"
|
||||
#include "utf8.h"
|
||||
|
||||
TEST(unichar_iswide) {
|
||||
char32_t c;
|
||||
int r;
|
||||
|
||||
/* FIXME: the cats are wide, but we get this wrong */
|
||||
for (const char *narrow = "abX_…ąęµ!" "😼😿🙀😸😻"; *narrow; narrow += r) {
|
||||
r = utf8_encoded_to_unichar(narrow, &c);
|
||||
bool w = unichar_iswide(c);
|
||||
assert_se(r > 0);
|
||||
assert_se(!w);
|
||||
}
|
||||
|
||||
for (const char *wide = "🐱/¥"; *wide; wide += r) {
|
||||
r = utf8_encoded_to_unichar(wide, &c);
|
||||
bool w = unichar_iswide(c);
|
||||
assert_se(r > 0);
|
||||
assert_se(w);
|
||||
}
|
||||
}
|
||||
|
||||
DEFINE_TEST_MAIN(LOG_INFO);
|
Loading…
Reference in a new issue