strv: add new helper strv_rebreak_lines() with a simple line breaking algorithm

This commit is contained in:
Lennart Poettering 2024-04-26 17:40:32 +02:00
parent 9632f8b465
commit aca093018c
3 changed files with 150 additions and 0 deletions

View file

@ -11,11 +11,13 @@
#include "escape.h"
#include "extract-word.h"
#include "fileio.h"
#include "gunicode.h"
#include "memory-util.h"
#include "nulstr-util.h"
#include "sort-util.h"
#include "string-util.h"
#include "strv.h"
#include "utf8.h"
char* strv_find(char * const *l, const char *name) {
assert(name);
@ -967,3 +969,91 @@ int _string_strv_ordered_hashmap_put(OrderedHashmap **h, const char *key, const
}
DEFINE_HASH_OPS_FULL(string_strv_hash_ops, char, string_hash_func, string_compare_func, free, char*, strv_free);
int strv_rebreak_lines(char **l, size_t width, char ***ret) {
_cleanup_strv_free_ char **broken = NULL;
int r;
assert(ret);
/* Implements a simple UTF-8 line breaking algorithm
*
* Goes through all entries in *l, and line-breaks each line that is longer than the specified
* character width. Breaks at the end of words/beginning of whitespace. Lines that do not contain whitespace are not
* broken. Retains whitespace at beginning of lines, removes it at end of lines. */
if (width == SIZE_MAX) { /* NOP? */
broken = strv_copy(l);
if (!broken)
return -ENOMEM;
*ret = TAKE_PTR(broken);
return 0;
}
STRV_FOREACH(i, l) {
const char *start = *i, *whitespace_begin = NULL, *whitespace_end = NULL;
bool in_prefix = true; /* still in the whitespace in the beginning of the line? */
size_t w = 0;
for (const char *p = start; *p != 0; p = utf8_next_char(p)) {
if (strchr(NEWLINE, *p)) {
in_prefix = true;
whitespace_begin = whitespace_end = NULL;
w = 0;
} else if (strchr(WHITESPACE, *p)) {
if (!in_prefix && (!whitespace_begin || whitespace_end)) {
whitespace_begin = p;
whitespace_end = NULL;
}
} else {
if (whitespace_begin && !whitespace_end)
whitespace_end = p;
in_prefix = false;
}
int cw = utf8_char_console_width(p);
if (cw < 0) {
log_debug_errno(cw, "Comment to line break contains invalid UTF-8, ignoring.");
cw = 1;
}
w += cw;
if (w > width && whitespace_begin && whitespace_end) {
_cleanup_free_ char *truncated = NULL;
truncated = strndup(start, whitespace_begin - start);
if (!truncated)
return -ENOMEM;
r = strv_consume(&broken, TAKE_PTR(truncated));
if (r < 0)
return r;
p = start = whitespace_end;
whitespace_begin = whitespace_end = NULL;
w = cw;
}
}
if (start) { /* Process rest of the line */
if (in_prefix) /* Never seen anything non-whitespace? Generate empty line! */
r = strv_extend(&broken, "");
else if (whitespace_begin && !whitespace_end) { /* Ends in whitespace? Chop it off! */
_cleanup_free_ char *truncated = strndup(start, whitespace_begin - start);
if (!truncated)
return -ENOMEM;
r = strv_consume(&broken, TAKE_PTR(truncated));
} else /* Otherwise use line as is */
r = strv_extend(&broken, start);
if (r < 0)
return r;
}
}
*ret = TAKE_PTR(broken);
return 0;
}

View file

@ -257,3 +257,5 @@ int _string_strv_hashmap_put(Hashmap **h, const char *key, const char *value HA
int _string_strv_ordered_hashmap_put(OrderedHashmap **h, const char *key, const char *value HASHMAP_DEBUG_PARAMS);
#define string_strv_hashmap_put(h, k, v) _string_strv_hashmap_put(h, k, v HASHMAP_DEBUG_SRC_ARGS)
#define string_strv_ordered_hashmap_put(h, k, v) _string_strv_ordered_hashmap_put(h, k, v HASHMAP_DEBUG_SRC_ARGS)
int strv_rebreak_lines(char **l, size_t width, char ***ret);

View file

@ -1055,4 +1055,62 @@ TEST(strv_extend_many) {
assert_se(strv_equal(l, STRV_MAKE("foo", "bar", "waldo", "quux", "1", "2", "3", "4", "yes", "no")));
}
TEST(strv_rebreak_lines) {
_cleanup_strv_free_ char **l = NULL;
assert_se(strv_rebreak_lines(NULL, SIZE_MAX, &l) >= 0);
assert_se(strv_equal(l, NULL));
l = strv_free(l);
assert_se(strv_rebreak_lines(STRV_MAKE(""), SIZE_MAX, &l) >= 0);
assert_se(strv_equal(l, STRV_MAKE("")));
l = strv_free(l);
assert_se(strv_rebreak_lines(STRV_MAKE("", ""), SIZE_MAX, &l) >= 0);
assert_se(strv_equal(l, STRV_MAKE("", "")));
l = strv_free(l);
assert_se(strv_rebreak_lines(STRV_MAKE("foo"), SIZE_MAX, &l) >= 0);
assert_se(strv_equal(l, STRV_MAKE("foo")));
l = strv_free(l);
assert_se(strv_rebreak_lines(STRV_MAKE("foo", "bar"), SIZE_MAX, &l) >= 0);
assert_se(strv_equal(l, STRV_MAKE("foo", "bar")));
l = strv_free(l);
assert_se(strv_rebreak_lines(STRV_MAKE("Foo fOo foO FOo", "bar Bar bAr baR BAr"), 10, &l) >= 0);
assert_se(strv_equal(l, STRV_MAKE("Foo fOo", "foO FOo", "bar Bar", "bAr baR", "BAr")));
l = strv_free(l);
assert_se(strv_rebreak_lines(STRV_MAKE(" foo ",
" foo bar waldo quux "),
10, &l) >= 0);
assert_se(strv_equal(l, STRV_MAKE(" foo",
" foo",
"bar",
"waldo quux")));
l = strv_free(l);
assert_se(strv_rebreak_lines(STRV_MAKE(" ",
"\tfoo bar\t",
"FOO\tBAR"),
10, &l) >= 0);
assert_se(strv_equal(l, STRV_MAKE("",
"\tfoo",
"bar",
"FOO",
"BAR")));
l = strv_free(l);
/* Now make sure that breaking the lines a 2nd time does not modify the output anymore */
for (size_t i = 1; i < 100; i++) {
_cleanup_strv_free_ char **a = NULL, **b = NULL;
assert_se(strv_rebreak_lines(STRV_MAKE("foobar waldo waldo quux piep\tschnurz pimm"), i, &a) >= 0);
assert_se(strv_rebreak_lines(a, i, &b) >= 0);
assert_se(strv_equal(a, b));
}
}
DEFINE_TEST_MAIN(LOG_INFO);