Merge pull request #13042 from poettering/locale-utf8-fix

Locale utf8 fix
2024-10-14 20:17:52 +00:00 · 2019-07-14 13:31:40 +02:00 · 2019-07-14 13:31:40 +02:00 · b9adb191a1
parent a18d83c264 a7d9fccd0e
commit b9adb191a1
2 changed files with 61 additions and 2 deletions
--- a/docs/ENVIRONMENT.md
+++ b/docs/ENVIRONMENT.md
@ -222,3 +222,9 @@ systemd-remount-fs:
  directory is remounted writable. This is primarily used by
  systemd-gpt-auto-generator to ensure the root partition is mounted writable
  in accordance to the GPT partition flags.
+
+systemd-firstboot and localectl:
+
+* `SYSTEMD_LIST_NON_UTF8_LOCALES=1` – if set non-UTF-8 locales are listed among
+  the installed ones. By default non-UTF-8 locales are suppressed from the
+  selection, since we are living in the 21st century.
--- a/src/basic/locale-util.c
+++ b/src/basic/locale-util.c
@ -27,6 +27,40 @@
 #include "strv.h"
 #include "utf8.h"

+static char *normalize_locale(const char *name) {
+        const char *e;
+
+        /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
+         * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
+         * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
+         * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
+         * that for UTF-8 however, since it's kinda the only charset that matters. */
+
+        e = endswith(name, ".utf8");
+        if (e) {
+                _cleanup_free_ char *prefix = NULL;
+
+                prefix = strndup(name, e - name);
+                if (!prefix)
+                        return NULL;
+
+                return strjoin(prefix, ".UTF-8");
+        }
+
+        e = strstr(name, ".utf8@");
+        if (e) {
+                _cleanup_free_ char *prefix = NULL;
+
+                prefix = strndup(name, e - name);
+                if (!prefix)
+                        return NULL;
+
+                return strjoin(prefix, ".UTF-8@", e + 6);
+        }
+
+        return strdup(name);
+}
+
 static int add_locales_from_archive(Set *locales) {
        /* Stolen from glibc... */

@ -107,7 +141,7 @@ static int add_locales_from_archive(Set *locales) {
                if (!utf8_is_valid((char*) p + e[i].name_offset))
                        continue;

-                z = strdup((char*) p + e[i].name_offset);
+                z = normalize_locale((char*) p + e[i].name_offset);
                if (!z) {
                        r = -ENOMEM;
                        goto finish;
@ -144,7 +178,7 @@ static int add_locales_from_libdir (Set *locales) {
                if (entry->d_type != DT_DIR)
                        continue;

-                z = strdup(entry->d_name);
+                z = normalize_locale(entry->d_name);
                if (!z)
                        return -ENOMEM;

@ -177,6 +211,25 @@ int get_locales(char ***ret) {
        if (!l)
                return -ENOMEM;

+        r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
+        if (r == -ENXIO || r == 0) {
+                char **a, **b;
+
+                /* Filter out non-UTF-8 locales, because it's 2019, by default */
+                for (a = b = l; *a; a++) {
+
+                        if (endswith(*a, "UTF-8") ||
+                            strstr(*a, ".UTF-8@"))
+                                *(b++) = *a;
+                        else
+                                free(*a);
+                }
+
+                *b = NULL;
+
+        } else if (r < 0)
+                log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
+
        strv_sort(l);

        *ret = TAKE_PTR(l);