From eac2b2382c36771cb62bb4beaf18dfc2e018b18f Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 24 Jan 2023 08:50:53 -0500 Subject: [PATCH] LibTextCodec: Add a MacRoman decoder Allows displaying `` html files. (`:set fenc=macroman`, `:w` in vim to save in that encoding.) --- Userland/Libraries/LibTextCodec/Decoder.cpp | 28 +++++++++++++++++++++ Userland/Libraries/LibTextCodec/Decoder.h | 5 ++++ 2 files changed, 33 insertions(+) diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index 4c6b2fbfb7..d991528593 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -25,6 +25,7 @@ HebrewDecoder s_hebrew_decoder; CyrillicDecoder s_cyrillic_decoder; Koi8RDecoder s_koi8r_decoder; Latin9Decoder s_latin9_decoder; +MacRomanDecoder s_mac_roman_decoder; TurkishDecoder s_turkish_decoder; XUserDefinedDecoder s_x_user_defined_decoder; } @@ -51,6 +52,8 @@ Decoder* decoder_for(DeprecatedString const& a_encoding) return &s_koi8r_decoder; if (encoding.value().equals_ignoring_case("iso-8859-15"sv)) return &s_latin9_decoder; + if (encoding.value().equals_ignoring_case("macintosh"sv)) + return &s_mac_roman_decoder; if (encoding.value().equals_ignoring_case("windows-1254"sv)) return &s_turkish_decoder; if (encoding.value().equals_ignoring_case("x-user-defined"sv)) @@ -528,6 +531,31 @@ void Latin9Decoder::process(StringView input, Function on_code_point) } } +void MacRomanDecoder::process(StringView input, Function on_code_point) +{ + // https://encoding.spec.whatwg.org/index-macintosh.txt + // clang-format off + static constexpr Array translation_table = { + 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, + 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, + 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, + 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, + 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, + 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, + 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, + 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7, + }; + // clang-format on + + for (u8 ch : input) { + if (ch < 0x80) { // Superset of ASCII + on_code_point(ch); + } else { + on_code_point(translation_table[ch - 0x80]); + } + } +} + void TurkishDecoder::process(StringView input, Function on_code_point) { auto convert_turkish_to_utf8 = [](u8 ch) -> u32 { diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h index 6faf61fce5..02cdfba599 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.h +++ b/Userland/Libraries/LibTextCodec/Decoder.h @@ -69,6 +69,11 @@ public: virtual void process(StringView, Function on_code_point) override; }; +class MacRomanDecoder final : public Decoder { +public: + virtual void process(StringView, Function on_code_point) override; +}; + class TurkishDecoder final : public Decoder { public: virtual void process(StringView, Function on_code_point) override;