From 328552a269d069cb303553474f0f54a6c66738cd Mon Sep 17 00:00:00 2001 From: Simon Wanner Date: Thu, 30 May 2024 21:35:50 +0200 Subject: [PATCH] LibTextCodec: Bring TextCodec::get_standardized_encoding closer to spec (cherry picked from commit 09f2d79cb10f84dfaedea61264d8a9d91bdfa17c) --- Userland/Libraries/LibTextCodec/Decoder.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index f77eaac858..a5869e55fb 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -388,8 +388,11 @@ Optional decoder_for(StringView a_encoding) // https://encoding.spec.whatwg.org/#concept-encoding-get Optional get_standardized_encoding(StringView encoding) { - encoding = encoding.trim_whitespace(); + // 1. Remove any leading and trailing ASCII whitespace from label. + // https://infra.spec.whatwg.org/#ascii-whitespace: ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020 SPACE. + encoding = encoding.trim("\t\n\f\r "sv); + // 2. If label is an ASCII case-insensitive match for any of the labels listed in the table below, then return the corresponding encoding; otherwise return failure. if (encoding.is_one_of_ignoring_ascii_case("unicode-1-1-utf-8"sv, "unicode11utf8"sv, "unicode20utf8"sv, "utf-8"sv, "utf8"sv, "x-unicode20utf8"sv)) return "UTF-8"sv; if (encoding.is_one_of_ignoring_ascii_case("866"sv, "cp866"sv, "csibm866"sv, "ibm866"sv))