From b1c99c189160972d7d52ebd482a4c1d7267b1c1f Mon Sep 17 00:00:00 2001 From: LepkoQQ Date: Fri, 19 Jun 2020 23:46:25 +0200 Subject: [PATCH] AK: Fix JsonParser double encoding multibyte utf-8 chararcters --- AK/JsonParser.cpp | 31 +++++++++++-------------------- AK/Tests/TestJSON.cpp | 10 ++++++++++ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/AK/JsonParser.cpp b/AK/JsonParser.cpp index 578b84741c..5697287ef6 100644 --- a/AK/JsonParser.cpp +++ b/AK/JsonParser.cpp @@ -72,7 +72,7 @@ String JsonParser::consume_quoted_string() { if (!consume_specific('"')) return {}; - Vector buffer; + StringBuilder final_sb; for (;;) { size_t peek_index = m_index; @@ -88,8 +88,7 @@ String JsonParser::consume_quoted_string() if (peek_index != m_index) { while (peek_index != m_index) { - u32 value = m_input.characters_without_null_termination()[m_index]; - buffer.append(value); + final_sb.append(m_input.characters_without_null_termination()[m_index]); m_index++; } } @@ -99,26 +98,26 @@ String JsonParser::consume_quoted_string() if (ch == '"') break; if (ch != '\\') { - buffer.append(consume()); + final_sb.append(consume()); continue; } consume(); char escaped_ch = consume(); switch (escaped_ch) { case 'n': - buffer.append('\n'); + final_sb.append('\n'); break; case 'r': - buffer.append('\r'); + final_sb.append('\r'); break; case 't': - buffer.append('\t'); + final_sb.append('\t'); break; case 'b': - buffer.append('\b'); + final_sb.append('\b'); break; case 'f': - buffer.append('\f'); + final_sb.append('\f'); break; case 'u': { StringBuilder sb; @@ -129,27 +128,19 @@ String JsonParser::consume_quoted_string() auto codepoint = AK::StringUtils::convert_to_uint_from_hex(sb.to_string()); if (codepoint.has_value()) { - buffer.append(codepoint.value()); + final_sb.append_codepoint(codepoint.value()); } else { - buffer.append('?'); + final_sb.append('?'); } } break; default: - buffer.append(escaped_ch); + final_sb.append(escaped_ch); break; } } if (!consume_specific('"')) return {}; - if (buffer.is_empty()) - return String::empty(); - - StringBuilder final_sb; - for (auto cp : buffer) { - final_sb.append_codepoint(cp); - } - return final_sb.to_string(); } diff --git a/AK/Tests/TestJSON.cpp b/AK/Tests/TestJSON.cpp index 06a0fd0f62..31dc2c508f 100644 --- a/AK/Tests/TestJSON.cpp +++ b/AK/Tests/TestJSON.cpp @@ -118,4 +118,14 @@ TEST_CASE(json_utf8_character) EXPECT_EQ(json.as_string() == "A", true); } +TEST_CASE(json_utf8_multibyte) +{ + auto json = JsonValue::from_string("\"š\"").value(); + EXPECT_EQ(json.type(), JsonValue::Type::String); + EXPECT_EQ(json.as_string().is_null(), false); + EXPECT_EQ(json.as_string().length(), size_t { 2 }); + EXPECT_EQ(json.as_string() == "š", true); + EXPECT_EQ(json.as_string() == "\xc5\xa1", true); +} + TEST_MAIN(JSON)