diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index 8bd6281bc3..d0180ef086 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -23,6 +23,7 @@ add_subdirectory(LibTimeZone) add_subdirectory(LibUnicode) add_subdirectory(LibWasm) add_subdirectory(LibWeb) +add_subdirectory(LibXML) if (${SERENITY_ARCH} STREQUAL "i686") add_subdirectory(UserspaceEmulator) endif() diff --git a/Tests/LibXML/CMakeLists.txt b/Tests/LibXML/CMakeLists.txt new file mode 100644 index 0000000000..107f39bc11 --- /dev/null +++ b/Tests/LibXML/CMakeLists.txt @@ -0,0 +1,7 @@ +set(TEST_SOURCES + TestParser.cpp +) + +foreach(source IN LISTS TEST_SOURCES) + serenity_test("${source}" LibXML LIBS LibXML) +endforeach() diff --git a/Tests/LibXML/TestParser.cpp b/Tests/LibXML/TestParser.cpp new file mode 100644 index 0000000000..54a9621a22 --- /dev/null +++ b/Tests/LibXML/TestParser.cpp @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2022, Luke Wilde + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +TEST_CASE(char_data_ending) +{ + EXPECT_NO_CRASH("parsing character data ending by itself should not crash", [] { + // After seeing ``, the parser will start parsing the content of the element. The content parser will then parse any character data it sees. + // The character parser would see the first two `]]` and consume them. Then, it would see the `>` and set the state machine to say we have seen this, + // but it did _not_ consume it and would instead tell GenericLexer that it should stop consuming characters. Therefore, we only consumed 2 characters. + // Then, it would see that we are in the state where we've seen the full `]]>` and try to take off three characters from the end of the consumed + // input when we only have 2 characters, causing an assertion failure as we are asking to take off more characters than there really is. + XML::Parser parser("]]>"); + (void)parser.parse(); + return Test::Crash::Failure::DidNotCrash; + }); +} diff --git a/Userland/Libraries/LibXML/Parser/Parser.cpp b/Userland/Libraries/LibXML/Parser/Parser.cpp index 0940d76fab..d32ca51c75 100644 --- a/Userland/Libraries/LibXML/Parser/Parser.cpp +++ b/Userland/Libraries/LibXML/Parser/Parser.cpp @@ -891,7 +891,7 @@ ErrorOr Parser::parse_char_data() // CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) auto cend_state = 0; // 1: ], 2: ], 3: > auto text = m_lexer.consume_while([&](auto ch) { - if (ch == '<' || ch == '&') + if (ch == '<' || ch == '&' || cend_state == 3) return false; switch (cend_state) { case 0: @@ -904,7 +904,7 @@ ErrorOr Parser::parse_char_data() case 2: if (ch == '>') { cend_state++; - return false; + return true; } cend_state = 0; return true;