From 2404ad689729902fd27fe37f8ecbab935a6394e5 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 15 Jul 2021 01:25:34 +0200 Subject: [PATCH] LibWeb: Fix assertion failure when tokenizing JS regex literals This fixes parsing the following regular expression: /read_all(); auto tokens = run_tokenizer(file_contents); u32 hash = hash_tokens(tokens); - EXPECT_EQ(hash, 2891738465u); + EXPECT_EQ(hash, 2203864459u); } diff --git a/Tests/LibWeb/tokenizer-test.html b/Tests/LibWeb/tokenizer-test.html index c1df6a687d..0dde2442d0 100644 --- a/Tests/LibWeb/tokenizer-test.html +++ b/Tests/LibWeb/tokenizer-test.html @@ -3,6 +3,13 @@ This is a test page :^) +

This is the first paragraph.

diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 2736897a72..bb7e3a4590 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -2561,6 +2561,8 @@ _StartOfFunction: { m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); + // NOTE: The spec doesn't mention this, but it seems that m_current_token (an end tag) is just dropped in this case. + m_current_builder.clear(); for (auto code_point : m_temporary_buffer) m_queued_tokens.enqueue(HTMLToken::make_character(code_point)); RECONSUME_IN(ScriptData);