LibWeb: Fix assertion failure when tokenizing JS regex literals

This fixes parsing the following regular expression: /</g; It also adds a simple script element to the HTMLTokenizer regression test, which also contains that specific regex.
2024-07-09 05:20:45 +00:00 · 2021-07-15 01:25:34 +02:00 · 2021-07-15 01:25:34 +02:00 · 2404ad6897
commit 2404ad6897
parent c82c652ee4
3 changed files with 10 additions and 1 deletions
--- a/Tests/LibWeb/TestHTMLTokenizer.cpp
+++ b/Tests/LibWeb/TestHTMLTokenizer.cpp
@ -193,5 +193,5 @@ TEST_CASE(regression)
    auto file_contents = file.value()->read_all();
    auto tokens = run_tokenizer(file_contents);
    u32 hash = hash_tokens(tokens);
-    EXPECT_EQ(hash, 2891738465u);
+    EXPECT_EQ(hash, 2203864459u);
 }
--- a/Tests/LibWeb/tokenizer-test.html
+++ b/Tests/LibWeb/tokenizer-test.html
@ -3,6 +3,13 @@
 <head>
    <meta charset="UTF-8">
    <title>This is a test page :^)</title>
+    <script>
+        let foo = 2;
+        var bar = 3;
+        if (foo < bar)
+            alert("Check happens with HTML special <characters>.");
+        let regex = /</g;
+    </script>
 </head>
 <body>
    <p>This is the first paragraph.</p>
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
@ -2561,6 +2561,8 @@ _StartOfFunction:
                {
                    m_queued_tokens.enqueue(HTMLToken::make_character('<'));
                    m_queued_tokens.enqueue(HTMLToken::make_character('/'));
+                    // NOTE: The spec doesn't mention this, but it seems that m_current_token (an end tag) is just dropped in this case.
+                    m_current_builder.clear();
                    for (auto code_point : m_temporary_buffer)
                        m_queued_tokens.enqueue(HTMLToken::make_character(code_point));
                    RECONSUME_IN(ScriptData);