From 2404ad689729902fd27fe37f8ecbab935a6394e5 Mon Sep 17 00:00:00 2001
From: Max Wipfli <mail@maxwipfli.ch>
Date: Thu, 15 Jul 2021 01:25:34 +0200
Subject: [PATCH] LibWeb: Fix assertion failure when tokenizing JS regex
 literals

This fixes parsing the following regular expression: /</g;

It also adds a simple script element to the HTMLTokenizer regression
test, which also contains that specific regex.
---
 Tests/LibWeb/TestHTMLTokenizer.cpp                      | 2 +-
 Tests/LibWeb/tokenizer-test.html                        | 7 +++++++
 Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp | 2 ++
 3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/Tests/LibWeb/TestHTMLTokenizer.cpp b/Tests/LibWeb/TestHTMLTokenizer.cpp
index de4c455e91..c001f00d2b 100644
--- a/Tests/LibWeb/TestHTMLTokenizer.cpp
+++ b/Tests/LibWeb/TestHTMLTokenizer.cpp
@@ -193,5 +193,5 @@ TEST_CASE(regression)
     auto file_contents = file.value()->read_all();
     auto tokens = run_tokenizer(file_contents);
     u32 hash = hash_tokens(tokens);
-    EXPECT_EQ(hash, 2891738465u);
+    EXPECT_EQ(hash, 2203864459u);
 }
diff --git a/Tests/LibWeb/tokenizer-test.html b/Tests/LibWeb/tokenizer-test.html
index c1df6a687d..0dde2442d0 100644
--- a/Tests/LibWeb/tokenizer-test.html
+++ b/Tests/LibWeb/tokenizer-test.html
@@ -3,6 +3,13 @@
 <head>
     <meta charset="UTF-8">
     <title>This is a test page :^)</title>
+    <script>
+        let foo = 2;
+        var bar = 3;
+        if (foo < bar)
+            alert("Check happens with HTML special <characters>.");
+        let regex = /</g;
+    </script>
 </head>
 <body>
     <p>This is the first paragraph.</p>
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
index 2736897a72..bb7e3a4590 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
@@ -2561,6 +2561,8 @@ _StartOfFunction:
                 {
                     m_queued_tokens.enqueue(HTMLToken::make_character('<'));
                     m_queued_tokens.enqueue(HTMLToken::make_character('/'));
+                    // NOTE: The spec doesn't mention this, but it seems that m_current_token (an end tag) is just dropped in this case.
+                    m_current_builder.clear();
                     for (auto code_point : m_temporary_buffer)
                         m_queued_tokens.enqueue(HTMLToken::make_character(code_point));
                     RECONSUME_IN(ScriptData);