LibWeb: Make HTML parser flush all pending tokens in "in table text"

There were multiple bugs in the parsing algorithm for handling text
occurring inside a `table` element:

- When there was pending non-whitespace text inside a table, we only
  flushed one token instead of all pending tokens.

- Also, we didn't even flush one of the right tokens, but instead the
  token that caused the flush to happen.

- Once we started flushing the right tokens, it turned out we had not
  yet implemented character insertion points expressed as "before X".

- Finally, we were not exiting the "in table text" mode after flushing
  pending tokens, effectively getting us stuck in that mode until EOF.
This commit is contained in:
Andreas Kling 2023-07-03 10:00:20 +02:00
parent 8c3e5137f7
commit 5cdb394400
3 changed files with 25 additions and 12 deletions

View file

@ -0,0 +1 @@
PASS

View file

@ -0,0 +1,10 @@
<script src="include.js"></script>
<body><table><tr>PASS</tr></table></body>
<script>
test(() => {
// Remove the table. "PASS" should still be visible,
// as the HTML parser inserts it *before* the table
// under these circumstances.
document.querySelector("table").remove()
});
</script>

View file

@ -1001,7 +1001,11 @@ DOM::Text* HTMLParser::find_character_insertion_node()
{
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
if (adjusted_insertion_location.insert_before_sibling) {
TODO();
if (adjusted_insertion_location.insert_before_sibling->previous_sibling() && adjusted_insertion_location.insert_before_sibling->previous_sibling()->is_text())
return static_cast<DOM::Text*>(adjusted_insertion_location.insert_before_sibling->previous_sibling());
auto new_text_node = realm().heap().allocate<DOM::Text>(realm(), document(), "").release_allocated_value_but_fixme_should_propagate_errors();
adjusted_insertion_location.parent->insert_before(*new_text_node, *adjusted_insertion_location.insert_before_sibling);
return new_text_node;
}
if (adjusted_insertion_location.parent->is_document())
return nullptr;
@ -2661,20 +2665,18 @@ void HTMLParser::handle_in_table_text(HTMLToken& token)
// are character tokens that are not ASCII whitespace, then this is a parse error:
// reprocess the character tokens in the pending table character tokens list using
// the rules given in the "anything else" entry in the "in table" insertion mode.
for (auto& pending_token : m_pending_table_character_tokens) {
VERIFY(pending_token.is_character());
if (!pending_token.is_parser_whitespace()) {
log_parse_error();
if (any_of(m_pending_table_character_tokens, [](auto const& token) { return !token.is_parser_whitespace(); })) {
log_parse_error();
for (auto& pending_token : m_pending_table_character_tokens) {
m_foster_parenting = true;
process_using_the_rules_for(InsertionMode::InBody, token);
process_using_the_rules_for(InsertionMode::InBody, pending_token);
m_foster_parenting = false;
return;
}
}
// Otherwise, insert the characters given by the pending table character tokens list.
for (auto& pending_token : m_pending_table_character_tokens) {
insert_character(pending_token.code_point());
} else {
// Otherwise, insert the characters given by the pending table character tokens list.
for (auto& pending_token : m_pending_table_character_tokens) {
insert_character(pending_token.code_point());
}
}
// Switch the insertion mode to the original insertion mode and reprocess the token.