From cf5b1b7c102a4702526377fdf579e54aad8149ff Mon Sep 17 00:00:00 2001 From: Luke Warlow Date: Tue, 25 Jun 2024 20:55:58 +0100 Subject: [PATCH] LibWeb: Implement unsafe HTML parsing methods Both Element's and ShadowRoot's setHTMLUnsafe, and Document's static parseHTMLUnsafe methods are implemented. (cherry picked from commit ce8d3d17c4f2fcca8fac0ff4a832c8f50a011fc7) --- Userland/Libraries/LibWeb/DOM/Document.cpp | 36 +++++++++++++++++++ Userland/Libraries/LibWeb/DOM/Document.h | 3 ++ Userland/Libraries/LibWeb/DOM/Document.idl | 3 ++ Userland/Libraries/LibWeb/DOM/Element.cpp | 18 +++++++++- Userland/Libraries/LibWeb/DOM/Element.h | 2 ++ Userland/Libraries/LibWeb/DOM/Element.idl | 3 +- Userland/Libraries/LibWeb/DOM/Node.cpp | 20 +++++++++++ Userland/Libraries/LibWeb/DOM/Node.h | 2 ++ Userland/Libraries/LibWeb/DOM/ShadowRoot.cpp | 11 ++++++ Userland/Libraries/LibWeb/DOM/ShadowRoot.h | 2 ++ Userland/Libraries/LibWeb/DOM/ShadowRoot.idl | 3 +- Userland/Libraries/LibWeb/HTML/DOMParser.cpp | 20 ++++------- .../LibWeb/HTML/Parser/HTMLParser.cpp | 30 +++++++++------- .../Libraries/LibWeb/HTML/Parser/HTMLParser.h | 7 ++-- 14 files changed, 129 insertions(+), 31 deletions(-) diff --git a/Userland/Libraries/LibWeb/DOM/Document.cpp b/Userland/Libraries/LibWeb/DOM/Document.cpp index c140a423af..4c337817f3 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.cpp +++ b/Userland/Libraries/LibWeb/DOM/Document.cpp @@ -5131,4 +5131,40 @@ void Document::set_allow_declarative_shadow_roots(bool allow) m_allow_declarative_shadow_roots = allow; } +// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#parse-html-from-a-string +void Document::parse_html_from_a_string(StringView html) +{ + // 1. Set document's type to "html". + set_document_type(DOM::Document::Type::HTML); + + // 2. Create an HTML parser parser, associated with document. + // 3. Place html into the input stream for parser. The encoding confidence is irrelevant. + // FIXME: We don't have the concept of encoding confidence yet. + auto parser = HTML::HTMLParser::create(*this, html, "UTF-8"sv); + + // 4. Start parser and let it run until it has consumed all the characters just inserted into the input stream. + // FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL. + parser->run("about:blank"sv); +} + +// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsehtmlunsafe +JS::NonnullGCPtr Document::parse_html_unsafe(JS::VM& vm, StringView html) +{ + auto& realm = *vm.current_realm(); + // FIXME: 1. Let compliantHTML to the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "Document parseHTMLUnsafe", and "script". + + // 2. Let document be a new Document, whose content type is "text/html". + JS::NonnullGCPtr document = Document::create(realm); + document->set_content_type("text/html"_string); + + // 3. Set document's allow declarative shadow roots to true. + document->set_allow_declarative_shadow_roots(true); + + // 4. Parse HTML from a string given document and compliantHTML. // FIXME: Use compliantHTML. + document->parse_html_from_a_string(html); + + // 5. Return document. + return document; +} + } diff --git a/Userland/Libraries/LibWeb/DOM/Document.h b/Userland/Libraries/LibWeb/DOM/Document.h index 67b3b8f034..e6e2b0d2c1 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.h +++ b/Userland/Libraries/LibWeb/DOM/Document.h @@ -676,6 +676,9 @@ public: Vector> find_matching_text(String const&, CaseSensitivity); + void parse_html_from_a_string(StringView); + static JS::NonnullGCPtr parse_html_unsafe(JS::VM&, StringView); + protected: virtual void initialize(JS::Realm&) override; virtual void visit_edges(Cell::Visitor&) override; diff --git a/Userland/Libraries/LibWeb/DOM/Document.idl b/Userland/Libraries/LibWeb/DOM/Document.idl index 6f6bb6b782..34d0dd6626 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.idl +++ b/Userland/Libraries/LibWeb/DOM/Document.idl @@ -56,6 +56,9 @@ interface Document : Node { [CEReactions] undefined write(DOMString... text); [CEReactions] undefined writeln(DOMString... text); + // FIXME: static Document parseHTMLUnsafe((TrustedHTML or DOMString) html); + static Document parseHTMLUnsafe(DOMString html); + attribute DOMString cookie; // https://html.spec.whatwg.org/#Document-partial diff --git a/Userland/Libraries/LibWeb/DOM/Element.cpp b/Userland/Libraries/LibWeb/DOM/Element.cpp index 2fcb318eda..a0e58e2b98 100644 --- a/Userland/Libraries/LibWeb/DOM/Element.cpp +++ b/Userland/Libraries/LibWeb/DOM/Element.cpp @@ -1499,7 +1499,7 @@ WebIDL::ExceptionOr> Element::parse_frag } // 3. Let new children be the result of invoking algorithm given markup, with context set to context. - auto new_children = algorithm(*this, markup); + auto new_children = algorithm(*this, markup, HTML::HTMLParser::AllowDeclarativeShadowRoots::No); // 4. Let fragment be a new DocumentFragment whose node document is context's node document. auto fragment = realm().heap().allocate(realm(), document()); @@ -2656,4 +2656,20 @@ WebIDL::ExceptionOr Element::get_html(GetHTMLOptions const& options) con options.shadow_roots); } +// https://html.spec.whatwg.org/#dom-element-sethtmlunsafe +WebIDL::ExceptionOr Element::set_html_unsafe(StringView html) +{ + // FIXME: 1. Let compliantHTML be the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "Element setHTMLUnsafe", and "script". + + // 2. Let target be this's template contents if this is a template element; otherwise this. + DOM::Node* target = this; + if (is(*this)) + target = verify_cast(*this).content().ptr(); + + // 3. Unsafe set HTML given target, this, and compliantHTML. FIXME: Use compliantHTML. + TRY(target->unsafely_set_html(*this, html)); + + return {}; +} + } diff --git a/Userland/Libraries/LibWeb/DOM/Element.h b/Userland/Libraries/LibWeb/DOM/Element.h index c6d093218d..0092d73184 100644 --- a/Userland/Libraries/LibWeb/DOM/Element.h +++ b/Userland/Libraries/LibWeb/DOM/Element.h @@ -189,6 +189,8 @@ public: WebIDL::ExceptionOr inner_html() const; WebIDL::ExceptionOr set_inner_html(StringView); + WebIDL::ExceptionOr set_html_unsafe(StringView); + WebIDL::ExceptionOr get_html(GetHTMLOptions const&) const; WebIDL::ExceptionOr insert_adjacent_html(String const& position, String const&); diff --git a/Userland/Libraries/LibWeb/DOM/Element.idl b/Userland/Libraries/LibWeb/DOM/Element.idl index 3c92805b98..e1e6394380 100644 --- a/Userland/Libraries/LibWeb/DOM/Element.idl +++ b/Userland/Libraries/LibWeb/DOM/Element.idl @@ -94,7 +94,8 @@ interface Element : Node { readonly attribute double currentCSSZoom; // https://html.spec.whatwg.org/#dom-parsing-and-serialization - [FIXME, CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html); + // FIXME: [CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html); + [CEReactions] undefined setHTMLUnsafe(DOMString html); DOMString getHTML(optional GetHTMLOptions options = {}); // FIXME: [CEReactions] attribute (TrustedHTML or [LegacyNullToEmptyString] DOMString) innerHTML; diff --git a/Userland/Libraries/LibWeb/DOM/Node.cpp b/Userland/Libraries/LibWeb/DOM/Node.cpp index 27168750c0..dc5e32b47f 100644 --- a/Userland/Libraries/LibWeb/DOM/Node.cpp +++ b/Userland/Libraries/LibWeb/DOM/Node.cpp @@ -1385,6 +1385,26 @@ WebIDL::ExceptionOr Node::serialize_fragment(DOMParsing::RequireWellForm return DOMParsing::serialize_node_to_xml_string(*this, require_well_formed); } +// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#unsafely-set-html +WebIDL::ExceptionOr Node::unsafely_set_html(Element& context_element, StringView html) +{ + // 1. Let newChildren be the result of the HTML fragment parsing algorithm given contextElement, html, and true. + auto new_children = HTML::HTMLParser::parse_html_fragment(context_element, html, HTML::HTMLParser::AllowDeclarativeShadowRoots::Yes); + + // 2. Let fragment be a new DocumentFragment whose node document is contextElement’s node document. + auto fragment = heap().allocate(realm(), context_element.document()); + + // 3. For each node in newChildren, append node to fragment. + for (auto& child : new_children) + // I don't know if this can throw here, but let's be safe. + (void)TRY(fragment->append_child(*child)); + + // 4. Replace all with fragment within contextElement. + replace_all(fragment); + + return {}; +} + // https://dom.spec.whatwg.org/#dom-node-issamenode bool Node::is_same_node(Node const* other_node) const { diff --git a/Userland/Libraries/LibWeb/DOM/Node.h b/Userland/Libraries/LibWeb/DOM/Node.h index 0b09dd3d70..fafc8e760a 100644 --- a/Userland/Libraries/LibWeb/DOM/Node.h +++ b/Userland/Libraries/LibWeb/DOM/Node.h @@ -252,6 +252,8 @@ public: WebIDL::ExceptionOr serialize_fragment(DOMParsing::RequireWellFormed, FragmentSerializationMode = FragmentSerializationMode::Inner) const; + WebIDL::ExceptionOr unsafely_set_html(Element&, StringView); + void replace_all(JS::GCPtr); void string_replace_all(String const&); diff --git a/Userland/Libraries/LibWeb/DOM/ShadowRoot.cpp b/Userland/Libraries/LibWeb/DOM/ShadowRoot.cpp index 0a075c17d5..33d16cc0f7 100644 --- a/Userland/Libraries/LibWeb/DOM/ShadowRoot.cpp +++ b/Userland/Libraries/LibWeb/DOM/ShadowRoot.cpp @@ -107,6 +107,17 @@ WebIDL::ExceptionOr ShadowRoot::get_html(GetHTMLOptions const& options) options.shadow_roots); } +// https://html.spec.whatwg.org/#dom-shadowroot-sethtmlunsafe +WebIDL::ExceptionOr ShadowRoot::set_html_unsafe(StringView html) +{ + // FIXME: 1. Let compliantHTML be the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "ShadowRoot setHTMLUnsafe", and "script". + + // 3. Unsafe set HTML given this, this's shadow host, and compliantHTML. FIXME: Use compliantHTML. + TRY(unsafely_set_html(*this->host(), html)); + + return {}; +} + CSS::StyleSheetList& ShadowRoot::style_sheets() { if (!m_style_sheets) diff --git a/Userland/Libraries/LibWeb/DOM/ShadowRoot.h b/Userland/Libraries/LibWeb/DOM/ShadowRoot.h index ca01e56cdf..c76c3b2887 100644 --- a/Userland/Libraries/LibWeb/DOM/ShadowRoot.h +++ b/Userland/Libraries/LibWeb/DOM/ShadowRoot.h @@ -46,6 +46,8 @@ public: WebIDL::ExceptionOr inner_html() const; WebIDL::ExceptionOr set_inner_html(StringView); + WebIDL::ExceptionOr set_html_unsafe(StringView); + WebIDL::ExceptionOr get_html(GetHTMLOptions const&) const; CSS::StyleSheetList& style_sheets(); diff --git a/Userland/Libraries/LibWeb/DOM/ShadowRoot.idl b/Userland/Libraries/LibWeb/DOM/ShadowRoot.idl index 12fde61da1..186f410434 100644 --- a/Userland/Libraries/LibWeb/DOM/ShadowRoot.idl +++ b/Userland/Libraries/LibWeb/DOM/ShadowRoot.idl @@ -15,7 +15,8 @@ interface ShadowRoot : DocumentFragment { // https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization - [FIXME, CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html); + // FIXME: [CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html); + [CEReactions] undefined setHTMLUnsafe(DOMString html); DOMString getHTML(optional GetHTMLOptions options = {}); // FIXME: [CEReactions] attribute (TrustedHTML or [LegacyNullToEmptyString] DOMString) innerHTML; diff --git a/Userland/Libraries/LibWeb/HTML/DOMParser.cpp b/Userland/Libraries/LibWeb/HTML/DOMParser.cpp index d66b764153..9903da9e30 100644 --- a/Userland/Libraries/LibWeb/HTML/DOMParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/DOMParser.cpp @@ -39,25 +39,19 @@ void DOMParser::initialize(JS::Realm& realm) // https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-domparser-parsefromstring JS::NonnullGCPtr DOMParser::parse_from_string(StringView string, Bindings::DOMParserSupportedType type) { - // 1. Let document be a new Document, whose content type is type and url is this's relevant global object's associated Document's URL. + // FIXME: 1. Let compliantString to the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, string, "DOMParser parseFromString", and "script". + + // 2. Let document be a new Document, whose content type is type and url is this's relevant global object's associated Document's URL. JS::GCPtr document; - // 2. Switch on type: + // 3. Switch on type: if (type == Bindings::DOMParserSupportedType::Text_Html) { // -> "text/html" - // 1. Set document's type to "html". document = HTML::HTMLDocument::create(realm(), verify_cast(relevant_global_object(*this)).associated_document().url()); document->set_content_type(Bindings::idl_enum_to_string(type)); - document->set_document_type(DOM::Document::Type::HTML); - // 2. Create an HTML parser parser, associated with document. - // 3. Place string into the input stream for parser. The encoding confidence is irrelevant. - // FIXME: We don't have the concept of encoding confidence yet. - auto parser = HTMLParser::create(*document, string, "UTF-8"sv); - - // 4. Start parser and let it run until it has consumed all the characters just inserted into the input stream. - // FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL. - parser->run("about:blank"sv); + // 1. Parse HTML from a string given document and compliantString. FIXME: Use compliantString. + document->parse_html_from_a_string(string); } else { // -> Otherwise document = DOM::XMLDocument::create(realm(), verify_cast(relevant_global_object(*this)).associated_document().url()); @@ -67,7 +61,7 @@ JS::NonnullGCPtr DOMParser::parse_from_string(StringView string, // 1. Create an XML parser parse, associated with document, and with XML scripting support disabled. XML::Parser parser(string, { .resolve_external_resource = resolve_xml_resource }); XMLDocumentBuilder builder { *document, XMLScriptingSupport::Disabled }; - // 2. Parse string using parser. + // 2. Parse compliantString using parser. FIXME: Use compliantString. auto result = parser.parse_with_listener(builder); // 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then: if (result.is_error() || builder.has_error()) { diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp index 9b40d5d290..99c93ab1f7 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp @@ -4266,7 +4266,7 @@ DOM::Document& HTMLParser::document() } // https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments -Vector> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup) +Vector> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup, AllowDeclarativeShadowRoots allow_declarative_shadow_roots) { // 1. Create a new Document node, and mark it as being an HTML document. auto temp_document = DOM::Document::create(context_element.realm()); @@ -4279,12 +4279,16 @@ Vector> HTMLParser::parse_html_fragment(DOM::Element& cont // Otherwise, leave the Document in no-quirks mode. temp_document->set_quirks_mode(context_element.document().mode()); - // 3. Create a new HTML parser, and associate it with the just created Document node. + // 3. If allowDeclarativeShadowRoots is true, then set Document's allow declarative shadow roots to true. + if (allow_declarative_shadow_roots == AllowDeclarativeShadowRoots::Yes) + temp_document->set_allow_declarative_shadow_roots(true); + + // 4. Create a new HTML parser, and associate it with the just created Document node. auto parser = HTMLParser::create(*temp_document, markup, "utf-8"sv); parser->m_context_element = JS::make_handle(context_element); parser->m_parsing_fragment = true; - // 4. Set the state of the HTML parser's tokenization stage as follows, switching on the context element: + // 5. Set the state of the HTML parser's tokenization stage as follows, switching on the context element: // - title // - textarea if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) { @@ -4321,37 +4325,37 @@ Vector> HTMLParser::parse_html_fragment(DOM::Element& cont // Leave the tokenizer in the data state. } - // 5. Let root be a new html element with no attributes. + // 6. Let root be a new html element with no attributes. auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - // 6. Append the element root to the Document node created above. + // 7. Append the element root to the Document node created above. MUST(temp_document->append_child(root)); - // 7. Set up the parser's stack of open elements so that it contains just the single element root. + // 8. Set up the parser's stack of open elements so that it contains just the single element root. parser->m_stack_of_open_elements.push(root); - // 8. If the context element is a template element, + // 9. If the context element is a template element, if (context_element.local_name() == HTML::TagNames::template_) { // push "in template" onto the stack of template insertion modes so that it is the new current template insertion mode. parser->m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate); } - // FIXME: 9. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context. + // FIXME: 10. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context. // Let this start tag token be the start tag token of the context node, e.g. for the purposes of determining if it is an HTML integration point. - // 10. Reset the parser's insertion mode appropriately. + // 11. Reset the parser's insertion mode appropriately. parser->reset_the_insertion_mode_appropriately(); - // 11. Set the parser's form element pointer to the nearest node to the context element that is a form element + // 12. Set the parser's form element pointer to the nearest node to the context element that is a form element // (going straight up the ancestor chain, and including the element itself, if it is a form element), if any. // (If there is no such form element, the form element pointer keeps its initial value, null.) parser->m_form_element = context_element.first_ancestor_of_type(); - // 12. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant. - // 13. Start the parser and let it run until it has consumed all the characters just inserted into the input stream. + // 13. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant. + // 14. Start the parser and let it run until it has consumed all the characters just inserted into the input stream. parser->run(context_element.document().url()); - // 14. Return the child nodes of root, in tree order. + // 15. Return the child nodes of root, in tree order. Vector> children; while (JS::GCPtr child = root->first_child()) { MUST(root->remove_child(*child)); diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h index 9a0b40b48f..494baf12be 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h @@ -59,8 +59,11 @@ public: static void the_end(JS::NonnullGCPtr, JS::GCPtr = nullptr); DOM::Document& document(); - - static Vector> parse_html_fragment(DOM::Element& context_element, StringView); + enum class AllowDeclarativeShadowRoots { + No, + Yes, + }; + static Vector> parse_html_fragment(DOM::Element& context_element, StringView, AllowDeclarativeShadowRoots = AllowDeclarativeShadowRoots::No); enum class SerializableShadowRoots { No, Yes,