1
0
mirror of https://github.com/SerenityOS/serenity synced 2024-07-01 11:19:21 +00:00

LibWeb: Implement unsafe HTML parsing methods

Both Element's and ShadowRoot's setHTMLUnsafe, and Document's static
parseHTMLUnsafe methods are implemented.

(cherry picked from commit ce8d3d17c4f2fcca8fac0ff4a832c8f50a011fc7)
This commit is contained in:
Luke Warlow 2024-06-25 20:55:58 +01:00 committed by Nico Weber
parent e2bf7d1a36
commit cf5b1b7c10
14 changed files with 129 additions and 31 deletions

View File

@ -5131,4 +5131,40 @@ void Document::set_allow_declarative_shadow_roots(bool allow)
m_allow_declarative_shadow_roots = allow;
}
// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#parse-html-from-a-string
void Document::parse_html_from_a_string(StringView html)
{
// 1. Set document's type to "html".
set_document_type(DOM::Document::Type::HTML);
// 2. Create an HTML parser parser, associated with document.
// 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
// FIXME: We don't have the concept of encoding confidence yet.
auto parser = HTML::HTMLParser::create(*this, html, "UTF-8"sv);
// 4. Start parser and let it run until it has consumed all the characters just inserted into the input stream.
// FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL.
parser->run("about:blank"sv);
}
// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsehtmlunsafe
JS::NonnullGCPtr<Document> Document::parse_html_unsafe(JS::VM& vm, StringView html)
{
auto& realm = *vm.current_realm();
// FIXME: 1. Let compliantHTML to the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "Document parseHTMLUnsafe", and "script".
// 2. Let document be a new Document, whose content type is "text/html".
JS::NonnullGCPtr<DOM::Document> document = Document::create(realm);
document->set_content_type("text/html"_string);
// 3. Set document's allow declarative shadow roots to true.
document->set_allow_declarative_shadow_roots(true);
// 4. Parse HTML from a string given document and compliantHTML. // FIXME: Use compliantHTML.
document->parse_html_from_a_string(html);
// 5. Return document.
return document;
}
}

View File

@ -676,6 +676,9 @@ public:
Vector<JS::Handle<DOM::Range>> find_matching_text(String const&, CaseSensitivity);
void parse_html_from_a_string(StringView);
static JS::NonnullGCPtr<Document> parse_html_unsafe(JS::VM&, StringView);
protected:
virtual void initialize(JS::Realm&) override;
virtual void visit_edges(Cell::Visitor&) override;

View File

@ -56,6 +56,9 @@ interface Document : Node {
[CEReactions] undefined write(DOMString... text);
[CEReactions] undefined writeln(DOMString... text);
// FIXME: static Document parseHTMLUnsafe((TrustedHTML or DOMString) html);
static Document parseHTMLUnsafe(DOMString html);
attribute DOMString cookie;
// https://html.spec.whatwg.org/#Document-partial

View File

@ -1499,7 +1499,7 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<DOM::DocumentFragment>> Element::parse_frag
}
// 3. Let new children be the result of invoking algorithm given markup, with context set to context.
auto new_children = algorithm(*this, markup);
auto new_children = algorithm(*this, markup, HTML::HTMLParser::AllowDeclarativeShadowRoots::No);
// 4. Let fragment be a new DocumentFragment whose node document is context's node document.
auto fragment = realm().heap().allocate<DOM::DocumentFragment>(realm(), document());
@ -2656,4 +2656,20 @@ WebIDL::ExceptionOr<String> Element::get_html(GetHTMLOptions const& options) con
options.shadow_roots);
}
// https://html.spec.whatwg.org/#dom-element-sethtmlunsafe
WebIDL::ExceptionOr<void> Element::set_html_unsafe(StringView html)
{
// FIXME: 1. Let compliantHTML be the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "Element setHTMLUnsafe", and "script".
// 2. Let target be this's template contents if this is a template element; otherwise this.
DOM::Node* target = this;
if (is<HTML::HTMLTemplateElement>(*this))
target = verify_cast<HTML::HTMLTemplateElement>(*this).content().ptr();
// 3. Unsafe set HTML given target, this, and compliantHTML. FIXME: Use compliantHTML.
TRY(target->unsafely_set_html(*this, html));
return {};
}
}

View File

@ -189,6 +189,8 @@ public:
WebIDL::ExceptionOr<String> inner_html() const;
WebIDL::ExceptionOr<void> set_inner_html(StringView);
WebIDL::ExceptionOr<void> set_html_unsafe(StringView);
WebIDL::ExceptionOr<String> get_html(GetHTMLOptions const&) const;
WebIDL::ExceptionOr<void> insert_adjacent_html(String const& position, String const&);

View File

@ -94,7 +94,8 @@ interface Element : Node {
readonly attribute double currentCSSZoom;
// https://html.spec.whatwg.org/#dom-parsing-and-serialization
[FIXME, CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
// FIXME: [CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
[CEReactions] undefined setHTMLUnsafe(DOMString html);
DOMString getHTML(optional GetHTMLOptions options = {});
// FIXME: [CEReactions] attribute (TrustedHTML or [LegacyNullToEmptyString] DOMString) innerHTML;

View File

@ -1385,6 +1385,26 @@ WebIDL::ExceptionOr<String> Node::serialize_fragment(DOMParsing::RequireWellForm
return DOMParsing::serialize_node_to_xml_string(*this, require_well_formed);
}
// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#unsafely-set-html
WebIDL::ExceptionOr<void> Node::unsafely_set_html(Element& context_element, StringView html)
{
// 1. Let newChildren be the result of the HTML fragment parsing algorithm given contextElement, html, and true.
auto new_children = HTML::HTMLParser::parse_html_fragment(context_element, html, HTML::HTMLParser::AllowDeclarativeShadowRoots::Yes);
// 2. Let fragment be a new DocumentFragment whose node document is contextElements node document.
auto fragment = heap().allocate<DocumentFragment>(realm(), context_element.document());
// 3. For each node in newChildren, append node to fragment.
for (auto& child : new_children)
// I don't know if this can throw here, but let's be safe.
(void)TRY(fragment->append_child(*child));
// 4. Replace all with fragment within contextElement.
replace_all(fragment);
return {};
}
// https://dom.spec.whatwg.org/#dom-node-issamenode
bool Node::is_same_node(Node const* other_node) const
{

View File

@ -252,6 +252,8 @@ public:
WebIDL::ExceptionOr<String> serialize_fragment(DOMParsing::RequireWellFormed, FragmentSerializationMode = FragmentSerializationMode::Inner) const;
WebIDL::ExceptionOr<void> unsafely_set_html(Element&, StringView);
void replace_all(JS::GCPtr<Node>);
void string_replace_all(String const&);

View File

@ -107,6 +107,17 @@ WebIDL::ExceptionOr<String> ShadowRoot::get_html(GetHTMLOptions const& options)
options.shadow_roots);
}
// https://html.spec.whatwg.org/#dom-shadowroot-sethtmlunsafe
WebIDL::ExceptionOr<void> ShadowRoot::set_html_unsafe(StringView html)
{
// FIXME: 1. Let compliantHTML be the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "ShadowRoot setHTMLUnsafe", and "script".
// 3. Unsafe set HTML given this, this's shadow host, and compliantHTML. FIXME: Use compliantHTML.
TRY(unsafely_set_html(*this->host(), html));
return {};
}
CSS::StyleSheetList& ShadowRoot::style_sheets()
{
if (!m_style_sheets)

View File

@ -46,6 +46,8 @@ public:
WebIDL::ExceptionOr<String> inner_html() const;
WebIDL::ExceptionOr<void> set_inner_html(StringView);
WebIDL::ExceptionOr<void> set_html_unsafe(StringView);
WebIDL::ExceptionOr<String> get_html(GetHTMLOptions const&) const;
CSS::StyleSheetList& style_sheets();

View File

@ -15,7 +15,8 @@ interface ShadowRoot : DocumentFragment {
// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
[FIXME, CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
// FIXME: [CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
[CEReactions] undefined setHTMLUnsafe(DOMString html);
DOMString getHTML(optional GetHTMLOptions options = {});
// FIXME: [CEReactions] attribute (TrustedHTML or [LegacyNullToEmptyString] DOMString) innerHTML;

View File

@ -39,25 +39,19 @@ void DOMParser::initialize(JS::Realm& realm)
// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-domparser-parsefromstring
JS::NonnullGCPtr<DOM::Document> DOMParser::parse_from_string(StringView string, Bindings::DOMParserSupportedType type)
{
// 1. Let document be a new Document, whose content type is type and url is this's relevant global object's associated Document's URL.
// FIXME: 1. Let compliantString to the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, string, "DOMParser parseFromString", and "script".
// 2. Let document be a new Document, whose content type is type and url is this's relevant global object's associated Document's URL.
JS::GCPtr<DOM::Document> document;
// 2. Switch on type:
// 3. Switch on type:
if (type == Bindings::DOMParserSupportedType::Text_Html) {
// -> "text/html"
// 1. Set document's type to "html".
document = HTML::HTMLDocument::create(realm(), verify_cast<HTML::Window>(relevant_global_object(*this)).associated_document().url());
document->set_content_type(Bindings::idl_enum_to_string(type));
document->set_document_type(DOM::Document::Type::HTML);
// 2. Create an HTML parser parser, associated with document.
// 3. Place string into the input stream for parser. The encoding confidence is irrelevant.
// FIXME: We don't have the concept of encoding confidence yet.
auto parser = HTMLParser::create(*document, string, "UTF-8"sv);
// 4. Start parser and let it run until it has consumed all the characters just inserted into the input stream.
// FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL.
parser->run("about:blank"sv);
// 1. Parse HTML from a string given document and compliantString. FIXME: Use compliantString.
document->parse_html_from_a_string(string);
} else {
// -> Otherwise
document = DOM::XMLDocument::create(realm(), verify_cast<HTML::Window>(relevant_global_object(*this)).associated_document().url());
@ -67,7 +61,7 @@ JS::NonnullGCPtr<DOM::Document> DOMParser::parse_from_string(StringView string,
// 1. Create an XML parser parse, associated with document, and with XML scripting support disabled.
XML::Parser parser(string, { .resolve_external_resource = resolve_xml_resource });
XMLDocumentBuilder builder { *document, XMLScriptingSupport::Disabled };
// 2. Parse string using parser.
// 2. Parse compliantString using parser. FIXME: Use compliantString.
auto result = parser.parse_with_listener(builder);
// 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then:
if (result.is_error() || builder.has_error()) {

View File

@ -4266,7 +4266,7 @@ DOM::Document& HTMLParser::document()
}
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup)
Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup, AllowDeclarativeShadowRoots allow_declarative_shadow_roots)
{
// 1. Create a new Document node, and mark it as being an HTML document.
auto temp_document = DOM::Document::create(context_element.realm());
@ -4279,12 +4279,16 @@ Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& cont
// Otherwise, leave the Document in no-quirks mode.
temp_document->set_quirks_mode(context_element.document().mode());
// 3. Create a new HTML parser, and associate it with the just created Document node.
// 3. If allowDeclarativeShadowRoots is true, then set Document's allow declarative shadow roots to true.
if (allow_declarative_shadow_roots == AllowDeclarativeShadowRoots::Yes)
temp_document->set_allow_declarative_shadow_roots(true);
// 4. Create a new HTML parser, and associate it with the just created Document node.
auto parser = HTMLParser::create(*temp_document, markup, "utf-8"sv);
parser->m_context_element = JS::make_handle(context_element);
parser->m_parsing_fragment = true;
// 4. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
// 5. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
// - title
// - textarea
if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
@ -4321,37 +4325,37 @@ Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& cont
// Leave the tokenizer in the data state.
}
// 5. Let root be a new html element with no attributes.
// 6. Let root be a new html element with no attributes.
auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
// 6. Append the element root to the Document node created above.
// 7. Append the element root to the Document node created above.
MUST(temp_document->append_child(root));
// 7. Set up the parser's stack of open elements so that it contains just the single element root.
// 8. Set up the parser's stack of open elements so that it contains just the single element root.
parser->m_stack_of_open_elements.push(root);
// 8. If the context element is a template element,
// 9. If the context element is a template element,
if (context_element.local_name() == HTML::TagNames::template_) {
// push "in template" onto the stack of template insertion modes so that it is the new current template insertion mode.
parser->m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate);
}
// FIXME: 9. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
// FIXME: 10. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
// Let this start tag token be the start tag token of the context node, e.g. for the purposes of determining if it is an HTML integration point.
// 10. Reset the parser's insertion mode appropriately.
// 11. Reset the parser's insertion mode appropriately.
parser->reset_the_insertion_mode_appropriately();
// 11. Set the parser's form element pointer to the nearest node to the context element that is a form element
// 12. Set the parser's form element pointer to the nearest node to the context element that is a form element
// (going straight up the ancestor chain, and including the element itself, if it is a form element), if any.
// (If there is no such form element, the form element pointer keeps its initial value, null.)
parser->m_form_element = context_element.first_ancestor_of_type<HTMLFormElement>();
// 12. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
// 13. Start the parser and let it run until it has consumed all the characters just inserted into the input stream.
// 13. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
// 14. Start the parser and let it run until it has consumed all the characters just inserted into the input stream.
parser->run(context_element.document().url());
// 14. Return the child nodes of root, in tree order.
// 15. Return the child nodes of root, in tree order.
Vector<JS::Handle<DOM::Node>> children;
while (JS::GCPtr<DOM::Node> child = root->first_child()) {
MUST(root->remove_child(*child));

View File

@ -59,8 +59,11 @@ public:
static void the_end(JS::NonnullGCPtr<DOM::Document>, JS::GCPtr<HTMLParser> = nullptr);
DOM::Document& document();
static Vector<JS::Handle<DOM::Node>> parse_html_fragment(DOM::Element& context_element, StringView);
enum class AllowDeclarativeShadowRoots {
No,
Yes,
};
static Vector<JS::Handle<DOM::Node>> parse_html_fragment(DOM::Element& context_element, StringView, AllowDeclarativeShadowRoots = AllowDeclarativeShadowRoots::No);
enum class SerializableShadowRoots {
No,
Yes,