JSSpecCompiler: Get rid of ParseError in Lexer

This commit is contained in:
Dan Klishch 2024-01-20 19:47:02 -05:00 committed by Andrew Kaster
parent ed04aff1de
commit 9a2337f7ad
5 changed files with 125 additions and 88 deletions

View file

@ -43,9 +43,8 @@ bool can_end_word_token(char c)
{
return is_ascii_space(c) || ".,"sv.contains(c);
}
}
ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens)
void tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens)
{
static constexpr struct {
StringView text_to_match;
@ -103,74 +102,131 @@ ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node c
if (word.length())
tokens.append({ TokenType::Word, word, node, move(token_location) });
}
return {};
}
ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps)
enum class TreeType {
AlgorithmStep,
Header,
};
struct TokenizerState {
Vector<Token> tokens;
XML::Node const* substeps = nullptr;
bool has_errors = false;
};
void tokenize_tree(SpecificationParsingContext& ctx, TokenizerState& state, XML::Node const* node, TreeType tree_type)
{
TokenizeTreeResult result;
auto& tokens = result.tokens;
// FIXME: Use structured binding once macOS Lagom CI updates to Clang >= 16.
auto& tokens = state.tokens;
auto& substeps = state.substeps;
auto& has_errors = state.has_errors;
for (auto const& child : node->as_element().children) {
TRY(child->content.visit(
[&](XML::Node::Element const& element) -> ParseErrorOr<void> {
if (result.substeps != nullptr)
return ParseError::create("Substeps list must be the last non-empty child"sv, child);
if (has_errors)
break;
child->content.visit(
[&](XML::Node::Element const& element) -> void {
Location child_location = ctx.location_from_xml_offset(child->offset);
auto report_error = [&]<typename... Parameters>(AK::CheckedFormatString<Parameters...>&& fmt, Parameters const&... parameters) {
ctx.diag().error(child_location, move(fmt), parameters...);
has_errors = true;
};
if (element.name == tag_var) {
tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child, move(child_location) });
return {};
if (substeps) {
report_error("substeps list must be the last child of algorithm step");
return;
}
if (element.name == tag_span) {
auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class));
if (element_class != class_secnum)
return ParseError::create(String::formatted("Expected 'secnum' as a class name of <span>, but found '{}'", element_class), child);
tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child, move(child_location) });
return {};
if (element.name == tag_var) {
auto variable_name = get_text_contents(child);
if (!variable_name.has_value())
report_error("malformed <var> subtree, expected single text child node");
tokens.append({ TokenType::Identifier, variable_name.value_or(""sv), child, move(child_location) });
return;
}
if (element.name == tag_emu_val) {
auto contents = TRY(get_text_contents(child));
auto maybe_contents = get_text_contents(child);
if (!maybe_contents.has_value())
report_error("malformed <emu-val> subtree, expected single text child node");
auto contents = maybe_contents.value_or(""sv);
if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"'))
tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) });
else if (contents == "undefined")
tokens.append({ TokenType::Undefined, contents, child, move(child_location) });
else
tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
return {};
return;
}
if (element.name == tag_emu_xref) {
auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv))));
tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
return {};
auto identifier = get_single_child_with_tag(child, "a"sv).map([](XML::Node const* node) {
return get_text_contents(node).value_or(""sv);
});
if (!identifier.has_value() || identifier.value().is_empty())
report_error("malformed <emu-xref> subtree, expected <a> with nested single text node");
tokens.append({ TokenType::Identifier, identifier.value_or(""sv), child, move(child_location) });
return;
}
if (element.name == tag_ol) {
if (!allow_substeps)
return ParseError::create("Found nested list but substeps are not allowed"sv, child);
result.substeps = child;
return {};
if (tree_type == TreeType::Header && element.name == tag_span) {
auto element_class = get_attribute_by_name(child, attribute_class);
if (element_class != class_secnum)
report_error("expected <span> to have class='secnum' attribute");
auto section_number = get_text_contents(child);
if (!section_number.has_value())
report_error("malformed section number span subtree, expected single text child node");
tokens.append({ TokenType::SectionNumber, section_number.value_or(""sv), child, move(child_location) });
return;
}
return ParseError::create(String::formatted("Unexpected child element with tag {}", element.name), child);
if (tree_type == TreeType::AlgorithmStep && element.name == tag_ol) {
substeps = child;
return;
}
report_error("<{}> should not be a child of algorithm step", element.name);
},
[&](XML::Node::Text const& text) -> ParseErrorOr<void> {
[&](XML::Node::Text const& text) {
auto view = text.builder.string_view();
if (result.substeps && !contains_empty_text(child))
return ParseError::create("Substeps list must be the last non-empty child"sv, child);
return tokenize_string(ctx, child, view, tokens);
if (substeps != nullptr && !contains_empty_text(child)) {
ctx.diag().error(ctx.location_from_xml_offset(child->offset),
"substeps list must be the last child of algorithm step");
} else {
tokenize_string(ctx, child, view, tokens);
}
},
move(ignore_comments)));
[&](auto const&) {});
}
if (tokens.size() && tokens.last().type == TokenType::MemberAccess)
tokens.last().type = TokenType::Dot;
}
}
return result;
StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node)
{
TokenizerState state;
tokenize_tree(ctx, state, node, TreeType::AlgorithmStep);
return {
.tokens = state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { move(state.tokens) },
.substeps = state.substeps,
};
}
Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node)
{
TokenizerState state;
tokenize_tree(ctx, state, node, TreeType::Header);
return state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { state.tokens };
}
}

View file

@ -31,13 +31,12 @@ inline constexpr StringView attribute_id = "id"sv;
inline constexpr StringView class_secnum = "secnum"sv;
ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens);
struct TokenizeTreeResult {
Vector<Token> tokens;
struct StepTokenizationResult {
Optional<Vector<Token>> tokens;
XML::Node const* substeps = nullptr;
};
ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps = false);
StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node);
Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node);
}

View file

@ -64,16 +64,9 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx,
{
VERIFY(element->as_element().name == tag_li);
auto tokenization_result = tokenize_tree(ctx, element, true);
if (tokenization_result.is_error()) {
ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()),
"{}", tokenization_result.error()->to_string());
return {};
}
auto [maybe_tokens, substeps] = tokenize_step(ctx, element);
auto [tokens, substeps] = tokenization_result.release_value();
AlgorithmStep result(ctx);
result.m_tokens = move(tokens);
result.m_node = element;
if (substeps) {
@ -86,6 +79,10 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx,
result.m_substeps = step_list.has_value() ? step_list->tree() : error_tree;
}
if (!maybe_tokens.has_value())
return {};
result.m_tokens = maybe_tokens.release_value();
if (!result.parse())
return {};
return result;
@ -260,14 +257,11 @@ Optional<FailedTextParseDiagnostic> SpecificationClause::parse_header(XML::Node
auto& ctx = *m_ctx_pointer;
VERIFY(element->as_element().name == tag_h1);
auto tokenization_result = tokenize_tree(ctx, element, false);
if (tokenization_result.is_error()) {
return FailedTextParseDiagnostic {
ctx.location_from_xml_offset(tokenization_result.error()->offset()),
tokenization_result.error()->to_string()
};
}
auto const& tokens = tokenization_result.release_value().tokens;
auto maybe_tokens = tokenize_header(ctx, element);
if (!maybe_tokens.has_value())
return {};
auto const& tokens = maybe_tokens.release_value();
TextParser parser(ctx, tokens, element);
auto parse_result = parser.parse_clause_header();
@ -289,6 +283,7 @@ void SpecificationClause::parse(XML::Node const* element)
auto& ctx = context();
u32 child_index = 0;
bool node_ignored_warning_issued = false;
Optional<FailedTextParseDiagnostic> header_parse_error;
for (auto const& child : element->as_element().children) {
@ -312,10 +307,12 @@ void SpecificationClause::parse(XML::Node const* element)
m_subclauses.append(create(ctx, child));
return;
}
if (header_parse_error.has_value()) {
if (!node_ignored_warning_issued && m_header.header.has<AK::Empty>()) {
node_ignored_warning_issued = true;
ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
"node content will be ignored since section header was not parsed successfully");
ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message);
if (header_parse_error.has_value())
ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message);
}
}
++child_index;

View file

@ -16,15 +16,6 @@ bool contains_empty_text(XML::Node const* node)
return node->as_text().builder.string_view().trim_whitespace().is_empty();
}
ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name)
{
auto const& attribute = node->as_element().attributes.get(attribute_name);
if (!attribute.has_value())
return ParseError::create(String::formatted("Attribute {} is not present", attribute_name), node);
return attribute.value();
}
Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name)
{
auto const& attribute = node->as_element().attributes.get(attribute_name);
@ -34,39 +25,34 @@ Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView att
return attribute.value();
}
ParseErrorOr<StringView> get_text_contents(XML::Node const* node)
Optional<StringView> get_text_contents(XML::Node const* node)
{
auto const& children = node->as_element().children;
if (children.size() != 1 || !children[0]->is_text())
return ParseError::create("Expected single text node in a child list of the node"sv, node);
return {};
return children[0]->as_text().builder.string_view();
}
ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name)
Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name)
{
XML::Node const* result = nullptr;
for (auto const& child : element->as_element().children) {
TRY(child->content.visit(
[&](XML::Node::Element const& element) -> ParseErrorOr<void> {
if (element.name != tag_name)
return ParseError::create(String::formatted("Expected child with the tag name {} but found {}", tag_name, element.name), child);
if (result != nullptr)
return ParseError::create("Element must have only one child"sv, child);
auto is_valid = child->content.visit(
[&](XML::Node::Element const& element) {
result = child;
return {};
return result != nullptr || element.name != tag_name;
},
[&](XML::Node::Text const&) -> ParseErrorOr<void> {
if (!contains_empty_text(child))
return ParseError::create("Element should not have non-empty child text nodes"sv, element);
return {};
[&](XML::Node::Text const&) {
return contains_empty_text(child);
},
move(ignore_comments)));
[&](auto const&) { return true; });
if (!is_valid)
return {};
}
if (result == nullptr)
return ParseError::create(String::formatted("Element must have only one child"), element);
return {};
return result;
}

View file

@ -20,11 +20,10 @@ inline constexpr IgnoreComments ignore_comments {};
bool contains_empty_text(XML::Node const* node);
ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name);
Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name);
ParseErrorOr<StringView> get_text_contents(XML::Node const* node);
Optional<StringView> get_text_contents(XML::Node const* node);
ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name);
Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name);
}