JSSpecCompiler: Make -xspec capable of parsing the whole specification

... in theory. In practice, we fail to parse all of the functions.
This commit is contained in:
Dan Klishch 2024-01-18 21:17:26 -05:00 committed by Andrew Kaster
parent 483e195e48
commit 14ee25b8ba
9 changed files with 255 additions and 58 deletions

View file

@ -63,6 +63,8 @@ class AlgorithmStep;
class AlgorithmStepList;
class Algorithm;
class SpecFunction;
class SpecificationClause;
class Specification;
// DiagnosticEngine.h
struct LogicalLocation;

View file

@ -13,6 +13,8 @@ namespace JSSpecCompiler {
inline constexpr StringView tag_emu_alg = "emu-alg"sv;
inline constexpr StringView tag_emu_clause = "emu-clause"sv;
inline constexpr StringView tag_emu_import = "emu-import"sv;
inline constexpr StringView tag_emu_intro = "emu-intro"sv;
inline constexpr StringView tag_emu_val = "emu-val"sv;
inline constexpr StringView tag_emu_xref = "emu-xref"sv;
inline constexpr StringView tag_h1 = "h1"sv;
@ -20,6 +22,7 @@ inline constexpr StringView tag_li = "li"sv;
inline constexpr StringView tag_ol = "ol"sv;
inline constexpr StringView tag_p = "p"sv;
inline constexpr StringView tag_span = "span"sv;
inline constexpr StringView tag_specification = "specification"sv;
inline constexpr StringView tag_var = "var"sv;
inline constexpr StringView attribute_aoid = "aoid"sv;

View file

@ -5,6 +5,7 @@
*/
#include "Parser/ParseError.h"
#include "DiagnosticEngine.h"
namespace JSSpecCompiler {
@ -27,7 +28,7 @@ NonnullRefPtr<ParseError> ParseError::create(ErrorOr<String> message, XML::Node
String ParseError::to_string() const
{
StringBuilder builder;
builder.appendff("error: {}\n", m_message);
builder.appendff("{}\n", m_message);
XML::Node const* current = m_node;
while (current != nullptr) {
@ -48,4 +49,9 @@ String ParseError::to_string() const
return MUST(builder.to_string());
}
XML::Offset ParseError::offset() const
{
return m_node->offset;
}
}

View file

@ -24,6 +24,7 @@ public:
static NonnullRefPtr<ParseError> create(ErrorOr<String> message, XML::Node const* node);
String to_string() const;
XML::Offset offset() const;
private:
String m_message;

View file

@ -138,16 +138,127 @@ ParseErrorOr<Algorithm> Algorithm::create(XML::Node const* node)
return algorithm;
}
ParseErrorOr<SpecFunction> SpecFunction::create(XML::Node const* element)
NonnullOwnPtr<SpecificationClause> SpecificationClause::create(SpecificationParsingContext& ctx, XML::Node const* element)
{
return ctx.with_new_logical_scope([&] {
VERIFY(element->as_element().name == tag_emu_clause);
SpecificationClause specification_clause;
specification_clause.parse(ctx, element);
OwnPtr<SpecificationClause> result;
specification_clause.m_header.header.visit(
[&](AK::Empty const&) {
result = make<SpecificationClause>(move(specification_clause));
},
[&](ClauseHeader::FunctionDefinition const&) {
result = make<SpecFunction>(move(specification_clause));
});
if (!result->post_initialize(ctx, element))
result = make<SpecificationClause>(move(*result));
return result.release_nonnull();
});
}
void SpecificationClause::collect_into(TranslationUnitRef translation_unit)
{
do_collect(translation_unit);
for (auto& subclause : m_subclauses)
subclause->collect_into(translation_unit);
}
ParseErrorOr<void> SpecificationClause::parse_header(XML::Node const* element)
{
VERIFY(element->as_element().name == tag_h1);
auto tokens = TRY(tokenize_tree(element));
TextParser parser(tokens.tokens, element);
m_header = TRY(parser.parse_clause_header());
return {};
}
void SpecificationClause::parse(SpecificationParsingContext& ctx, XML::Node const* element)
{
u32 child_index = 0;
Optional<NonnullRefPtr<ParseError>> header_parse_error;
for (auto const& child : element->as_element().children) {
child->content.visit(
[&](XML::Node::Element const& element) {
if (child_index == 0) {
if (element.name != tag_h1) {
ctx.diag().error(ctx.location_from_xml_offset(child->offset),
"<h1> must be the first child of <emu-clause>");
return;
}
if (auto error = parse_header(child); error.is_error())
header_parse_error = error.release_error();
else
ctx.current_logical_scope().section = MUST(String::from_utf8(m_header.section_number));
} else {
if (element.name == tag_emu_clause) {
m_subclauses.append(create(ctx, child));
return;
}
if (header_parse_error.has_value()) {
ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
"node content will be ignored since section header was not parsed successfully");
// TODO: Integrate backtracing parser errors better
ctx.diag().note(ctx.location_from_xml_offset(header_parse_error.value()->offset()),
"{}", header_parse_error.value()->to_string());
header_parse_error.clear();
}
}
++child_index;
},
[&](XML::Node::Text const&) {
if (!contains_empty_text(child)) {
ctx.diag().error(ctx.location_from_xml_offset(child->offset),
"non-empty text node should not be a child of <emu-clause>");
}
},
[&](auto) {});
}
}
bool SpecFunction::post_initialize(SpecificationParsingContext& ctx, XML::Node const* element)
{
auto initialization_result = do_post_initialize(ctx, element);
if (initialization_result.is_error()) {
// TODO: Integrate backtracing parser errors better
ctx.diag().error(ctx.location_from_xml_offset(initialization_result.error()->offset()),
"{}", initialization_result.error()->to_string());
return false;
}
return true;
}
void SpecFunction::do_collect(TranslationUnitRef translation_unit)
{
translation_unit->adopt_function(make_ref_counted<FunctionDefinition>(m_name, m_algorithm.m_tree, move(m_arguments)));
}
ParseErrorOr<void> SpecFunction::do_post_initialize(SpecificationParsingContext& ctx, XML::Node const* element)
{
VERIFY(element->as_element().name == tag_emu_clause);
SpecFunction result;
result.m_id = TRY(get_attribute_by_name(element, attribute_id));
result.m_name = TRY(get_attribute_by_name(element, attribute_aoid));
m_id = TRY(get_attribute_by_name(element, attribute_id));
m_name = TRY(get_attribute_by_name(element, attribute_aoid));
m_section_number = m_header.section_number;
auto const& [function_name, arguments] = m_header.header.get<ClauseHeader::FunctionDefinition>();
if (m_name != function_name) {
ctx.diag().warn(ctx.location_from_xml_offset(element->offset),
"function name in header and <emu-clause>[aoid] do not match");
}
m_arguments = arguments;
u32 children_count = 0;
bool has_definition = false;
XML::Node const* algorithm_node = nullptr;
XML::Node const* prose_node = nullptr;
@ -155,12 +266,9 @@ ParseErrorOr<SpecFunction> SpecFunction::create(XML::Node const* element)
for (auto const& child : element->as_element().children) {
TRY(child->content.visit(
[&](XML::Node::Element const& element) -> ParseErrorOr<void> {
++children_count;
if (element.name == tag_h1) {
if (children_count != 1)
return ParseError::create("<h1> should be the first child of a <emu-clause>"sv, child);
TRY(result.parse_definition(child));
has_definition = true;
if (children_count != 0)
return ParseError::create("<h1> can only be the first child of <emu-clause>"sv, child);
} else if (element.name == tag_p) {
if (prose_node == nullptr)
prose_node = child;
@ -169,6 +277,7 @@ ParseErrorOr<SpecFunction> SpecFunction::create(XML::Node const* element)
} else {
return ParseError::create("Unknown child of <emu-clause>"sv, child);
}
++children_count;
return {};
},
[&](XML::Node::Text const&) -> ParseErrorOr<void> {
@ -182,32 +291,58 @@ ParseErrorOr<SpecFunction> SpecFunction::create(XML::Node const* element)
if (algorithm_node == nullptr)
return ParseError::create("No <emu-alg>"sv, element);
if (prose_node == nullptr)
return ParseError::create("No prose element"sv, element);
if (!has_definition)
return ParseError::create("Definition was not found"sv, element);
result.m_algorithm = TRY(Algorithm::create(algorithm_node));
return result;
}
if (prose_node) {
ctx.diag().warn(ctx.location_from_xml_offset(element->offset),
"prose is ignored");
}
ParseErrorOr<void> SpecFunction::parse_definition(XML::Node const* element)
{
auto tokens = TRY(tokenize_tree(element));
TextParser parser(tokens.tokens, element);
auto [section_number, function_name, arguments] = TRY(parser.parse_definition());
if (function_name != m_name)
return ParseError::create("Function name in definition differs from <emu-clause>[aoid]"sv, element);
m_section_number = section_number;
for (auto const& argument : arguments)
m_arguments.append({ argument });
m_algorithm = TRY(Algorithm::create(algorithm_node));
return {};
}
Specification Specification::create(SpecificationParsingContext& ctx, XML::Node const* element)
{
VERIFY(element->as_element().name == tag_specification);
Specification specification;
specification.parse(ctx, element);
return specification;
}
void Specification::collect_into(TranslationUnitRef translation_unit)
{
for (auto& clause : m_clauses)
clause->collect_into(translation_unit);
}
void Specification::parse(SpecificationParsingContext& ctx, XML::Node const* element)
{
for (auto const& child : element->as_element().children) {
child->content.visit(
[&](XML::Node::Element const& element) {
if (element.name == tag_emu_intro) {
// Introductory comments are ignored.
} else if (element.name == tag_emu_clause) {
m_clauses.append(SpecificationClause::create(ctx, child));
} else if (element.name == tag_emu_import) {
parse(ctx, child);
} else {
ctx.diag().error(ctx.location_from_xml_offset(child->offset),
"<{}> should not be a child of <specification>", element.name);
}
},
[&](XML::Node::Text const&) {
if (!contains_empty_text(child)) {
ctx.diag().error(ctx.location_from_xml_offset(child->offset),
"non-empty text node should not be a child of <specification>");
}
},
[&](auto) {});
}
}
SpecParsingStep::SpecParsingStep()
: CompilationStep("parser"sv)
{
@ -247,14 +382,14 @@ void SpecParsingStep::run(TranslationUnitRef translation_unit)
}
m_document = make<XML::Document>(document_or_error.release_value());
auto spec_function = SpecFunction::create(&m_document->root()).release_value_but_fixme_should_propagate_errors();
auto const& root = m_document->root();
if (!root.is_element() || root.as_element().name != tag_specification) {
ctx.diag().fatal_error(ctx.location_from_xml_offset(root.offset),
"document root must be <specification> tag");
return;
}
Vector<FunctionArgument> arguments;
for (auto const& argument : spec_function.m_arguments)
arguments.append({ argument.name });
translation_unit->adopt_function(
make_ref_counted<FunctionDefinition>(spec_function.m_name, spec_function.m_algorithm.m_tree, move(arguments)));
auto specification = Specification::create(ctx, &root);
specification.collect_into(translation_unit);
}
}

View file

@ -12,6 +12,7 @@
#include "CompilationPipeline.h"
#include "Forward.h"
#include "Parser/ParseError.h"
#include "Parser/TextParser.h"
#include "Parser/Token.h"
namespace JSSpecCompiler {
@ -68,24 +69,64 @@ public:
Tree m_tree = error_tree;
};
class SpecFunction {
class SpecificationClause {
AK_MAKE_DEFAULT_MOVABLE(SpecificationClause);
public:
struct Argument {
StringView name;
};
static NonnullOwnPtr<SpecificationClause> create(SpecificationParsingContext& ctx, XML::Node const* element);
static ParseErrorOr<SpecFunction> create(XML::Node const* element);
virtual ~SpecificationClause() = default;
ParseErrorOr<void> parse_definition(XML::Node const* element);
void collect_into(TranslationUnitRef translation_unit);
protected:
virtual bool post_initialize(SpecificationParsingContext& /*ctx*/, XML::Node const* /*element*/) { return true; }
virtual void do_collect(TranslationUnitRef /*translation_unit*/) { }
ClauseHeader m_header;
private:
SpecificationClause() = default;
ParseErrorOr<void> parse_header(XML::Node const* element);
void parse(SpecificationParsingContext& ctx, XML::Node const* element);
Vector<NonnullOwnPtr<SpecificationClause>> m_subclauses;
};
class SpecFunction : public SpecificationClause {
public:
SpecFunction(SpecificationClause&& clause)
: SpecificationClause(move(clause))
{
}
protected:
bool post_initialize(SpecificationParsingContext& ctx, XML::Node const* element) override;
void do_collect(TranslationUnitRef translation_unit) override;
private:
ParseErrorOr<void> do_post_initialize(SpecificationParsingContext& ctx, XML::Node const* element);
StringView m_section_number;
StringView m_id;
StringView m_name;
Vector<Argument> m_arguments;
Vector<FunctionArgument> m_arguments;
Algorithm m_algorithm;
};
class Specification {
public:
static Specification create(SpecificationParsingContext& ctx, XML::Node const* element);
void collect_into(TranslationUnitRef translation_unit);
private:
void parse(SpecificationParsingContext& ctx, XML::Node const* element);
Vector<NonnullOwnPtr<SpecificationClause>> m_clauses;
};
class SpecParsingStep : public CompilationStep {
public:
SpecParsingStep();

View file

@ -479,24 +479,28 @@ ParseErrorOr<Tree> TextParser::parse_step_with_substeps(Tree substeps)
return ParseError::create("Unable to parse step with substeps"sv, m_node);
}
ParseErrorOr<TextParser::DefinitionParseResult> TextParser::parse_definition()
ParseErrorOr<ClauseHeader> TextParser::parse_clause_header()
{
DefinitionParseResult result;
ClauseHeader result;
auto section_number_token = TRY(consume_token_with_type(TokenType::SectionNumber));
result.section_number = section_number_token->data;
result.function_name = TRY(consume_token())->data;
ClauseHeader::FunctionDefinition function_definition;
function_definition.name = TRY(consume_token())->data;
TRY(consume_token_with_type(TokenType::ParenOpen));
while (true) {
result.arguments.append({ TRY(consume_token_with_type(TokenType::Identifier))->data });
function_definition.arguments.append({ TRY(consume_token_with_type(TokenType::Identifier))->data });
auto next_token = TRY(consume_token_with_one_of_types({ TokenType::ParenClose, TokenType::Comma }));
if (next_token->type == TokenType::ParenClose)
break;
}
TRY(expect_eof());
result.header = function_definition;
return result;
}

View file

@ -7,26 +7,31 @@
#pragma once
#include "AST/AST.h"
#include "Function.h"
#include "Parser/ParseError.h"
#include "Parser/Token.h"
namespace JSSpecCompiler {
class TextParser {
public:
struct DefinitionParseResult {
StringView section_number;
StringView function_name;
Vector<StringView> arguments;
struct ClauseHeader {
struct FunctionDefinition {
StringView name;
Vector<FunctionArgument> arguments;
};
StringView section_number;
Variant<AK::Empty, FunctionDefinition> header;
};
class TextParser {
public:
TextParser(Vector<Token>& tokens_, XML::Node const* node_)
: m_tokens(tokens_)
, m_node(node_)
{
}
ParseErrorOr<DefinitionParseResult> parse_definition();
ParseErrorOr<ClauseHeader> parse_clause_header();
ParseErrorOr<Tree> parse_step_without_substeps();
ParseErrorOr<Tree> parse_step_with_substeps(Tree substeps);

View file

@ -161,7 +161,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
outln(stderr, "{}", function->m_ast);
}
}
if (step.dump_cfg && translation_unit.functions_to_compile()[0]->m_cfg != nullptr) {
if (step.dump_cfg && translation_unit.functions_to_compile().size() && translation_unit.functions_to_compile()[0]->m_cfg != nullptr) {
outln(stderr, "===== CFG after {} =====", step.step->name());
for (auto const& function : translation_unit.functions_to_compile()) {
outln(stderr, "{}({}):", function->m_name, function->m_arguments);