qapi: Rewrite doc comment parser

QAPISchemaParser is a conventional recursive descent parser.  Except
QAPISchemaParser.get_doc() delegates most of the doc comment parsing
work to a state machine in QAPIDoc.  The state machine doesn't get
tokens like a recursive descent parser, it is fed tokens.

I find this state machine rather opaque and hard to maintain.

Replace it by a conventional parser, all in QAPISchemaParser.  Less
code, and (at least in my opinion) easier to understand.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-ID: <20240216145841.2099240-15-armbru@redhat.com>
Tested-by: Daniel P. Berrangé <berrange@redhat.com>
This commit is contained in:
Markus Armbruster 2024-02-16 15:58:38 +01:00
parent 0b82a7440c
commit 3d035cd2cc

View file

@ -134,8 +134,8 @@ def _parse(self) -> None:
info = self.info
if self.tok == '#':
self.reject_expr_doc(cur_doc)
for cur_doc in self.get_doc(info):
self.docs.append(cur_doc)
cur_doc = self.get_doc()
self.docs.append(cur_doc)
continue
expr = self.get_expr()
@ -414,39 +414,171 @@ def get_expr(self) -> _ExprValue:
self, "expected '{', '[', string, or boolean")
return expr
def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']:
def get_doc_line(self) -> Optional[str]:
if self.tok != '#':
raise QAPIParseError(
self, "documentation comment must end with '##'")
assert isinstance(self.val, str)
if self.val.startswith('##'):
# End of doc comment
if self.val != '##':
raise QAPIParseError(
self, "junk after '##' at end of documentation comment")
return None
if self.val == '#':
return ''
if self.val[1] != ' ':
raise QAPIParseError(self, "missing space after #")
return self.val[2:].rstrip()
@staticmethod
def _match_at_name_colon(string: str) -> Optional[Match[str]]:
return re.match(r'@([^:]*): *', string)
def get_doc_indented(self, doc: 'QAPIDoc') -> Optional[str]:
self.accept(False)
line = self.get_doc_line()
while line == '':
doc.append_line(line)
self.accept(False)
line = self.get_doc_line()
if line is None:
return line
indent = must_match(r'\s*', line).end()
if not indent:
return line
doc.append_line(line[indent:])
prev_line_blank = False
while True:
self.accept(False)
line = self.get_doc_line()
if line is None:
return line
if self._match_at_name_colon(line):
return line
cur_indent = must_match(r'\s*', line).end()
if line != '' and cur_indent < indent:
if prev_line_blank:
return line
raise QAPIParseError(
self,
"unexpected de-indent (expected at least %d spaces)" %
indent)
doc.append_line(line[indent:])
prev_line_blank = True
def get_doc_paragraph(self, doc: 'QAPIDoc') -> Optional[str]:
while True:
self.accept(False)
line = self.get_doc_line()
if line is None:
return line
if line == '':
return line
doc.append_line(line)
def get_doc(self) -> 'QAPIDoc':
if self.val != '##':
raise QAPIParseError(
self, "junk after '##' at start of documentation comment")
docs = []
cur_doc = QAPIDoc(self, info)
info = self.info
self.accept(False)
while self.tok == '#':
assert isinstance(self.val, str)
if self.val.startswith('##'):
# End of doc comment
if self.val != '##':
raise QAPIParseError(
self,
"junk after '##' at end of documentation comment")
cur_doc.end_comment()
docs.append(cur_doc)
self.accept()
return docs
if self.val.startswith('# ='):
if cur_doc.symbol:
line = self.get_doc_line()
if line is not None and line.startswith('@'):
# Definition documentation
if not line.endswith(':'):
raise QAPIParseError(self, "line should end with ':'")
# Invalid names are not checked here, but the name
# provided *must* match the following definition,
# which *is* validated in expr.py.
symbol = line[1:-1]
if not symbol:
raise QAPIParseError(self, "name required after '@'")
doc = QAPIDoc(self, info, symbol)
self.accept(False)
line = self.get_doc_line()
no_more_args = False
while line is not None:
# Blank lines
while line == '':
self.accept(False)
line = self.get_doc_line()
if line is None:
break
# Non-blank line, first of a section
if line == 'Features:' and not doc.features:
self.accept(False)
line = self.get_doc_line()
while line == '':
self.accept(False)
line = self.get_doc_line()
while (line is not None
and (match := self._match_at_name_colon(line))):
doc.new_feature(match.group(1))
text = line[match.end():]
if text:
doc.append_line(text)
line = self.get_doc_indented(doc)
no_more_args = True
elif match := self._match_at_name_colon(line):
# description
if no_more_args:
raise QAPIParseError(
self,
"description of '@%s:' follows a section"
% match.group(1))
while (line is not None
and (match := self._match_at_name_colon(line))):
doc.new_argument(match.group(1))
text = line[match.end():]
if text:
doc.append_line(text)
line = self.get_doc_indented(doc)
no_more_args = True
elif match := re.match(
r'(Returns|Since|Notes?|Examples?|TODO): *',
line):
# tagged section
doc.new_tagged_section(match.group(1))
text = line[match.end():]
if text:
doc.append_line(text)
line = self.get_doc_indented(doc)
no_more_args = True
elif line.startswith('='):
raise QAPIParseError(
self,
"unexpected '=' markup in definition documentation")
if cur_doc.body.text:
else:
# tag-less paragraph
doc.ensure_untagged_section()
doc.append_line(line)
line = self.get_doc_paragraph(doc)
else:
# Free-form documentation
doc = QAPIDoc(self, info)
doc.ensure_untagged_section()
first = True
while line is not None:
if match := self._match_at_name_colon(line):
raise QAPIParseError(
self,
"'=' heading must come first in a comment block")
cur_doc.append(self.val)
self.accept(False)
"'@%s:' not allowed in free-form documentation"
% match.group(1))
if line.startswith('='):
if not first:
raise QAPIParseError(
self,
"'=' heading must come first in a comment block")
doc.append_line(line)
self.accept(False)
line = self.get_doc_line()
first = False
raise QAPIParseError(self, "documentation comment must end with '##'")
self.accept(False)
doc.end()
return doc
class QAPIDoc:
@ -469,7 +601,6 @@ class QAPIDoc:
"""
class Section:
# pylint: disable=too-few-public-methods
def __init__(self, parser: QAPISchemaParser,
tag: Optional[str] = None):
# section source info, i.e. where it begins
@ -480,29 +611,8 @@ def __init__(self, parser: QAPISchemaParser,
self.tag = tag
# section text without tag
self.text = ''
# indentation to strip (None means indeterminate)
self._indent = None if self.tag else 0
def append(self, line: str) -> None:
line = line.rstrip()
if line:
indent = must_match(r'\s*', line).end()
if self._indent is None:
# indeterminate indentation
if self.text != '':
# non-blank, non-first line determines indentation
if indent == 0:
raise QAPIParseError(
self._parser, "line needs to be indented")
self._indent = indent
elif indent < self._indent:
raise QAPIParseError(
self._parser,
"unexpected de-indent (expected at least %d spaces)" %
self._indent)
line = line[self._indent:]
def append_line(self, line: str) -> None:
self.text += line + '\n'
class ArgSection(Section):
@ -514,243 +624,75 @@ def __init__(self, parser: QAPISchemaParser,
def connect(self, member: 'QAPISchemaMember') -> None:
self.member = member
class NullSection(Section):
"""
Immutable dummy section for use at the end of a doc block.
"""
# pylint: disable=too-few-public-methods
def append(self, line: str) -> None:
assert False, "Text appended after end_comment() called."
def __init__(self, parser: QAPISchemaParser, info: QAPISourceInfo):
def __init__(self, parser: QAPISchemaParser, info: QAPISourceInfo,
symbol: Optional[str] = None):
# self._parser is used to report errors with QAPIParseError. The
# resulting error position depends on the state of the parser.
# It happens to be the beginning of the comment. More or less
# servicable, but action at a distance.
self._parser = parser
# info points to the doc comment block's first line
self.info = info
self.symbol: Optional[str] = None
self.body = QAPIDoc.Section(parser)
# dicts mapping parameter/feature names to their ArgSection
self.args: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
self.features: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
# definition doc's symbol, None for free-form doc
self.symbol: Optional[str] = symbol
# the sections in textual order
self.all_sections: List[QAPIDoc.Section] = [QAPIDoc.Section(parser)]
# the body section
self.body: Optional[QAPIDoc.Section] = self.all_sections[0]
# dicts mapping parameter/feature names to their description
self.args: Dict[str, QAPIDoc.ArgSection] = {}
self.features: Dict[str, QAPIDoc.ArgSection] = {}
# sections other than .body, .args, .features
self.sections: List[QAPIDoc.Section] = []
# the current section
self._section = self.body
self._append_line = self._append_body_line
self._first_line_in_paragraph = False
def has_section(self, tag: str) -> bool:
"""Return True if we have a section with this tag."""
for i in self.sections:
if i.tag == tag:
return True
return False
def end(self) -> None:
for section in self.all_sections:
section.text = section.text.strip('\n')
if section.tag is not None and section.text == '':
raise QAPISemError(
section.info, "text required after '%s:'" % section.tag)
def append(self, line: str) -> None:
"""
Parse a comment line and add it to the documentation.
The way that the line is dealt with depends on which part of
the documentation we're parsing right now:
* The body section: ._append_line is ._append_body_line
* An argument section: ._append_line is ._append_args_line
* A features section: ._append_line is ._append_features_line
* An additional section: ._append_line is ._append_various_line
"""
line = line[1:]
if not line:
self._append_freeform(line)
self._first_line_in_paragraph = True
def ensure_untagged_section(self) -> None:
if self.all_sections and not self.all_sections[-1].tag:
# extend current section
self.all_sections[-1].text += '\n'
return
# start new section
section = self.Section(self._parser)
self.sections.append(section)
self.all_sections.append(section)
if line[0] != ' ':
raise QAPIParseError(self._parser, "missing space after #")
line = line[1:]
self._append_line(line)
self._first_line_in_paragraph = False
def new_tagged_section(self, tag: str) -> None:
if tag in ('Returns', 'Since'):
for section in self.all_sections:
if isinstance(section, self.ArgSection):
continue
if section.tag == tag:
raise QAPIParseError(
self._parser, "duplicated '%s' section" % tag)
section = self.Section(self._parser, tag)
self.sections.append(section)
self.all_sections.append(section)
def end_comment(self) -> None:
self._switch_section(QAPIDoc.NullSection(self._parser))
@staticmethod
def _match_at_name_colon(string: str) -> Optional[Match[str]]:
return re.match(r'@([^:]*): *', string)
def _match_section_tag(self, string: str) -> Optional[Match[str]]:
if not self._first_line_in_paragraph:
return None
return re.match(r'(Returns|Since|Notes?|Examples?|TODO): *',
string)
def _append_body_line(self, line: str) -> None:
"""
Process a line of documentation text in the body section.
If this a symbol line and it is the section's first line, this
is a definition documentation block for that symbol.
If it's a definition documentation block, another symbol line
begins the argument section for the argument named by it, and
a section tag begins an additional section. Start that
section and append the line to it.
Else, append the line to the current section.
"""
# FIXME not nice: things like '# @foo:' and '# @foo: ' aren't
# recognized, and get silently treated as ordinary text
if not self.symbol and not self.body.text and line.startswith('@'):
if not line.endswith(':'):
raise QAPIParseError(self._parser, "line should end with ':'")
self.symbol = line[1:-1]
# Invalid names are not checked here, but the name provided MUST
# match the following definition, which *is* validated in expr.py.
if not self.symbol:
raise QAPIParseError(
self._parser, "name required after '@'")
elif self.symbol:
# This is a definition documentation block
if self._match_at_name_colon(line):
self._append_line = self._append_args_line
self._append_args_line(line)
elif line == 'Features:':
self._append_line = self._append_features_line
elif self._match_section_tag(line):
self._append_line = self._append_various_line
self._append_various_line(line)
else:
self._append_freeform(line)
else:
# This is a free-form documentation block
self._append_freeform(line)
def _append_args_line(self, line: str) -> None:
"""
Process a line of documentation text in an argument section.
A symbol line begins the next argument section, a section tag
section or a non-indented line after a blank line begins an
additional section. Start that section and append the line to
it.
Else, append the line to the current section.
"""
match = self._match_at_name_colon(line)
if match:
line = line[match.end():]
self._start_args_section(match.group(1))
elif self._match_section_tag(line):
self._append_line = self._append_various_line
self._append_various_line(line)
return
elif (self._section.text.endswith('\n\n')
and line and not line[0].isspace()):
if line == 'Features:':
self._append_line = self._append_features_line
else:
self._start_section()
self._append_line = self._append_various_line
self._append_various_line(line)
return
self._append_freeform(line)
def _append_features_line(self, line: str) -> None:
match = self._match_at_name_colon(line)
if match:
line = line[match.end():]
self._start_features_section(match.group(1))
elif self._match_section_tag(line):
self._append_line = self._append_various_line
self._append_various_line(line)
return
elif (self._section.text.endswith('\n\n')
and line and not line[0].isspace()):
self._start_section()
self._append_line = self._append_various_line
self._append_various_line(line)
return
self._append_freeform(line)
def _append_various_line(self, line: str) -> None:
"""
Process a line of documentation text in an additional section.
A symbol line is an error.
A section tag begins an additional section. Start that
section and append the line to it.
Else, append the line to the current section.
"""
match = self._match_at_name_colon(line)
if match:
raise QAPIParseError(self._parser,
"description of '@%s:' follows a section"
% match.group(1))
match = self._match_section_tag(line)
if match:
line = line[match.end():]
self._start_section(match.group(1))
self._append_freeform(line)
def _start_symbol_section(
self,
symbols_dict: Dict[str, 'QAPIDoc.ArgSection'],
name: str) -> None:
# FIXME invalid names other than the empty string aren't flagged
def _new_description(self, name: str,
desc: Dict[str, ArgSection]) -> None:
if not name:
raise QAPIParseError(self._parser, "invalid parameter name")
if name in symbols_dict:
if name in desc:
raise QAPIParseError(self._parser,
"'%s' parameter name duplicated" % name)
assert not self.sections
new_section = QAPIDoc.ArgSection(self._parser, '@' + name)
self._switch_section(new_section)
symbols_dict[name] = new_section
section = self.ArgSection(self._parser, '@' + name)
self.all_sections.append(section)
desc[name] = section
def _start_args_section(self, name: str) -> None:
self._start_symbol_section(self.args, name)
def new_argument(self, name: str) -> None:
self._new_description(name, self.args)
def _start_features_section(self, name: str) -> None:
self._start_symbol_section(self.features, name)
def new_feature(self, name: str) -> None:
self._new_description(name, self.features)
def _start_section(self, tag: Optional[str] = None) -> None:
if not tag and not self._section.tag:
# extend current section
return
if tag in ('Returns', 'Since') and self.has_section(tag):
raise QAPIParseError(self._parser,
"duplicated '%s' section" % tag)
new_section = QAPIDoc.Section(self._parser, tag)
self._switch_section(new_section)
self.sections.append(new_section)
def _switch_section(self, new_section: 'QAPIDoc.Section') -> None:
text = self._section.text = self._section.text.strip('\n')
# Only the 'body' section is allowed to have an empty body.
# All other sections, including anonymous ones, must have text.
if self._section != self.body and not text:
# We do not create anonymous sections unless there is
# something to put in them; this is a parser bug.
assert self._section.tag
raise QAPISemError(
self._section.info,
"text required after '%s:'" % self._section.tag)
self._section = new_section
def _append_freeform(self, line: str) -> None:
match = re.match(r'(@\S+:)', line)
if match:
raise QAPIParseError(self._parser,
"'%s' not allowed in free-form documentation"
% match.group(1))
self._section.append(line)
def append_line(self, line: str) -> None:
self.all_sections[-1].append_line(line)
def connect_member(self, member: 'QAPISchemaMember') -> None:
if member.name not in self.args:
@ -758,8 +700,8 @@ def connect_member(self, member: 'QAPISchemaMember') -> None:
raise QAPISemError(member.info,
"%s '%s' lacks documentation"
% (member.role, member.name))
self.args[member.name] = QAPIDoc.ArgSection(self._parser,
'@' + member.name)
self.args[member.name] = QAPIDoc.ArgSection(
self._parser, '@' + member.name)
self.args[member.name].connect(member)
def connect_feature(self, feature: 'QAPISchemaFeature') -> None: