mirror of
https://gitlab.com/qemu-project/qemu
synced 2024-11-02 21:32:52 +00:00
json: Revamp lexer documentation
Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-Id: <20180823164025.12553-20-armbru@redhat.com>
This commit is contained in:
parent
340db1ed82
commit
eddc0a7f0a
1 changed files with 71 additions and 9 deletions
|
@ -18,21 +18,83 @@
|
|||
#define MAX_TOKEN_SIZE (64ULL << 20)
|
||||
|
||||
/*
|
||||
* Required by JSON (RFC 7159):
|
||||
* From RFC 8259 "The JavaScript Object Notation (JSON) Data
|
||||
* Interchange Format", with [comments in brackets]:
|
||||
*
|
||||
* \"([^\\\"]|\\[\"'\\/bfnrt]|\\u[0-9a-fA-F]{4})*\"
|
||||
* -?(0|[1-9][0-9]*)(.[0-9]+)?([eE][-+]?[0-9]+)?
|
||||
* [{}\[\],:]
|
||||
* [a-z]+ # covers null, true, false
|
||||
* The set of tokens includes six structural characters, strings,
|
||||
* numbers, and three literal names.
|
||||
*
|
||||
* Extension of '' strings:
|
||||
* These are the six structural characters:
|
||||
*
|
||||
* '([^\\']|\\[\"'\\/bfnrt]|\\u[0-9a-fA-F]{4})*'
|
||||
* begin-array = ws %x5B ws ; [ left square bracket
|
||||
* begin-object = ws %x7B ws ; { left curly bracket
|
||||
* end-array = ws %x5D ws ; ] right square bracket
|
||||
* end-object = ws %x7D ws ; } right curly bracket
|
||||
* name-separator = ws %x3A ws ; : colon
|
||||
* value-separator = ws %x2C ws ; , comma
|
||||
*
|
||||
* Extension for vararg handling in JSON construction:
|
||||
* Insignificant whitespace is allowed before or after any of the six
|
||||
* structural characters.
|
||||
* [This lexer accepts it before or after any token, which is actually
|
||||
* the same, as the grammar always has structural characters between
|
||||
* other tokens.]
|
||||
*
|
||||
* %((l|ll|I64)?d|[ipsf])
|
||||
* ws = *(
|
||||
* %x20 / ; Space
|
||||
* %x09 / ; Horizontal tab
|
||||
* %x0A / ; Line feed or New line
|
||||
* %x0D ) ; Carriage return
|
||||
*
|
||||
* [...] three literal names:
|
||||
* false null true
|
||||
* [This lexer accepts [a-z]+, and leaves rejecting unknown literal
|
||||
* names to the parser.]
|
||||
*
|
||||
* [Numbers:]
|
||||
*
|
||||
* number = [ minus ] int [ frac ] [ exp ]
|
||||
* decimal-point = %x2E ; .
|
||||
* digit1-9 = %x31-39 ; 1-9
|
||||
* e = %x65 / %x45 ; e E
|
||||
* exp = e [ minus / plus ] 1*DIGIT
|
||||
* frac = decimal-point 1*DIGIT
|
||||
* int = zero / ( digit1-9 *DIGIT )
|
||||
* minus = %x2D ; -
|
||||
* plus = %x2B ; +
|
||||
* zero = %x30 ; 0
|
||||
*
|
||||
* [Strings:]
|
||||
* string = quotation-mark *char quotation-mark
|
||||
*
|
||||
* char = unescaped /
|
||||
* escape (
|
||||
* %x22 / ; " quotation mark U+0022
|
||||
* %x5C / ; \ reverse solidus U+005C
|
||||
* %x2F / ; / solidus U+002F
|
||||
* %x62 / ; b backspace U+0008
|
||||
* %x66 / ; f form feed U+000C
|
||||
* %x6E / ; n line feed U+000A
|
||||
* %x72 / ; r carriage return U+000D
|
||||
* %x74 / ; t tab U+0009
|
||||
* %x75 4HEXDIG ) ; uXXXX U+XXXX
|
||||
* escape = %x5C ; \
|
||||
* quotation-mark = %x22 ; "
|
||||
* unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
|
||||
*
|
||||
*
|
||||
* Extensions over RFC 8259:
|
||||
* - Extra escape sequence in strings:
|
||||
* 0x27 (apostrophe) is recognized after escape, too
|
||||
* - Single-quoted strings:
|
||||
* Like double-quoted strings, except they're delimited by %x27
|
||||
* (apostrophe) instead of %x22 (quotation mark), and can't contain
|
||||
* unescaped apostrophe, but can contain unescaped quotation mark.
|
||||
* - Interpolation:
|
||||
* interpolation = %((l|ll|I64)[du]|[ipsf])
|
||||
*
|
||||
* Note:
|
||||
* - Input must be encoded in UTF-8.
|
||||
* - Decoding and validating is left to the parser.
|
||||
*/
|
||||
|
||||
enum json_lexer_state {
|
||||
|
|
Loading…
Reference in a new issue