json: Eliminate lexer state IN_WHITESPACE, pseudo-token JSON_SKIP

The lexer ignores whitespace like this:

         on whitespace      on non-ws   spontaneously
    IN_START --> IN_WHITESPACE --> JSON_SKIP --> IN_START
                    ^    |
                     \__/  on whitespace

This accumulates a whitespace token in state IN_WHITESPACE, only to
throw it away on the transition via JSON_SKIP to the start state.
Wasteful.  Go from IN_START to IN_START on whitespace directly,
dropping the whitespace character.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180831075841.13363-7-armbru@redhat.com>
This commit is contained in:
Markus Armbruster 2018-08-31 09:58:41 +02:00
parent 2ce4ee64c4
commit 1e960b4602
2 changed files with 5 additions and 18 deletions

View file

@ -115,7 +115,6 @@ enum json_lexer_state {
IN_SIGN,
IN_KEYWORD,
IN_INTERP,
IN_WHITESPACE,
IN_START,
IN_START_INTERP, /* must be IN_START + 1 */
};
@ -228,15 +227,6 @@ static const uint8_t json_lexer[][256] = {
['a' ... 'z'] = IN_KEYWORD,
},
/* whitespace */
[IN_WHITESPACE] = {
TERMINAL(JSON_SKIP),
[' '] = IN_WHITESPACE,
['\t'] = IN_WHITESPACE,
['\r'] = IN_WHITESPACE,
['\n'] = IN_WHITESPACE,
},
/* interpolation */
[IN_INTERP] = {
TERMINAL(JSON_INTERP),
@ -263,10 +253,10 @@ static const uint8_t json_lexer[][256] = {
[','] = JSON_COMMA,
[':'] = JSON_COLON,
['a' ... 'z'] = IN_KEYWORD,
[' '] = IN_WHITESPACE,
['\t'] = IN_WHITESPACE,
['\r'] = IN_WHITESPACE,
['\n'] = IN_WHITESPACE,
[' '] = IN_START,
['\t'] = IN_START,
['\r'] = IN_START,
['\n'] = IN_START,
},
[IN_START_INTERP]['%'] = IN_INTERP,
};
@ -323,10 +313,8 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
json_message_process_token(lexer, lexer->token, new_state,
lexer->x, lexer->y);
/* fall through */
case JSON_SKIP:
g_string_truncate(lexer->token, 0);
/* fall through */
case IN_START:
g_string_truncate(lexer->token, 0);
new_state = lexer->start_state;
break;
case JSON_ERROR:

View file

@ -31,7 +31,6 @@ typedef enum json_token_type {
JSON_KEYWORD,
JSON_STRING,
JSON_INTERP,
JSON_SKIP,
JSON_END_OF_INPUT,
JSON_MAX = JSON_END_OF_INPUT
} JSONTokenType;