From 31e582301033690002fe9528012ce5fd41d7ab80 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Wed, 20 Mar 2024 19:42:02 +0200 Subject: [PATCH] spa: json: more parsing errors & add spa_json_get_error Disallow = and : as bare items in [] containers, as that likely is "[ { foo = bar } ]" mistyped as "[ foo = bar ]". Disallow nesting errors, eg. "[ foo bar" or "[ foo bar }". Fix handling of ", \ and # in bare strings. Fix ignoring trailing comments. Add a fixed-size stack (128 levels) to the tokenizer, so that it can check these at levels below its depth. When the tokenizer encounters an error, make it and its parents enter error state where no further input will be processed. This allows caller to check for parse errors later as convenient. The error state can be queried using spa_json_get_error, which also looks up the error line/column position. --- spa/include/spa/utils/json.h | 146 +++++++++++++++++---- test/test-properties.c | 2 +- test/test-spa-json.c | 245 ++++++++++++++++++++++++++++++++++- 3 files changed, 367 insertions(+), 26 deletions(-) diff --git a/spa/include/spa/utils/json.h b/spa/include/spa/utils/json.h index 30d14f5a2..1a0e4887a 100644 --- a/spa/include/spa/utils/json.h +++ b/spa/include/spa/utils/json.h @@ -44,7 +44,7 @@ static inline void spa_json_init(struct spa_json * iter, const char *data, size_ { *iter = SPA_JSON_INIT(data, size); } -#define SPA_JSON_ENTER(iter) ((struct spa_json) { (iter)->cur, (iter)->end, (iter), 0, 0 }) +#define SPA_JSON_ENTER(iter) ((struct spa_json) { (iter)->cur, (iter)->end, (iter), (iter)->state & 0xf0, 0 }) static inline void spa_json_enter(struct spa_json * iter, struct spa_json * sub) { @@ -54,68 +54,114 @@ static inline void spa_json_enter(struct spa_json * iter, struct spa_json * sub) #define SPA_JSON_SAVE(iter) ((struct spa_json) { (iter)->cur, (iter)->end, }) /** Get the next token. \a value points to the token and the return value - * is the length. */ + * is the length. Returns -1 on parse error, 0 on end of input. */ static inline int spa_json_next(struct spa_json * iter, const char **value) { int utf8_remain = 0; - enum { __NONE, __STRUCT, __BARE, __STRING, __UTF8, __ESC, __COMMENT }; + enum { + __NONE, __STRUCT, __BARE, __STRING, __UTF8, __ESC, __COMMENT, + __ARRAY_FLAG = 0x10, + __OBJECT_FLAG = 0x20, + __ERROR_FLAG = 0x40, + __FLAGS = 0xf0, + }; + uint8_t object_stack[16] = {0}; + uint8_t array_stack[SPA_N_ELEMENTS(object_stack)] = {0}; *value = iter->cur; + + if (iter->state & __ERROR_FLAG) + return -1; + for (; iter->cur < iter->end; iter->cur++) { unsigned char cur = (unsigned char)*iter->cur; + uint32_t flag; + again: - switch (iter->state) { + flag = iter->state & __FLAGS; + switch (iter->state & ~__FLAGS) { case __NONE: - iter->state = __STRUCT; + iter->state = __STRUCT | flag; iter->depth = 0; goto again; case __STRUCT: switch (cur) { - case '\0': case '\t': case ' ': case '\r': case '\n': case ':': case '=': case ',': + case '\0': case '\t': case ' ': case '\r': case '\n': case ',': + continue; + case ':': case '=': + if (flag & __ARRAY_FLAG) + goto error; continue; case '#': - iter->state = __COMMENT; + iter->state = __COMMENT | flag; continue; case '"': *value = iter->cur; - iter->state = __STRING; + iter->state = __STRING | flag; continue; case '[': case '{': + iter->state = __STRUCT | (cur == '[' ? __ARRAY_FLAG : __OBJECT_FLAG); + if ((iter->depth >> 3) < SPA_N_ELEMENTS(object_stack)) { + uint8_t mask = 1 << (iter->depth & 0x7); + SPA_FLAG_UPDATE(object_stack[iter->depth >> 3], mask, flag & __OBJECT_FLAG); + SPA_FLAG_UPDATE(array_stack[iter->depth >> 3], mask, flag & __ARRAY_FLAG); + } *value = iter->cur; if (++iter->depth > 1) continue; iter->cur++; return 1; case '}': case ']': + if ((flag & __ARRAY_FLAG) && cur != ']') + goto error; + if ((flag & __OBJECT_FLAG) && cur != '}') + goto error; + iter->state = __STRUCT; if (iter->depth == 0) { if (iter->parent) iter->parent->cur = iter->cur; + else + goto error; return 0; } --iter->depth; + if ((iter->depth >> 3) < SPA_N_ELEMENTS(object_stack)) { + uint8_t mask = 1 << (iter->depth & 0x7); + if (SPA_FLAG_IS_SET(object_stack[iter->depth >> 3], mask)) + iter->state |= __OBJECT_FLAG; + if (SPA_FLAG_IS_SET(array_stack[iter->depth >> 3], mask)) + iter->state |= __ARRAY_FLAG; + } continue; + case '\\': + /* disallow bare escape */ + goto error; default: *value = iter->cur; - iter->state = __BARE; + iter->state = __BARE | flag; } continue; case __BARE: switch (cur) { case '\t': case ' ': case '\r': case '\n': + case '"': case '#': case ':': case ',': case '=': case ']': case '}': - iter->state = __STRUCT; + iter->state = __STRUCT | flag; if (iter->depth > 0) goto again; return iter->cur - *value; + case '\\': + /* disallow bare escape */ + goto error; } continue; case __STRING: switch (cur) { case '\\': - iter->state = __ESC; + iter->state = __ESC | flag; continue; case '"': - iter->state = __STRUCT; + iter->state = __STRUCT | flag; if (iter->depth > 0) continue; return ++iter->cur - *value; @@ -127,44 +173,98 @@ static inline int spa_json_next(struct spa_json * iter, const char **value) SPA_FALLTHROUGH; case 192 ... 223: utf8_remain++; - iter->state = __UTF8; + iter->state = __UTF8 | flag; continue; default: if (cur >= 32 && cur <= 126) continue; } - return -1; + goto error; case __UTF8: switch (cur) { case 128 ... 191: if (--utf8_remain == 0) - iter->state = __STRING; + iter->state = __STRING | flag; continue; } - return -1; + goto error; case __ESC: switch (cur) { case '"': case '\\': case '/': case 'b': case 'f': case 'n': case 'r': case 't': case 'u': - iter->state = __STRING; + iter->state = __STRING | flag; continue; } - return -1; + goto error; case __COMMENT: switch (cur) { case '\n': case '\r': - iter->state = __STRUCT; + iter->state = __STRUCT | flag; } + break; + default: + goto error; } } - if (iter->depth != 0) - return -1; - if (iter->state != __STRUCT) { - iter->state = __STRUCT; + if (iter->depth != 0 || iter->parent) + goto error; + + switch (iter->state & ~__FLAGS) { + case __STRING: case __UTF8: case __ESC: + /* string/escape not closed */ + goto error; + case __COMMENT: + /* trailing comment */ + return 0; + } + + if ((iter->state & ~__FLAGS) != __STRUCT) { + iter->state = __STRUCT | (iter->state & __FLAGS); return iter->cur - *value; } return 0; + +error: + iter->state |= __ERROR_FLAG; + while (iter->parent) { + if (iter->parent->state & __ERROR_FLAG) + break; + iter->parent->state |= __ERROR_FLAG; + iter->parent->cur = iter->cur; + iter = iter->parent; + } + return -1; +} + +/** + * Return whether parse error occurred, and its possible location. + * + * \since 1.1.0 + */ +static inline bool spa_json_get_error(struct spa_json *iter, const char *start, int *line, int *col) +{ + int linepos = 1, colpos = 1; + const char *p; + + if (!(iter->state & 0x40)) + return false; + + for (p = start; p && p != iter->cur; ++p) { + if (*p == '\n') { + linepos++; + colpos = 1; + } else { + colpos++; + } + } + + if (line) + *line = linepos; + if (col) + *col = colpos; + + return true; } static inline int spa_json_enter_container(struct spa_json *iter, struct spa_json *sub, char type) diff --git a/test/test-properties.c b/test/test-properties.c index 8067dd90a..c53a4d5e2 100644 --- a/test/test-properties.c +++ b/test/test-properties.c @@ -207,7 +207,7 @@ PWTEST(properties_new_string) pw_properties_free(props); - props = pw_properties_new_string("foo=bar bar=\"baz"); + props = pw_properties_new_string("foo=bar bar=\"baz\""); pwtest_ptr_notnull(props); pwtest_int_eq(props->flags, 0U); pwtest_int_eq(props->dict.n_items, 2U); diff --git a/test/test-spa-json.c b/test/test-spa-json.c index a97ddb22d..7e1624973 100644 --- a/test/test-spa-json.c +++ b/test/test-spa-json.c @@ -81,6 +81,26 @@ static void expect_end(struct spa_json *it) pwtest_int_eq(memcmp(&it2, it, sizeof(*it)), 0); } +static void expect_parse_error(struct spa_json *it, const char *str, int line, int col) +{ + const char *value; + struct spa_json it2; + int linepos, colpos; + + pwtest_int_eq(spa_json_next(it, &value), -1); + pwtest_bool_true(spa_json_get_error(it, str, &linepos, &colpos)); + pwtest_int_eq(linepos, line); + pwtest_int_eq(colpos, col); + + /* parse error is idempotent also for parents */ + while (it) { + memcpy(&it2, it, sizeof(*it)); + pwtest_int_eq(spa_json_next(it, &value), -1); + pwtest_int_eq(memcmp(&it2, it, sizeof(*it)), 0); + it = it->parent; + } +} + static void expect_array(struct spa_json *it, struct spa_json *sub) { pwtest_int_eq(spa_json_enter_array(it, sub), 1); @@ -99,7 +119,7 @@ static void expect_string(struct spa_json *it, const char *str) pwtest_int_gt((len = spa_json_next(it, &value)), 0); check_type(TYPE_STRING, value, len); s = alloca(len+1); - spa_json_parse_stringn(value, len, s, len+1); + pwtest_int_eq(spa_json_parse_stringn(value, len, s, len+1), 1); pwtest_str_eq(s, str); } @@ -158,9 +178,11 @@ static void expect_null(struct spa_json *it) PWTEST(json_parse) { + char buf[1024]; + int i; struct spa_json it[5]; const char *json = " { " - "\"foo\": \"bar\"," + "\"foo\": \"bar\", # comment\n" "\"foo\\\" \": true, " "\"foo \\n\\r\\t\": false," " \" arr\": [ true, false, null, 5, 5.7, \"str]\"]," @@ -198,6 +220,8 @@ PWTEST(json_parse) expect_float(&it[1], -1.8f); expect_string(&it[1], "foo 6"); expect_float(&it[1], +2.8f); + expect_end(&it[1]); + expect_end(&it[0]); /* in the array */ expect_type(&it[2], TYPE_TRUE); expect_type(&it[2], TYPE_FALSE); @@ -218,6 +242,79 @@ PWTEST(json_parse) expect_string(&it[3], "1.9"); expect_float(&it[3], 1.9f); + expect_end(&it[3]); + expect_end(&it[2]); + + pwtest_bool_false(spa_json_get_error(&it[0], NULL, NULL, NULL)); + pwtest_bool_false(spa_json_get_error(&it[1], NULL, NULL, NULL)); + pwtest_bool_false(spa_json_get_error(&it[2], NULL, NULL, NULL)); + pwtest_bool_false(spa_json_get_error(&it[3], NULL, NULL, NULL)); + + json = "section={\"key\":value}, section2=[item1,item2]"; + + spa_json_init(&it[0], json, strlen(json)); + expect_string_or_bare(&it[0], "section"); + expect_object(&it[0], &it[1]); + expect_string_or_bare(&it[0], "section2"); + expect_array(&it[0], &it[1]); + expect_end(&it[0]); + + spa_json_init(&it[0], json, strlen(json)); + expect_string_or_bare(&it[0], "section"); + expect_object(&it[0], &it[1]); + expect_string(&it[1], "key"); + expect_string_or_bare(&it[1], "value"); + expect_string_or_bare(&it[0], "section2"); + expect_array(&it[0], &it[1]); + expect_string_or_bare(&it[1], "item1"); + expect_string_or_bare(&it[1], "item2"); + expect_end(&it[0]); + + /* 2-byte utf8 */ + json = "\"\xc3\xa4\", \"\xc3\xa4\""; + spa_json_init(&it[0], json, strlen(json)); + expect_string(&it[0], "\xc3\xa4"); + expect_string(&it[0], "\xc3\xa4"); + expect_end(&it[0]); + + /* 3-byte utf8 */ + json = "\"\xe6\xad\xa3\", \"\xe6\xad\xa3\""; + spa_json_init(&it[0], json, strlen(json)); + expect_string(&it[0], "\xe6\xad\xa3"); + expect_string(&it[0], "\xe6\xad\xa3"); + expect_end(&it[0]); + + /* 4-byte utf8 */ + json = "\"\xf0\x92\x80\x80\", \"\xf0\x92\x80\x80\""; + spa_json_init(&it[0], json, strlen(json)); + expect_string(&it[0], "\xf0\x92\x80\x80"); + expect_string(&it[0], "\xf0\x92\x80\x80"); + expect_end(&it[0]); + + /* run-in comment in bare */ + json = "foo#comment"; + spa_json_init(&it[0], json, strlen(json)); + expect_string_or_bare(&it[0], "foo"); + expect_end(&it[0]); + + /* end of parsing idempotent */ + json = "{}"; + spa_json_init(&it[0], json, strlen(json)); + expect_object(&it[0], &it[1]); + expect_end(&it[0]); + expect_end(&it[0]); + + /* overflowing parser nesting stack is not an error */ + for (i = 0; i < 256; ++i) + buf[i] = '['; + for (; i < 512; ++i) + buf[i] = ']'; + buf[i++] = '\0'; + + spa_json_init(&it[0], buf, strlen(buf)); + pwtest_int_eq(spa_json_next(&it[0], &value), 1); + expect_end(&it[0]); + /* non-null terminated strings OK */ json = "1.234"; spa_json_init(&it[0], json, 4); @@ -267,6 +364,149 @@ PWTEST(json_parse) return PWTEST_PASS; } +PWTEST(json_parse_fail) +{ + char buf[1024]; + struct spa_json it[5]; + const char *json, *value; + int i; + + /* = in array */ + json = "[ foo = bar ]"; + spa_json_init(&it[0], json, strlen(json)); + expect_array(&it[0], &it[1]); + expect_string_or_bare(&it[1], "foo"); + expect_parse_error(&it[1], json, 1, 7); + expect_parse_error(&it[1], json, 1, 7); /* parse error is idempotent */ + expect_parse_error(&it[0], json, 1, 7); /* parse error visible in parent */ + + /* : in array */ + json = "[ foo, bar\n : quux ]"; + spa_json_init(&it[0], json, strlen(json)); + expect_array(&it[0], &it[1]); + expect_string_or_bare(&it[1], "foo"); + expect_string_or_bare(&it[1], "bar"); + expect_parse_error(&it[1], json, 2, 2); + + /* missing ] */ + json = "[ foo, bar"; + spa_json_init(&it[0], json, strlen(json)); + pwtest_int_eq(spa_json_next(&it[0], &value), 1); + expect_parse_error(&it[0], json, 1, 11); + + /* spurious ] */ + json = "foo, bar ]"; + spa_json_init(&it[0], json, strlen(json)); + pwtest_int_eq(spa_json_next(&it[0], &value), 3); + pwtest_int_eq(spa_json_next(&it[0], &value), 3); + expect_parse_error(&it[0], json, 1, 10); + + /* spurious } */ + json = "{ foo, bar } }"; + spa_json_init(&it[0], json, strlen(json)); + expect_object(&it[0], &it[1]); + expect_parse_error(&it[0], json, 1, 14); + + /* bad nesting */ + json = "{ {[{[{[{[{[{[{[{[{[{[{[{[ ]}]}]}]}]}]}]}]}]}]}]}]} ]"; + spa_json_init(&it[0], json, strlen(json)); + pwtest_int_eq(spa_json_next(&it[0], &value), 1); + expect_parse_error(&it[0], json, 1, strlen(json)); + + /* bad nesting */ + json = "[ {[{[{[{[{[{[{[{[{[{[{[{[ ]}]}]}]}]}]}]}]}]}]}]}]} }"; + spa_json_init(&it[0], json, strlen(json)); + pwtest_int_eq(spa_json_next(&it[0], &value), 1); + expect_parse_error(&it[0], json, 1, strlen(json)); + + /* unclosed string */ + json = "\"foo"; + spa_json_init(&it[0], json, strlen(json)); + expect_parse_error(&it[0], json, 1, 5); + + /* unclosed string */ + json = "foo\""; + spa_json_init(&it[0], json, strlen(json)); + expect_string_or_bare(&it[0], "foo"); + expect_parse_error(&it[0], json, 1, 5); + + /* unclosed string */ + json = "foo\"bar"; + spa_json_init(&it[0], json, strlen(json)); + expect_string_or_bare(&it[0], "foo"); + expect_parse_error(&it[0], json, 1, 8); + + /* unclosed escape */ + json = "\"\\"; + spa_json_init(&it[0], json, strlen(json)); + expect_parse_error(&it[0], json, 1, 3); + + /* bare escape */ + json = "foo\\n"; + spa_json_init(&it[0], json, strlen(json)); + expect_parse_error(&it[0], json, 1, 4); + + /* bare escape */ + json = "\\nfoo"; + spa_json_init(&it[0], json, strlen(json)); + expect_parse_error(&it[0], json, 1, 1); + + /* bad nesting in subparser */ + json = "{[]"; + spa_json_init(&it[0], json, strlen(json)); + expect_object(&it[0], &it[1]); + expect_array(&it[1], &it[2]); + expect_parse_error(&it[1], json, 1, 4); + + /* entered parser assumes nesting */ + json = "[]"; + spa_json_init(&it[0], json, strlen(json)); + spa_json_enter(&it[0], &it[1]); + expect_array(&it[1], &it[2]); + expect_parse_error(&it[1], json, 1, 3); + + /* overflowing parser nesting stack */ + for (i = 0; i < 256; ++i) + buf[i] = '['; + for (; i < 511; ++i) + buf[i] = ']'; + buf[i++] = '}'; + buf[i++] = '\0'; + + spa_json_init(&it[0], buf, strlen(buf)); + pwtest_int_eq(spa_json_next(&it[0], &value), 1); + expect_parse_error(&it[0], buf, 1, strlen(buf)); + + /* bad utf8 */ + json = "\"\xc0\""; + spa_json_init(&it[0], json, strlen(json)); + expect_parse_error(&it[0], json, 1, 3); + + json = "\"\xe6\xad\""; + spa_json_init(&it[0], json, strlen(json)); + expect_parse_error(&it[0], json, 1, 4); + + json = "\"\xf0\x92\x80\""; + spa_json_init(&it[0], json, strlen(json)); + expect_parse_error(&it[0], json, 1, 5); + + /* bad string */ + json = "\"\x01\""; + spa_json_init(&it[0], json, strlen(json)); + expect_parse_error(&it[0], json, 1, 2); + + json = "\"\x0f\""; + spa_json_init(&it[0], json, strlen(json)); + expect_parse_error(&it[0], json, 1, 2); + + /* bad escape */ + json = "\"\\z\""; + spa_json_init(&it[0], json, strlen(json)); + expect_parse_error(&it[0], json, 1, 3); + + return PWTEST_PASS; +} + PWTEST(json_encode) { char dst[128]; @@ -431,6 +671,7 @@ PWTEST_SUITE(spa_json) { pwtest_add(json_abi, PWTEST_NOARG); pwtest_add(json_parse, PWTEST_NOARG); + pwtest_add(json_parse_fail, PWTEST_NOARG); pwtest_add(json_encode, PWTEST_NOARG); pwtest_add(json_array, PWTEST_NOARG); pwtest_add(json_overflow, PWTEST_NOARG);