more experiments: add json pattern matcher

This commit is contained in:
Wim Taymans 2017-09-20 20:33:15 +02:00
parent 86c8955c3a
commit b93ecb70fc

View file

@ -88,14 +88,6 @@
#include <stddef.h>
struct spa_json_iter {
const char *cur;
const char *end;
struct spa_json_iter *parent;
int state;
int depth;
};
#define NONE 0
#define STRUCT 1
#define BARE 2
@ -103,20 +95,29 @@ struct spa_json_iter {
#define UTF8 4
#define ESC 5
static void
spa_json_iter_init (struct spa_json_iter *iter, const char *data, size_t size)
{
iter->end = data + size;
iter->parent = NULL;
iter->cur = data;
iter->state = NONE;
}
struct spa_json_iter {
const char *start;
const char *cur;
const char *end;
struct spa_json_iter *parent;
int state;
int depth;
};
struct spa_json_chunk {
const char *value;
int len;
};
static void
spa_json_iter_init (struct spa_json_iter *iter, const char *data, size_t size)
{
iter->end = size == -1 ? NULL : data + size;
iter->parent = NULL;
iter->cur = data;
iter->state = NONE;
}
enum spa_json_type {
SPA_JSON_TYPE_INVALID,
SPA_JSON_TYPE_ARRAY,
@ -173,139 +174,169 @@ static inline bool spa_json_chunk_is_null(struct spa_json_chunk *chunk) {
}
static int
spa_json_iter_value(struct spa_json_iter *iter, struct spa_json_chunk *value)
spa_json_iter_next_chunk(struct spa_json_iter *iter, struct spa_json_chunk *chunk)
{
int utf8_remain = 0;
int utf8_remain = 0;
for (;iter->cur < iter->end; iter->cur++) {
unsigned char cur = (unsigned char) *iter->cur;
for (;iter->end == NULL || iter->cur < iter->end; iter->cur++) {
unsigned char cur = (unsigned char) *iter->cur;
again:
switch (iter->state) {
case NONE:
iter->state = STRUCT;
iter->depth = 0;
goto again;
case STRUCT:
switch (cur) {
case '\t': case ' ': case '\r': case '\n': case ':': case ',':
continue;
case '"':
value->value = iter->cur;
iter->state = STRING;
continue;
case '[': case '{':
value->value = iter->cur;
if (++iter->depth > 1)
continue;
iter->cur++;
return value->len = 1;
case '}': case ']':
if (iter->depth == 0) {
if (iter->parent)
iter->parent->cur = iter->cur;
return -1;
}
--iter->depth;
continue;
case '-': case 'a' ... 'z': case 'A' ... 'Z': case '0' ... '9':
value->value = iter->cur;
iter->state = BARE;
continue;
}
return -2;
case BARE:
switch (cur) {
case '\t': case ' ': case '\r': case '\n': case ':': case ',':
case ']': case '}':
iter->state = STRUCT;
if (iter->depth > 0)
goto again;
return value->len = iter->cur - value->value;
default:
if (cur >= 32 && cur <= 126)
continue;
}
return -2;
case STRING:
switch (cur) {
case '\\':
iter->state = ESC;
continue;
case '"':
iter->state = STRUCT;
if (iter->depth > 0)
continue;
iter->cur++;
return value->len = iter->cur - value->value;
case 240 ... 247:
utf8_remain++;
case 224 ... 239:
utf8_remain++;
case 192 ... 223:
utf8_remain++;
iter->state = UTF8;
continue;
default:
if (cur >= 32 && cur <= 126)
continue;
}
return -2;
case UTF8:
switch (cur) {
case 128 ... 191:
if (--utf8_remain == 0)
iter->state = STRING;
continue;
}
return -2;
case ESC:
switch (cur) {
case '"': case '\\': case '/': case 'b': case 'f': case 'n': case 'r':
case 't': case 'u':
iter->state = STRING;
continue;
}
return -2;
}
}
return iter->depth == 0 ? -1 : -2;
switch (iter->state) {
case NONE:
iter->state = STRUCT;
iter->depth = 0;
/** fallthrough */
case STRUCT:
switch (cur) {
case '\t': case ' ': case '\r': case '\n': case ':': case ',':
continue;
case '"':
chunk->value = iter->cur;
iter->state = STRING;
continue;
case '[': case '{':
chunk->value = iter->cur;
if (++iter->depth > 1)
continue;
iter->cur++;
return chunk->len = 1;
case '}': case ']':
if (iter->depth == 0) {
if (iter->parent)
iter->parent->cur = iter->cur;
return 0;
}
--iter->depth;
continue;
case '-': case 'a' ... 'z': case 'A' ... 'Z': case '0' ... '9': case '#':
chunk->value = iter->cur;
iter->state = BARE;
continue;
case '\0':
return 0;
}
return -1;
case BARE:
switch (cur) {
case '\t': case ' ': case '\r': case '\n': case ':': case ',':
case ']': case '}':
iter->state = STRUCT;
if (iter->depth > 0)
goto again;
return chunk->len = iter->cur - chunk->value;
default:
if (cur >= 32 && cur <= 126)
continue;
}
return -1;
case STRING:
switch (cur) {
case '\\':
iter->state = ESC;
continue;
case '"':
iter->state = STRUCT;
if (iter->depth > 0)
continue;
iter->cur++;
return chunk->len = iter->cur - chunk->value;
case 240 ... 247:
utf8_remain++;
case 224 ... 239:
utf8_remain++;
case 192 ... 223:
utf8_remain++;
iter->state = UTF8;
continue;
default:
if (cur >= 32 && cur <= 126)
continue;
}
return -1;
case UTF8:
switch (cur) {
case 128 ... 191:
if (--utf8_remain == 0)
iter->state = STRING;
continue;
}
return -1;
case ESC:
switch (cur) {
case '"': case '\\': case '/': case 'b': case 'f': case 'n': case 'r':
case 't': case 'u':
iter->state = STRING;
continue;
}
return -1;
}
}
return iter->depth == 0 ? 0 : -1;
}
static int
spa_json_iter_enter(struct spa_json_iter *iter, struct spa_json_iter *sub)
spa_json_iter_recurse(struct spa_json_iter *iter, struct spa_json_iter *sub)
{
sub->end = iter->end;
sub->parent = iter;
sub->cur = iter->cur;
sub->state = NONE;
return 0;
sub->end = iter->end;
sub->parent = iter;
sub->cur = iter->cur;
sub->state = NONE;
sub->start = iter->cur-1;
return 0;
}
static int
spa_json_iter_enter_array(struct spa_json_iter *iter,
struct spa_json_iter *array)
spa_json_iter_chunk(struct spa_json_iter *iter, struct spa_json_chunk *chunk)
{
if (spa_json_chunk_is_object(chunk) || spa_json_chunk_is_array(chunk))
spa_json_iter_init (iter, chunk->value + 1, -1);
else
spa_json_iter_init (iter, chunk->value, chunk->len);
return 0;
}
static void
spa_json_iter_print (struct spa_json_iter *it, int prefix)
{
struct spa_json_chunk chunk;
if (spa_json_iter_value(iter, &chunk) < 0) return -1;
if (*chunk.value != '[') return -1;
return spa_json_iter_enter(iter, array);
while (spa_json_iter_next_chunk(it, &chunk) > 0) {
printf ("%-*s %.*s\n", prefix, "", chunk.len, chunk.value);
if (spa_json_chunk_is_object(&chunk) ||
spa_json_chunk_is_array(&chunk)) {
struct spa_json_iter sub;
spa_json_iter_recurse(it, &sub);
spa_json_iter_print(&sub, prefix + 2);
printf ("%-*s %c\n", prefix, "", sub.cur[0]);
}
}
}
static int
spa_json_iter_enter_object(struct spa_json_iter *iter,
struct spa_json_iter *object)
spa_json_iter_array(struct spa_json_iter *iter,
struct spa_json_iter *array)
{
struct spa_json_chunk chunk;
if (spa_json_iter_value(iter, &chunk) < 0) return -1;
if (*chunk.value != '{') return -1;
return spa_json_iter_enter(iter, object);
if (spa_json_iter_next_chunk(iter, &chunk) <= 0 ||
!spa_json_chunk_is_array(&chunk)) return -1;
return spa_json_iter_recurse(iter, array);
}
static int
spa_json_iter_object(struct spa_json_iter *iter,
struct spa_json_iter *object)
{
struct spa_json_chunk chunk;
if (spa_json_iter_next_chunk(iter, &chunk) <= 0 ||
!spa_json_chunk_is_object(&chunk)) return -1;
return spa_json_iter_recurse(iter, object);
}
static int
spa_json_iter_string(struct spa_json_iter *iter,
struct spa_json_chunk *str)
{
if (spa_json_iter_value(iter, str) < 0) return -1;
if (spa_json_iter_next_chunk(iter, str) <= 0) return -1;
return (*str->value == '"') ? 0 : -1;
}
@ -318,15 +349,15 @@ spa_format_parse(struct spa_json_iter *iter,
struct spa_json_iter it[2];
struct spa_json_chunk type;
if (spa_json_iter_enter_array(iter, &it[0]) < 0) return -1;
if (spa_json_iter_array(iter, &it[0]) < 0) return -1;
if (spa_json_iter_string(&it[0], &type) < 0) return -1;
if (strncmp(type.value, "\"Format\"", type.len) != 0) return -1;
if (spa_json_iter_enter_array(&it[0], &it[1]) < 0) return -1;
if (spa_json_iter_array(&it[0], &it[1]) < 0) return -1;
if (spa_json_iter_string(&it[1], media_type) < 0) return -1;
if (spa_json_iter_string(&it[1], media_subtype) < 0) return -1;
return spa_json_iter_enter_object(&it[0], props);
return spa_json_iter_object(&it[0], props);
}
static int test_parsing(const char *format)
@ -346,21 +377,208 @@ static int test_parsing(const char *format)
while (spa_json_iter_string(&props, &value) >= 0) {
printf("Key: %.*s\n", value.len, value.value);
if (spa_json_iter_enter_array(&props, &iter[1]) < 0) return -1;
if (spa_json_iter_array(&props, &iter[1]) < 0) return -1;
if (spa_json_iter_string(&iter[1], &value) < 0) return -1;
printf("flags: %.*s\n", value.len, value.value);
if (spa_json_iter_value(&iter[1], &value) < 0) return -1;
if (spa_json_iter_next_chunk(&iter[1], &value) <= 0) return -1;
printf("default: %.*s\n", value.len, value.value);
if (spa_json_iter_enter_array(&iter[1], &iter[2]) < 0) return -1;
while (spa_json_iter_value(&iter[2], &value) >= 0) {
if (spa_json_iter_array(&iter[1], &iter[2]) < 0) return -1;
while (spa_json_iter_next_chunk(&iter[2], &value) > 0) {
printf("value: %.*s\n", value.len, value.value);
}
}
return 0;
}
static int spa_json_iter_find_key(struct spa_json_iter *iter,
const char *key,
struct spa_json_chunk *chunk)
{
struct spa_json_chunk ch;
struct spa_json_iter it = *iter;
int res;
while (true) {
if ((res = spa_json_iter_next_chunk(&it, &ch)) <= 0)
return res;
if (spa_json_iter_next_chunk(&it, chunk) <= 0)
return -1;
if (spa_json_chunk_is_string(&ch) &&
strncmp(key, ch.value, ch.len) == 0)
return 1;
}
return 0;
}
/**
* #c -> any chunk
* #s -> string chunk
* #n -> number chunk
* #b -> bool chunk
* #i -> iter
*/
static int spa_json_iter_extract(struct spa_json_iter *iter,
const char *template, ...)
{
struct spa_json_iter templ[16], it[16];
struct spa_json_chunk tchunk, chunk, *ch;
int collected = 0, res, level = 0;
va_list args;
va_start(args, template);
it[0] = *iter;
spa_json_iter_init (&templ[0], template, strlen(template));
while (true) {
res = spa_json_iter_next_chunk(&templ[level], &tchunk);
if (res == 0) {
if (--level == 0)
break;
continue;
} else if (res < 0) {
return res;
}
switch (tchunk.value[0]) {
case '[': case '{':
if (spa_json_iter_next_chunk(&it[level], &chunk) <= 0 ||
chunk.value[0] != tchunk.value[0])
return -1;
if (++level == 16)
return -2;
spa_json_iter_recurse(&it[level-1], &it[level]);
spa_json_iter_recurse(&templ[level-1], &templ[level]);
break;
case '"':
case '-': case '0' ... '9':
case 't': case 'f':
case 'n':
if (templ[level].start[0] == '{') {
it[level].cur = it[level].start + 1;
it[level].depth = 0;
it[level].state = 1;
if (spa_json_iter_find_key(&it[level], tchunk.value, &chunk) <= 0)
continue;
it[level].cur = chunk.value;
} else if (spa_json_iter_next_chunk(&it[level], &chunk) <= 0 ||
chunk.len != tchunk.len ||
strncmp(chunk.value, tchunk.value, chunk.len) != 0)
return -1;
break;
case '#':
ch = va_arg(args, struct spa_json_chunk *);
if (spa_json_iter_next_chunk(&it[level], ch) <= 0)
return -1;
switch (tchunk.value[1]) {
case 's':
if (spa_json_chunk_is_string(ch))
collected++;
break;
case 'c':
collected++;
break;
}
break;
default:
printf("invalid %c\n", tchunk.value[0]);
return -2;
}
}
va_end(args);
return collected;
}
static int test_extract(const char *fmt)
{
struct spa_json_iter iter;
struct spa_json_chunk media_type;
struct spa_json_chunk media_subtype;
struct spa_json_chunk format;
struct spa_json_chunk rate;
struct spa_json_chunk channels;
int res;
spa_json_iter_init (&iter, fmt, strlen(fmt));
res = spa_json_iter_extract(&iter,
"[ \"Format\", "
" [ #s, #s], "
" { "
" \"rate\": #c, "
" \"format\": #c, "
" \"channels\": #c "
" } "
"]",
&media_type,
&media_subtype,
&rate,
&format,
&channels);
printf("collected %d\n", res);
printf("media type %.*s\n", media_type.len, media_type.value);
printf("media subtype %.*s\n", media_subtype.len, media_subtype.value);
printf("rate:\n");
spa_json_iter_chunk (&iter, &rate);
spa_json_iter_print(&iter, 4);
printf("format:\n");
spa_json_iter_chunk (&iter, &format);
spa_json_iter_print(&iter, 4);
printf("channels:\n");
spa_json_iter_chunk (&iter, &channels);
spa_json_iter_print(&iter, 4);
return 0;
}
static int test_extract2(const char *fmt)
{
struct spa_json_iter iter, iter2;
struct spa_json_chunk media_type;
struct spa_json_chunk media_subtype;
struct spa_json_chunk props;
struct spa_json_chunk format;
struct spa_json_chunk rate;
int res;
spa_json_iter_init (&iter, fmt, strlen(fmt));
res = spa_json_iter_extract(&iter,
"[ \"Format\", "
" [ #s, #s], "
" #c "
"]",
&media_type,
&media_subtype,
&props);
printf("collected %d\n", res);
printf("media type %.*s\n", media_type.len, media_type.value);
printf("media subtype %.*s\n", media_subtype.len, media_subtype.value);
printf("props:\n");
spa_json_iter_chunk(&iter, &props);
printf("rate:\n");
if (spa_json_iter_find_key(&iter, "\"rate\"", &rate) > 0) {
spa_json_iter_chunk(&iter2, &rate);
spa_json_iter_print(&iter2, 4);
}
printf("format:\n");
if (spa_json_iter_find_key(&iter, "\"format\"", &format) > 0) {
spa_json_iter_chunk(&iter2, &format);
spa_json_iter_print(&iter2, 4);
}
return 0;
}
int main(int argc, char *argv[])
{
const char *format =
@ -375,6 +593,8 @@ int main(int argc, char *argv[])
"]";
test_parsing(format);
test_extract(format);
test_extract2(format);
return 0;
}