diff --git a/check-qjson.c b/check-qjson.c index 86e780ccd..0b60e45fa 100644 --- a/check-qjson.c +++ b/check-qjson.c @@ -28,6 +28,13 @@ START_TEST(escaped_string) const char *decoded; int skip; } test_cases[] = { + { "\"\\b\"", "\b" }, + { "\"\\f\"", "\f" }, + { "\"\\n\"", "\n" }, + { "\"\\r\"", "\r" }, + { "\"\\t\"", "\t" }, + { "\"\\/\"", "\\/" }, + { "\"\\\\\"", "\\" }, { "\"\\\"\"", "\"" }, { "\"hello world \\\"embedded string\\\"\"", "hello world \"embedded string\"" }, @@ -48,11 +55,14 @@ START_TEST(escaped_string) fail_unless(qobject_type(obj) == QTYPE_QSTRING); str = qobject_to_qstring(obj); - fail_unless(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0); + fail_unless(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0, + "%s != %s\n", qstring_get_str(str), test_cases[i].decoded); if (test_cases[i].skip == 0) { str = qobject_to_json(obj); - fail_unless(strcmp(qstring_get_str(str), test_cases[i].encoded) == 0); + fail_unless(strcmp(qstring_get_str(str),test_cases[i].encoded) == 0, + "%s != %s\n", qstring_get_str(str), + test_cases[i].encoded); qobject_decref(obj); } @@ -627,11 +637,90 @@ START_TEST(simple_varargs) } END_TEST +START_TEST(empty_input) +{ + QObject *obj = qobject_from_json(""); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_string) +{ + QObject *obj = qobject_from_json("\"abc"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_sq_string) +{ + QObject *obj = qobject_from_json("'abc"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_escape) +{ + QObject *obj = qobject_from_json("\"abc\\\""); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_array) +{ + QObject *obj = qobject_from_json("[32"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_array_comma) +{ + QObject *obj = qobject_from_json("[32,"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(invalid_array_comma) +{ + QObject *obj = qobject_from_json("[32,}"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_dict) +{ + QObject *obj = qobject_from_json("{'abc':32"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_dict_comma) +{ + QObject *obj = qobject_from_json("{'abc':32,"); + fail_unless(obj == NULL); +} +END_TEST + +#if 0 +START_TEST(invalid_dict_comma) +{ + QObject *obj = qobject_from_json("{'abc':32,}"); + fail_unless(obj == NULL); +} +END_TEST + +START_TEST(unterminated_literal) +{ + QObject *obj = qobject_from_json("nul"); + fail_unless(obj == NULL); +} +END_TEST +#endif + static Suite *qjson_suite(void) { Suite *suite; TCase *string_literals, *number_literals, *keyword_literals; - TCase *dicts, *lists, *whitespace, *varargs; + TCase *dicts, *lists, *whitespace, *varargs, *errors; string_literals = tcase_create("String Literals"); tcase_add_test(string_literals, simple_string); @@ -657,6 +746,22 @@ static Suite *qjson_suite(void) varargs = tcase_create("Varargs"); tcase_add_test(varargs, simple_varargs); + errors = tcase_create("Invalid JSON"); + tcase_add_test(errors, empty_input); + tcase_add_test(errors, unterminated_string); + tcase_add_test(errors, unterminated_escape); + tcase_add_test(errors, unterminated_sq_string); + tcase_add_test(errors, unterminated_array); + tcase_add_test(errors, unterminated_array_comma); + tcase_add_test(errors, invalid_array_comma); + tcase_add_test(errors, unterminated_dict); + tcase_add_test(errors, unterminated_dict_comma); +#if 0 + /* FIXME: this print parse error messages on stderr. */ + tcase_add_test(errors, invalid_dict_comma); + tcase_add_test(errors, unterminated_literal); +#endif + suite = suite_create("QJSON test-suite"); suite_add_tcase(suite, string_literals); suite_add_tcase(suite, number_literals); @@ -665,6 +770,7 @@ static Suite *qjson_suite(void) suite_add_tcase(suite, lists); suite_add_tcase(suite, whitespace); suite_add_tcase(suite, varargs); + suite_add_tcase(suite, errors); return suite; } diff --git a/hxtool b/hxtool index d499dc08a..7ca83ed1f 100644 --- a/hxtool +++ b/hxtool @@ -59,6 +59,7 @@ hxtoqmp() { IFS= flag=0 + line=1 while read -r str; do case "$str" in HXCOMM*) @@ -87,6 +88,7 @@ hxtoqmp() test $flag -eq 1 && echo "$str" ;; esac + line=$((line+1)) done } diff --git a/json-lexer.c b/json-lexer.c index 9d649205a..c736f4290 100644 --- a/json-lexer.c +++ b/json-lexer.c @@ -29,7 +29,6 @@ enum json_lexer_state { ERROR = 0, - IN_DONE_STRING, IN_DQ_UCODE3, IN_DQ_UCODE2, IN_DQ_UCODE1, @@ -57,19 +56,19 @@ enum json_lexer_state { IN_ESCAPE_I, IN_ESCAPE_I6, IN_ESCAPE_I64, - IN_ESCAPE_DONE, IN_WHITESPACE, - IN_OPERATOR_DONE, IN_START, }; #define TERMINAL(state) [0 ... 0x7F] = (state) -static const uint8_t json_lexer[][256] = { - [IN_DONE_STRING] = { - TERMINAL(JSON_STRING), - }, +/* Return whether TERMINAL is a terminal state and the transition to it + from OLD_STATE required lookahead. This happens whenever the table + below uses the TERMINAL macro. */ +#define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \ + (json_lexer[(old_state)][0] == (terminal)) +static const uint8_t json_lexer[][256] = { /* double quote string */ [IN_DQ_UCODE3] = { ['0' ... '9'] = IN_DQ_STRING, @@ -97,6 +96,8 @@ static const uint8_t json_lexer[][256] = { ['n'] = IN_DQ_STRING, ['r'] = IN_DQ_STRING, ['t'] = IN_DQ_STRING, + ['/'] = IN_DQ_STRING, + ['\\'] = IN_DQ_STRING, ['\''] = IN_DQ_STRING, ['\"'] = IN_DQ_STRING, ['u'] = IN_DQ_UCODE0, @@ -104,7 +105,7 @@ static const uint8_t json_lexer[][256] = { [IN_DQ_STRING] = { [1 ... 0xFF] = IN_DQ_STRING, ['\\'] = IN_DQ_STRING_ESCAPE, - ['"'] = IN_DONE_STRING, + ['"'] = JSON_STRING, }, /* single quote string */ @@ -134,6 +135,8 @@ static const uint8_t json_lexer[][256] = { ['n'] = IN_SQ_STRING, ['r'] = IN_SQ_STRING, ['t'] = IN_SQ_STRING, + ['/'] = IN_DQ_STRING, + ['\\'] = IN_DQ_STRING, ['\''] = IN_SQ_STRING, ['\"'] = IN_SQ_STRING, ['u'] = IN_SQ_UCODE0, @@ -141,7 +144,7 @@ static const uint8_t json_lexer[][256] = { [IN_SQ_STRING] = { [1 ... 0xFF] = IN_SQ_STRING, ['\\'] = IN_SQ_STRING_ESCAPE, - ['\''] = IN_DONE_STRING, + ['\''] = JSON_STRING, }, /* Zero */ @@ -207,27 +210,18 @@ static const uint8_t json_lexer[][256] = { ['\n'] = IN_WHITESPACE, }, - /* operator */ - [IN_OPERATOR_DONE] = { - TERMINAL(JSON_OPERATOR), - }, - /* escape */ - [IN_ESCAPE_DONE] = { - TERMINAL(JSON_ESCAPE), - }, - [IN_ESCAPE_LL] = { - ['d'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, }, [IN_ESCAPE_L] = { - ['d'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, ['l'] = IN_ESCAPE_LL, }, [IN_ESCAPE_I64] = { - ['d'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, }, [IN_ESCAPE_I6] = { @@ -239,11 +233,11 @@ static const uint8_t json_lexer[][256] = { }, [IN_ESCAPE] = { - ['d'] = IN_ESCAPE_DONE, - ['i'] = IN_ESCAPE_DONE, - ['p'] = IN_ESCAPE_DONE, - ['s'] = IN_ESCAPE_DONE, - ['f'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, + ['i'] = JSON_ESCAPE, + ['p'] = JSON_ESCAPE, + ['s'] = JSON_ESCAPE, + ['f'] = JSON_ESCAPE, ['l'] = IN_ESCAPE_L, ['I'] = IN_ESCAPE_I, }, @@ -255,12 +249,12 @@ static const uint8_t json_lexer[][256] = { ['0'] = IN_ZERO, ['1' ... '9'] = IN_NONZERO_NUMBER, ['-'] = IN_NEG_NONZERO_NUMBER, - ['{'] = IN_OPERATOR_DONE, - ['}'] = IN_OPERATOR_DONE, - ['['] = IN_OPERATOR_DONE, - [']'] = IN_OPERATOR_DONE, - [','] = IN_OPERATOR_DONE, - [':'] = IN_OPERATOR_DONE, + ['{'] = JSON_OPERATOR, + ['}'] = JSON_OPERATOR, + ['['] = JSON_OPERATOR, + [']'] = JSON_OPERATOR, + [','] = JSON_OPERATOR, + [':'] = JSON_OPERATOR, ['a' ... 'z'] = IN_KEYWORD, ['%'] = IN_ESCAPE, [' '] = IN_WHITESPACE, @@ -275,11 +269,12 @@ void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) lexer->emit = func; lexer->state = IN_START; lexer->token = qstring_new(); + lexer->x = lexer->y = 0; } static int json_lexer_feed_char(JSONLexer *lexer, char ch) { - char buf[2]; + int char_consumed, new_state; lexer->x++; if (ch == '\n') { @@ -287,32 +282,33 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch) lexer->y++; } - lexer->state = json_lexer[lexer->state][(uint8_t)ch]; - - switch (lexer->state) { - case JSON_OPERATOR: - case JSON_ESCAPE: - case JSON_INTEGER: - case JSON_FLOAT: - case JSON_KEYWORD: - case JSON_STRING: - lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); - case JSON_SKIP: - lexer->state = json_lexer[IN_START][(uint8_t)ch]; - QDECREF(lexer->token); - lexer->token = qstring_new(); - break; - case ERROR: - return -EINVAL; - default: - break; - } - - buf[0] = ch; - buf[1] = 0; - - qstring_append(lexer->token, buf); + do { + new_state = json_lexer[lexer->state][(uint8_t)ch]; + char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state); + if (char_consumed) { + qstring_append_chr(lexer->token, ch); + } + switch (new_state) { + case JSON_OPERATOR: + case JSON_ESCAPE: + case JSON_INTEGER: + case JSON_FLOAT: + case JSON_KEYWORD: + case JSON_STRING: + lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y); + case JSON_SKIP: + QDECREF(lexer->token); + lexer->token = qstring_new(); + new_state = IN_START; + break; + case ERROR: + return -EINVAL; + default: + break; + } + lexer->state = new_state; + } while (!char_consumed); return 0; } @@ -334,7 +330,7 @@ int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) int json_lexer_flush(JSONLexer *lexer) { - return json_lexer_feed_char(lexer, 0); + return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0); } void json_lexer_destroy(JSONLexer *lexer) diff --git a/json-parser.c b/json-parser.c index 1c88ed898..70b9b6f96 100644 --- a/json-parser.c +++ b/json-parser.c @@ -205,6 +205,10 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token qstring_append(str, "\b"); ptr++; break; + case 'f': + qstring_append(str, "\f"); + ptr++; + break; case 'n': qstring_append(str, "\n"); ptr++; diff --git a/json-streamer.c b/json-streamer.c index 610ffea6d..f7e7a68d4 100644 --- a/json-streamer.c +++ b/json-streamer.c @@ -43,11 +43,11 @@ static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTok } dict = qdict_new(); - qdict_put_obj(dict, "type", QOBJECT(qint_from_int(type))); + qdict_put(dict, "type", qint_from_int(type)); QINCREF(token); - qdict_put_obj(dict, "token", QOBJECT(token)); - qdict_put_obj(dict, "x", QOBJECT(qint_from_int(x))); - qdict_put_obj(dict, "y", QOBJECT(qint_from_int(y))); + qdict_put(dict, "token", token); + qdict_put(dict, "x", qint_from_int(x)); + qdict_put(dict, "y", qint_from_int(y)); qlist_append(parser->tokens, dict); diff --git a/qjson.c b/qjson.c index 483c6675d..e4ee43376 100644 --- a/qjson.c +++ b/qjson.c @@ -158,6 +158,9 @@ static void to_json(const QObject *obj, QString *str) case '\b': qstring_append(str, "\\b"); break; + case '\f': + qstring_append(str, "\\f"); + break; case '\n': qstring_append(str, "\\n"); break;