wireshark/epan/dissectors/packet-json.c
Dario Lombardo 11c10244ef wsutil: split libjsmn.
The libjsmn was imported into the tree and enhanced with a new
function. This change splits it into the "original" libjsmn and
an addictional module wsjsmn that contains the new function.
This will make easier to port within the tree future versions
of the library.

Change-Id: I3f1caa91bee462e0767e5e18d0b6a10f0b1cad32
Reviewed-on: https://code.wireshark.org/review/17963
Petri-Dish: Peter Wu <peter@lekensteyn.nl>
Tested-by: Petri Dish Buildbot <buildbot-no-reply@wireshark.org>
Reviewed-by: Peter Wu <peter@lekensteyn.nl>
2016-09-30 20:12:00 +00:00

682 lines
20 KiB
C

/* packet-json.c
* Routines for JSON dissection
* References:
* RFC 4627: http://tools.ietf.org/html/rfc4627
* Website: http://json.org/
*
* Copyright 2010, Jakub Zawadzki <darkjames-ws@darkjames.pl>
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#define NEW_PROTO_TREE_API
#include "config.h"
#include <epan/packet.h>
#include <epan/tvbparse.h>
#include <wsutil/wsjsmn.h>
#include <wsutil/str_util.h>
#include <wsutil/unicode-utils.h>
#include <wiretap/wtap.h>
#include "packet-http.h"
void proto_register_json(void);
void proto_reg_handoff_json(void);
static char *json_string_unescape(tvbparse_elem_t *tok);
static dissector_handle_t json_handle;
static int proto_json = -1;
static gint ett_json = -1;
static gint ett_json_array = -1;
static gint ett_json_object = -1;
static gint ett_json_member = -1;
static header_field_info *hfi_json = NULL;
#define JSON_HFI_INIT HFI_INIT(proto_json)
static header_field_info hfi_json_array JSON_HFI_INIT =
{ "Array", "json.array", FT_NONE, BASE_NONE, NULL, 0x00, "JSON array", HFILL };
static header_field_info hfi_json_object JSON_HFI_INIT =
{ "Object", "json.object", FT_NONE, BASE_NONE, NULL, 0x00, "JSON object", HFILL };
static header_field_info hfi_json_member JSON_HFI_INIT =
{ "Member", "json.member", FT_NONE, BASE_NONE, NULL, 0x00, "JSON object member", HFILL };
static header_field_info hfi_json_key JSON_HFI_INIT =
{ "Key", "json.key", FT_STRING, STR_UNICODE, NULL, 0x00, NULL, HFILL };
static header_field_info hfi_json_value_string JSON_HFI_INIT = /* FT_STRINGZ? */
{ "String value", "json.value.string", FT_STRING, STR_UNICODE, NULL, 0x00, "JSON string value", HFILL };
static header_field_info hfi_json_value_number JSON_HFI_INIT = /* FT_DOUBLE/ FT_INT64? */
{ "Number value", "json.value.number", FT_STRING, BASE_NONE, NULL, 0x00, "JSON number value", HFILL };
static header_field_info hfi_json_value_false JSON_HFI_INIT =
{ "False value", "json.value.false", FT_NONE, BASE_NONE, NULL, 0x00, "JSON false value", HFILL };
static header_field_info hfi_json_value_null JSON_HFI_INIT =
{ "Null value", "json.value.null", FT_NONE, BASE_NONE, NULL, 0x00, "JSON null value", HFILL };
static header_field_info hfi_json_value_true JSON_HFI_INIT =
{ "True value", "json.value.true", FT_NONE, BASE_NONE, NULL, 0x00, "JSON true value", HFILL };
static tvbparse_wanted_t* want;
static tvbparse_wanted_t* want_ignore;
static dissector_handle_t text_lines_handle;
typedef enum {
JSON_TOKEN_INVALID = -1,
JSON_TOKEN_NUMBER = 0,
JSON_TOKEN_STRING,
JSON_TOKEN_FALSE,
JSON_TOKEN_NULL,
JSON_TOKEN_TRUE,
/* not really tokens ... */
JSON_OBJECT,
JSON_ARRAY
} json_token_type_t;
typedef struct {
wmem_stack_t *stack;
} json_parser_data_t;
static int
dissect_json(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void* data)
{
proto_tree *json_tree = NULL;
proto_item *ti = NULL;
json_parser_data_t parser_data;
tvbparse_t *tt;
http_message_info_t *message_info;
const char *data_name;
int offset;
/* JSON dissector can be called in a JSON native file or when transported
* by another protocol. We set the column values only if they've not been
* already set by someone else.
*/
wmem_list_frame_t *proto = wmem_list_frame_prev(wmem_list_tail(pinfo->layers));
if (proto) {
const char *name = proto_get_protocol_filter_name(GPOINTER_TO_INT(wmem_list_frame_data(proto)));
if (!strcmp(name, "frame")) {
col_set_str(pinfo->cinfo, COL_PROTOCOL, "JSON");
col_set_str(pinfo->cinfo, COL_INFO, "JavaScript Object Notation");
}
}
data_name = pinfo->match_string;
if (! (data_name && data_name[0])) {
/*
* No information from "match_string"
*/
message_info = (http_message_info_t *)data;
if (message_info == NULL) {
/*
* No information from dissector data
*/
data_name = NULL;
} else {
data_name = message_info->media_str;
if (! (data_name && data_name[0])) {
/*
* No information from dissector data
*/
data_name = NULL;
}
}
}
if (tree) {
ti = proto_tree_add_item(tree, hfi_json, tvb, 0, -1, ENC_NA);
json_tree = proto_item_add_subtree(ti, ett_json);
if (data_name)
proto_item_append_text(ti, ": %s", data_name);
}
offset = 0;
parser_data.stack = wmem_stack_new(wmem_packet_scope());
wmem_stack_push(parser_data.stack, json_tree);
tt = tvbparse_init(tvb, offset, -1, &parser_data, want_ignore);
/* XXX, only one json in packet? */
while ((tvbparse_get(tt, want)))
;
offset = tvbparse_curr_offset(tt);
proto_item_set_len(ti, offset);
/* if we have some unparsed data, pass to data-text-lines dissector (?) */
if (tvb_reported_length_remaining(tvb, offset) > 0) {
tvbuff_t *next_tvb;
next_tvb = tvb_new_subset_remaining(tvb, offset);
call_dissector_with_data(text_lines_handle, next_tvb, pinfo, tree, data);
} else if (data_name) {
col_append_sep_fstr(pinfo->cinfo, COL_INFO, " ", "(%s)", data_name);
}
return tvb_captured_length(tvb);
}
/*
* For dissecting JSON in a file; we don't get passed a media type.
*/
static int
dissect_json_file(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void* data _U_)
{
return dissect_json(tvb, pinfo, tree, NULL);
}
static void before_object(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) {
json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack);
proto_tree *subtree;
proto_item *ti;
ti = proto_tree_add_item(tree, &hfi_json_object, tok->tvb, tok->offset, tok->len, ENC_NA);
subtree = proto_item_add_subtree(ti, ett_json_object);
wmem_stack_push(data->stack, subtree);
}
static void after_object(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *elem _U_) {
json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
wmem_stack_pop(data->stack);
}
static void before_member(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) {
json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack);
proto_tree *subtree;
proto_item *ti;
ti = proto_tree_add_item(tree, &hfi_json_member, tok->tvb, tok->offset, tok->len, ENC_NA);
subtree = proto_item_add_subtree(ti, ett_json_member);
wmem_stack_push(data->stack, subtree);
}
static void after_member(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) {
json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
proto_tree *tree = (proto_tree *)wmem_stack_pop(data->stack);
if (tree) {
tvbparse_elem_t *key_tok = tok->sub;
if (key_tok && key_tok->id == JSON_TOKEN_STRING) {
char *key = json_string_unescape(key_tok);
proto_tree_add_string(tree, &hfi_json_key, key_tok->tvb, key_tok->offset, key_tok->len, key);
proto_item_append_text(tree, " Key: %s", key);
}
}
}
static void before_array(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) {
json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack);
proto_tree *subtree;
proto_item *ti;
ti = proto_tree_add_item(tree, &hfi_json_array, tok->tvb, tok->offset, tok->len, ENC_NA);
subtree = proto_item_add_subtree(ti, ett_json_array);
wmem_stack_push(data->stack, subtree);
}
static void after_array(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *elem _U_) {
json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
wmem_stack_pop(data->stack);
}
static int
json_tvb_memcpy_utf8(char *buf, tvbuff_t *tvb, int offset, int offset_max)
{
int len = ws_utf8_char_len((guint8) *buf);
/* XXX, before moving to core API check if it's off-by-one safe.
* For JSON analyzer it's not a problem
* (string always terminated by ", which is not valid UTF-8 continuation character) */
if (len == -1 || ((guint) (offset + len)) >= (guint) offset_max) {
*buf = '?';
return 1;
}
/* assume it's valid UTF-8 */
tvb_memcpy(tvb, buf + 1, offset + 1, len - 1);
if (!g_utf8_validate(buf, len, NULL)) {
*buf = '?';
return 1;
}
return len;
}
static char *json_string_unescape(tvbparse_elem_t *tok)
{
char *str = (char *)wmem_alloc(wmem_packet_scope(), tok->len - 1);
int i, j;
j = 0;
for (i = 1; i < tok->len - 1; i++) {
guint8 ch = tvb_get_guint8(tok->tvb, tok->offset + i);
int bin;
if (ch == '\\') {
i++;
ch = tvb_get_guint8(tok->tvb, tok->offset + i);
switch (ch) {
case '\"':
case '\\':
case '/':
str[j++] = ch;
break;
case 'b':
str[j++] = '\b';
break;
case 'f':
str[j++] = '\f';
break;
case 'n':
str[j++] = '\n';
break;
case 'r':
str[j++] = '\r';
break;
case 't':
str[j++] = '\t';
break;
case 'u':
{
guint32 unicode_hex = 0;
gboolean valid = TRUE;
int k;
for (k = 0; k < 4; k++) {
i++;
unicode_hex <<= 4;
ch = tvb_get_guint8(tok->tvb, tok->offset + i);
bin = ws_xton(ch);
if (bin == -1) {
valid = FALSE;
break;
}
unicode_hex |= bin;
}
if ((IS_LEAD_SURROGATE(unicode_hex))) {
ch = tvb_get_guint8(tok->tvb, tok->offset + i + 1);
if (ch == '\\') {
i++;
ch = tvb_get_guint8(tok->tvb, tok->offset + i + 1);
if (ch == 'u') {
guint16 lead_surrogate = unicode_hex;
guint16 trail_surrogate = 0;
i++;
for (k = 0; k < 4; k++) {
i++;
trail_surrogate <<= 4;
ch = tvb_get_guint8(tok->tvb, tok->offset + i);
bin = ws_xton(ch);
if (bin == -1) {
valid = FALSE;
break;
}
trail_surrogate |= bin;
}
if ((IS_TRAIL_SURROGATE(trail_surrogate))) {
unicode_hex = SURROGATE_VALUE(lead_surrogate,trail_surrogate);
} else {
valid = FALSE;
}
} else {
valid = FALSE;
}
} else {
valid = FALSE;
}
} else if ((IS_TRAIL_SURROGATE(unicode_hex))) {
i++;
valid = FALSE;
}
if (valid && g_unichar_validate(unicode_hex) && g_unichar_isprint(unicode_hex)) {
/* \uXXXX => 6 bytes */
int charlen = g_unichar_to_utf8(unicode_hex, &str[j]);
j += charlen;
} else
str[j++] = '?';
break;
}
default:
/* not valid by JSON grammar (also tvbparse rules should not allow it) */
DISSECTOR_ASSERT_NOT_REACHED();
break;
}
} else {
int utf_len;
str[j] = ch;
/* XXX if it's not valid UTF-8 character, add some expert info? (it violates JSON grammar) */
utf_len = json_tvb_memcpy_utf8(&str[j], tok->tvb, i, tok->len);
j += utf_len;
i += (utf_len - 1);
}
}
str[j] = '\0';
return str;
}
static void after_value(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) {
json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack);
json_token_type_t value_id = JSON_TOKEN_INVALID;
if (tok->sub)
value_id = (json_token_type_t)tok->sub->id;
switch (value_id) {
case JSON_TOKEN_STRING:
if (tok->len >= 2)
proto_tree_add_string(tree, &hfi_json_value_string, tok->tvb, tok->offset, tok->len, json_string_unescape(tok));
else
proto_tree_add_item(tree, &hfi_json_value_string, tok->tvb, tok->offset, tok->len, ENC_ASCII|ENC_NA);
break;
case JSON_TOKEN_NUMBER:
/* XXX, convert to number */
proto_tree_add_item(tree, &hfi_json_value_number, tok->tvb, tok->offset, tok->len, ENC_ASCII|ENC_NA);
break;
case JSON_TOKEN_FALSE:
proto_tree_add_item(tree, &hfi_json_value_false, tok->tvb, tok->offset, tok->len, ENC_NA);
break;
case JSON_TOKEN_NULL:
proto_tree_add_item(tree, &hfi_json_value_null, tok->tvb, tok->offset, tok->len, ENC_NA);
break;
case JSON_TOKEN_TRUE:
proto_tree_add_item(tree, &hfi_json_value_true, tok->tvb, tok->offset, tok->len, ENC_NA);
break;
case JSON_OBJECT:
case JSON_ARRAY:
/* already added */
break;
default:
proto_tree_add_format_text(tree, tok->tvb, tok->offset, tok->len);
break;
}
}
static void init_json_parser(void) {
static tvbparse_wanted_t _want_object;
static tvbparse_wanted_t _want_array;
tvbparse_wanted_t *want_object, *want_array;
tvbparse_wanted_t *want_member;
tvbparse_wanted_t *want_string;
tvbparse_wanted_t *want_number, *want_int;
tvbparse_wanted_t *want_value;
tvbparse_wanted_t *want_value_separator;
#define tvbparse_optional(id, private_data, before_cb, after_cb, wanted) \
tvbparse_some(id, 0, 1, private_data, before_cb, after_cb, wanted)
tvbparse_wanted_t *want_quot = tvbparse_char(-1,"\"",NULL,NULL,NULL);
want_string = tvbparse_set_seq(JSON_TOKEN_STRING, NULL, NULL, NULL,
want_quot,
tvbparse_some(-1, 0, G_MAXINT, NULL, NULL, NULL,
tvbparse_set_oneof(-1, NULL, NULL, NULL,
tvbparse_not_chars(-1, 0, 0, "\"" "\\", NULL, NULL, NULL), /* XXX, without invalid unicode characters */
tvbparse_set_seq(-1, NULL, NULL, NULL,
tvbparse_char(-1, "\\", NULL, NULL, NULL),
tvbparse_set_oneof(-1, NULL, NULL, NULL,
tvbparse_chars(-1, 0, 1, "\"" "\\" "/bfnrt", NULL, NULL, NULL),
tvbparse_set_seq(-1, NULL, NULL, NULL,
tvbparse_char(-1, "u", NULL, NULL, NULL),
tvbparse_chars(-1, 4, 4, "0123456789abcdefABCDEF", NULL, NULL, NULL),
NULL),
NULL),
NULL),
NULL)
),
want_quot,
NULL);
want_value_separator = tvbparse_char(-1, ",", NULL, NULL, NULL);
/* int = zero / ( digit1-9 *DIGIT ) */
want_int = tvbparse_set_oneof(-1, NULL, NULL, NULL,
tvbparse_char(-1, "0", NULL, NULL, NULL),
tvbparse_set_seq(-1, NULL, NULL, NULL,
tvbparse_chars(-1, 1, 1, "123456789", NULL, NULL, NULL),
tvbparse_optional(-1, NULL, NULL, NULL, /* tvbparse_chars() don't respect 0 as min_len ;/ */
tvbparse_chars(-1, 0, 0, "0123456789", NULL, NULL, NULL)),
NULL),
NULL);
/* number = [ minus ] int [ frac ] [ exp ] */
want_number = tvbparse_set_seq(JSON_TOKEN_NUMBER, NULL, NULL, NULL,
tvbparse_optional(-1, NULL, NULL, NULL, /* tvbparse_chars() don't respect 0 as min_len ;/ */
tvbparse_chars(-1, 0, 1, "-", NULL, NULL, NULL)),
want_int,
/* frac = decimal-point 1*DIGIT */
tvbparse_optional(-1, NULL, NULL, NULL,
tvbparse_set_seq(-1, NULL, NULL, NULL,
tvbparse_char(-1, ".", NULL, NULL, NULL),
tvbparse_chars(-1, 1, 0, "0123456789", NULL, NULL, NULL),
NULL)),
/* exp = e [ minus / plus ] 1*DIGIT */
tvbparse_optional(-1, NULL, NULL, NULL,
tvbparse_set_seq(-1, NULL, NULL, NULL,
tvbparse_char(-1, "eE", NULL, NULL, NULL),
tvbparse_optional(-1, NULL, NULL, NULL, /* tvbparse_chars() don't respect 0 as min_len ;/ */
tvbparse_chars(-1, 0, 1, "-+", NULL, NULL, NULL)),
tvbparse_chars(-1, 1, 0, "0123456789", NULL, NULL, NULL),
NULL)),
NULL);
/* value = false / null / true / object / array / number / string */
want_value = tvbparse_set_oneof(-1, NULL, NULL, after_value,
tvbparse_string(JSON_TOKEN_FALSE, "false", NULL, NULL, NULL),
tvbparse_string(JSON_TOKEN_NULL, "null", NULL, NULL, NULL),
tvbparse_string(JSON_TOKEN_TRUE, "true", NULL, NULL, NULL),
&_want_object,
&_want_array,
want_number,
want_string,
NULL);
/* array = begin-array [ value *( value-separator value ) ] end-array */
want_array = tvbparse_set_seq(JSON_ARRAY, NULL, before_array, after_array,
tvbparse_char(-1, "[", NULL, NULL, NULL),
tvbparse_optional(-1, NULL, NULL, NULL,
tvbparse_set_seq(-1, NULL, NULL, NULL,
want_value,
tvbparse_some(-1, 0, G_MAXINT, NULL, NULL, NULL,
tvbparse_set_seq(-1, NULL, NULL, NULL,
want_value_separator,
want_value,
NULL)),
NULL)
),
tvbparse_char(-1, "]", NULL, NULL, NULL),
NULL);
_want_array = *want_array;
/* member = string name-separator value */
want_member = tvbparse_set_seq(-1, NULL, before_member, after_member,
want_string,
tvbparse_char(-1, ":", NULL, NULL, NULL),
want_value,
NULL);
/* object = begin-object [ member *( value-separator member ) ] end-object */
want_object = tvbparse_set_seq(JSON_OBJECT, NULL, before_object, after_object,
tvbparse_char(-1, "{", NULL, NULL, NULL),
tvbparse_optional(-1, NULL, NULL, NULL,
tvbparse_set_seq(-1, NULL, NULL, NULL,
want_member,
tvbparse_some(-1, 0, G_MAXINT, NULL, NULL, NULL,
tvbparse_set_seq(-1, NULL, NULL, NULL,
want_value_separator,
want_member,
NULL)),
NULL)
),
tvbparse_char(-1, "}", NULL, NULL, NULL),
NULL);
_want_object = *want_object;
want_ignore = tvbparse_chars(-1, 1, 0, " \t\r\n", NULL, NULL, NULL);
/* JSON-text = object / array */
want = tvbparse_set_oneof(-1, NULL, NULL, NULL,
want_object,
want_array,
/* tvbparse_not_chars(-1, 1, 0, " \t\r\n", NULL, NULL, NULL), */
NULL);
/* XXX, heur? */
}
/* This function leverages the libjsmn to undestand if the payload is json or not
*/
static gboolean
dissect_json_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *data)
{
guint len = tvb_captured_length(tvb);
const guint8* buf = tvb_get_string_enc(wmem_packet_scope(), tvb, 0, len, ENC_ASCII);
if (jsmn_is_json(buf, len) == FALSE)
return FALSE;
return (dissect_json(tvb, pinfo, tree, data) != 0);
}
void
proto_register_json(void)
{
static gint *ett[] = {
&ett_json,
&ett_json_array,
&ett_json_object,
&ett_json_member
};
#ifndef HAVE_HFI_SECTION_INIT
static header_field_info *hfi[] = {
&hfi_json_array,
&hfi_json_object,
&hfi_json_member,
&hfi_json_key,
&hfi_json_value_string,
&hfi_json_value_number,
&hfi_json_value_false,
&hfi_json_value_null,
&hfi_json_value_true,
};
#endif
proto_json = proto_register_protocol("JavaScript Object Notation", "JSON", "json");
hfi_json = proto_registrar_get_nth(proto_json);
proto_register_fields(proto_json, hfi, array_length(hfi));
proto_register_subtree_array(ett, array_length(ett));
json_handle = register_dissector("json", dissect_json, proto_json);
init_json_parser();
}
void
proto_reg_handoff_json(void)
{
dissector_handle_t json_file_handle = create_dissector_handle(dissect_json_file, proto_json);
heur_dissector_add("hpfeeds", dissect_json_heur, "JSON over HPFEEDS", "json_hpfeeds", proto_json, HEURISTIC_ENABLE);
heur_dissector_add("db-lsp", dissect_json_heur, "JSON over DB-LSP", "json_db_lsp", proto_json, HEURISTIC_ENABLE);
dissector_add_uint("wtap_encap", WTAP_ENCAP_JSON, json_file_handle);
dissector_add_string("media_type", "application/json", json_handle); /* RFC 4627 */
dissector_add_string("media_type", "application/json-rpc", json_handle); /* JSON-RPC over HTTP */
dissector_add_string("media_type", "application/jsonrequest", json_handle); /* JSON-RPC over HTTP */
dissector_add_string("media_type", "application/dds-web+json", json_handle); /* DDS Web Integration Service over HTTP */
text_lines_handle = find_dissector_add_dependency("data-text-lines", proto_json);
}
/*
* Editor modelines - http://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 8
* tab-width: 8
* indent-tabs-mode: t
* End:
*
* vi: set shiftwidth=8 tabstop=8 noexpandtab:
* :indentSize=8:tabSize=8:noTabs=false:
*/