Add --no-duplicate-keys tshark option.

Adds the --no-duplicate-keys option to tshark. If -T json is specified,
this option can be specified in order to transform the duplicate keys
produced by -T json into single keys with as value a json array of all
separate values.

Specifying --no-duplicate-keys changes the function which groups node
children that is passed to write_json_proto_tree. Instead of a function
that puts each node in a separate group (proto_node_group_children_by_unique)
a function is passed that groups children that have the same json key
together (proto_node_group_children_by_json_key). This will lead to
some groups having multiple values. Groups with multiple values are
written to the output as a json array. This includes normal json keys
but also keys with the "_raw" and "_tree" suffix.

If --no-duplicate-keys is specified with an option other than "-T json"
or "-T jsonraw" or without -T an error is shown and tshark will exit.

"Export Packet Dissections -> As JSON" in the GUI is hardcoded to use
the duplicated keys format.

Fixes one regression in the output where a filtered json key (-j) with
both a value and children would not have the "_tree" suffix added to the
json key containing the children.

Includes a little code cleanup (removes one instance of code
duplication and simplifies a while loop).

Fixes a memory leak (I thought this fix was already included in the
previous refactor patch but something must have gone wrong when updating
the patch so I'm including it again in this patch).

Bug: 12958
Change-Id: I401f8fc877b5c590686567c3c44cdb832e9e7dfe
Reviewed-on: https://code.wireshark.org/review/22166
Petri-Dish: Jaap Keuter <jaap.keuter@xs4all.nl>
Tested-by: Petri Dish Buildbot <buildbot-no-reply@wireshark.org>
Reviewed-by: Anders Broman <a.broman58@gmail.com>
This commit is contained in:
Daan De Meyer 2017-06-16 15:19:18 +02:00 committed by Anders Broman
parent 2954a69d7d
commit 07f576ffeb
5 changed files with 108 additions and 45 deletions

View File

@ -55,6 +55,7 @@ S<[ B<-M> E<lt>auto session resetE<gt> ]>
S<[ B<-z> E<lt>statisticsE<gt> ]>
S<[ B<--capture-comment> E<lt>commentE<gt> ]>
S<[ B<--color> ]>
S<[ B<--no-duplicate-keys> ]>
S<[ B<--export-objects> E<lt>protocolE<gt>,E<lt>destdirE<gt> ]>
S<[ B<--enable-protocol> E<lt>proto_nameE<gt> ]>
S<[ B<--disable-protocol> E<lt>proto_nameE<gt> ]>
@ -1626,6 +1627,12 @@ are not supported by all terminal emulators. See
L<https://wiki.wireshark.org/ColoringRules> for more information on configuring
color filters.
=item --no-duplicate-keys
If a key appears multiple times in an object, only write it a single time with
as value a json array containing all the separate values. (Only works with
-T json)
=item --export-objects E<lt>protocolE<gt>,E<lt>destdirE<gt>
Export all objects within a protocol into directory B<destdir>. The available

View File

@ -72,6 +72,7 @@ typedef struct {
pf_flags filter_flags;
gboolean print_hex;
gboolean print_text;
proto_node_children_grouper_func node_children_grouper;
} write_json_data;
typedef struct {
@ -122,11 +123,7 @@ static void write_json_proto_node_filtered(proto_node *node, write_json_data *da
static void write_json_proto_node_hex_dump(proto_node *node, write_json_data *data);
static void write_json_proto_node_children(proto_node *node, write_json_data *data);
static void write_json_proto_node_value(proto_node *node, write_json_data *data);
typedef GSList* (*proto_node_children_grouper_func)(proto_node *node);
static void write_json_proto_node_no_value(proto_node *node, write_json_data *data);
static GSList *proto_node_group_children_by_unique(proto_node *node);
static const char *proto_node_to_json_key(proto_node *node);
static void print_pdml_geninfo(epan_dissect_t *edt, FILE *fh);
@ -135,10 +132,6 @@ static void proto_tree_get_node_field_values(proto_node *node, gpointer data);
static gboolean json_is_first;
// Function used to group a node's children. Children in the same group are represented in the json output by a single
// json key. If multiple nodes are in a group they are wrapped in a json array in the json output.
static proto_node_children_grouper_func json_proto_node_children_grouper = proto_node_group_children_by_unique;
/* Cache the protocols and field handles that the print functionality needs
This helps break explicit dependency on the dissectors. */
static int proto_data = -1;
@ -695,6 +688,7 @@ write_json_proto_tree(output_fields_t* fields,
print_dissections_e print_dissections,
gboolean print_hex, gchar **protocolfilter,
pf_flags protocolfilter_flags, epan_dissect_t *edt,
proto_node_children_grouper_func node_children_grouper,
FILE *fh)
{
char ts[30];
@ -734,16 +728,9 @@ write_json_proto_tree(output_fields_t* fields,
if (print_dissections == print_dissections_none) {
data.print_text = FALSE;
}
data.node_children_grouper = node_children_grouper;
/*
* Group nodes together by the key they will have in the json output. This is necessary to know which json keys
* have multiple values which need to be put in a json array in the output. A map is not required since we can
* easily retrieve the json key from the first value in the linked list.
*/
GSList *same_key_nodes_list = json_proto_node_children_grouper(edt->tree);
write_json_proto_node_list(same_key_nodes_list, &data);
g_slist_free(same_key_nodes_list);
write_json_proto_node_children(edt->tree, &data);
} else {
write_specified_fields(FORMAT_JSON, fields, edt, NULL, fh);
}
@ -822,10 +809,14 @@ write_json_proto_node_list(GSList *proto_node_list_head, write_json_data *data)
if (has_children) {
if (delimiter_needed) fputs(",\n", data->fh);
if (is_filtered) {
write_json_proto_node(node_values_list, "", write_json_proto_node_filtered, data);
} else {
// If a node has both a value and a set of children we print the value and the children in separate
// key:value pairs. These can't have the same key so whenever a value is already printed with the node
// json key we print the children with the same key with a "_tree" suffix added.
char *suffix = has_value ? "_tree": "";
if (is_filtered) {
write_json_proto_node(node_values_list, suffix, write_json_proto_node_filtered, data);
} else {
// Remove protocol filter for children, if children should be included. This functionality is enabled
// with the "-J" command line option. We save the filter so it can be reenabled when we are done with
// the current key:value pair.
@ -835,14 +826,7 @@ write_json_proto_node_list(GSList *proto_node_list_head, write_json_data *data)
data->filter = NULL;
}
// If a node has both a value and a set of children we print the value and the children in separate
// key:value pairs. These can't have the same key so whenever a value is already printed with the node
// json key we print the children with the same key with a "_tree" suffix added.
if (has_value) {
write_json_proto_node(node_values_list, "_tree", write_json_proto_node_children, data);
} else {
write_json_proto_node(node_values_list, "", write_json_proto_node_children, data);
}
write_json_proto_node(node_values_list, suffix, write_json_proto_node_children, data);
// Put protocol filter back
if ((data->filter_flags&PF_INCLUDE_CHILDREN) == PF_INCLUDE_CHILDREN) {
@ -913,14 +897,9 @@ write_json_proto_node_value_list(GSList *node_values_head, proto_node_value_writ
fputs("[\n", data->fh);
data->level++;
// Print first value outside the while loop so we write the delimiter at the start of each loop without having
// to check if we are at the last element.
print_indent(data->level, data->fh);
value_writer((proto_node *) current_value->data, data);
current_value = current_value->next;
while (current_value != NULL) {
fputs(",\n", data->fh);
// Do not print delimiter before first value
if (current_value != node_values_head) fputs(",\n", data->fh);
print_indent(data->level, data->fh);
value_writer((proto_node *) current_value->data, data);
@ -1017,9 +996,9 @@ write_json_proto_node_hex_dump(proto_node *node, write_json_data *data)
static void
write_json_proto_node_children(proto_node *node, write_json_data *data)
{
GSList *same_key_nodes_list = json_proto_node_children_grouper(node);
write_json_proto_node_list(same_key_nodes_list, data);
g_slist_free(same_key_nodes_list);
GSList *grouped_children_list = data->node_children_grouper(node);
write_json_proto_node_list(grouped_children_list, data);
g_slist_free_full(grouped_children_list, (GDestroyNotify) g_slist_free);
}
/**
@ -1064,10 +1043,10 @@ write_json_proto_node_no_value(proto_node *node, write_json_data *data)
}
/**
* Groups each node separately as if it had a unique json key even if it doesn't. Using this function leads to duplicate
* keys in the json output.
* Groups each child of the node separately.
* @return Linked list where each element is another linked list containing a single node.
*/
static GSList *
GSList *
proto_node_group_children_by_unique(proto_node *node) {
GSList *unique_nodes_list = NULL;
proto_node *current_child = node->first_child;
@ -1081,6 +1060,54 @@ proto_node_group_children_by_unique(proto_node *node) {
return g_slist_reverse(unique_nodes_list);
}
/**
* Groups the children of a node by their json key. Children are put in the same group if they have the same json key.
* @return Linked list where each element is another linked list of nodes associated with the same json key.
*/
GSList *
proto_node_group_children_by_json_key(proto_node *node)
{
/**
* For each different json key we store a linked list of values corresponding to that json key. These lists are kept
* in both a linked list and a hashmap. The hashmap is used to quickly retrieve the values of a json key. The linked
* list is used to preserve the ordering of keys as they are encountered which is not guaranteed when only using a
* hashmap.
*/
GSList *same_key_nodes_list = NULL;
GHashTable *lookup_by_json_key = g_hash_table_new(g_str_hash, g_str_equal);
proto_node *current_child = node->first_child;
/**
* For each child of the node get the key and get the list of values already associated with that key from the
* hashmap. If no list exist yet for that key create a new one and add it to both the linked list and hashmap. If a
* list already exists add the node to that list.
*/
while (current_child != NULL) {
char *json_key = (char *) proto_node_to_json_key(current_child);
GSList *json_key_nodes = (GSList *) g_hash_table_lookup(lookup_by_json_key, json_key);
if (json_key_nodes == NULL) {
json_key_nodes = g_slist_append(json_key_nodes, current_child);
// Prepending in single linked list is O(1), appending is O(n). Better to prepend here and reverse at the
// end than potentially looping to the end of the linked list for each child.
same_key_nodes_list = g_slist_prepend(same_key_nodes_list, json_key_nodes);
g_hash_table_insert(lookup_by_json_key, json_key, json_key_nodes);
} else {
// Store and insert value again to circumvent unused_variable warning.
// Append in this case since most value lists will only have a single value.
json_key_nodes = g_slist_append(json_key_nodes, current_child);
g_hash_table_insert(lookup_by_json_key, json_key, json_key_nodes);
}
current_child = current_child->next;
}
// Hash table is not needed anymore since the linked list with the correct ordering is returned.
g_hash_table_destroy(lookup_by_json_key);
return g_slist_reverse(same_key_nodes_list);
}
/**
* Returns the json key of a node. Tries to use the node's abbreviated name. If the abbreviated name is not available
* the representation is used instead.

View File

@ -71,6 +71,8 @@ typedef enum {
struct _output_fields;
typedef struct _output_fields output_fields_t;
typedef GSList* (*proto_node_children_grouper_func)(proto_node *node);
WS_DLL_PUBLIC output_fields_t* output_fields_new(void);
WS_DLL_PUBLIC void output_fields_free(output_fields_t* info);
WS_DLL_PUBLIC void output_fields_add(output_fields_t* info, const gchar* field);
@ -95,13 +97,21 @@ WS_DLL_PUBLIC void write_pdml_preamble(FILE *fh, const gchar* filename);
WS_DLL_PUBLIC void write_pdml_proto_tree(output_fields_t* fields, gchar **protocolfilter, pf_flags protocolfilter_flags, epan_dissect_t *edt, FILE *fh, gboolean use_color);
WS_DLL_PUBLIC void write_pdml_finale(FILE *fh);
// Implementations of proto_node_children_grouper_func
// Groups each child separately
WS_DLL_PUBLIC GSList *proto_node_group_children_by_unique(proto_node *node);
// Groups children by json key (children with the same json key get put in the same group
WS_DLL_PUBLIC GSList *proto_node_group_children_by_json_key(proto_node *node);
WS_DLL_PUBLIC void write_json_preamble(FILE *fh);
WS_DLL_PUBLIC void write_json_proto_tree(output_fields_t* fields,
print_dissections_e print_dissections,
gboolean print_hex_data,
gchar **protocolfilter,
pf_flags protocolfilter_flags,
epan_dissect_t *edt, FILE *fh);
epan_dissect_t *edt,
proto_node_children_grouper_func node_children_grouper,
FILE *fh);
WS_DLL_PUBLIC void write_json_finale(FILE *fh);
WS_DLL_PUBLIC void write_ek_proto_tree(output_fields_t* fields,

2
file.c
View File

@ -2862,7 +2862,7 @@ write_json_packet(capture_file *cf, frame_data *fdata,
/* Write out the information in that tree. */
write_json_proto_tree(NULL, args->print_args->print_dissections,
args->print_args->print_hex, NULL, PF_NONE,
&args->edt, args->fh);
&args->edt, proto_node_group_children_by_unique, args->fh);
epan_dissect_reset(&args->edt);

View File

@ -156,6 +156,7 @@
* ui/commandline.c, so start tshark-specific options 1000 after this
*/
#define LONGOPT_COLOR (65536+1000)
#define LONGOPT_NO_DUPLICATE_KEYS (65536+1001)
#if 0
#define tshark_debug(...) g_warning(__VA_ARGS__)
@ -206,6 +207,9 @@ static output_fields_t* output_fields = NULL;
static gchar **protocolfilter = NULL;
static pf_flags protocolfilter_flags = PF_NONE;
static gboolean no_duplicate_keys = FALSE;
static proto_node_children_grouper_func node_children_grouper = proto_node_group_children_by_unique;
/* The line separator used between packets, changeable via the -S option */
static const char *separator = "";
@ -446,6 +450,9 @@ print_usage(FILE *output)
fprintf(output, " requires a terminal with 24-bit color support\n");
fprintf(output, " Also supplies color attributes to pdml and psml formats\n");
fprintf(output, " (Note that attributes are nonstandard)\n");
fprintf(output, " --no-duplicate-keys If -T json is specified, merge duplicate keys in an object\n");
fprintf(output, " into a single key with as value a json array containing all\n");
fprintf(output, " values");
fprintf(output, "\n");
fprintf(output, "Miscellaneous:\n");
@ -664,6 +671,7 @@ main(int argc, char *argv[])
LONGOPT_DISSECT_COMMON
{"export-objects", required_argument, NULL, LONGOPT_EXPORT_OBJECTS},
{"color", no_argument, NULL, LONGOPT_COLOR},
{"no-duplicate-keys", no_argument, NULL, LONGOPT_NO_DUPLICATE_KEYS},
{0, 0, 0, 0 }
};
gboolean arg_error = FALSE;
@ -1436,6 +1444,10 @@ main(int argc, char *argv[])
case LONGOPT_COLOR: /* print in color where appropriate */
dissect_color = TRUE;
break;
case LONGOPT_NO_DUPLICATE_KEYS:
no_duplicate_keys = TRUE;
node_children_grouper = proto_node_group_children_by_json_key;
break;
default:
case '?': /* Bad flag - print usage message */
switch(optopt) {
@ -1451,6 +1463,12 @@ main(int argc, char *argv[])
}
}
if (no_duplicate_keys && output_action != WRITE_JSON && output_action != WRITE_JSON_RAW) {
cmdarg_err("--no-duplicate-keys can only be used with \"-T json\" and \"-T jsonraw\"");
exit_status = INVALID_OPTION;
goto clean_exit;
}
/* If we specified output fields, but not the output field type... */
if ((WRITE_FIELDS != output_action && WRITE_XML != output_action && WRITE_JSON != output_action && WRITE_EK != output_action) && 0 != output_fields_num_fields(output_fields)) {
cmdarg_err("Output fields were specified with \"-e\", "
@ -3901,11 +3919,12 @@ print_packet(capture_file *cf, epan_dissect_t *edt)
case WRITE_JSON:
write_json_proto_tree(output_fields, print_dissections_expanded,
print_hex, protocolfilter, protocolfilter_flags,
edt, stdout);
edt, node_children_grouper, stdout);
return !ferror(stdout);
case WRITE_JSON_RAW:
write_json_proto_tree(output_fields, print_dissections_none, TRUE,
protocolfilter, protocolfilter_flags, edt, stdout);
protocolfilter, protocolfilter_flags,
edt, node_children_grouper, stdout);
return !ferror(stdout);
case WRITE_EK:
write_ek_proto_tree(output_fields, print_hex, protocolfilter,