Tshark: Optional packet summary for Elasticsearch

Currently, the Elasticsearch output exports the packet details and,
if -x is specified, the raw hex data.

This change adds the option of exporting the packet summary as well.

The default stays the same (packet details only), but now the existing
-P switch turns on printing of the packet summary. It also turns off
printing packet details, which can be turned back on with -V to print
both, and combined with -x to print all three: summary, details and
raw hex.

The packet summary is especially useful when exploring and visualizing
the data in Kibana, e.g. by displaying the summary "Info" field/column
in a table, as in the Wireshark GUI.

Change-Id: I2030490cfdd905572397bc3d5457ba49d805a5c4
Reviewed-on: https://code.wireshark.org/review/22716
Petri-Dish: Michael Mann <mmann78@netscape.net>
Tested-by: Petri Dish Buildbot <buildbot-no-reply@wireshark.org>
Reviewed-by: Michael Mann <mmann78@netscape.net>
This commit is contained in:
Christoph Wurm 2017-07-19 14:24:47 +00:00 committed by Michael Mann
parent ddf6526256
commit af09db8bd3
4 changed files with 88 additions and 52 deletions

View File

@ -771,11 +771,14 @@ options are one of:
B<ek> Newline delimited JSON format for bulk import into Elasticsearch.
It can be used with B<-j> or B<-J> including the JSON filter or with
B<-x> flag to include raw hex-encoded packet data.
B<-x> to include raw hex-encoded packet data.
If B<-P> is specified it will print the packet summary only, with both
B<-P> and B<-V> it will print the packet summary and packet details.
If neither B<-P> or B<-V> are used it will print the packet details only.
Example of usage to import data into Elasticsearch:
tshark -T ek -j "http tcp ip" -x -r file.pcap > file.json
curl -XPUT http://elasticsearch:9200/_bulk --data-binary @file.json
tshark -T ek -j "http tcp ip" -P -V -x -r file.pcap > file.json
curl -H "Content-Type: application/x-ndjson" -XPOST http://elasticsearch:9200/_bulk --data-binary "@file.json"
B<fields> The values of fields specified with the B<-e> option, in a
form specified by the B<-E> option. For example,

View File

@ -128,6 +128,7 @@ static void write_json_proto_node_no_value(proto_node *node, write_json_data *da
static const char *proto_node_to_json_key(proto_node *node);
static void print_pdml_geninfo(epan_dissect_t *edt, FILE *fh);
static void write_ek_summary(column_info *cinfo, FILE *fh);
static void proto_tree_get_node_field_values(proto_node *node, gpointer data);
@ -352,7 +353,8 @@ write_pdml_proto_tree(output_fields_t* fields, gchar **protocolfilter, pf_flags
void
write_ek_proto_tree(output_fields_t* fields,
gboolean print_hex, gchar **protocolfilter,
gboolean print_summary, gboolean print_hex,
gchar **protocolfilter,
pf_flags protocolfilter_flags, epan_dissect_t *edt,
FILE *fh)
{
@ -373,7 +375,12 @@ write_ek_proto_tree(output_fields_t* fields,
fprintf(fh, "{\"index\" : {\"_index\": \"packets-%s\", \"_type\": \"pcap_file\", \"_score\": null}}\n", ts);
/* Timestamp added for time indexing in Elasticsearch */
fprintf(fh, "{\"timestamp\" : \"%" G_GUINT64_FORMAT "%03d\", \"layers\" : {", (guint64)edt->pi.abs_ts.secs, edt->pi.abs_ts.nsecs/1000000);
fprintf(fh, "{\"timestamp\" : \"%" G_GUINT64_FORMAT "%03d\"", (guint64)edt->pi.abs_ts.secs, edt->pi.abs_ts.nsecs/1000000);
if (print_summary)
write_ek_summary(edt->pi.cinfo, fh);
fprintf(fh, ", \"layers\" : {");
if (fields == NULL || fields->fields == NULL) {
/* Write out all fields */
@ -1157,6 +1164,21 @@ ek_check_protocolfilter(gchar **protocolfilter, const char *str)
* Finds a node's descendants to be printed as EK/JSON attributes.
*/
static void
write_ek_summary(column_info *cinfo, FILE *fh)
{
gint i;
for (i = 0; i < cinfo->num_cols; i++) {
fputs(", \"", fh);
print_escaped_ek(fh, g_ascii_strdown(cinfo->columns[i].col_title, -1));
fputs("\": \"", fh);
print_escaped_json(fh, cinfo->columns[i].col_data);
fputs("\"", fh);
}
}
/* Write out a tree's data, and any child nodes, as JSON for EK */
static void
ek_fill_attr(proto_node *node, GSList **attr_list, GHashTable *attr_table, write_json_data *pdata)
{
field_info *fi = NULL;

View File

@ -115,6 +115,7 @@ WS_DLL_PUBLIC void write_json_proto_tree(output_fields_t* fields,
WS_DLL_PUBLIC void write_json_finale(FILE *fh);
WS_DLL_PUBLIC void write_ek_proto_tree(output_fields_t* fields,
gboolean print_summary,
gboolean print_hex_data,
gchar **protocolfilter,
pf_flags protocolfilter_flags,

104
tshark.c
View File

@ -193,7 +193,7 @@ typedef enum {
static output_action_e output_action;
static gboolean do_dissection; /* TRUE if we have to dissect each packet */
static gboolean print_packet_info; /* TRUE if we're to print packet information */
static gint print_summary = -1; /* TRUE if we're to print packet summary information */
static gboolean print_summary; /* TRUE if we're to print packet summary information */
static gboolean print_details; /* TRUE if we're to print packet details information */
static gboolean print_hex; /* TRUE if we're to print hex/ascci information */
static gboolean line_buffered;
@ -872,15 +872,6 @@ main(int argc, char *argv[])
}
}
/*
* Print packet summary information is the default, unless either -V or -x
* were specified and -P was not. Note that this is new behavior, which
* allows for the possibility of printing only hex/ascii output without
* necessarily requiring that either the summary or details be printed too.
*/
if (print_summary == -1)
print_summary = (print_details || print_hex) ? FALSE : TRUE;
/** Send All g_log messages to our own handler **/
log_flags =
@ -1323,8 +1314,8 @@ main(int argc, char *argv[])
print_summary = FALSE; /* Don't allow summary */
} else if (strcmp(optarg, "ek") == 0) {
output_action = WRITE_EK;
print_details = TRUE; /* Need details */
print_summary = FALSE; /* Don't allow summary */
if (!print_summary)
print_details = TRUE;
} else if (strcmp(optarg, "jsonraw") == 0) {
output_action = WRITE_JSON_RAW;
print_details = TRUE; /* Need details */
@ -1480,6 +1471,15 @@ main(int argc, char *argv[])
}
}
/*
* Print packet summary information is the default if neither -V or -x
* were specified. Note that this is new behavior, which allows for the
* possibility of printing only hex/ascii output without necessarily
* requiring that either the summary or details be printed too.
*/
if (!print_summary && !print_details && !print_hex)
print_summary = TRUE;
if (no_duplicate_keys && output_action != WRITE_JSON && output_action != WRITE_JSON_RAW) {
cmdarg_err("--no-duplicate-keys can only be used with \"-T json\" and \"-T jsonraw\"");
exit_status = INVALID_OPTION;
@ -3899,36 +3899,17 @@ print_columns(capture_file *cf, const epan_dissect_t *edt)
static gboolean
print_packet(capture_file *cf, epan_dissect_t *edt)
{
if (print_summary || output_fields_has_cols(output_fields)) {
if (print_summary || output_fields_has_cols(output_fields))
/* Just fill in the columns. */
epan_dissect_fill_in_columns(edt, FALSE, TRUE);
if (print_summary) {
/* Now print them. */
switch (output_action) {
/* Print summary columns and/or protocol tree */
switch (output_action) {
case WRITE_TEXT:
if (!print_columns(cf, edt))
return FALSE;
break;
case WRITE_XML:
write_psml_columns(edt, stdout, dissect_color);
return !ferror(stdout);
case WRITE_FIELDS: /*No non-verbose "fields" format */
case WRITE_JSON:
case WRITE_EK:
case WRITE_JSON_RAW:
g_assert_not_reached();
break;
}
}
}
if (print_details) {
/* Print the information in the protocol tree. */
switch (output_action) {
case WRITE_TEXT:
case WRITE_TEXT:
if (print_summary && !print_columns(cf, edt))
return FALSE;
if (print_details) {
if (!proto_tree_print(print_details ? print_dissections_expanded : print_dissections_none,
print_hex, edt, output_only_tables, print_stream))
return FALSE;
@ -3936,32 +3917,61 @@ print_packet(capture_file *cf, epan_dissect_t *edt)
if (!print_line(print_stream, 0, separator))
return FALSE;
}
break;
}
break;
case WRITE_XML:
case WRITE_XML:
if (print_summary) {
write_psml_columns(edt, stdout, dissect_color);
return !ferror(stdout);
}
if (print_details) {
write_pdml_proto_tree(output_fields, protocolfilter, protocolfilter_flags, edt, stdout, dissect_color);
printf("\n");
return !ferror(stdout);
case WRITE_FIELDS:
}
break;
case WRITE_FIELDS:
if (print_summary) {
/*No non-verbose "fields" format */
g_assert_not_reached();
}
if (print_details) {
write_fields_proto_tree(output_fields, edt, &cf->cinfo, stdout);
printf("\n");
return !ferror(stdout);
case WRITE_JSON:
}
break;
case WRITE_JSON:
if (print_summary)
g_assert_not_reached();
if (print_details) {
write_json_proto_tree(output_fields, print_dissections_expanded,
print_hex, protocolfilter, protocolfilter_flags,
edt, node_children_grouper, stdout);
return !ferror(stdout);
case WRITE_JSON_RAW:
}
break;
case WRITE_JSON_RAW:
if (print_summary)
g_assert_not_reached();
if (print_details) {
write_json_proto_tree(output_fields, print_dissections_none, TRUE,
protocolfilter, protocolfilter_flags,
edt, node_children_grouper, stdout);
return !ferror(stdout);
case WRITE_EK:
write_ek_proto_tree(output_fields, print_hex, protocolfilter,
protocolfilter_flags, edt, stdout);
return !ferror(stdout);
}
break;
case WRITE_EK:
write_ek_proto_tree(output_fields, print_summary, print_hex, protocolfilter,
protocolfilter_flags, edt, stdout);
return !ferror(stdout);
}
if (print_hex) {
if (print_summary || print_details) {
if (!print_line(print_stream, 0, ""))