tshark: add -G elastic-mapping report.

This option generates an ElasticSearch mapping file as described here:
https://www.elastic.co/blog/analyzing-network-packets-with-wireshark-elasticsearch-and-kibana

It leverages the Glib-json library.

Change-Id: Iff25f991e87d3da07bf06654e353fb785799dde9
Reviewed-on: https://code.wireshark.org/review/26848
Petri-Dish: Dario Lombardo <lomato@gmail.com>
Tested-by: Petri Dish Buildbot
Reviewed-by: Peter Wu <peter@lekensteyn.nl>
Reviewed-by: Dario Lombardo <lomato@gmail.com>
This commit is contained in:
Dario Lombardo 2018-04-09 12:31:38 +02:00
parent 88435354c0
commit 511c2e166a
11 changed files with 311 additions and 3 deletions

View File

@ -894,6 +894,8 @@ endif()
set(PACKAGELIST ${PACKAGELIST} LIBSSH)
set(LIBSSH_OPTIONS "0.6")
set(PACKAGELIST ${PACKAGELIST} JSONGLIB)
if(ENABLE_PCAP)
set(PACKAGELIST ${PACKAGELIST} PCAP)
endif()
@ -1129,6 +1131,9 @@ endif()
if(LIBSSH_FOUND)
set(HAVE_LIBSSH 1)
endif()
if(JSONGLIB_FOUND)
set(HAVE_JSONGLIB 1)
endif()
if(NGHTTP2_FOUND)
set(HAVE_NGHTTP2 1)
endif()
@ -1679,6 +1684,11 @@ set(LIBEPAN_LIBS
${WINSPARKLE_LIBRARIES}
)
if(JSONGLIB_FOUND)
list(APPEND LIBEPAN_LIBS ${JSONGLIB_LIBRARIES})
endif()
if(WIN32)
set(_dll_output_dir "${DATAFILE_DIR}")
add_custom_target(copy_cli_dlls)
@ -1717,6 +1727,9 @@ if(WIN32)
if (LIBSSH_FOUND)
list (APPEND OPTIONAL_DLLS "${LIBSSH_DLL_DIR}/${LIBSSH_DLL}")
endif(LIBSSH_FOUND)
if (JSONGLIB_FOUND)
list (APPEND OPTIONAL_DLLS "${JSONGLIB_DLL_DIR}/${JSONGLIB_DLL}")
endif(JSONGLIB_FOUND)
foreach( _dll ${GCRYPT_DLLS} )
list (APPEND OPTIONAL_DLLS "${GCRYPT_DLL_DIR}/${_dll}")
endforeach(_dll)
@ -2346,6 +2359,7 @@ if(BUILD_tshark)
${SHARK_COMMON_SRC}
${CMAKE_BINARY_DIR}/image/tshark.rc
)
add_executable(tshark ${tshark_FILES})
add_dependencies(tshark version)
set_extra_executable_properties(tshark "Executables")

View File

@ -0,0 +1,70 @@
# - Try to find JsonGlib-1.0
# Once done, this will define
#
# JSONGLIB_FOUND - system has Glib
# JSONGLIB_INCLUDE_DIRS - the Glib include directories
# JSONGLIB_LIBRARIES - link these to use Glib
include(FindWSWinLibs)
FindWSWinLibs("libjson-glib-.*" "JSONGLIB_HINTS")
find_path(JSONGLIB_INCLUDE_DIR
NAMES
json-glib/json-glib.h
HINTS
${JSONGLIB_HINTS}
PATHS
/usr/include
/usr/local/include
/opt/local/include
/sw/include
${CMAKE_INCLUDE_PATH}
${CMAKE_INSTALL_PREFIX}/include
PATH_SUFFIXES
json-glib-1.0
)
find_library(JSONGLIB_LIBRARY
NAMES
json-glib-1.0
json-glib-1.0-0
HINTS
${JSONGLIB_HINTS}
PATHS
/usr/lib
/usr/local/lib
/opt/local/lib
/sw/lib
${CMAKE_LIBRARY_PATH}
${CMAKE_INSTALL_PREFIX}/lib
)
if(WIN32)
set(JSONGLIB_DLL_DIR "${JSONGLIB_HINTS}/bin"
CACHE PATH "Path to libjson-glib DLL"
)
file(GLOB _libssh_dll RELATIVE "${JSONGLIB_DLL_DIR}"
"${JSONGLIB_DLL_DIR}/json-glib-1.0.dll"
)
set(JSONGLIB_DLL ${_jsonglib_dll}
# We're storing filenames only. Should we use STRING instead?
CACHE FILEPATH "libjson-glib DLL file name"
)
mark_as_advanced(JSONGLIB_DLL_DIR JSONGLIB_DLL)
endif()
if(JSONGLIB_INCLUDE_DIR AND JSONGLIB_LIBRARY)
set(JSONGLIB_INCLUDE_DIRS
${JSONGLIB_INCLUDE_DIR}
)
set(JSONGLIB_LIBRARIES
${JSONGLIB_LIBRARY}
)
endif()
# handle the QUIETLY and REQUIRED arguments and set JSONGLIB_FOUND to TRUE if
# all listed variables are TRUE and the requested version matches.
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(JSONGLIB
REQUIRED_VARS JSONGLIB_LIBRARY JSONGLIB_INCLUDE_DIR)

View File

@ -71,6 +71,9 @@
/* Define if LIBSSH support is enabled */
#cmakedefine HAVE_LIBSSH 1
/* Define if JSONGLIB support is enabled */
#cmakedefine HAVE_JSONGLIB 1
/* Define if LIBSSH has ssh_userauth_agent() function */
#cmakedefine HAVE_SSH_USERAUTH_AGENT 1

View File

@ -448,6 +448,8 @@ is one record per line. The fields are tab-delimited.
* Field 5 = protocol name
* Field 6 = "decode as" support
B<elastic-mapping> Dumps the ElasticSearch mapping file to stdout.
B<fieldcount> Dumps the number of header fields to stdout.
B<fields> Dumps the contents of the registration database to
@ -835,6 +837,10 @@ Example of usage to import data into Elasticsearch:
tshark -T ek -j "http tcp ip" -P -V -x -r file.pcap > file.json
curl -H "Content-Type: application/x-ndjson" -XPOST http://elasticsearch:9200/_bulk --data-binary "@file.json"
Elastic requires a mapping file to be loaded as template for packets-*
index in order to convert wireshark types to elastic types. This file
can be auto-generated with the command "tshark -G elastic-mapping".
B<fields> The values of fields specified with the B<-e> option, in a
form specified by the B<-E> option. For example,

View File

@ -42,6 +42,7 @@ since version 2.6.0:
* The membership operator now supports ranges, allowing display filters such as
`tcp.port in {4430..4434}` to be expressed. See the User's Guide, chapter
_Building display filter expressions_ for details.
* tshark has now "-G elastic-mapping" option to generate an ElasticSearch mapping file.
//=== Removed Dissectors

View File

@ -46,6 +46,10 @@
#include <wsutil/ws_printf.h> /* ws_debug_printf/ws_g_warning */
#include <wsutil/crash_info.h>
#ifdef HAVE_JSONGLIB
#include <json-glib/json-glib.h>
#endif
/* Ptvcursor limits */
#define SUBTREE_ONCE_ALLOCATION_NUMBER 8
#define SUBTREE_MAX_LEVELS 256
@ -9957,6 +9961,206 @@ proto_registrar_dump_fieldcount(void)
return (gpa_hfinfo.allocated_len > PROTO_PRE_ALLOC_HF_FIELDS_MEM);
}
#ifdef HAVE_JSONGLIB
static JsonBuilder*
elastic_add_base_mapping(JsonBuilder* builder)
{
json_builder_set_member_name(builder, "template");
json_builder_add_string_value(builder, "packets-*");
json_builder_set_member_name(builder, "settings");
json_builder_begin_object(builder);
json_builder_set_member_name(builder, "index.mapping.total_fields.limit");
json_builder_add_int_value(builder, 1000000);
json_builder_end_object(builder);
return builder;
}
gchar* ws_type_to_elastic(guint type _U_)
{
switch(type) {
case FT_UINT16:
case FT_INT16:
case FT_INT32:
case FT_UINT32:
case FT_UINT24:
case FT_FRAMENUM:
case FT_UINT48:
case FT_INT48:
case FT_INT24:
return "integer";
case FT_INT8:
case FT_UINT8:
return "short";
case FT_UINT40:
case FT_UINT56:
case FT_UINT64:
case FT_INT64:
return "long";
case FT_FLOAT:
case FT_DOUBLE:
return "float";
case FT_IPv6:
case FT_IPv4:
return "ip";
case FT_ABSOLUTE_TIME:
case FT_RELATIVE_TIME:
return "date";
case FT_BYTES:
case FT_UINT_BYTES:
return "byte";
case FT_BOOLEAN:
return "boolean";
case FT_NONE:
case FT_STRING:
case FT_ETHER:
case FT_GUID:
case FT_OID:
case FT_STRINGZ:
case FT_UINT_STRING:
case FT_CHAR:
case FT_AX25:
case FT_REL_OID:
case FT_IEEE_11073_SFLOAT:
case FT_IEEE_11073_FLOAT:
case FT_STRINGZPAD:
case FT_PROTOCOL:
case FT_EUI64:
case FT_IPXNET:
case FT_SYSTEM_ID:
case FT_FCWWN:
case FT_VINES:
return "string";
default:
DISSECTOR_ASSERT_NOT_REACHED();
}
}
static gchar*
dot_to_underscore(gchar* str)
{
unsigned i;
for (i = 0; i < strlen(str); i++) {
if (str[i] == '.')
str[i] = '_';
}
return str;
}
/* Dumps a mapping file for ElasticSearch
*/
void
proto_registrar_dump_elastic(void)
{
header_field_info *hfinfo;
header_field_info *parent_hfinfo;
JsonGenerator* generator;
JsonBuilder* builder;
JsonNode* root;
gsize length;
guint i;
gboolean open_object = TRUE;
const char* prev_proto = NULL;
gchar* data;
gchar* str;
/*
To help traking down the json tree, objects have been appended with a comment:
n.label -> where n is the indentation level and label the name of the object
*/
builder = json_builder_new();
json_builder_begin_object(builder); // 1.root
builder = elastic_add_base_mapping(builder);
json_builder_set_member_name(builder, "mappings");
json_builder_begin_object(builder); // 2.mappings
json_builder_set_member_name(builder, "pcap_file");
json_builder_begin_object(builder); // 3.pcap_file
json_builder_set_member_name(builder, "dynamic");
json_builder_add_boolean_value(builder, FALSE);
json_builder_set_member_name(builder, "properties");
json_builder_begin_object(builder); // 4.properties
json_builder_set_member_name(builder, "timestamp");
json_builder_begin_object(builder); // 5.timestamp
json_builder_set_member_name(builder, "type");
json_builder_add_string_value(builder, "date");
json_builder_end_object(builder); // 5.timestamp
json_builder_set_member_name(builder, "layers");
json_builder_begin_object(builder); // 5.layers
json_builder_set_member_name(builder, "properties");
json_builder_begin_object(builder); // 6.properties
for (i = 0; i < gpa_hfinfo.len; i++) {
if (gpa_hfinfo.hfi[i] == NULL)
continue; /* This is a deregistered protocol or header field */
PROTO_REGISTRAR_GET_NTH(i, hfinfo);
/*
* Skip the pseudo-field for "proto_tree_add_text()" since
* we don't want it in the list of filterable fields.
*/
if (hfinfo->id == hf_text_only)
continue;
if (!proto_registrar_is_protocol(i)) {
PROTO_REGISTRAR_GET_NTH(hfinfo->parent, parent_hfinfo);
if (prev_proto && g_strcmp0(parent_hfinfo->abbrev, prev_proto)) {
json_builder_end_object(builder); // 8.properties
json_builder_end_object(builder); // 7.parent_hfinfo->abbrev
open_object = TRUE;
}
prev_proto = parent_hfinfo->abbrev;
if (open_object) {
json_builder_set_member_name(builder, parent_hfinfo->abbrev);
json_builder_begin_object(builder); // 7.parent_hfinfo->abbrev
json_builder_set_member_name(builder, "properties");
json_builder_begin_object(builder); // 8.properties
open_object = FALSE;
}
str = g_strdup(hfinfo->abbrev);
json_builder_set_member_name(builder, dot_to_underscore(str));
g_free(str);
json_builder_begin_object(builder); // 9.hfinfo->abbrev
json_builder_set_member_name(builder, "type");
json_builder_add_string_value(builder, ws_type_to_elastic(hfinfo->type));
json_builder_end_object(builder); // 9.hfinfo->abbrev
}
}
if (prev_proto) {
json_builder_end_object(builder); // 8.properties
json_builder_end_object(builder); // 7.parent_hfinfo->abbrev
}
json_builder_end_object(builder); // 6.properties
json_builder_end_object(builder); // 5.layers
json_builder_end_object(builder); // 4.properties
json_builder_end_object(builder); // 3.pcap_file
json_builder_end_object(builder); // 2.mappings
DISSECTOR_ASSERT(json_builder_end_object(builder)); // 1.root
generator = json_generator_new();
json_generator_set_pretty(generator, TRUE);
root = json_builder_get_root(builder);
json_generator_set_root(generator, root);
json_node_unref(root);
g_object_unref(builder);
data = json_generator_to_data(generator, &length);
g_object_unref(generator);
ws_debug_printf("%s\n", data);
g_free(data);
}
#endif
/* Dumps the contents of the registration database to stdout. An independent
* program can take this output and format it into nice tables or HTML or

View File

@ -2453,6 +2453,11 @@ WS_DLL_PUBLIC void proto_registrar_dump_protocols(void);
/** Dumps a glossary of the field value strings or true/false strings to STDOUT */
WS_DLL_PUBLIC void proto_registrar_dump_values(void);
#ifdef HAVE_JSONGLIB
/** Dumps a mapping file for loading tshark output into ElasticSearch */
WS_DLL_PUBLIC void proto_registrar_dump_elastic(void);
#endif
/** Dumps the number of protocol and field registrations to STDOUT.
@return FALSE if we pre-allocated enough fields, TRUE otherwise. */
WS_DLL_PUBLIC gboolean proto_registrar_dump_fieldcount(void);

View File

@ -47,7 +47,7 @@ ADDITIONAL_LIST="libnl-3-dev qttools5-dev qttools5-dev-tools libgtk-3-dev \
qtmultimedia5-dev liblua5.2-dev libnl-cli-3-dev \
libparse-yapp-perl qt5-default cmake libcap-dev \
liblz4-dev libsnappy-dev libspandsp-dev libxml2-dev \
git"
git libjson-glib-dev"
# Adds package $2 to list variable $1 if the package is found
add_package() {

View File

@ -13,7 +13,7 @@
brew update
#install some libs needed by Wireshark
brew install c-ares glib libgcrypt gnutls lua cmake nghttp2 snappy lz4 libxml2
brew install c-ares glib libgcrypt gnutls lua cmake nghttp2 snappy lz4 libxml2 json-glib
#install Qt5
brew install qt5

View File

@ -27,7 +27,7 @@ libpcap-devel zlib-devel"
ADDITIONAL_LIST="libnl3-devel libnghttp2-devel libcap libcap-devel \
libgcrypt-devel libssh-devel krb5-devel perl-Parse-Yapp sbc-devel libsmi-devel \
snappy-devel lz4"
snappy-devel lz4 json-glib-devel"
# Guess which package manager we will use
PM=`which zypper 2> /dev/null ||

View File

@ -476,6 +476,7 @@ glossary_option_help(void)
fprintf(output, " -G column-formats dump column format codes and exit\n");
fprintf(output, " -G decodes dump \"layer type\"/\"decode as\" associations and exit\n");
fprintf(output, " -G dissector-tables dump dissector table names, types, and properties\n");
fprintf(output, " -G elastic-mapping dump ElasticSearch mapping file\n");
fprintf(output, " -G fieldcount dump count of header fields and exit\n");
fprintf(output, " -G fields dump fields glossary and exit\n");
fprintf(output, " -G ftypes dump field type basic and descriptive names\n");
@ -964,6 +965,10 @@ main(int argc, char *argv[])
write_prefs(NULL);
else if (strcmp(argv[2], "dissector-tables") == 0)
dissector_dump_dissector_tables();
#ifdef HAVE_JSONGLIB
else if (strcmp(argv[2], "elastic-mapping") == 0)
proto_registrar_dump_elastic();
#endif
else if (strcmp(argv[2], "fieldcount") == 0) {
/* return value for the test suite */
exit_status = proto_registrar_dump_fieldcount();