From 511c2e166a6d3eeb37930a3dd7f40056498456ca Mon Sep 17 00:00:00 2001 From: Dario Lombardo Date: Mon, 9 Apr 2018 12:31:38 +0200 Subject: [PATCH] tshark: add -G elastic-mapping report. This option generates an ElasticSearch mapping file as described here: https://www.elastic.co/blog/analyzing-network-packets-with-wireshark-elasticsearch-and-kibana It leverages the Glib-json library. Change-Id: Iff25f991e87d3da07bf06654e353fb785799dde9 Reviewed-on: https://code.wireshark.org/review/26848 Petri-Dish: Dario Lombardo Tested-by: Petri Dish Buildbot Reviewed-by: Peter Wu Reviewed-by: Dario Lombardo --- CMakeLists.txt | 14 +++ cmake/modules/FindJSONGLIB.cmake | 70 +++++++++++ cmakeconfig.h.in | 3 + doc/tshark.pod | 6 + docbook/release-notes.asciidoc | 1 + epan/proto.c | 204 +++++++++++++++++++++++++++++++ epan/proto.h | 5 + tools/debian-setup.sh | 2 +- tools/macos-setup-brew.sh | 2 +- tools/rpm_setup.sh | 2 +- tshark.c | 5 + 11 files changed, 311 insertions(+), 3 deletions(-) create mode 100644 cmake/modules/FindJSONGLIB.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 492cd9fff7..4337b8a2fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -894,6 +894,8 @@ endif() set(PACKAGELIST ${PACKAGELIST} LIBSSH) set(LIBSSH_OPTIONS "0.6") +set(PACKAGELIST ${PACKAGELIST} JSONGLIB) + if(ENABLE_PCAP) set(PACKAGELIST ${PACKAGELIST} PCAP) endif() @@ -1129,6 +1131,9 @@ endif() if(LIBSSH_FOUND) set(HAVE_LIBSSH 1) endif() +if(JSONGLIB_FOUND) + set(HAVE_JSONGLIB 1) +endif() if(NGHTTP2_FOUND) set(HAVE_NGHTTP2 1) endif() @@ -1679,6 +1684,11 @@ set(LIBEPAN_LIBS ${WINSPARKLE_LIBRARIES} ) +if(JSONGLIB_FOUND) + list(APPEND LIBEPAN_LIBS ${JSONGLIB_LIBRARIES}) +endif() + + if(WIN32) set(_dll_output_dir "${DATAFILE_DIR}") add_custom_target(copy_cli_dlls) @@ -1717,6 +1727,9 @@ if(WIN32) if (LIBSSH_FOUND) list (APPEND OPTIONAL_DLLS "${LIBSSH_DLL_DIR}/${LIBSSH_DLL}") endif(LIBSSH_FOUND) + if (JSONGLIB_FOUND) + list (APPEND OPTIONAL_DLLS "${JSONGLIB_DLL_DIR}/${JSONGLIB_DLL}") + endif(JSONGLIB_FOUND) foreach( _dll ${GCRYPT_DLLS} ) list (APPEND OPTIONAL_DLLS "${GCRYPT_DLL_DIR}/${_dll}") endforeach(_dll) @@ -2346,6 +2359,7 @@ if(BUILD_tshark) ${SHARK_COMMON_SRC} ${CMAKE_BINARY_DIR}/image/tshark.rc ) + add_executable(tshark ${tshark_FILES}) add_dependencies(tshark version) set_extra_executable_properties(tshark "Executables") diff --git a/cmake/modules/FindJSONGLIB.cmake b/cmake/modules/FindJSONGLIB.cmake new file mode 100644 index 0000000000..514bbdd6af --- /dev/null +++ b/cmake/modules/FindJSONGLIB.cmake @@ -0,0 +1,70 @@ +# - Try to find JsonGlib-1.0 +# Once done, this will define +# +# JSONGLIB_FOUND - system has Glib +# JSONGLIB_INCLUDE_DIRS - the Glib include directories +# JSONGLIB_LIBRARIES - link these to use Glib + +include(FindWSWinLibs) +FindWSWinLibs("libjson-glib-.*" "JSONGLIB_HINTS") + +find_path(JSONGLIB_INCLUDE_DIR + NAMES + json-glib/json-glib.h + HINTS + ${JSONGLIB_HINTS} + PATHS + /usr/include + /usr/local/include + /opt/local/include + /sw/include + ${CMAKE_INCLUDE_PATH} + ${CMAKE_INSTALL_PREFIX}/include + PATH_SUFFIXES + json-glib-1.0 +) + +find_library(JSONGLIB_LIBRARY + NAMES + json-glib-1.0 + json-glib-1.0-0 + HINTS + ${JSONGLIB_HINTS} + PATHS + /usr/lib + /usr/local/lib + /opt/local/lib + /sw/lib + ${CMAKE_LIBRARY_PATH} + ${CMAKE_INSTALL_PREFIX}/lib +) + +if(WIN32) + set(JSONGLIB_DLL_DIR "${JSONGLIB_HINTS}/bin" + CACHE PATH "Path to libjson-glib DLL" + ) + file(GLOB _libssh_dll RELATIVE "${JSONGLIB_DLL_DIR}" + "${JSONGLIB_DLL_DIR}/json-glib-1.0.dll" + ) + set(JSONGLIB_DLL ${_jsonglib_dll} + # We're storing filenames only. Should we use STRING instead? + CACHE FILEPATH "libjson-glib DLL file name" + ) + mark_as_advanced(JSONGLIB_DLL_DIR JSONGLIB_DLL) +endif() + +if(JSONGLIB_INCLUDE_DIR AND JSONGLIB_LIBRARY) + set(JSONGLIB_INCLUDE_DIRS + ${JSONGLIB_INCLUDE_DIR} + ) + set(JSONGLIB_LIBRARIES + ${JSONGLIB_LIBRARY} + ) +endif() + +# handle the QUIETLY and REQUIRED arguments and set JSONGLIB_FOUND to TRUE if +# all listed variables are TRUE and the requested version matches. +include(FindPackageHandleStandardArgs) + +find_package_handle_standard_args(JSONGLIB + REQUIRED_VARS JSONGLIB_LIBRARY JSONGLIB_INCLUDE_DIR) diff --git a/cmakeconfig.h.in b/cmakeconfig.h.in index 6a1c993074..2c7ba5c75e 100644 --- a/cmakeconfig.h.in +++ b/cmakeconfig.h.in @@ -71,6 +71,9 @@ /* Define if LIBSSH support is enabled */ #cmakedefine HAVE_LIBSSH 1 +/* Define if JSONGLIB support is enabled */ +#cmakedefine HAVE_JSONGLIB 1 + /* Define if LIBSSH has ssh_userauth_agent() function */ #cmakedefine HAVE_SSH_USERAUTH_AGENT 1 diff --git a/doc/tshark.pod b/doc/tshark.pod index 08fc22e920..9ccfdbca7b 100644 --- a/doc/tshark.pod +++ b/doc/tshark.pod @@ -448,6 +448,8 @@ is one record per line. The fields are tab-delimited. * Field 5 = protocol name * Field 6 = "decode as" support +B Dumps the ElasticSearch mapping file to stdout. + B Dumps the number of header fields to stdout. B Dumps the contents of the registration database to @@ -835,6 +837,10 @@ Example of usage to import data into Elasticsearch: tshark -T ek -j "http tcp ip" -P -V -x -r file.pcap > file.json curl -H "Content-Type: application/x-ndjson" -XPOST http://elasticsearch:9200/_bulk --data-binary "@file.json" +Elastic requires a mapping file to be loaded as template for packets-* +index in order to convert wireshark types to elastic types. This file +can be auto-generated with the command "tshark -G elastic-mapping". + B The values of fields specified with the B<-e> option, in a form specified by the B<-E> option. For example, diff --git a/docbook/release-notes.asciidoc b/docbook/release-notes.asciidoc index f8bc5934af..9c1cb552f0 100644 --- a/docbook/release-notes.asciidoc +++ b/docbook/release-notes.asciidoc @@ -42,6 +42,7 @@ since version 2.6.0: * The membership operator now supports ranges, allowing display filters such as `tcp.port in {4430..4434}` to be expressed. See the User's Guide, chapter _Building display filter expressions_ for details. +* tshark has now "-G elastic-mapping" option to generate an ElasticSearch mapping file. //=== Removed Dissectors diff --git a/epan/proto.c b/epan/proto.c index 71fa0901ba..feb92ae45b 100644 --- a/epan/proto.c +++ b/epan/proto.c @@ -46,6 +46,10 @@ #include /* ws_debug_printf/ws_g_warning */ #include +#ifdef HAVE_JSONGLIB +#include +#endif + /* Ptvcursor limits */ #define SUBTREE_ONCE_ALLOCATION_NUMBER 8 #define SUBTREE_MAX_LEVELS 256 @@ -9957,6 +9961,206 @@ proto_registrar_dump_fieldcount(void) return (gpa_hfinfo.allocated_len > PROTO_PRE_ALLOC_HF_FIELDS_MEM); } +#ifdef HAVE_JSONGLIB + +static JsonBuilder* +elastic_add_base_mapping(JsonBuilder* builder) +{ + json_builder_set_member_name(builder, "template"); + json_builder_add_string_value(builder, "packets-*"); + + json_builder_set_member_name(builder, "settings"); + json_builder_begin_object(builder); + json_builder_set_member_name(builder, "index.mapping.total_fields.limit"); + json_builder_add_int_value(builder, 1000000); + json_builder_end_object(builder); + + return builder; +} + +gchar* ws_type_to_elastic(guint type _U_) +{ + switch(type) { + case FT_UINT16: + case FT_INT16: + case FT_INT32: + case FT_UINT32: + case FT_UINT24: + case FT_FRAMENUM: + case FT_UINT48: + case FT_INT48: + case FT_INT24: + return "integer"; + case FT_INT8: + case FT_UINT8: + return "short"; + case FT_UINT40: + case FT_UINT56: + case FT_UINT64: + case FT_INT64: + return "long"; + case FT_FLOAT: + case FT_DOUBLE: + return "float"; + case FT_IPv6: + case FT_IPv4: + return "ip"; + case FT_ABSOLUTE_TIME: + case FT_RELATIVE_TIME: + return "date"; + case FT_BYTES: + case FT_UINT_BYTES: + return "byte"; + case FT_BOOLEAN: + return "boolean"; + case FT_NONE: + case FT_STRING: + case FT_ETHER: + case FT_GUID: + case FT_OID: + case FT_STRINGZ: + case FT_UINT_STRING: + case FT_CHAR: + case FT_AX25: + case FT_REL_OID: + case FT_IEEE_11073_SFLOAT: + case FT_IEEE_11073_FLOAT: + case FT_STRINGZPAD: + case FT_PROTOCOL: + case FT_EUI64: + case FT_IPXNET: + case FT_SYSTEM_ID: + case FT_FCWWN: + case FT_VINES: + return "string"; + default: + DISSECTOR_ASSERT_NOT_REACHED(); + } +} + +static gchar* +dot_to_underscore(gchar* str) +{ + unsigned i; + for (i = 0; i < strlen(str); i++) { + if (str[i] == '.') + str[i] = '_'; + } + return str; +} + +/* Dumps a mapping file for ElasticSearch + */ +void +proto_registrar_dump_elastic(void) +{ + header_field_info *hfinfo; + header_field_info *parent_hfinfo; + JsonGenerator* generator; + JsonBuilder* builder; + JsonNode* root; + gsize length; + guint i; + gboolean open_object = TRUE; + const char* prev_proto = NULL; + gchar* data; + gchar* str; + + /* + To help traking down the json tree, objects have been appended with a comment: + n.label -> where n is the indentation level and label the name of the object + */ + + builder = json_builder_new(); + json_builder_begin_object(builder); // 1.root + builder = elastic_add_base_mapping(builder); + + json_builder_set_member_name(builder, "mappings"); + json_builder_begin_object(builder); // 2.mappings + json_builder_set_member_name(builder, "pcap_file"); + + json_builder_begin_object(builder); // 3.pcap_file + json_builder_set_member_name(builder, "dynamic"); + json_builder_add_boolean_value(builder, FALSE); + + json_builder_set_member_name(builder, "properties"); + json_builder_begin_object(builder); // 4.properties + json_builder_set_member_name(builder, "timestamp"); + json_builder_begin_object(builder); // 5.timestamp + json_builder_set_member_name(builder, "type"); + json_builder_add_string_value(builder, "date"); + json_builder_end_object(builder); // 5.timestamp + + json_builder_set_member_name(builder, "layers"); + json_builder_begin_object(builder); // 5.layers + json_builder_set_member_name(builder, "properties"); + json_builder_begin_object(builder); // 6.properties + + for (i = 0; i < gpa_hfinfo.len; i++) { + if (gpa_hfinfo.hfi[i] == NULL) + continue; /* This is a deregistered protocol or header field */ + + PROTO_REGISTRAR_GET_NTH(i, hfinfo); + + /* + * Skip the pseudo-field for "proto_tree_add_text()" since + * we don't want it in the list of filterable fields. + */ + if (hfinfo->id == hf_text_only) + continue; + + if (!proto_registrar_is_protocol(i)) { + PROTO_REGISTRAR_GET_NTH(hfinfo->parent, parent_hfinfo); + + if (prev_proto && g_strcmp0(parent_hfinfo->abbrev, prev_proto)) { + json_builder_end_object(builder); // 8.properties + json_builder_end_object(builder); // 7.parent_hfinfo->abbrev + open_object = TRUE; + } + + prev_proto = parent_hfinfo->abbrev; + + if (open_object) { + json_builder_set_member_name(builder, parent_hfinfo->abbrev); + json_builder_begin_object(builder); // 7.parent_hfinfo->abbrev + json_builder_set_member_name(builder, "properties"); + json_builder_begin_object(builder); // 8.properties + open_object = FALSE; + } + str = g_strdup(hfinfo->abbrev); + json_builder_set_member_name(builder, dot_to_underscore(str)); + g_free(str); + json_builder_begin_object(builder); // 9.hfinfo->abbrev + json_builder_set_member_name(builder, "type"); + json_builder_add_string_value(builder, ws_type_to_elastic(hfinfo->type)); + json_builder_end_object(builder); // 9.hfinfo->abbrev + } + } + + if (prev_proto) { + json_builder_end_object(builder); // 8.properties + json_builder_end_object(builder); // 7.parent_hfinfo->abbrev + } + + json_builder_end_object(builder); // 6.properties + json_builder_end_object(builder); // 5.layers + json_builder_end_object(builder); // 4.properties + json_builder_end_object(builder); // 3.pcap_file + json_builder_end_object(builder); // 2.mappings + DISSECTOR_ASSERT(json_builder_end_object(builder)); // 1.root + + generator = json_generator_new(); + json_generator_set_pretty(generator, TRUE); + root = json_builder_get_root(builder); + json_generator_set_root(generator, root); + json_node_unref(root); + g_object_unref(builder); + data = json_generator_to_data(generator, &length); + g_object_unref(generator); + ws_debug_printf("%s\n", data); + g_free(data); +} +#endif /* Dumps the contents of the registration database to stdout. An independent * program can take this output and format it into nice tables or HTML or diff --git a/epan/proto.h b/epan/proto.h index 96e88c439e..d618a49e36 100644 --- a/epan/proto.h +++ b/epan/proto.h @@ -2453,6 +2453,11 @@ WS_DLL_PUBLIC void proto_registrar_dump_protocols(void); /** Dumps a glossary of the field value strings or true/false strings to STDOUT */ WS_DLL_PUBLIC void proto_registrar_dump_values(void); +#ifdef HAVE_JSONGLIB +/** Dumps a mapping file for loading tshark output into ElasticSearch */ +WS_DLL_PUBLIC void proto_registrar_dump_elastic(void); +#endif + /** Dumps the number of protocol and field registrations to STDOUT. @return FALSE if we pre-allocated enough fields, TRUE otherwise. */ WS_DLL_PUBLIC gboolean proto_registrar_dump_fieldcount(void); diff --git a/tools/debian-setup.sh b/tools/debian-setup.sh index 6ca11a0606..c4f19ed316 100755 --- a/tools/debian-setup.sh +++ b/tools/debian-setup.sh @@ -47,7 +47,7 @@ ADDITIONAL_LIST="libnl-3-dev qttools5-dev qttools5-dev-tools libgtk-3-dev \ qtmultimedia5-dev liblua5.2-dev libnl-cli-3-dev \ libparse-yapp-perl qt5-default cmake libcap-dev \ liblz4-dev libsnappy-dev libspandsp-dev libxml2-dev \ - git" + git libjson-glib-dev" # Adds package $2 to list variable $1 if the package is found add_package() { diff --git a/tools/macos-setup-brew.sh b/tools/macos-setup-brew.sh index 5b8e01fc2a..41bf694c39 100755 --- a/tools/macos-setup-brew.sh +++ b/tools/macos-setup-brew.sh @@ -13,7 +13,7 @@ brew update #install some libs needed by Wireshark -brew install c-ares glib libgcrypt gnutls lua cmake nghttp2 snappy lz4 libxml2 +brew install c-ares glib libgcrypt gnutls lua cmake nghttp2 snappy lz4 libxml2 json-glib #install Qt5 brew install qt5 diff --git a/tools/rpm_setup.sh b/tools/rpm_setup.sh index 663daa1147..c7e408a099 100755 --- a/tools/rpm_setup.sh +++ b/tools/rpm_setup.sh @@ -27,7 +27,7 @@ libpcap-devel zlib-devel" ADDITIONAL_LIST="libnl3-devel libnghttp2-devel libcap libcap-devel \ libgcrypt-devel libssh-devel krb5-devel perl-Parse-Yapp sbc-devel libsmi-devel \ -snappy-devel lz4" +snappy-devel lz4 json-glib-devel" # Guess which package manager we will use PM=`which zypper 2> /dev/null || diff --git a/tshark.c b/tshark.c index 9c838287c7..c617642def 100644 --- a/tshark.c +++ b/tshark.c @@ -476,6 +476,7 @@ glossary_option_help(void) fprintf(output, " -G column-formats dump column format codes and exit\n"); fprintf(output, " -G decodes dump \"layer type\"/\"decode as\" associations and exit\n"); fprintf(output, " -G dissector-tables dump dissector table names, types, and properties\n"); + fprintf(output, " -G elastic-mapping dump ElasticSearch mapping file\n"); fprintf(output, " -G fieldcount dump count of header fields and exit\n"); fprintf(output, " -G fields dump fields glossary and exit\n"); fprintf(output, " -G ftypes dump field type basic and descriptive names\n"); @@ -964,6 +965,10 @@ main(int argc, char *argv[]) write_prefs(NULL); else if (strcmp(argv[2], "dissector-tables") == 0) dissector_dump_dissector_tables(); +#ifdef HAVE_JSONGLIB + else if (strcmp(argv[2], "elastic-mapping") == 0) + proto_registrar_dump_elastic(); +#endif else if (strcmp(argv[2], "fieldcount") == 0) { /* return value for the test suite */ exit_status = proto_registrar_dump_fieldcount();