wireshark/epan/dissectors/packet-communityid.c

637 lines
22 KiB
C

/* packet-communityid.c
*
* A post-dissector for Community ID flow hashes in Wireshark/tshark.
*
* Community ID flow hashing provides a standardized way for mapping
* flow tuples to string identifiers, used in SIEM searches, network
* data post-processing/correlation, etc. For details, see:
*
* https://github.com/corelight/community-id-spec
*
* Copyright 2020, Corelight Inc
* Contact: Christian Kreibich <christian@corelight.com>
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* SPDX-License-Identifier: BSD-3-Clause
*/
/*
*
* This module provides a stand-alone implementation of the spec.
*/
#include <config.h>
#include <epan/value_string.h>
#include <epan/ipproto.h>
#include <epan/packet.h>
#include <epan/proto_data.h>
#include <epan/prefs.h>
#include <wsutil/wsgcrypt.h>
#include "packet-icmp.h"
/* ---- Generic Community ID codebase, based on GLib & GCrypt ------------------
*
* The code between here and the corresponding end comment below
* provides a reusable implementation of the Community ID. To avoid
* dealing imperfectly with low-level implementation details, it
* assumes GLib and GCrypt are available. Adaptation to other data
* types should be straightforward.
*
* Version 1.0
*
* For updates or feedback please visit:
* https://github.com/corelight/c-community-id
*
* Copyright (c) 2017-2020 by Corelight, Inc
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* (1) Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* (2) Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* (3) Neither the name of Corelight, Inc, nor the names of contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* 8-bit IP protocol numbers, likely redundant with similar
* definitions in the surrounding project, but having these here keeps
* the Community ID code self-contained.
*/
#define CID_PROTO_ICMP 1
#define CID_PROTO_IP 4
#define CID_PROTO_TCP 6
#define CID_PROTO_UDP 17
#define CID_PROTO_IPV6 41
#define CID_PROTO_ICMPV6 58
#define CID_PROTO_SCTP 132
/* Similarly, ICMP type numbers, to implement flow-like treatment of
* the ICMPs via type & code values.
*/
#define CID_ICMP_ECHO_REPLY 0
#define CID_ICMP_ECHO 8
#define CID_ICMP_RTR_ADVERT 9
#define CID_ICMP_RTR_SOLICIT 10
#define CID_ICMP_TSTAMP 13
#define CID_ICMP_TSTAMP_REPLY 14
#define CID_ICMP_INFO 15
#define CID_ICMP_INFO_REPLY 16
#define CID_ICMP_MASK 17
#define CID_ICMP_MASK_REPLY 18
#define CID_ICMPV6_ECHO_REQUEST 128
#define CID_ICMPV6_ECHO_REPLY 129
#define CID_ICMPV6_MLD_LISTENER_QUERY 130
#define CID_ICMPV6_MLD_LISTENER_REPORT 131
#define CID_ICMPV6_ND_ROUTER_SOLICIT 133
#define CID_ICMPV6_ND_ROUTER_ADVERT 134
#define CID_ICMPV6_ND_NEIGHBOR_SOLICIT 135
#define CID_ICMPV6_ND_NEIGHBOR_ADVERT 136
#define CID_ICMPV6_WRU_REQUEST 139
#define CID_ICMPV6_WRU_REPLY 140
#define CID_ICMPV6_HAAD_REQUEST 144
#define CID_ICMPV6_HAAD_REPLY 145
/* There's currently only a v1, so we hardwire its prefix string. */
#define CID_VERSION_PREFIX "1:"
/* Largest IP address size currently supported, to simplify buffer
* allocations in C90-compliant codebases.
*/
#define CID_ADDR_LEN_MAX 16
/* Set to 1 for debugging output to stderr. */
#define CID_DEBUG 0
typedef struct _communityid_cfg_t {
gboolean cfg_do_base64;
guint16 cfg_seed;
} communityid_cfg_t;
#if CID_DEBUG
static void communityid_sha1_dbg(const gchar *msg, const void* data, gsize len)
{
gchar *buf = (gchar*) g_malloc(len*2 + 1);
gchar *ptr = buf;
gsize i;
for (i = 0; i < len; i++, ptr += 2) {
snprintf(ptr, 3, "%02x", ((guchar*)data)[i]);
}
fprintf(stderr, "Community ID dbg [%s]: %s\n", msg, buf);
g_free(buf);
}
#define COMMUNITYID_SHA1_DBG(...) communityid_sha1_dbg(__VA_ARGS__)
#else
#define COMMUNITYID_SHA1_DBG(...)
#endif
/* Helper function to determine whether a flow tuple is ordered
* correctly or needs flipping for abstracting flow directionality.
*/
static gboolean communityid_tuple_lt(guint8 addr_len,
const guchar *saddr, const guchar *daddr,
const guint16 *sport, const guint16 *dport)
{
int addrcmp = memcmp(saddr, daddr, addr_len);
int ports_lt = (sport != NULL && dport != NULL) ? *sport < *dport : TRUE;
return addrcmp < 0 || (addrcmp == 0 && ports_lt);
}
/* Main Community ID computation routine. Arguments:
*
* - cfg: a pointer to a communityid_cfg_t instance with configuration
* information.
*
* - proto: an 8-bit unsigned value representing the IP protocol
* number of the transport layer (or equivalent) protocol.
*
* - addr_len: the length in octets of the network-layer addresses we
* use. Must be either 4 (for IPv4) or 16 (for IPv6).
*
* - saddr/daddr: pointers to the network-layer source/destination
* address, in NBO.
*
* - sport/dport: pointers to the transport-layer 16-bit port numbers,
* in NBO. These may be NULL pointers to signal that port numbers
* aren't available for the flow.
*
* - result: the address of a result pointer that will point at a
* newly allocated string containing the computed ID value upon
* return from the function. Callers take ownership of the allocated
* string and need to free it when finished.
*
* Return value: a Boolean, TRUE if the computation was successful and
* FALSE otherwise. The function modifies the result pointer only when
* the return value is TRUE.
*/
static gboolean communityid_calc(communityid_cfg_t *cfg, guint8 proto,
guint8 addr_len, const guchar *saddr, const guchar *daddr,
const guint16 *sport, const guint16 *dport,
gchar **result)
{
gboolean is_one_way = FALSE;
guint8 padding = 0;
guint16 seed_final = 0;
gcry_md_hd_t sha1;
guchar *sha1_buf = NULL;
gsize sha1_buf_len = gcry_md_get_algo_dlen(GCRY_MD_SHA1);
guint16 sport_final, dport_final;
g_return_val_if_fail(cfg != NULL, FALSE);
g_return_val_if_fail(result != NULL, FALSE);
g_return_val_if_fail(addr_len == 4 || addr_len == 16, FALSE);
g_return_val_if_fail(saddr != NULL && daddr != NULL, FALSE);
if (sport != NULL && dport != NULL) {
sport_final = *sport;
dport_final = *dport;
/* Sort out directionality of this packet in the flow. For
* regular bidirectional traffic we resort this by ordering
* the flow tuple. ICMP is our corner-case; we use its type
* and code values as port equivalents, and expand them when
* feasible to provide directionality. This is based on Zeek's
* internal model of ICMP traffic.
*/
switch (proto) {
case CID_PROTO_ICMP:
{
/* Get ports from network byte order: */
sport_final = GUINT16_FROM_BE(sport_final);
dport_final = GUINT16_FROM_BE(dport_final);
switch (sport_final) {
case CID_ICMP_ECHO:
dport_final = CID_ICMP_ECHO_REPLY;
break;
case CID_ICMP_ECHO_REPLY:
dport_final = CID_ICMP_ECHO;
break;
case CID_ICMP_TSTAMP:
dport_final = CID_ICMP_TSTAMP_REPLY;
break;
case CID_ICMP_TSTAMP_REPLY:
dport_final = CID_ICMP_TSTAMP;
break;
case CID_ICMP_INFO:
dport_final = CID_ICMP_INFO_REPLY;
break;
case CID_ICMP_INFO_REPLY:
dport_final = CID_ICMP_INFO;
break;
case CID_ICMP_RTR_SOLICIT:
dport_final = CID_ICMP_RTR_ADVERT;
break;
case CID_ICMP_RTR_ADVERT:
dport_final = CID_ICMP_RTR_SOLICIT;
break;
case CID_ICMP_MASK:
dport_final = CID_ICMP_MASK_REPLY;
break;
case CID_ICMP_MASK_REPLY:
dport_final = CID_ICMP_MASK;
break;
default:
is_one_way = TRUE;
}
/* And back to NBO: */
sport_final = GUINT16_TO_BE(sport_final);
dport_final = GUINT16_TO_BE(dport_final);
}
break;
case CID_PROTO_ICMPV6:
{
sport_final = GUINT16_FROM_BE(sport_final);
dport_final = GUINT16_FROM_BE(dport_final);
switch (sport_final) {
case CID_ICMPV6_ECHO_REQUEST:
dport_final = CID_ICMPV6_ECHO_REPLY;
break;
case CID_ICMPV6_ECHO_REPLY:
dport_final = CID_ICMPV6_ECHO_REQUEST;
break;
case CID_ICMPV6_MLD_LISTENER_QUERY:
dport_final = CID_ICMPV6_MLD_LISTENER_REPORT;
break;
case CID_ICMPV6_MLD_LISTENER_REPORT:
dport_final = CID_ICMPV6_MLD_LISTENER_QUERY;
break;
case CID_ICMPV6_ND_ROUTER_SOLICIT:
dport_final = CID_ICMPV6_ND_ROUTER_ADVERT;
break;
case CID_ICMPV6_ND_ROUTER_ADVERT:
dport_final = CID_ICMPV6_ND_ROUTER_SOLICIT;
break;
case CID_ICMPV6_ND_NEIGHBOR_SOLICIT:
dport_final = CID_ICMPV6_ND_NEIGHBOR_ADVERT;
break;
case CID_ICMPV6_ND_NEIGHBOR_ADVERT:
dport_final = CID_ICMPV6_ND_NEIGHBOR_SOLICIT;
break;
case CID_ICMPV6_WRU_REQUEST:
dport_final = CID_ICMPV6_WRU_REPLY;
break;
case CID_ICMPV6_WRU_REPLY:
dport_final = CID_ICMPV6_WRU_REQUEST;
break;
case CID_ICMPV6_HAAD_REQUEST:
dport_final = CID_ICMPV6_HAAD_REPLY;
break;
case CID_ICMPV6_HAAD_REPLY:
dport_final = CID_ICMPV6_HAAD_REQUEST;
break;
default:
is_one_way = TRUE;
}
sport_final = GUINT16_TO_BE(sport_final);
dport_final = GUINT16_TO_BE(dport_final);
}
default:
;
}
sport = &sport_final;
dport = &dport_final;
}
if (is_one_way || communityid_tuple_lt(addr_len, saddr, daddr,
sport, dport)) {
/* Ordered correctly, no need to flip. */
} else {
/* Need to flip endpoints for consistent hashing. */
const guchar *tmp_addr = saddr;
saddr = daddr;
daddr = tmp_addr;
if (sport != NULL && dport != NULL) {
const guint16 *tmp_port = sport;
sport = dport;
dport = tmp_port;
}
}
seed_final = GUINT16_TO_BE(cfg->cfg_seed);
/* SHA-1 computation */
if (gcry_md_open(&sha1, GCRY_MD_SHA1, 0))
return FALSE;
COMMUNITYID_SHA1_DBG("seed", &seed_final, 2);
gcry_md_write(sha1, &seed_final, 2);
COMMUNITYID_SHA1_DBG("saddr", saddr, addr_len);
gcry_md_write(sha1, saddr, addr_len);
COMMUNITYID_SHA1_DBG("daddr", daddr, addr_len);
gcry_md_write(sha1, daddr, addr_len);
COMMUNITYID_SHA1_DBG("proto", &proto, 1);
gcry_md_write(sha1, &proto, 1);
COMMUNITYID_SHA1_DBG("padding", &padding, 1);
gcry_md_write(sha1, &padding, 1);
if (sport != NULL && dport != NULL) {
COMMUNITYID_SHA1_DBG("sport", sport, 2);
gcry_md_write(sha1, sport, 2);
COMMUNITYID_SHA1_DBG("dport", dport, 2);
gcry_md_write(sha1, dport, 2);
}
sha1_buf = (guchar*) g_malloc(sha1_buf_len);
memcpy(sha1_buf, gcry_md_read(sha1, 0), sha1_buf_len);
gcry_md_close(sha1);
if (cfg->cfg_do_base64) {
gchar *str = g_base64_encode(sha1_buf, sha1_buf_len);
gsize len = strlen(CID_VERSION_PREFIX) + strlen(str) + 1;
*result = (gchar*) g_malloc(len);
snprintf(*result, (gulong) len, "%s%s", CID_VERSION_PREFIX, str);
g_free(str);
} else {
/* Convert binary SHA-1 to ASCII representation.
* 2 hex digits for every byte + 1 for trailing \0:
*/
gchar *ptr;
gsize i;
*result = (gchar*) g_malloc(strlen(CID_VERSION_PREFIX) + sha1_buf_len*2 + 1);
memcpy(*result, CID_VERSION_PREFIX, strlen(CID_VERSION_PREFIX));
ptr = *result + strlen(CID_VERSION_PREFIX);
for (i = 0; i < sha1_buf_len; i++, ptr += 2) {
snprintf(ptr, 3, "%02x", sha1_buf[i]);
}
}
g_free(sha1_buf);
return TRUE;
}
/* ---- End of generic Community ID codebase ----------------------------------- */
void proto_register_communityid(void);
static int proto_communityid = -1;
static int proto_ip = -1;
static int proto_ipv6 = -1;
static int proto_icmp = -1;
static int proto_icmpv6 = -1;
static int hf_communityid_hash = -1;
static dissector_handle_t communityid_handle;
/* Config settings as handled by Wireshark's preference framework ... */
static gboolean pref_cid_do_base64 = TRUE;
static guint pref_cid_seed = 0;
/* ... and as interpreted by the Community ID code. */
static communityid_cfg_t cid_cfg;
/* rapper mapping Wireshark's data types to the generic ones supported above. */
static gboolean communityid_calc_wrapper(communityid_cfg_t *cfg, guint8 proto,
address *saddr, address *daddr,
const guint16 *sport, const guint16 *dport,
gchar **result)
{
/* IPv4 */
if (4 == saddr->len && saddr->len == daddr->len)
return communityid_calc(cfg, proto, 4,
(const guchar*)saddr->data, (const guchar*)daddr->data,
sport, dport, result);
/* IPv6 */
if (16 == saddr->len && saddr->len == daddr->len)
return communityid_calc(cfg, proto, 16,
(const guchar*)saddr->data, (const guchar*)daddr->data,
sport, dport, result);
/* Need another network protocol here? Please file a ticket at
* https://github.com/corelight/community-id-spec!
*/
return FALSE;
}
static int communityid_dissector(tvbuff_t *tvb, packet_info *pinfo,
proto_tree *tree, void *data _U_)
{
/* We need to figure out whether this is one of the protocol
* constellations supported by Community ID:
*
* - TCP/UDP/SCTP over IPv4/v6
* - ICMP/ICMPv6
* - Other IPv4/v6
*/
gchar *cid = NULL;
int proto_ip_found = -1;
icmp_info_t *icmp_info = NULL;
guint8 proto = 0;
/* All of this is to establish the Community ID value in the tree,
* so if we don't have a tree, we're done.
*/
if (tree == NULL)
return 0;
/* Map Wireshark-level config to Community ID configs. */
cid_cfg.cfg_do_base64 = pref_cid_do_base64;
cid_cfg.cfg_seed = (guint16) pref_cid_seed;
/* If not yet done, establish global handles for required protocols. */
if (proto_ip < 0) {
proto_ip = proto_get_id_by_filter_name("ip");
proto_ipv6 = proto_get_id_by_filter_name("ipv6");
proto_icmp = proto_get_id_by_filter_name("icmp");
proto_icmpv6 = proto_get_id_by_filter_name("icmpv6");
}
if (pinfo->net_src.type == AT_IPv4 && pinfo->net_dst.type == AT_IPv4)
proto_ip_found = proto_ip;
if (pinfo->net_src.type == AT_IPv6 && pinfo->net_dst.type == AT_IPv6)
proto_ip_found = proto_ipv6;
/* If this frame isn't IP at all, we're done. */
if (proto_ip_found < 0)
return 0;
/* Next, look specifically for ICMP state stored by dissectors: */
if (proto_ip_found == proto_ip) {
icmp_info = (icmp_info_t*) p_get_proto_data(wmem_file_scope(),
pinfo, proto_icmp, 0);
if (icmp_info != NULL) {
guint16 sport = GUINT16_TO_BE(icmp_info->type);
guint16 dport = GUINT16_TO_BE(icmp_info->code);
if (! communityid_calc_wrapper(&cid_cfg, CID_PROTO_ICMP,
&pinfo->net_src, &pinfo->net_dst,
&sport, &dport, &cid))
return 0;
}
}
/* It could also be ICMPv6. Try this before generic transport
* layers, since the dissection can find transport layers in the
* ICMP-contained snippets.
*/
if (cid == NULL && proto_ip_found == proto_ipv6) {
icmp_info = (icmp_info_t*) p_get_proto_data(wmem_file_scope(),
pinfo, proto_icmpv6, 0);
if (icmp_info != NULL) {
guint16 sport = GUINT16_TO_BE(icmp_info->type);
guint16 dport = GUINT16_TO_BE(icmp_info->code);
if (! communityid_calc_wrapper(&cid_cfg, CID_PROTO_ICMPV6,
&pinfo->net_src, &pinfo->net_dst,
&sport, &dport, &cid))
return 0;
}
}
/* Still no go? Try generic transport layers next. */
if (cid == NULL) {
guint16 sport = GUINT16_TO_BE(pinfo->srcport);
guint16 dport = GUINT16_TO_BE(pinfo->destport);
switch ( pinfo->ptype ) {
case PT_SCTP:
proto = CID_PROTO_SCTP;
break;
case PT_TCP:
proto = CID_PROTO_TCP;
break;
case PT_UDP:
proto = CID_PROTO_UDP;
break;
default:
/* We'll fall through to the IP-only scenario below. */
;
}
if (proto != 0 && ! communityid_calc_wrapper(&cid_cfg, proto,
&pinfo->net_src, &pinfo->net_dst,
&sport, &dport, &cid)) {
return 0;
}
}
/* Final straw: IP-only. */
if (cid == NULL) {
/* We'd like to grab the outermost IP header's protocol field
* value so we can grab its protocol field number. The IPv4
* analyzer stores the field in its protocol data, but we need
* the layer number. Inspired by proto_get_frame_protocols().
*/
wmem_list_frame_t *protos = wmem_list_head(pinfo->layers);
guint layer_num = 1;
while (protos != NULL) {
if (GPOINTER_TO_INT(wmem_list_frame_data(protos)) == proto_ip_found) {
/* We take any protocol number present, so this can
* include values other than the defined CID_PROTO_*
* constants.
*/
proto = GPOINTER_TO_UINT(p_get_proto_data(pinfo->pool, pinfo,
proto_ip_found, layer_num));
break;
}
layer_num++;
protos = wmem_list_frame_next(protos);
}
if (proto != 0) {
if (! communityid_calc_wrapper(&cid_cfg, proto,
&pinfo->net_src, &pinfo->net_dst,
NULL, NULL, &cid))
return 0;
}
}
if (cid != NULL) {
proto_item *it = proto_tree_add_string(tree, hf_communityid_hash, tvb, 0, 0, cid);
proto_item_set_generated(it);
g_free(cid);
}
return tvb_reported_length(tvb);
}
void proto_register_communityid(void)
{
module_t *communityid_module;
static hf_register_info hf[] = {
{ &hf_communityid_hash,
{ "Community ID", "communityid", FT_STRING, BASE_NONE, NULL, 0x00,
"Community ID hash value for this packet's flow", HFILL }}
};
proto_communityid = proto_register_protocol("Community ID Flow Hashing",
"CommunityID", "communityid");
proto_register_field_array(proto_communityid, hf, array_length(hf));
proto_disable_by_default(proto_communityid);
communityid_handle = create_dissector_handle(communityid_dissector,
proto_communityid);
register_postdissector(communityid_handle);
/* Preference handling */
communityid_module = prefs_register_protocol(proto_communityid, NULL);
prefs_register_bool_preference(communityid_module, "do_base64",
"Use base64 encoding",
"Whether to base64-encode the Community ID hash value",
&pref_cid_do_base64);
prefs_register_uint_preference(communityid_module, "seed",
"Hash seed value",
"A 16-bit seed value to add to the hashed data",
10, &pref_cid_seed);
}
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 4
* tab-width: 8
* indent-tabs-mode: nil
* End:
*
* vi: set shiftwidth=4 tabstop=8 expandtab:
* :indentSize=4:tabSize=8:noTabs=true:
*/