From 6fab4cbeff3d95d59547b70eecee7df03f9a0b7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Valverde?= Date: Wed, 24 Nov 2021 16:35:45 +0000 Subject: [PATCH] epan: Add introspection API to export some constants C is notoriously difficult to bind from other languages without additional metadata. The C ABI does not include enums and macros that are an essential component of the API. To make Wireshark instrospectable and more binding friendly include an introspection API to export enums and integer macros. To avoid the tedious need to manually keep the code up to date it uses the excellent pyclibrary python package to automatically parse C headers and extract this data. This is not a process that should be done automatically during the build. This could be used for example to replace most of the wslua make-init-lua.pl perl script, which tries to do the same thing using regular expressions. Besides the downside of using Perl using regular expressions is inferior to pyclibrary in 2 ways: 1) pyclibrary understands most of C99 grammar so it is much more powerful; 2) pyclibrary has a specific API to extract "values" (enums and constants) automagically. We just need to take care to use only integer values, for our purposes. --- debian/libwireshark0.symbols | 1 + epan/CMakeLists.txt | 18 +++ epan/introspection-enums.c | 291 +++++++++++++++++++++++++++++++++++ epan/introspection.h | 31 ++++ tools/make-enums.py | 100 ++++++++++++ 5 files changed, 441 insertions(+) create mode 100644 epan/introspection-enums.c create mode 100644 epan/introspection.h create mode 100755 tools/make-enums.py diff --git a/debian/libwireshark0.symbols b/debian/libwireshark0.symbols index 65e8bdf03f..1badeb596e 100644 --- a/debian/libwireshark0.symbols +++ b/debian/libwireshark0.symbols @@ -586,6 +586,7 @@ libwireshark.so.0 libwireshark0 #MINVER# epan_get_version@Base 1.9.1 epan_get_version_number@Base 2.5.0 epan_init@Base 2.9.0 + epan_inspect_enums@Base 3.7.0 epan_load_settings@Base 2.3.0 epan_memmem@Base 1.9.1 epan_new@Base 1.12.0~rc1 diff --git a/epan/CMakeLists.txt b/epan/CMakeLists.txt index 2a3ca68736..8130318bfb 100644 --- a/epan/CMakeLists.txt +++ b/epan/CMakeLists.txt @@ -99,6 +99,7 @@ set(LIBWIRESHARK_PUBLIC_HEADERS iana_charsets.h iax2_codec_type.h in_cksum.h + introspection.h ip_opts.h ipproto.h ipv4.h @@ -210,6 +211,7 @@ set(LIBWIRESHARK_NONGENERATED_FILES guid-utils.c iana_charsets.c in_cksum.c + introspection-enums.c ipproto.c maxmind_db.c media_params.c @@ -428,6 +430,22 @@ set_target_properties(wscbor_test PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD True ) +set(ENUM_FILES + epan/address.h + epan/proto.h + epan/ftypes/ftypes.h +) + +# This tries to parse C headers using Python to extract enums for +# introspection. It is slow and has some particular dependencies. +# It's also not foolproof. It should not be part of the ALL target. +add_custom_target(gen-enums + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/make-enums.py + --outfile ${CMAKE_CURRENT_SOURCE_DIR}/introspection-enums.c + ${ENUM_FILES} + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} +) + CHECKAPI( NAME epan diff --git a/epan/introspection-enums.c b/epan/introspection-enums.c new file mode 100644 index 0000000000..15f20911b0 --- /dev/null +++ b/epan/introspection-enums.c @@ -0,0 +1,291 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs + * Copyright 1998 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Generated automatically from make-enums.py. + * + * It can be re-created using "make gen-enums". + * + * It is fine to edit this file by hand. Particularly if a symbol + * disappears from the API it can just be removed here. There is no + * requirement to re-run the generator script. + * + */ +#include "config.h" +#include "introspection.h" +#include +#include +#include + +#define ENUM(arg) { #arg, arg } + +static ws_enum_t all_enums[] = { + ENUM(AT_AX25), + ENUM(AT_END_OF_LIST), + ENUM(AT_ETHER), + ENUM(AT_EUI64), + ENUM(AT_FC), + ENUM(AT_FCWWN), + ENUM(AT_IB), + ENUM(AT_IPX), + ENUM(AT_IPv4), + ENUM(AT_IPv6), + ENUM(AT_NONE), + ENUM(AT_STRINGZ), + ENUM(AT_VINES), + ENUM(BASE_ALLOW_ZERO), + ENUM(BASE_CUSTOM), + ENUM(BASE_DEC), + ENUM(BASE_DEC_HEX), + ENUM(BASE_EXT_STRING), + ENUM(BASE_FLOAT), + ENUM(BASE_HEX), + ENUM(BASE_HEX_DEC), + ENUM(BASE_NETMASK), + ENUM(BASE_NONE), + ENUM(BASE_NO_DISPLAY_VALUE), + ENUM(BASE_OCT), + ENUM(BASE_OUI), + ENUM(BASE_PROTOCOL_INFO), + ENUM(BASE_PT_DCCP), + ENUM(BASE_PT_SCTP), + ENUM(BASE_PT_TCP), + ENUM(BASE_PT_UDP), + ENUM(BASE_RANGE_STRING), + ENUM(BASE_SHOW_ASCII_PRINTABLE), + ENUM(BASE_SPECIAL_VALS), + ENUM(BASE_UNIT_STRING), + ENUM(BASE_VAL64_STRING), + ENUM(BMT_NO_APPEND), + ENUM(BMT_NO_FALSE), + ENUM(BMT_NO_FLAGS), + ENUM(BMT_NO_INT), + ENUM(BMT_NO_TFS), + ENUM(ENC_3GPP_TS_23_038_7BITS), + ENUM(ENC_3GPP_TS_23_038_7BITS_PACKED), + ENUM(ENC_3GPP_TS_23_038_7BITS_UNPACKED), + ENUM(ENC_ANTI_HOST_ENDIAN), + ENUM(ENC_APN_STR), + ENUM(ENC_ASCII), + ENUM(ENC_ASCII_7BITS), + ENUM(ENC_BCD_DIGITS_0_9), + ENUM(ENC_BCD_ODD_NUM_DIG), + ENUM(ENC_BCD_SKIP_FIRST), + ENUM(ENC_BIG_ENDIAN), + ENUM(ENC_CHARENCODING_MASK), + ENUM(ENC_CP437), + ENUM(ENC_CP855), + ENUM(ENC_CP866), + ENUM(ENC_EBCDIC), + ENUM(ENC_EBCDIC_CP037), + ENUM(ENC_ETSI_TS_102_221_ANNEX_A), + ENUM(ENC_EUC_KR), + ENUM(ENC_GB18030), + ENUM(ENC_HOST_ENDIAN), + ENUM(ENC_ISO_646_BASIC), + ENUM(ENC_ISO_646_IRV), + ENUM(ENC_ISO_8601_DATE), + ENUM(ENC_ISO_8601_DATE_TIME), + ENUM(ENC_ISO_8601_TIME), + ENUM(ENC_ISO_8859_1), + ENUM(ENC_ISO_8859_10), + ENUM(ENC_ISO_8859_11), + ENUM(ENC_ISO_8859_13), + ENUM(ENC_ISO_8859_14), + ENUM(ENC_ISO_8859_15), + ENUM(ENC_ISO_8859_16), + ENUM(ENC_ISO_8859_2), + ENUM(ENC_ISO_8859_3), + ENUM(ENC_ISO_8859_4), + ENUM(ENC_ISO_8859_5), + ENUM(ENC_ISO_8859_6), + ENUM(ENC_ISO_8859_7), + ENUM(ENC_ISO_8859_8), + ENUM(ENC_ISO_8859_9), + ENUM(ENC_KEYPAD_ABC_TBCD), + ENUM(ENC_KEYPAD_BC_TBCD), + ENUM(ENC_LITTLE_ENDIAN), + ENUM(ENC_MAC_ROMAN), + ENUM(ENC_NA), + ENUM(ENC_NUM_PREF), + ENUM(ENC_RFC_1123), + ENUM(ENC_RFC_822), + ENUM(ENC_SEP_COLON), + ENUM(ENC_SEP_DASH), + ENUM(ENC_SEP_DOT), + ENUM(ENC_SEP_MASK), + ENUM(ENC_SEP_NONE), + ENUM(ENC_SEP_SPACE), + ENUM(ENC_STRING), + ENUM(ENC_STR_HEX), + ENUM(ENC_STR_MASK), + ENUM(ENC_STR_NUM), + ENUM(ENC_STR_TIME_MASK), + ENUM(ENC_T61), + ENUM(ENC_TIME_CLASSIC_MAC_OS_SECS), + ENUM(ENC_TIME_MIP6), + ENUM(ENC_TIME_MSECS), + ENUM(ENC_TIME_MSEC_NTP), + ENUM(ENC_TIME_NSECS), + ENUM(ENC_TIME_NTP), + ENUM(ENC_TIME_NTP_BASE_ZERO), + ENUM(ENC_TIME_RFC_3971), + ENUM(ENC_TIME_RTPS), + ENUM(ENC_TIME_SECS), + ENUM(ENC_TIME_SECS_NSECS), + ENUM(ENC_TIME_SECS_NTP), + ENUM(ENC_TIME_SECS_USECS), + ENUM(ENC_TIME_TIMESPEC), + ENUM(ENC_TIME_TIMEVAL), + ENUM(ENC_TIME_TOD), + ENUM(ENC_UCS_2), + ENUM(ENC_UCS_4), + ENUM(ENC_UTF_16), + ENUM(ENC_UTF_8), + ENUM(ENC_VARIANT_MASK), + ENUM(ENC_VARINT_PROTOBUF), + ENUM(ENC_VARINT_QUIC), + ENUM(ENC_VARINT_ZIGZAG), + ENUM(ENC_WINDOWS_1250), + ENUM(ENC_WINDOWS_1251), + ENUM(ENC_WINDOWS_1252), + ENUM(ENC_ZIGBEE), + ENUM(FIELD_DISPLAY_E_MASK), + ENUM(FI_BIG_ENDIAN), + ENUM(FI_GENERATED), + ENUM(FI_HIDDEN), + ENUM(FI_LITTLE_ENDIAN), + ENUM(FI_URL), + ENUM(FI_VARINT), + ENUM(FTREPR_DFILTER), + ENUM(FTREPR_DISPLAY), + ENUM(FT_ABSOLUTE_TIME), + ENUM(FT_AX25), + ENUM(FT_AX25_ADDR_LEN), + ENUM(FT_BOOLEAN), + ENUM(FT_BYTES), + ENUM(FT_CHAR), + ENUM(FT_DOUBLE), + ENUM(FT_ETHER), + ENUM(FT_ETHER_LEN), + ENUM(FT_EUI64), + ENUM(FT_EUI64_LEN), + ENUM(FT_FCWWN), + ENUM(FT_FCWWN_LEN), + ENUM(FT_FLOAT), + ENUM(FT_FRAMENUM), + ENUM(FT_FRAMENUM_ACK), + ENUM(FT_FRAMENUM_DUP_ACK), + ENUM(FT_FRAMENUM_NONE), + ENUM(FT_FRAMENUM_NUM_TYPES), + ENUM(FT_FRAMENUM_REQUEST), + ENUM(FT_FRAMENUM_RESPONSE), + ENUM(FT_FRAMENUM_RETRANS_NEXT), + ENUM(FT_FRAMENUM_RETRANS_PREV), + ENUM(FT_GUID), + ENUM(FT_GUID_LEN), + ENUM(FT_IEEE_11073_FLOAT), + ENUM(FT_IEEE_11073_SFLOAT), + ENUM(FT_INT16), + ENUM(FT_INT24), + ENUM(FT_INT32), + ENUM(FT_INT40), + ENUM(FT_INT48), + ENUM(FT_INT56), + ENUM(FT_INT64), + ENUM(FT_INT8), + ENUM(FT_IPXNET), + ENUM(FT_IPXNET_LEN), + ENUM(FT_IPv4), + ENUM(FT_IPv4_LEN), + ENUM(FT_IPv6), + ENUM(FT_IPv6_LEN), + ENUM(FT_NONE), + ENUM(FT_NUM_TYPES), + ENUM(FT_OID), + ENUM(FT_PROTOCOL), + ENUM(FT_RELATIVE_TIME), + ENUM(FT_REL_OID), + ENUM(FT_STRING), + ENUM(FT_STRINGZ), + ENUM(FT_STRINGZPAD), + ENUM(FT_STRINGZTRUNC), + ENUM(FT_SYSTEM_ID), + ENUM(FT_UINT16), + ENUM(FT_UINT24), + ENUM(FT_UINT32), + ENUM(FT_UINT40), + ENUM(FT_UINT48), + ENUM(FT_UINT56), + ENUM(FT_UINT64), + ENUM(FT_UINT8), + ENUM(FT_UINT_BYTES), + ENUM(FT_UINT_STRING), + ENUM(FT_VARINT_MAX_LEN), + ENUM(FT_VINES), + ENUM(FT_VINES_ADDR_LEN), + ENUM(HF_REF_TYPE_DIRECT), + ENUM(HF_REF_TYPE_INDIRECT), + ENUM(HF_REF_TYPE_NONE), + ENUM(ITEM_LABEL_LENGTH), + ENUM(PI_ASSUMPTION), + ENUM(PI_CHAT), + ENUM(PI_CHECKSUM), + ENUM(PI_COMMENT), + ENUM(PI_COMMENTS_GROUP), + ENUM(PI_DEBUG), + ENUM(PI_DECRYPTION), + ENUM(PI_DEPRECATED), + ENUM(PI_ERROR), + ENUM(PI_GROUP_MASK), + ENUM(PI_MALFORMED), + ENUM(PI_NOTE), + ENUM(PI_PROTOCOL), + ENUM(PI_REASSEMBLE), + ENUM(PI_REQUEST_CODE), + ENUM(PI_RESPONSE_CODE), + ENUM(PI_SECURITY), + ENUM(PI_SEQUENCE), + ENUM(PI_SEVERITY_MASK), + ENUM(PI_UNDECODED), + ENUM(PI_WARN), + ENUM(PROTO_CHECKSUM_E_BAD), + ENUM(PROTO_CHECKSUM_E_GOOD), + ENUM(PROTO_CHECKSUM_E_ILLEGAL), + ENUM(PROTO_CHECKSUM_E_NOT_PRESENT), + ENUM(PROTO_CHECKSUM_E_UNVERIFIED), + ENUM(PROTO_CHECKSUM_GENERATED), + ENUM(PROTO_CHECKSUM_IN_CKSUM), + ENUM(PROTO_CHECKSUM_NOT_PRESENT), + ENUM(PROTO_CHECKSUM_NO_FLAGS), + ENUM(PROTO_CHECKSUM_VERIFY), + ENUM(PROTO_CHECKSUM_ZERO), + ENUM(PT_BLUETOOTH), + ENUM(PT_DCCP), + ENUM(PT_DDP), + ENUM(PT_I2C), + ENUM(PT_IBQP), + ENUM(PT_IDP), + ENUM(PT_IPX), + ENUM(PT_IWARP_MPA), + ENUM(PT_NONE), + ENUM(PT_SCTP), + ENUM(PT_TCP), + ENUM(PT_UDP), + ENUM(PT_USB), + ENUM(SEP_COLON), + ENUM(SEP_DASH), + ENUM(SEP_DOT), + ENUM(SEP_SPACE), + ENUM(STR_ASCII), + ENUM(STR_UNICODE), + { NULL, 0 }, +}; + +const ws_enum_t *epan_inspect_enums(void) +{ + return all_enums; +} diff --git a/epan/introspection.h b/epan/introspection.h new file mode 100644 index 0000000000..9a7eb61ab8 --- /dev/null +++ b/epan/introspection.h @@ -0,0 +1,31 @@ +/* + * Copyright 2021, João Valverde + * + * Wireshark - Network traffic analyzer + * By Gerald Combs + * Copyright 1998 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _INTROSPECTION_H_ +#define _INTROSPECTION_H_ + +#include + +typedef struct { + const char *symbol; + int value; +} ws_enum_t; + +/** Returns an array of ws_enum_t elements. The array is sorted and + * ends with {NULL, 0}. + * + * It can be used by language bindings to the Wireshark API to obtain + * the value of some magic constants. The array can be binary searched, + * imported to a hash table, serialized, etc. + */ +WS_DLL_PUBLIC +const ws_enum_t *epan_inspect_enums(void); + +#endif diff --git a/tools/make-enums.py b/tools/make-enums.py new file mode 100755 index 0000000000..a201c5fa8c --- /dev/null +++ b/tools/make-enums.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# +# Copyright 2021, João Valverde +# +# Wireshark - Network traffic analyzer +# By Gerald Combs +# Copyright 1998 Gerald Combs +# +# SPDX-License-Identifier: GPL-2.0-or-later +# +# +# Uses pyclibrary to parse C headers for enums and integer macro +# definitions. Exports that data to a C file for the introspection API. +# +# Requires: https://github.com/MatthieuDartiailh/pyclibrary +# + +import os +import sys +import argparse +from pyclibrary import CParser + +argp = argparse.ArgumentParser() +argp.add_argument("-o", "--outfile") +argp.add_argument("infiles", nargs="*") +args = argp.parse_args() + +parser = CParser(args.infiles) + +source = """\ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs + * Copyright 1998 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Generated automatically from %s. + * + * It can be re-created using "make gen-enums". + * + * It is fine to edit this file by hand. Particularly if a symbol + * disappears from the API it can just be removed here. There is no + * requirement to re-run the generator script. + * + */ +#include "config.h" +#include "introspection.h" +""" % (os.path.basename(sys.argv[0])) + +for f in args.infiles: + source += '#include <{}>\n'.format(f) + +source += """ +#define ENUM(arg) { #arg, arg } + +static ws_enum_t all_enums[] = { +""" + +definitions = parser.defs['values'] +symbols = list(definitions.keys()) +symbols.sort() + +for s in symbols: + if isinstance(definitions[s], int): + source += ' ENUM({}),\n'.format(s) + +source += """\ + { NULL, 0 }, +}; + +const ws_enum_t *epan_inspect_enums(void) +{ + return all_enums; +} +""" + +try: + if args.outfile: + fh = open(args.outfile, 'w') + else: + fh = sys.stdout +except OSError: + sys.exit('Unable to write ' + args.outfile + '.\n') + +fh.write(source) +fh.close() + + +# +# Editor modelines - https://www.wireshark.org/tools/modelines.html +# +# Local variables: +# c-basic-offset: 4 +# indent-tabs-mode: nil +# End: +# +# vi: set shiftwidth=4 expandtab: +# :indentSize=4:noTabs=true: +#