epan: Add introspection API to export some constants

C is notoriously difficult to bind from other languages
without additional metadata. The C ABI does not include
enums and macros that are an essential component of the
API.

To make Wireshark instrospectable and more binding friendly
include an introspection API to export enums and integer macros.

To avoid the tedious need to manually keep the code up to date
it uses the excellent pyclibrary python package to automatically
parse C headers and extract this data.

This is not a process that should be done automatically during
the build.

This could be used for example to replace most of the wslua
make-init-lua.pl perl script, which tries to do the same thing
using regular expressions.

Besides the downside of using Perl using regular expressions
is inferior to pyclibrary in 2 ways: 1) pyclibrary understands
most of C99 grammar so it is much more powerful; 2) pyclibrary
has a specific API to extract "values" (enums and constants)
automagically. We just need to take care to use only integer
values, for our purposes.
This commit is contained in:
João Valverde 2021-11-24 16:35:45 +00:00
parent 80ebcc90bc
commit 6fab4cbeff
5 changed files with 441 additions and 0 deletions

View File

@ -586,6 +586,7 @@ libwireshark.so.0 libwireshark0 #MINVER#
epan_get_version@Base 1.9.1
epan_get_version_number@Base 2.5.0
epan_init@Base 2.9.0
epan_inspect_enums@Base 3.7.0
epan_load_settings@Base 2.3.0
epan_memmem@Base 1.9.1
epan_new@Base 1.12.0~rc1

View File

@ -99,6 +99,7 @@ set(LIBWIRESHARK_PUBLIC_HEADERS
iana_charsets.h
iax2_codec_type.h
in_cksum.h
introspection.h
ip_opts.h
ipproto.h
ipv4.h
@ -210,6 +211,7 @@ set(LIBWIRESHARK_NONGENERATED_FILES
guid-utils.c
iana_charsets.c
in_cksum.c
introspection-enums.c
ipproto.c
maxmind_db.c
media_params.c
@ -428,6 +430,22 @@ set_target_properties(wscbor_test PROPERTIES
EXCLUDE_FROM_DEFAULT_BUILD True
)
set(ENUM_FILES
epan/address.h
epan/proto.h
epan/ftypes/ftypes.h
)
# This tries to parse C headers using Python to extract enums for
# introspection. It is slow and has some particular dependencies.
# It's also not foolproof. It should not be part of the ALL target.
add_custom_target(gen-enums
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/make-enums.py
--outfile ${CMAKE_CURRENT_SOURCE_DIR}/introspection-enums.c
${ENUM_FILES}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
CHECKAPI(
NAME
epan

291
epan/introspection-enums.c Normal file
View File

@ -0,0 +1,291 @@
/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*
* Generated automatically from make-enums.py.
*
* It can be re-created using "make gen-enums".
*
* It is fine to edit this file by hand. Particularly if a symbol
* disappears from the API it can just be removed here. There is no
* requirement to re-run the generator script.
*
*/
#include "config.h"
#include "introspection.h"
#include <epan/address.h>
#include <epan/proto.h>
#include <epan/ftypes/ftypes.h>
#define ENUM(arg) { #arg, arg }
static ws_enum_t all_enums[] = {
ENUM(AT_AX25),
ENUM(AT_END_OF_LIST),
ENUM(AT_ETHER),
ENUM(AT_EUI64),
ENUM(AT_FC),
ENUM(AT_FCWWN),
ENUM(AT_IB),
ENUM(AT_IPX),
ENUM(AT_IPv4),
ENUM(AT_IPv6),
ENUM(AT_NONE),
ENUM(AT_STRINGZ),
ENUM(AT_VINES),
ENUM(BASE_ALLOW_ZERO),
ENUM(BASE_CUSTOM),
ENUM(BASE_DEC),
ENUM(BASE_DEC_HEX),
ENUM(BASE_EXT_STRING),
ENUM(BASE_FLOAT),
ENUM(BASE_HEX),
ENUM(BASE_HEX_DEC),
ENUM(BASE_NETMASK),
ENUM(BASE_NONE),
ENUM(BASE_NO_DISPLAY_VALUE),
ENUM(BASE_OCT),
ENUM(BASE_OUI),
ENUM(BASE_PROTOCOL_INFO),
ENUM(BASE_PT_DCCP),
ENUM(BASE_PT_SCTP),
ENUM(BASE_PT_TCP),
ENUM(BASE_PT_UDP),
ENUM(BASE_RANGE_STRING),
ENUM(BASE_SHOW_ASCII_PRINTABLE),
ENUM(BASE_SPECIAL_VALS),
ENUM(BASE_UNIT_STRING),
ENUM(BASE_VAL64_STRING),
ENUM(BMT_NO_APPEND),
ENUM(BMT_NO_FALSE),
ENUM(BMT_NO_FLAGS),
ENUM(BMT_NO_INT),
ENUM(BMT_NO_TFS),
ENUM(ENC_3GPP_TS_23_038_7BITS),
ENUM(ENC_3GPP_TS_23_038_7BITS_PACKED),
ENUM(ENC_3GPP_TS_23_038_7BITS_UNPACKED),
ENUM(ENC_ANTI_HOST_ENDIAN),
ENUM(ENC_APN_STR),
ENUM(ENC_ASCII),
ENUM(ENC_ASCII_7BITS),
ENUM(ENC_BCD_DIGITS_0_9),
ENUM(ENC_BCD_ODD_NUM_DIG),
ENUM(ENC_BCD_SKIP_FIRST),
ENUM(ENC_BIG_ENDIAN),
ENUM(ENC_CHARENCODING_MASK),
ENUM(ENC_CP437),
ENUM(ENC_CP855),
ENUM(ENC_CP866),
ENUM(ENC_EBCDIC),
ENUM(ENC_EBCDIC_CP037),
ENUM(ENC_ETSI_TS_102_221_ANNEX_A),
ENUM(ENC_EUC_KR),
ENUM(ENC_GB18030),
ENUM(ENC_HOST_ENDIAN),
ENUM(ENC_ISO_646_BASIC),
ENUM(ENC_ISO_646_IRV),
ENUM(ENC_ISO_8601_DATE),
ENUM(ENC_ISO_8601_DATE_TIME),
ENUM(ENC_ISO_8601_TIME),
ENUM(ENC_ISO_8859_1),
ENUM(ENC_ISO_8859_10),
ENUM(ENC_ISO_8859_11),
ENUM(ENC_ISO_8859_13),
ENUM(ENC_ISO_8859_14),
ENUM(ENC_ISO_8859_15),
ENUM(ENC_ISO_8859_16),
ENUM(ENC_ISO_8859_2),
ENUM(ENC_ISO_8859_3),
ENUM(ENC_ISO_8859_4),
ENUM(ENC_ISO_8859_5),
ENUM(ENC_ISO_8859_6),
ENUM(ENC_ISO_8859_7),
ENUM(ENC_ISO_8859_8),
ENUM(ENC_ISO_8859_9),
ENUM(ENC_KEYPAD_ABC_TBCD),
ENUM(ENC_KEYPAD_BC_TBCD),
ENUM(ENC_LITTLE_ENDIAN),
ENUM(ENC_MAC_ROMAN),
ENUM(ENC_NA),
ENUM(ENC_NUM_PREF),
ENUM(ENC_RFC_1123),
ENUM(ENC_RFC_822),
ENUM(ENC_SEP_COLON),
ENUM(ENC_SEP_DASH),
ENUM(ENC_SEP_DOT),
ENUM(ENC_SEP_MASK),
ENUM(ENC_SEP_NONE),
ENUM(ENC_SEP_SPACE),
ENUM(ENC_STRING),
ENUM(ENC_STR_HEX),
ENUM(ENC_STR_MASK),
ENUM(ENC_STR_NUM),
ENUM(ENC_STR_TIME_MASK),
ENUM(ENC_T61),
ENUM(ENC_TIME_CLASSIC_MAC_OS_SECS),
ENUM(ENC_TIME_MIP6),
ENUM(ENC_TIME_MSECS),
ENUM(ENC_TIME_MSEC_NTP),
ENUM(ENC_TIME_NSECS),
ENUM(ENC_TIME_NTP),
ENUM(ENC_TIME_NTP_BASE_ZERO),
ENUM(ENC_TIME_RFC_3971),
ENUM(ENC_TIME_RTPS),
ENUM(ENC_TIME_SECS),
ENUM(ENC_TIME_SECS_NSECS),
ENUM(ENC_TIME_SECS_NTP),
ENUM(ENC_TIME_SECS_USECS),
ENUM(ENC_TIME_TIMESPEC),
ENUM(ENC_TIME_TIMEVAL),
ENUM(ENC_TIME_TOD),
ENUM(ENC_UCS_2),
ENUM(ENC_UCS_4),
ENUM(ENC_UTF_16),
ENUM(ENC_UTF_8),
ENUM(ENC_VARIANT_MASK),
ENUM(ENC_VARINT_PROTOBUF),
ENUM(ENC_VARINT_QUIC),
ENUM(ENC_VARINT_ZIGZAG),
ENUM(ENC_WINDOWS_1250),
ENUM(ENC_WINDOWS_1251),
ENUM(ENC_WINDOWS_1252),
ENUM(ENC_ZIGBEE),
ENUM(FIELD_DISPLAY_E_MASK),
ENUM(FI_BIG_ENDIAN),
ENUM(FI_GENERATED),
ENUM(FI_HIDDEN),
ENUM(FI_LITTLE_ENDIAN),
ENUM(FI_URL),
ENUM(FI_VARINT),
ENUM(FTREPR_DFILTER),
ENUM(FTREPR_DISPLAY),
ENUM(FT_ABSOLUTE_TIME),
ENUM(FT_AX25),
ENUM(FT_AX25_ADDR_LEN),
ENUM(FT_BOOLEAN),
ENUM(FT_BYTES),
ENUM(FT_CHAR),
ENUM(FT_DOUBLE),
ENUM(FT_ETHER),
ENUM(FT_ETHER_LEN),
ENUM(FT_EUI64),
ENUM(FT_EUI64_LEN),
ENUM(FT_FCWWN),
ENUM(FT_FCWWN_LEN),
ENUM(FT_FLOAT),
ENUM(FT_FRAMENUM),
ENUM(FT_FRAMENUM_ACK),
ENUM(FT_FRAMENUM_DUP_ACK),
ENUM(FT_FRAMENUM_NONE),
ENUM(FT_FRAMENUM_NUM_TYPES),
ENUM(FT_FRAMENUM_REQUEST),
ENUM(FT_FRAMENUM_RESPONSE),
ENUM(FT_FRAMENUM_RETRANS_NEXT),
ENUM(FT_FRAMENUM_RETRANS_PREV),
ENUM(FT_GUID),
ENUM(FT_GUID_LEN),
ENUM(FT_IEEE_11073_FLOAT),
ENUM(FT_IEEE_11073_SFLOAT),
ENUM(FT_INT16),
ENUM(FT_INT24),
ENUM(FT_INT32),
ENUM(FT_INT40),
ENUM(FT_INT48),
ENUM(FT_INT56),
ENUM(FT_INT64),
ENUM(FT_INT8),
ENUM(FT_IPXNET),
ENUM(FT_IPXNET_LEN),
ENUM(FT_IPv4),
ENUM(FT_IPv4_LEN),
ENUM(FT_IPv6),
ENUM(FT_IPv6_LEN),
ENUM(FT_NONE),
ENUM(FT_NUM_TYPES),
ENUM(FT_OID),
ENUM(FT_PROTOCOL),
ENUM(FT_RELATIVE_TIME),
ENUM(FT_REL_OID),
ENUM(FT_STRING),
ENUM(FT_STRINGZ),
ENUM(FT_STRINGZPAD),
ENUM(FT_STRINGZTRUNC),
ENUM(FT_SYSTEM_ID),
ENUM(FT_UINT16),
ENUM(FT_UINT24),
ENUM(FT_UINT32),
ENUM(FT_UINT40),
ENUM(FT_UINT48),
ENUM(FT_UINT56),
ENUM(FT_UINT64),
ENUM(FT_UINT8),
ENUM(FT_UINT_BYTES),
ENUM(FT_UINT_STRING),
ENUM(FT_VARINT_MAX_LEN),
ENUM(FT_VINES),
ENUM(FT_VINES_ADDR_LEN),
ENUM(HF_REF_TYPE_DIRECT),
ENUM(HF_REF_TYPE_INDIRECT),
ENUM(HF_REF_TYPE_NONE),
ENUM(ITEM_LABEL_LENGTH),
ENUM(PI_ASSUMPTION),
ENUM(PI_CHAT),
ENUM(PI_CHECKSUM),
ENUM(PI_COMMENT),
ENUM(PI_COMMENTS_GROUP),
ENUM(PI_DEBUG),
ENUM(PI_DECRYPTION),
ENUM(PI_DEPRECATED),
ENUM(PI_ERROR),
ENUM(PI_GROUP_MASK),
ENUM(PI_MALFORMED),
ENUM(PI_NOTE),
ENUM(PI_PROTOCOL),
ENUM(PI_REASSEMBLE),
ENUM(PI_REQUEST_CODE),
ENUM(PI_RESPONSE_CODE),
ENUM(PI_SECURITY),
ENUM(PI_SEQUENCE),
ENUM(PI_SEVERITY_MASK),
ENUM(PI_UNDECODED),
ENUM(PI_WARN),
ENUM(PROTO_CHECKSUM_E_BAD),
ENUM(PROTO_CHECKSUM_E_GOOD),
ENUM(PROTO_CHECKSUM_E_ILLEGAL),
ENUM(PROTO_CHECKSUM_E_NOT_PRESENT),
ENUM(PROTO_CHECKSUM_E_UNVERIFIED),
ENUM(PROTO_CHECKSUM_GENERATED),
ENUM(PROTO_CHECKSUM_IN_CKSUM),
ENUM(PROTO_CHECKSUM_NOT_PRESENT),
ENUM(PROTO_CHECKSUM_NO_FLAGS),
ENUM(PROTO_CHECKSUM_VERIFY),
ENUM(PROTO_CHECKSUM_ZERO),
ENUM(PT_BLUETOOTH),
ENUM(PT_DCCP),
ENUM(PT_DDP),
ENUM(PT_I2C),
ENUM(PT_IBQP),
ENUM(PT_IDP),
ENUM(PT_IPX),
ENUM(PT_IWARP_MPA),
ENUM(PT_NONE),
ENUM(PT_SCTP),
ENUM(PT_TCP),
ENUM(PT_UDP),
ENUM(PT_USB),
ENUM(SEP_COLON),
ENUM(SEP_DASH),
ENUM(SEP_DOT),
ENUM(SEP_SPACE),
ENUM(STR_ASCII),
ENUM(STR_UNICODE),
{ NULL, 0 },
};
const ws_enum_t *epan_inspect_enums(void)
{
return all_enums;
}

31
epan/introspection.h Normal file
View File

@ -0,0 +1,31 @@
/*
* Copyright 2021, João Valverde <j@v6e.pt>
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef _INTROSPECTION_H_
#define _INTROSPECTION_H_
#include <ws_symbol_export.h>
typedef struct {
const char *symbol;
int value;
} ws_enum_t;
/** Returns an array of ws_enum_t elements. The array is sorted and
* ends with {NULL, 0}.
*
* It can be used by language bindings to the Wireshark API to obtain
* the value of some magic constants. The array can be binary searched,
* imported to a hash table, serialized, etc.
*/
WS_DLL_PUBLIC
const ws_enum_t *epan_inspect_enums(void);
#endif

100
tools/make-enums.py Executable file
View File

@ -0,0 +1,100 @@
#!/usr/bin/env python3
#
# Copyright 2021, João Valverde <j@v6e.pt>
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 1998 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later
#
#
# Uses pyclibrary to parse C headers for enums and integer macro
# definitions. Exports that data to a C file for the introspection API.
#
# Requires: https://github.com/MatthieuDartiailh/pyclibrary
#
import os
import sys
import argparse
from pyclibrary import CParser
argp = argparse.ArgumentParser()
argp.add_argument("-o", "--outfile")
argp.add_argument("infiles", nargs="*")
args = argp.parse_args()
parser = CParser(args.infiles)
source = """\
/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*
* Generated automatically from %s.
*
* It can be re-created using "make gen-enums".
*
* It is fine to edit this file by hand. Particularly if a symbol
* disappears from the API it can just be removed here. There is no
* requirement to re-run the generator script.
*
*/
#include "config.h"
#include "introspection.h"
""" % (os.path.basename(sys.argv[0]))
for f in args.infiles:
source += '#include <{}>\n'.format(f)
source += """
#define ENUM(arg) { #arg, arg }
static ws_enum_t all_enums[] = {
"""
definitions = parser.defs['values']
symbols = list(definitions.keys())
symbols.sort()
for s in symbols:
if isinstance(definitions[s], int):
source += ' ENUM({}),\n'.format(s)
source += """\
{ NULL, 0 },
};
const ws_enum_t *epan_inspect_enums(void)
{
return all_enums;
}
"""
try:
if args.outfile:
fh = open(args.outfile, 'w')
else:
fh = sys.stdout
except OSError:
sys.exit('Unable to write ' + args.outfile + '.\n')
fh.write(source)
fh.close()
#
# Editor modelines - https://www.wireshark.org/tools/modelines.html
#
# Local variables:
# c-basic-offset: 4
# indent-tabs-mode: nil
# End:
#
# vi: set shiftwidth=4 expandtab:
# :indentSize=4:noTabs=true:
#