diff --git a/debian/libwireshark0.symbols b/debian/libwireshark0.symbols index 933dc1bd8b..99a0d2f431 100644 --- a/debian/libwireshark0.symbols +++ b/debian/libwireshark0.symbols @@ -812,6 +812,7 @@ libwireshark.so.0 libwireshark0 #MINVER# get_ipv4_hash_table@Base 1.12.0~rc1 get_ipv6_hash_table@Base 1.12.0~rc1 get_ipxnet_hash_table@Base 1.12.0~rc1 + get_iso_646_string@Base 3.1.0 get_key_string@Base 1.9.1 get_mac_lte_proto_data@Base 1.9.1 get_mac_nr_proto_data@Base 2.5.2 diff --git a/epan/charsets.c b/epan/charsets.c index f953a4727e..aa6c237703 100644 --- a/epan/charsets.c +++ b/epan/charsets.c @@ -80,6 +80,59 @@ get_ascii_string(wmem_allocator_t *scope, const guint8 *ptr, gint length) return (guint8 *) wmem_strbuf_finalize(str); } +/* + * ISO 646 "Basic code table". + */ +const gunichar2 charset_table_iso_646_basic[0x80] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, /* 0x00 - */ + 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* - 0x0F */ + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, /* 0x10 - */ + 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* - 0x1F */ + 0x0020, 0x0021, 0x0022, UNREPL, UNREPL, 0x0025, 0x0026, 0x0027, /* 0x20 - */ + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* - 0x2F */ + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, /* 0x30 - */ + 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* - 0x3F */ + UNREPL, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, /* 0x40 - */ + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* - 0x4F */ + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, /* 0x50 - */ + 0x0058, 0x0059, 0x005a, UNREPL, UNREPL, UNREPL, UNREPL, 0x005f, /* - 0x5F */ + UNREPL, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, /* 0x60 - */ + 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* - 0x6F */ + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, /* 0x70 - */ + 0x0078, 0x0079, 0x007a, UNREPL, UNREPL, UNREPL, UNREPL, 0x007f, /* - 0x7F */ +}; + +/* + * Given a wmem scope, a pointer, a length, and a translation table, + * treat the string of bytes referred to by the pointer and length as a + * string encoded using one octet per character, with octets with the + * high-order bit clear being mapped by the translation table to 2-byte + * Unicode Basic Multilingual Plane characters (including REPLACEMENT + * CHARACTER) and octets with the high-order bit set being mapped to + * REPLACEMENT CHARACTER, and return a pointer to a UTF-8 string, + * allocated using the wmem scope. + */ +guint8 * +get_iso_646_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const gunichar2 table[0x80]) +{ + wmem_strbuf_t *str; + + str = wmem_strbuf_sized_new(scope, length+1, 0); + + while (length > 0) { + guint8 ch = *ptr; + + if (ch < 0x80) + wmem_strbuf_append_unichar(str, table[ch]); + else + wmem_strbuf_append_unichar(str, UNREPL); + ptr++; + length--; + } + + return (guint8 *) wmem_strbuf_finalize(str); +} + /* * Given a wmem scope, a pointer, and a length, treat the string of bytes * referred to by the pointer and length as an ISO 8859/1 string, and @@ -577,11 +630,11 @@ const gunichar2 charset_table_cp866[0x80] = { }; /* - * Given a wmem scope, a pointer, a length, and a translation table, - * treat the string of bytes referred to by the pointer and length as a - * string encoded using one octet per character, with octets with the - * high-order bit clear being ASCII and octets with the high-order bit - * set being mapped by the translation table to 2-byte Unicode Basic + * Given a wmem scope, a pointer, a length, and a translation table with + * 128 entries, treat the string of bytes referred to by the pointer and + * length as a string encoded using one octet per character, with octets + * with the high-order bit clear being ASCII and octets with the high-order + * bit set being mapped by the translation table to 2-byte Unicode Basic * Multilingual Plane characters (including REPLACEMENT CHARACTER), and * return a pointer to a UTF-8 string, allocated using the wmem scope. */ @@ -1196,10 +1249,10 @@ const gunichar2 charset_table_ebcdic_cp037[256] = { }; /* - * Given a wmem scope, a pointer, a length, and a translation table, - * treat the string of bytes referred to by the pointer and length as a - * string encoded using one octet per character, with octets being - * mapped by the translation table to 2-byte Unicode Basic Multilingual + * Given a wmem scope, a pointer, a length, and a translation table with + * 256 entries, treat the string of bytes referred to by the pointer and + * length as a string encoded using one octet per character, with octets + * being mapped by the translation table to 2-byte Unicode Basic Multilingual * Plane characters (including REPLACEMENT CHARACTER), and return a * pointer to a UTF-8 string, allocated using the wmem scope. */ diff --git a/epan/charsets.h b/epan/charsets.h index 2a5306a757..9e5dd239fa 100644 --- a/epan/charsets.h +++ b/epan/charsets.h @@ -53,6 +53,12 @@ extern const gunichar2 charset_table_cp437[0x80]; extern const gunichar2 charset_table_cp855[0x80]; extern const gunichar2 charset_table_cp866[0x80]; +/* + * Translation tables that map the lower 128 code points in single-byte + * ISO 646-based character encodings to Unicode code points in the + * Basic Multilingual Plane. + */ +extern const gunichar2 charset_table_iso_646_basic[0x80]; /* Tables for EBCDIC code pages */ extern const gunichar2 charset_table_ebcdic[256]; @@ -70,18 +76,92 @@ extern const gunichar2 charset_table_ebcdic_cp037[256]; WS_DLL_PUBLIC guint8 * get_ascii_string(wmem_allocator_t *scope, const guint8 *ptr, gint length); +/* + * Given a wmem scope, a pointer, a length, and a translation table, + * treat the string of bytes referred to by the pointer and length as a + * string encoded using one octet per character, with octets with the + * high-order bit clear being mapped by the translation table to 2-byte + * Unicode Basic Multilingual Plane characters (including REPLACEMENT + * CHARACTER) and octets with the high-order bit set being mapped to + * REPLACEMENT CHARACTER, and return a pointer to a UTF-8 string, + * allocated using the wmem scope. + */ +WS_DLL_PUBLIC guint8 * +get_iso_646_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const gunichar2 table[0x80]); + +/* + * Given a wmem scope, a pointer, and a length, treat the string of bytes + * referred to by the pointer and length as an ISO 8859/1 string, and + * return a pointer to a UTF-8 string, allocated using the wmem scope. + */ WS_DLL_PUBLIC guint8 * get_8859_1_string(wmem_allocator_t *scope, const guint8 *ptr, gint length); +/* + * Given a wmem scope, a pointer, a length, and a translation table with + * 128 entries, treat the string of bytes referred to by the pointer and + * length as a string encoded using one octet per character, with octets + * with the high-order bit clear being ASCII and octets with the high-order + * bit set being mapped by the translation table to 2-byte Unicode Basic + * Multilingual Plane characters (including REPLACEMENT CHARACTER), and + * return a pointer to a UTF-8 string, allocated using the wmem scope. + */ WS_DLL_PUBLIC guint8 * get_unichar2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const gunichar2 table[0x80]); +/* + * Given a wmem scope, a pointer, and a length, treat the string of bytes + * referred to by the pointer and length as a UCS-2 encoded string + * containing characters from the Basic Multilingual Plane (plane 0) of + * Unicode, and return a pointer to a UTF-8 string, allocated with the + * wmem scope. + * + * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN. + * + * Specify length in bytes. + * + * XXX - should map lead and trail surrogate values to REPLACEMENT + * CHARACTERs (0xFFFD)? + * XXX - if there are an odd number of bytes, should put a + * REPLACEMENT CHARACTER at the end. + */ WS_DLL_PUBLIC guint8 * get_ucs_2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const guint encoding); +/* + * Given a wmem scope, a pointer, and a length, treat the string of bytes + * referred to by the pointer and length as a UTF-16 encoded string, and + * return a pointer to a UTF-8 string, allocated with the wmem scope. + * + * See RFC 2781 section 2.2. + * + * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN. + * + * Specify length in bytes. + * + * XXX - should map surrogate errors to REPLACEMENT CHARACTERs (0xFFFD). + * XXX - should map code points > 10FFFF to REPLACEMENT CHARACTERs. + * XXX - if there are an odd number of bytes, should put a + * REPLACEMENT CHARACTER at the end. + */ WS_DLL_PUBLIC guint8 * get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const guint encoding); +/* + * Given a wmem scope, a pointer, and a length, treat the string of bytes + * referred to by the pointer and length as a UCS-4 encoded string, and + * return a pointer to a UTF-8 string, allocated with the wmem scope. + * + * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN + * + * Specify length in bytes + * + * XXX - should map lead and trail surrogate values to a "substitute" + * UTF-8 character? + * XXX - should map code points > 10FFFF to REPLACEMENT CHARACTERs. + * XXX - if the number of bytes isn't a multiple of 4, should put a + * REPLACEMENT CHARACTER at the end. + */ WS_DLL_PUBLIC guint8 * get_ucs_4_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const guint encoding); @@ -93,6 +173,14 @@ WS_DLL_PUBLIC guint8 * get_ascii_7bits_string(wmem_allocator_t *scope, const guint8 *ptr, const gint bit_offset, gint no_of_chars); +/* + * Given a wmem scope, a pointer, a length, and a translation table with + * 256 entries, treat the string of bytes referred to by the pointer and + * length as a string encoded using one octet per character, with octets + * being mapped by the translation table to 2-byte Unicode Basic Multilingual + * Plane characters (including REPLACEMENT CHARACTER), and return a + * pointer to a UTF-8 string, allocated using the wmem scope. + */ WS_DLL_PUBLIC guint8 * get_nonascii_unichar2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const gunichar2 table[256]); diff --git a/epan/proto.h b/epan/proto.h index 76defa811e..787348b5fb 100644 --- a/epan/proto.h +++ b/epan/proto.h @@ -455,6 +455,7 @@ void proto_report_dissector_bug(const char *format, ...) */ #define ENC_CHARENCODING_MASK 0x3FFFFFFE /* mask out byte-order bits and Zigbee bits */ #define ENC_ASCII 0x00000000 +#define ENC_ISO_646_IRV ENC_ASCII /* ISO 646 International Reference Version = ASCII */ #define ENC_UTF_8 0x00000002 #define ENC_UTF_16 0x00000004 #define ENC_UCS_2 0x00000006 @@ -487,6 +488,7 @@ void proto_report_dissector_bug(const char *format, ...) #define ENC_WINDOWS_1251 0x0000003C #define ENC_CP855 0x0000003E #define ENC_CP866 0x00000040 +#define ENC_ISO_646_BASIC 0x00000042 /* * TODO: * diff --git a/epan/tvbuff.c b/epan/tvbuff.c index ea9e1ffacb..07c0b23472 100644 --- a/epan/tvbuff.c +++ b/epan/tvbuff.c @@ -2502,6 +2502,28 @@ tvb_get_ascii_string(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint l return get_ascii_string(scope, ptr, length); } +/* + * Given a wmem scope, a tvbuff, an offset, a length, and a translation table, + * treat the string of bytes referred to by the tvbuff, offset, and length + * as a string encoded using one octet per character, with octets with the + * high-order bit clear being mapped by the translation table to 2-byte + * Unicode Basic Multilingual Plane characters (including REPLACEMENT + * CHARACTER) and octets with the high-order bit set being mapped to + * REPLACEMENT CHARACTER, and return a pointer to a UTF-8 string, + * allocated using the wmem scope. + * + * Octets with the highest bit set will be converted to the Unicode + * REPLACEMENT CHARACTER. + */ +static guint8 * +tvb_get_iso_646_string(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length, const gunichar2 table[0x80]) +{ + const guint8 *ptr; + + ptr = ensure_contiguous(tvb, offset, length); + return get_iso_646_string(scope, ptr, length, table); +} + /* * Given a wmem scope, a tvbuff, an offset, and a length, treat the string * of bytes referred to by the tvbuff, the offset. and the length as a UTF-8 @@ -2870,6 +2892,10 @@ tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp866); break; + case ENC_ISO_646_BASIC: + strptr = tvb_get_iso_646_string(scope, tvb, offset, length, charset_table_iso_646_basic); + break; + case ENC_3GPP_TS_23_038_7BITS: { gint bit_offset = offset << 3; @@ -2950,6 +2976,20 @@ tvb_get_ascii_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint return get_ascii_string(scope, ptr, size); } +static guint8 * +tvb_get_iso_646_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp, const gunichar2 table[0x80]) +{ + guint size; + const guint8 *ptr; + + size = tvb_strsize(tvb, offset); + ptr = ensure_contiguous(tvb, offset, size); + /* XXX, conversion between signed/unsigned integer */ + if (lengthp) + *lengthp = size; + return get_iso_646_string(scope, ptr, size, table); +} + static guint8 * tvb_get_utf_8_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp) { @@ -3236,6 +3276,10 @@ tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, g strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp866); break; + case ENC_ISO_646_BASIC: + strptr = tvb_get_iso_646_stringz(scope, tvb, offset, lengthp, charset_table_iso_646_basic); + break; + case ENC_3GPP_TS_23_038_7BITS: REPORT_DISSECTOR_BUG("TS 23.038 7bits has no null character and doesn't support null-terminated strings"); break;