diff --git a/doc/README.dissector b/doc/README.dissector index a693284381..1ca8107cc0 100644 --- a/doc/README.dissector +++ b/doc/README.dissector @@ -1389,7 +1389,6 @@ currently supported are: ENC_UCS_2 - UCS-2-encoded subset of Unicode, with no surrogate pairs and thus no code points above 0xFFFF ENC_UCS_4 - UCS-4-encoded Unicode - ENC_EBCDIC - EBCDIC ENC_WINDOWS_1250 - Windows-1250 code page ENC_ISO_8859_1 - ISO 8859-1 ENC_ISO_8859_2 - ISO 8859-2 @@ -1406,8 +1405,13 @@ currently supported are: ENC_ISO_8859_14 - ISO 8859-14 ENC_ISO_8859_15 - ISO 8859-15 ENC_ISO_8859_16 - ISO 8859-16 + ENC_WINDOWS_1250 - Windows-1250 ENC_3GPP_TS_23_038_7BITS - GSM 7 bits alphabet as described in 3GPP TS 23.038 + ENC_EBCDIC - EBCDIC + ENC_MAC_ROMAN - MAC ROMAN + ENC_CP437 - DOS CP437 + ENC_ASCII_7BITS - 7 bits ASCII Other encodings will be added in the future. diff --git a/epan/dissectors/packet-ansi_637.c b/epan/dissectors/packet-ansi_637.c index 470550e97e..e5c2b944ec 100644 --- a/epan/dissectors/packet-ansi_637.c +++ b/epan/dissectors/packet-ansi_637.c @@ -334,101 +334,9 @@ static dissector_handle_t ansi_637_trans_handle; static guint32 ansi_637_trans_tele_id; static char ansi_637_bigbuf[1024]; -static char ia5_637_bigbuf[1024]; static dissector_table_t tele_dissector_table; static proto_tree *g_tree; -/* FUNCTIONS */ - -/* - * last_bit must be from 1 to 8 - * '1' means there is one bit remaining in 'last_oct' (i.e. 0x01) - * '3' means there are 3 bits remaining in 'last_oct' (i.e. 0x07) - */ -static int -decode_7_bits(tvbuff_t *tvb, guint32 *offset, guint8 num_fields, guint8 *last_oct, guint8 *last_bit, gchar *buf) -{ - guint8 oct, oct2, bit; - guint32 i; - - if (num_fields == 0) - { - return 0; - } - - oct = oct2 = *last_oct; - bit = *last_bit; - - if (bit == 1) - { - oct2 = tvb_get_guint8(tvb, *offset); - (*offset) += 1; - } - - for (i=0; i < num_fields; i++) - { - if (bit != 1) - { - oct = oct2; - - /* - * cannot grab an octet if we are getting - * the last field and bit is 7 or 8 - * because there may not be another octet - */ - if (((i + 1) != num_fields) || - ((bit != 7) && (bit != 8))) - { - oct2 = tvb_get_guint8(tvb, *offset); - (*offset) += 1; - } - } - - switch (bit) - { - case 1: - buf[i] = ((oct & 0x01) << 6) | ((oct2 & 0xfc) >> 2); - break; - - case 2: - buf[i] = ((oct & 0x03) << 5) | ((oct2 & 0xf8) >> 3); - break; - - case 3: - buf[i] = ((oct & 0x07) << 4) | ((oct2 & 0xf0) >> 4); - break; - - case 4: - buf[i] = ((oct & 0x0f) << 3) | ((oct2 & 0xe0) >> 5); - break; - - case 5: - buf[i] = ((oct & 0x1f) << 2) | ((oct2 & 0xc0) >> 6); - break; - - case 6: - buf[i] = ((oct & 0x3f) << 1) | ((oct2 & 0x80) >> 7); - break; - - case 7: - buf[i] = oct & 0x7f; - break; - - case 8: - buf[i] = (oct & 0xfe) >> 1; - break; - } - - bit = (bit % 8) + 1; - } - - buf[i] = '\0'; - *last_bit = bit; - *last_oct = (bit == 1) ? oct : oct2; - - return i; -} - /* PARAM FUNCTIONS */ #define EXTRANEOUS_DATA_CHECK(edc_len, edc_max_len) \ @@ -462,11 +370,8 @@ decode_7_bits(tvbuff_t *tvb, guint32 *offset, guint8 num_fields, guint8 *last_oc static void text_decoder(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset, guint8 encoding, guint8 num_fields, guint16 num_bits, guint8 unused_bits, guint8 fill_bits) { - guint8 oct; guint8 bit; guint32 required_octs; - guint32 out_len; - const gchar *str = NULL; tvbuff_t *tvb_out = NULL; GIConv cd; @@ -494,30 +399,21 @@ text_decoder(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset case 0x02: /* 7-bit ASCII */ offset = 0; - oct = tvb_get_guint8(tvb_out, offset); - offset += 1; - bit = 8; + bit = 0; - (void) decode_7_bits(tvb_out, &offset, num_fields, &oct, &bit, ansi_637_bigbuf); - - proto_tree_add_string(tree, hf_ansi_637_tele_user_data_text, tvb_out, 0, - offset, ansi_637_bigbuf); + proto_tree_add_ascii_7bits_item(tree, hf_ansi_637_tele_user_data_text, tvb_out, (offset << 3) + bit, num_fields); break; case 0x03: /* IA5 */ offset = 0; - oct = tvb_get_guint8(tvb_out, offset); - offset += 1; - bit = 8; + bit = 0; - out_len = - decode_7_bits(tvb_out, &offset, num_fields, &oct, &bit, ansi_637_bigbuf); - - IA5_7BIT_decode(ia5_637_bigbuf, ansi_637_bigbuf, out_len); + ustr = tvb_get_ascii_7bits_string(wmem_packet_scope(), tvb, (offset << 3) + bit, num_fields); + IA5_7BIT_decode(ansi_637_bigbuf, ustr, num_fields); proto_tree_add_string(tree, hf_ansi_637_tele_user_data_text, tvb_out, 0, - offset, ia5_637_bigbuf); + offset, ansi_637_bigbuf); break; case 0x04: /* UNICODE */ @@ -564,8 +460,7 @@ text_decoder(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset else { proto_tree_add_expert_format(tree, pinfo, &ei_ansi_637_failed_conversion, tvb_out, offset, required_octs, - "Failed iconv conversion on %s - (report to wireshark.org)", - str); + "Failed iconv conversion on EUC-KR - (report to wireshark.org)"); } if (ustr) { diff --git a/epan/proto.c b/epan/proto.c index fb7ae9fa4f..1db242d03b 100644 --- a/epan/proto.c +++ b/epan/proto.c @@ -7836,6 +7836,36 @@ proto_tree_add_ts_23_038_7bits_item(proto_tree *tree, const int hfindex, tvbuff_ return pi; } +proto_item * +proto_tree_add_ascii_7bits_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb, + const guint bit_offset, const gint no_of_chars) +{ + proto_item *pi; + header_field_info *hfinfo; + gint byte_length; + gint byte_offset; + gchar *string; + + TRY_TO_FAKE_THIS_ITEM(tree, hfindex, hfinfo); + + DISSECTOR_ASSERT(hfinfo->type == FT_STRING); + + byte_length = (((no_of_chars + 1) * 7) + (bit_offset & 0x07)) >> 3; + byte_offset = bit_offset >> 3; + + string = tvb_get_ascii_7bits_string(wmem_packet_scope(), tvb, bit_offset, no_of_chars); + + if (hfinfo->display == STR_UNICODE) { + DISSECTOR_ASSERT(g_utf8_validate(string, -1, NULL)); + } + + pi = proto_tree_add_pi(tree, hfinfo, tvb, byte_offset, &byte_length); + DISSECTOR_ASSERT(byte_length >= 0); + proto_tree_set_string(PNODE_FINFO(pi), string); + + return pi; +} + guchar proto_check_field_name(const gchar *field_name) { diff --git a/epan/proto.h b/epan/proto.h index fdb4aaee00..1e05072099 100644 --- a/epan/proto.h +++ b/epan/proto.h @@ -309,6 +309,7 @@ WS_DLL_PUBLIC WS_MSVC_NORETURN void proto_report_dissector_bug(const char *messa #define ENC_EBCDIC 0x0000002E #define ENC_MAC_ROMAN 0x00000030 #define ENC_CP437 0x00000032 +#define ENC_ASCII_7BITS 0x00000034 /* * TODO: @@ -317,7 +318,6 @@ WS_DLL_PUBLIC WS_MSVC_NORETURN void proto_report_dissector_bug(const char *messa * * "IBM MS DBCS" * JIS C 6226 - * 7-bit encodings such as 7 bits ASCII used in packet-ansi_637.c * * As those are added, change code such as the code in packet-bacapp.c * to use them. @@ -2157,6 +2157,17 @@ WS_DLL_PUBLIC proto_item * proto_tree_add_ts_23_038_7bits_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb, const guint bit_offset, const gint no_of_chars); +/** Add a FT_STRING with ENC_ASCII_7BITS encoding to a proto_tree. + @param tree the tree to append this item to + @param hfindex field index + @param tvb the tv buffer of the current data + @param bit_offset start of data in tvb expressed in bits + @param no_of_chars number of 7bits characters to display + @return the newly created item */ +WS_DLL_PUBLIC proto_item * +proto_tree_add_ascii_7bits_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb, + const guint bit_offset, const gint no_of_chars); + /** Check if given string is a valid field name @param field_name the field name to check @return 0 if valid, else first illegal character */ diff --git a/epan/tvbuff.c b/epan/tvbuff.c index fcdb0e22c9..c9eff6d192 100644 --- a/epan/tvbuff.c +++ b/epan/tvbuff.c @@ -2311,10 +2311,10 @@ tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb, length = ((no_of_chars + 1) * 7 + (bit_offset & 0x07)) >> 3; ptr = ensure_contiguous(tvb, in_offset, length); - strbuf = wmem_strbuf_new(scope, NULL); + strbuf = wmem_strbuf_sized_new(scope, no_of_chars+1, 0); for(char_count = 0; char_count < no_of_chars;) { /* Get the next byte from the string. */ - in_byte = *ptr;; + in_byte = *ptr; ptr++; /* @@ -2366,6 +2366,71 @@ tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb, return (gchar*)wmem_strbuf_finalize(strbuf); } +gchar * +tvb_get_ascii_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb, + const gint bit_offset, gint no_of_chars) +{ + wmem_strbuf_t *strbuf; + gint char_count; /* character counter for tvbuff */ + gint in_offset = bit_offset >> 3; /* Current pointer to the input buffer */ + guint8 in_byte, out_byte, rest = 0x00; + int bits; + const guint8 *ptr; + gint length; + + DISSECTOR_ASSERT(tvb && tvb->initialized); + + bits = bit_offset & 0x07; + if (!bits) { + bits = 7; + } + + length = ((no_of_chars + 1) * 7 + (bit_offset & 0x07)) >> 3; + ptr = ensure_contiguous(tvb, in_offset, length); + + strbuf = wmem_strbuf_sized_new(scope, no_of_chars+1, 0); + for(char_count = 0; char_count < no_of_chars;) { + /* Get the next byte from the string. */ + in_byte = *ptr; + ptr++; + + /* + * Combine the bits we've accumulated with bits from + * that byte to make a 7-bit code point. + */ + out_byte = (in_byte >> (8 - bits)) | rest; + + /* + * Leftover bits used in that code point. + */ + rest = (in_byte << (bits - 1)) & 0x7f; + + /* + * If we don't start from 0th bit, we shouldn't go to the + * next char. Under *out_num we have now 0 and under Rest - + * _first_ part of the char. + */ + if (char_count || (bits == 7)) { + wmem_strbuf_append_c(strbuf, out_byte); + char_count++; + } + + /* + * After reading 7 octets we have read 7 full characters + * but we have 7 bits as well. This is the next character. + */ + if ((bits == 1) && (char_count < no_of_chars)) { + wmem_strbuf_append_c(strbuf, rest); + char_count++; + bits = 7; + rest = 0x00; + } else + bits--; + } + + return (gchar*)wmem_strbuf_finalize(strbuf); +} + /* * Given a tvbuff, an offset, a length, and an encoding, allocate a * buffer big enough to hold a non-null-terminated string of that length @@ -2513,6 +2578,14 @@ tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, } break; + case ENC_ASCII_7BITS: + { + gint bit_offset = offset << 3; + gint no_of_chars = (length << 3) / 7; + strbuf = tvb_get_ascii_7bits_string(scope, tvb, bit_offset, no_of_chars); + } + break; + case ENC_EBCDIC: /* * XXX - do the copy and conversion in one pass. @@ -2834,6 +2907,10 @@ tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, g REPORT_DISSECTOR_BUG("TS 23.038 7bits has no null character and doesn't support null-terminated strings"); break; + case ENC_ASCII_7BITS: + REPORT_DISSECTOR_BUG("tvb_get_stringz_enc function with ENC_ASCII_7BITS not implemented yet"); + break; + case ENC_EBCDIC: /* * XXX - do the copy and conversion in one pass. diff --git a/epan/tvbuff.h b/epan/tvbuff.h index 896a13587f..a365276ee1 100644 --- a/epan/tvbuff.h +++ b/epan/tvbuff.h @@ -556,6 +556,29 @@ WS_DLL_PUBLIC guint8 *tvb_get_string_enc(wmem_allocator_t *scope, WS_DLL_PUBLIC gchar *tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint bit_offset, gint no_of_chars); +/** + * Given an allocator scope, a tvbuff, a bit offset, and a length in + * 7-bit characters (not octets!), with the specified offset and + * length referring to a string in the ASCII 7bits encoding: + * + * allocate a buffer using the specified scope; + * + * convert the string from the specified encoding to UTF-8, possibly + * mapping some characters or invalid octet sequences to the Unicode + * REPLACEMENT CHARACTER, and put the resulting UTF-8 string, plus a + * trailing '\0', into that buffer; + * + * and return a pointer to the buffer. + * + * Throws an exception if the tvbuff ends before the string does. + * + * If scope is set to NULL it is the user's responsibility to wmem_free() + * the memory allocated. Otherwise memory is automatically freed when the + * scope lifetime is reached. + */ +WS_DLL_PUBLIC gchar *tvb_get_ascii_7bits_string(wmem_allocator_t *scope, + tvbuff_t *tvb, const gint bit_offset, gint no_of_chars); + /** * Given an allocator scope, a tvbuff, a byte offset, a byte length, and * a string encoding, with the specified offset and length referring to