Add ENC_ASCII_7BITS encoding

Change-Id: I01ec87ff4181afb5b2de487fd5f5200f8d62f17d
Reviewed-on: https://code.wireshark.org/review/1088
Reviewed-by: Pascal Quantin <pascal.quantin@gmail.com>
This commit is contained in:
Pascal Quantin 2014-04-13 18:53:29 +02:00
parent 63ed4745a2
commit cfe11b1097
6 changed files with 156 additions and 116 deletions

View File

@ -1389,7 +1389,6 @@ currently supported are:
ENC_UCS_2 - UCS-2-encoded subset of Unicode, with no surrogate pairs
and thus no code points above 0xFFFF
ENC_UCS_4 - UCS-4-encoded Unicode
ENC_EBCDIC - EBCDIC
ENC_WINDOWS_1250 - Windows-1250 code page
ENC_ISO_8859_1 - ISO 8859-1
ENC_ISO_8859_2 - ISO 8859-2
@ -1406,8 +1405,13 @@ currently supported are:
ENC_ISO_8859_14 - ISO 8859-14
ENC_ISO_8859_15 - ISO 8859-15
ENC_ISO_8859_16 - ISO 8859-16
ENC_WINDOWS_1250 - Windows-1250
ENC_3GPP_TS_23_038_7BITS - GSM 7 bits alphabet as described
in 3GPP TS 23.038
ENC_EBCDIC - EBCDIC
ENC_MAC_ROMAN - MAC ROMAN
ENC_CP437 - DOS CP437
ENC_ASCII_7BITS - 7 bits ASCII
Other encodings will be added in the future.

View File

@ -334,101 +334,9 @@ static dissector_handle_t ansi_637_trans_handle;
static guint32 ansi_637_trans_tele_id;
static char ansi_637_bigbuf[1024];
static char ia5_637_bigbuf[1024];
static dissector_table_t tele_dissector_table;
static proto_tree *g_tree;
/* FUNCTIONS */
/*
* last_bit must be from 1 to 8
* '1' means there is one bit remaining in 'last_oct' (i.e. 0x01)
* '3' means there are 3 bits remaining in 'last_oct' (i.e. 0x07)
*/
static int
decode_7_bits(tvbuff_t *tvb, guint32 *offset, guint8 num_fields, guint8 *last_oct, guint8 *last_bit, gchar *buf)
{
guint8 oct, oct2, bit;
guint32 i;
if (num_fields == 0)
{
return 0;
}
oct = oct2 = *last_oct;
bit = *last_bit;
if (bit == 1)
{
oct2 = tvb_get_guint8(tvb, *offset);
(*offset) += 1;
}
for (i=0; i < num_fields; i++)
{
if (bit != 1)
{
oct = oct2;
/*
* cannot grab an octet if we are getting
* the last field and bit is 7 or 8
* because there may not be another octet
*/
if (((i + 1) != num_fields) ||
((bit != 7) && (bit != 8)))
{
oct2 = tvb_get_guint8(tvb, *offset);
(*offset) += 1;
}
}
switch (bit)
{
case 1:
buf[i] = ((oct & 0x01) << 6) | ((oct2 & 0xfc) >> 2);
break;
case 2:
buf[i] = ((oct & 0x03) << 5) | ((oct2 & 0xf8) >> 3);
break;
case 3:
buf[i] = ((oct & 0x07) << 4) | ((oct2 & 0xf0) >> 4);
break;
case 4:
buf[i] = ((oct & 0x0f) << 3) | ((oct2 & 0xe0) >> 5);
break;
case 5:
buf[i] = ((oct & 0x1f) << 2) | ((oct2 & 0xc0) >> 6);
break;
case 6:
buf[i] = ((oct & 0x3f) << 1) | ((oct2 & 0x80) >> 7);
break;
case 7:
buf[i] = oct & 0x7f;
break;
case 8:
buf[i] = (oct & 0xfe) >> 1;
break;
}
bit = (bit % 8) + 1;
}
buf[i] = '\0';
*last_bit = bit;
*last_oct = (bit == 1) ? oct : oct2;
return i;
}
/* PARAM FUNCTIONS */
#define EXTRANEOUS_DATA_CHECK(edc_len, edc_max_len) \
@ -462,11 +370,8 @@ decode_7_bits(tvbuff_t *tvb, guint32 *offset, guint8 num_fields, guint8 *last_oc
static void
text_decoder(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset, guint8 encoding, guint8 num_fields, guint16 num_bits, guint8 unused_bits, guint8 fill_bits)
{
guint8 oct;
guint8 bit;
guint32 required_octs;
guint32 out_len;
const gchar *str = NULL;
tvbuff_t *tvb_out = NULL;
GIConv cd;
@ -494,30 +399,21 @@ text_decoder(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset
case 0x02: /* 7-bit ASCII */
offset = 0;
oct = tvb_get_guint8(tvb_out, offset);
offset += 1;
bit = 8;
bit = 0;
(void) decode_7_bits(tvb_out, &offset, num_fields, &oct, &bit, ansi_637_bigbuf);
proto_tree_add_string(tree, hf_ansi_637_tele_user_data_text, tvb_out, 0,
offset, ansi_637_bigbuf);
proto_tree_add_ascii_7bits_item(tree, hf_ansi_637_tele_user_data_text, tvb_out, (offset << 3) + bit, num_fields);
break;
case 0x03: /* IA5 */
offset = 0;
oct = tvb_get_guint8(tvb_out, offset);
offset += 1;
bit = 8;
bit = 0;
out_len =
decode_7_bits(tvb_out, &offset, num_fields, &oct, &bit, ansi_637_bigbuf);
IA5_7BIT_decode(ia5_637_bigbuf, ansi_637_bigbuf, out_len);
ustr = tvb_get_ascii_7bits_string(wmem_packet_scope(), tvb, (offset << 3) + bit, num_fields);
IA5_7BIT_decode(ansi_637_bigbuf, ustr, num_fields);
proto_tree_add_string(tree, hf_ansi_637_tele_user_data_text, tvb_out, 0,
offset, ia5_637_bigbuf);
offset, ansi_637_bigbuf);
break;
case 0x04: /* UNICODE */
@ -564,8 +460,7 @@ text_decoder(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset
else
{
proto_tree_add_expert_format(tree, pinfo, &ei_ansi_637_failed_conversion, tvb_out, offset, required_octs,
"Failed iconv conversion on %s - (report to wireshark.org)",
str);
"Failed iconv conversion on EUC-KR - (report to wireshark.org)");
}
if (ustr)
{

View File

@ -7836,6 +7836,36 @@ proto_tree_add_ts_23_038_7bits_item(proto_tree *tree, const int hfindex, tvbuff_
return pi;
}
proto_item *
proto_tree_add_ascii_7bits_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb,
const guint bit_offset, const gint no_of_chars)
{
proto_item *pi;
header_field_info *hfinfo;
gint byte_length;
gint byte_offset;
gchar *string;
TRY_TO_FAKE_THIS_ITEM(tree, hfindex, hfinfo);
DISSECTOR_ASSERT(hfinfo->type == FT_STRING);
byte_length = (((no_of_chars + 1) * 7) + (bit_offset & 0x07)) >> 3;
byte_offset = bit_offset >> 3;
string = tvb_get_ascii_7bits_string(wmem_packet_scope(), tvb, bit_offset, no_of_chars);
if (hfinfo->display == STR_UNICODE) {
DISSECTOR_ASSERT(g_utf8_validate(string, -1, NULL));
}
pi = proto_tree_add_pi(tree, hfinfo, tvb, byte_offset, &byte_length);
DISSECTOR_ASSERT(byte_length >= 0);
proto_tree_set_string(PNODE_FINFO(pi), string);
return pi;
}
guchar
proto_check_field_name(const gchar *field_name)
{

View File

@ -309,6 +309,7 @@ WS_DLL_PUBLIC WS_MSVC_NORETURN void proto_report_dissector_bug(const char *messa
#define ENC_EBCDIC 0x0000002E
#define ENC_MAC_ROMAN 0x00000030
#define ENC_CP437 0x00000032
#define ENC_ASCII_7BITS 0x00000034
/*
* TODO:
@ -317,7 +318,6 @@ WS_DLL_PUBLIC WS_MSVC_NORETURN void proto_report_dissector_bug(const char *messa
*
* "IBM MS DBCS"
* JIS C 6226
* 7-bit encodings such as 7 bits ASCII used in packet-ansi_637.c
*
* As those are added, change code such as the code in packet-bacapp.c
* to use them.
@ -2157,6 +2157,17 @@ WS_DLL_PUBLIC proto_item *
proto_tree_add_ts_23_038_7bits_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb,
const guint bit_offset, const gint no_of_chars);
/** Add a FT_STRING with ENC_ASCII_7BITS encoding to a proto_tree.
@param tree the tree to append this item to
@param hfindex field index
@param tvb the tv buffer of the current data
@param bit_offset start of data in tvb expressed in bits
@param no_of_chars number of 7bits characters to display
@return the newly created item */
WS_DLL_PUBLIC proto_item *
proto_tree_add_ascii_7bits_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb,
const guint bit_offset, const gint no_of_chars);
/** Check if given string is a valid field name
@param field_name the field name to check
@return 0 if valid, else first illegal character */

View File

@ -2311,10 +2311,10 @@ tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb,
length = ((no_of_chars + 1) * 7 + (bit_offset & 0x07)) >> 3;
ptr = ensure_contiguous(tvb, in_offset, length);
strbuf = wmem_strbuf_new(scope, NULL);
strbuf = wmem_strbuf_sized_new(scope, no_of_chars+1, 0);
for(char_count = 0; char_count < no_of_chars;) {
/* Get the next byte from the string. */
in_byte = *ptr;;
in_byte = *ptr;
ptr++;
/*
@ -2366,6 +2366,71 @@ tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb,
return (gchar*)wmem_strbuf_finalize(strbuf);
}
gchar *
tvb_get_ascii_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb,
const gint bit_offset, gint no_of_chars)
{
wmem_strbuf_t *strbuf;
gint char_count; /* character counter for tvbuff */
gint in_offset = bit_offset >> 3; /* Current pointer to the input buffer */
guint8 in_byte, out_byte, rest = 0x00;
int bits;
const guint8 *ptr;
gint length;
DISSECTOR_ASSERT(tvb && tvb->initialized);
bits = bit_offset & 0x07;
if (!bits) {
bits = 7;
}
length = ((no_of_chars + 1) * 7 + (bit_offset & 0x07)) >> 3;
ptr = ensure_contiguous(tvb, in_offset, length);
strbuf = wmem_strbuf_sized_new(scope, no_of_chars+1, 0);
for(char_count = 0; char_count < no_of_chars;) {
/* Get the next byte from the string. */
in_byte = *ptr;
ptr++;
/*
* Combine the bits we've accumulated with bits from
* that byte to make a 7-bit code point.
*/
out_byte = (in_byte >> (8 - bits)) | rest;
/*
* Leftover bits used in that code point.
*/
rest = (in_byte << (bits - 1)) & 0x7f;
/*
* If we don't start from 0th bit, we shouldn't go to the
* next char. Under *out_num we have now 0 and under Rest -
* _first_ part of the char.
*/
if (char_count || (bits == 7)) {
wmem_strbuf_append_c(strbuf, out_byte);
char_count++;
}
/*
* After reading 7 octets we have read 7 full characters
* but we have 7 bits as well. This is the next character.
*/
if ((bits == 1) && (char_count < no_of_chars)) {
wmem_strbuf_append_c(strbuf, rest);
char_count++;
bits = 7;
rest = 0x00;
} else
bits--;
}
return (gchar*)wmem_strbuf_finalize(strbuf);
}
/*
* Given a tvbuff, an offset, a length, and an encoding, allocate a
* buffer big enough to hold a non-null-terminated string of that length
@ -2513,6 +2578,14 @@ tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
}
break;
case ENC_ASCII_7BITS:
{
gint bit_offset = offset << 3;
gint no_of_chars = (length << 3) / 7;
strbuf = tvb_get_ascii_7bits_string(scope, tvb, bit_offset, no_of_chars);
}
break;
case ENC_EBCDIC:
/*
* XXX - do the copy and conversion in one pass.
@ -2834,6 +2907,10 @@ tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, g
REPORT_DISSECTOR_BUG("TS 23.038 7bits has no null character and doesn't support null-terminated strings");
break;
case ENC_ASCII_7BITS:
REPORT_DISSECTOR_BUG("tvb_get_stringz_enc function with ENC_ASCII_7BITS not implemented yet");
break;
case ENC_EBCDIC:
/*
* XXX - do the copy and conversion in one pass.

View File

@ -556,6 +556,29 @@ WS_DLL_PUBLIC guint8 *tvb_get_string_enc(wmem_allocator_t *scope,
WS_DLL_PUBLIC gchar *tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope,
tvbuff_t *tvb, const gint bit_offset, gint no_of_chars);
/**
* Given an allocator scope, a tvbuff, a bit offset, and a length in
* 7-bit characters (not octets!), with the specified offset and
* length referring to a string in the ASCII 7bits encoding:
*
* allocate a buffer using the specified scope;
*
* convert the string from the specified encoding to UTF-8, possibly
* mapping some characters or invalid octet sequences to the Unicode
* REPLACEMENT CHARACTER, and put the resulting UTF-8 string, plus a
* trailing '\0', into that buffer;
*
* and return a pointer to the buffer.
*
* Throws an exception if the tvbuff ends before the string does.
*
* If scope is set to NULL it is the user's responsibility to wmem_free()
* the memory allocated. Otherwise memory is automatically freed when the
* scope lifetime is reached.
*/
WS_DLL_PUBLIC gchar *tvb_get_ascii_7bits_string(wmem_allocator_t *scope,
tvbuff_t *tvb, const gint bit_offset, gint no_of_chars);
/**
* Given an allocator scope, a tvbuff, a byte offset, a byte length, and
* a string encoding, with the specified offset and length referring to