Add proto_tree_add_ts_23_038_7bits_item() / tvb_get_ts_23_038_7bits_string() functions and update dissectors to use it.

Remove gsm_sms_char_7bit_unpack() / gsm_sms_chars_to_utf8() functions. Update documentation a bit. svn path=/trunk/; revision=54534
2014-01-01 14:33:19 +00:00 · 2014-01-01 14:33:19 +00:00 · 6ebc058f47
parent e504cf945b
commit 6ebc058f47
15 changed files with 166 additions and 293 deletions
--- a/asn1/gsm_map/gsm_map.cnf
+++ b/asn1/gsm_map/gsm_map.cnf
@ -502,7 +502,7 @@ actx->pinfo->p2p_dir = P2P_DIR_RECV;
  switch(sms_encoding){
    case SMS_ENCODING_7BIT:
    case SMS_ENCODING_7BIT_LANG:
-	proto_tree_add_text(tree, parameter_tvb , 0, length, "USSD String: %%s", tvb_get_string_enc(wmem_packet_scope(), parameter_tvb, 0, length, ENC_3GPP_TS_23_038|ENC_NA));
+	proto_tree_add_text(tree, parameter_tvb , 0, length, "USSD String: %%s", tvb_get_string_enc(wmem_packet_scope(), parameter_tvb, 0, length, ENC_3GPP_TS_23_038_7BITS|ENC_NA));
 	break;
    case SMS_ENCODING_8BIT:
        /* XXX - ASCII, or some extended ASCII? */
--- a/doc/README.dissector
+++ b/doc/README.dissector
@ -296,6 +296,14 @@ You do not need to free() this buffer, it will happen automatically once the
 next packet is dissected.  This function is slightly more efficient than the
 others because it does not allocate memory and copy the string.

+gchar *tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope,
+    tvbuff_t *tvb, const gint bit_offset, gint no_of_chars);
+
+tvb_get_ts_23_038_7bits_string() returns a string of a given number of characters
+and encoded according to 3GPP TS 23.038 7 bits alphabet. The buffer allocated by
+g_malloc() if scope is set to NULL (in that case memory must be explicitely freed),
+or with the allocator lifetime if scope is not NULL.
+
 Byte Array Accessors:

 gchar *tvb_bytes_to_str(tvbuff_t *tvb, gint offset, gint len);
@ -1309,6 +1317,10 @@ protocol or field labels to the proto_tree:
    proto_tree_add_float_bits_format_value(tree, id, tvb, bit_offset,
        no_of_bits, value, format, ...);

+    proto_item *
+    proto_tree_add_ts_23_038_7bits_item(tree, hf_index, tvb,
+        bit_offset, no_of_chars);
+
 The 'tree' argument is the tree to which the item is to be added.  The
 'tvb' argument is the tvbuff from which the item's value is being
 extracted; the 'start' argument is the offset from the beginning of that
@ -1365,12 +1377,26 @@ currently supported are:
    ENC_UTF_16 - UTF-16-encoded Unicode, with surrogate pairs
    ENC_UCS_2 - UCS-2-encoded subset of Unicode, with no surrogate pairs
        and thus no code points above 0xFFFF
+    ENC_UCS_4 - UCS-4-encoded Unicode
    ENC_EBCDIC - EBCDIC
    ENC_WINDOWS_1250 - Windows-1250 code page
    ENC_ISO_8859_1 - ISO 8859-1
    ENC_ISO_8859_2 - ISO 8859-2
+    ENC_ISO_8859_3 - ISO 8859-3
+    ENC_ISO_8859_4 - ISO 8859-4
    ENC_ISO_8859_5 - ISO 8859-5
+    ENC_ISO_8859_6 - ISO 8859-6
+    ENC_ISO_8859_7 - ISO 8859-7
+    ENC_ISO_8859_8 - ISO 8859-8
    ENC_ISO_8859_9 - ISO 8859-9
+    ENC_ISO_8859_10 - ISO 8859-10
+    ENC_ISO_8859_11 - ISO 8859-11
+    ENC_ISO_8859_13 - ISO 8859-13
+    ENC_ISO_8859_14 - ISO 8859-14
+    ENC_ISO_8859_15 - ISO 8859-15
+    ENC_ISO_8859_16 - ISO 8859-16
+    ENC_3GPP_TS_23_038_7BITS - GSM 7 bits alphabet as described
+        in 3GPP TS 23.038

 Other encodings will be added in the future.

@ -1686,6 +1712,11 @@ proto_tree_add_split_bits_crumb()
 ---------------------------------
 Helper function for the above, to add text for each crumb as it is encountered.

+proto_tree_add_ts_23_038_7bits_item()
+-------------------------------------
+Adds a string of a given number of characters and encoded according to 3GPP TS 23.038 7 bits
+alphabet.
+
 proto_tree_add_bitmask() et al.
 -------------------------------
 These functions provide easy to use and convenient dissection of many types of common
--- a/epan/dissectors/packet-ansi_637.c
+++ b/epan/dissectors/packet-ansi_637.c
@ -206,7 +206,6 @@ static dissector_handle_t ansi_637_trans_handle;

 static guint32 ansi_637_trans_tele_id;
 static char ansi_637_bigbuf[1024];
-static char gsm_637_bigbuf[1024];
 static char ia5_637_bigbuf[1024];
 static dissector_table_t tele_dissector_table;
 static proto_tree *g_tree;
@ -764,12 +763,7 @@ tele_param_user_data(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint
            dis_field_udh(tvb_out, tree, &offset, &required_octs, &num_fields, GSM_7BITS, &bit, &udh_fields);
        }

-        out_len = gsm_sms_char_7bit_unpack(bit, required_octs, num_fields,
-                                           tvb_get_ptr(tvb_out, offset, required_octs), gsm_637_bigbuf);
-        gsm_637_bigbuf[out_len] = '\0';
-
-        proto_tree_add_string(tree, hf_ansi_637_tele_user_data_text, tvb_out, offset,
-                              required_octs, gsm_sms_chars_to_utf8(gsm_637_bigbuf, num_fields));
+        proto_tree_add_ts_23_038_7bits_item(tree, hf_ansi_637_tele_user_data_text, tvb_out, (offset<<3)+bit, num_fields);
    }
    else if (encoding == 0x10)/* KSC5601 (Korean) */
    {
--- a/epan/dissectors/packet-cell_broadcast.c
+++ b/epan/dissectors/packet-cell_broadcast.c
@ -253,24 +253,18 @@ guint16 dissect_cbs_message_identifier(tvbuff_t *tvb, proto_tree *tree, guint16
 tvbuff_t * dissect_cbs_data(guint8 sms_encoding, tvbuff_t *tvb, proto_tree *tree, packet_info *pinfo, guint16 offset )
 {
   tvbuff_t * tvb_out = NULL;
-   guint8		out_len;
   int			length = tvb_length(tvb) - offset;
   gchar *utf8_text = NULL, *utf8_out;
-   static unsigned char msgbuf[1024];
-   guint8 * input_string = tvb_get_string(wmem_packet_scope(), tvb, offset, length);
+   guint8 * input_string;
   GIConv cd;
   GError *l_conv_error = NULL;

   switch(sms_encoding){
     case SMS_ENCODING_7BIT:
     case SMS_ENCODING_7BIT_LANG:
-     out_len = gsm_sms_char_7bit_unpack(0, length, sizeof(msgbuf),
-                                        input_string,
-                                        msgbuf);
-     msgbuf[out_len] = '\0';
-     utf8_text = gsm_sms_chars_to_utf8(msgbuf, out_len);
+     utf8_text = tvb_get_ts_23_038_7bits_string(wmem_packet_scope(), tvb, offset<<3, (length*8)/7);
     utf8_out = g_strdup(utf8_text);
-     tvb_out = tvb_new_child_real_data(tvb, utf8_out, out_len, out_len);
+     tvb_out = tvb_new_child_real_data(tvb, utf8_out, strlen(utf8_out), strlen(utf8_out));
     tvb_set_free_cb(tvb_out, g_free);
     add_new_data_source(pinfo, tvb_out, "unpacked 7 bit data");
     break;
@ -281,6 +275,7 @@ tvbuff_t * dissect_cbs_data(guint8 sms_encoding, tvbuff_t *tvb, proto_tree *tree

     case SMS_ENCODING_UCS2:
     case SMS_ENCODING_UCS2_LANG:
+     input_string = tvb_get_string(wmem_packet_scope(), tvb, offset, length);
     if ((cd = g_iconv_open("UTF-8","UCS-2BE")) != (GIConv) -1)
     {
         utf8_text = g_convert_with_iconv(input_string, length, cd, NULL, NULL, &l_conv_error);
--- a/epan/dissectors/packet-etsi_card_app_toolkit.c
+++ b/epan/dissectors/packet-etsi_card_app_toolkit.c
@ -1065,7 +1065,7 @@ dissect_cat(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *data)
 			}
 			switch (g8) {
 			case 0x00: /* 7bit */
-				proto_tree_add_item(elem_tree, hf_ctlv_text_string, tvb, pos+1, len-1, ENC_3GPP_TS_23_038|ENC_NA);
+				proto_tree_add_item(elem_tree, hf_ctlv_text_string, tvb, pos+1, len-1, ENC_3GPP_TS_23_038_7BITS|ENC_NA);
 				break;
 			case 0x04: /* 8bit */
 				/* XXX - ASCII, or some extended ASCII? */
--- a/epan/dissectors/packet-gmr1_rr.c
+++ b/epan/dissectors/packet-gmr1_rr.c
@ -649,25 +649,29 @@ static const value_string rr_pos_display_flag_vals[] = {
 GMR1_IE_FUNC(gmr1_ie_rr_pos_display)
 {
 	const unsigned char *txt_raw;
-	unsigned char txt_packed[11], txt_unpacked[12];
-	int out_len, i;
+	gchar *txt_packed, *txt_unpacked;
+	tvbuff_t *txt_packed_tvb;
+	int i;

 	/* Flag */
 	proto_tree_add_item(tree, hf_rr_pos_display_flag,
 	                    tvb, offset, 1, ENC_BIG_ENDIAN);

-	/* Unpack text */
+	/* Get text in an aligned tvbuff */
 	txt_raw = tvb_get_ptr(tvb, offset, 11);
-
+	txt_packed = (gchar*)wmem_alloc(wmem_packet_scope(), 11);
 	for (i=0; i<10; i++)
 		txt_packed[i] = (txt_raw[i] << 4) | (txt_raw[i+1] >> 4);
 	txt_packed[10] = txt_raw[10];
+	txt_packed_tvb = tvb_new_real_data(txt_packed, 11, 11);

-	out_len = gsm_sms_char_7bit_unpack(0, 11, 12, txt_packed, txt_unpacked);
+	/* Unpack text */
+	txt_unpacked = tvb_get_ts_23_038_7bits_string(wmem_packet_scope(), txt_packed_tvb, 0, 12);
+	tvb_free(txt_packed_tvb);

 	/* Display it */
 	proto_tree_add_string(tree, hf_rr_pos_display_text, tvb, offset, 11,
-	                      gsm_sms_chars_to_utf8(txt_unpacked, out_len));
+	                      txt_unpacked);

 	return 11;
 }
--- a/epan/dissectors/packet-gsm_a_dtap.c
+++ b/epan/dissectors/packet-gsm_a_dtap.c
@ -788,7 +788,7 @@ de_network_name(tvbuff_t *tvb, proto_tree *tree, packet_info *pinfo, guint32 off
 	guint32      curr_offset;
 	const gchar *str;
 	guint8       coding_scheme, num_spare_bits;
-	guint32      num_chars, num_text_bits;
+	guint32      num_text_bits;
 	gchar       *net_name     = NULL;
 	proto_item  *item;

@ -819,24 +819,13 @@ de_network_name(tvbuff_t *tvb, proto_tree *tree, packet_info *pinfo, guint32 off
 	switch(coding_scheme)
 	{
 	case 0:
-		num_chars = gsm_sms_char_7bit_unpack(0, len - 1, sizeof(a_bigbuf),
-			tvb_get_ptr(tvb, curr_offset, len - 1), a_bigbuf);
-
 		/* Check if there was a reasonable value for number of spare bits in last octet */
 		num_text_bits = ((len - 1) << 3) - num_spare_bits;
 		if (num_spare_bits && (num_text_bits % 7))
 		{
 			expert_add_info(pinfo, item, &ei_gsm_a_dtap_text_string_not_multiple_of_7);
 		}
-		/*
-		 * If the number of spare bits is 7, then we have unpacked one extra
-		 * character. Disregard this character.
-		 */
-		if (num_spare_bits == 7)
-			num_chars--;
-		a_bigbuf[num_chars] = '\0';
-		net_name = gsm_sms_chars_to_utf8(a_bigbuf, num_chars);
-		proto_tree_add_string(tree, hf_gsm_a_dtap_text_string, tvb, curr_offset, len - 1, net_name);
+		proto_tree_add_ts_23_038_7bits_item(tree, hf_gsm_a_dtap_text_string, tvb, curr_offset<<3, num_text_bits/7);
 		break;
 	case 1:
 		net_name = tvb_get_string_enc(wmem_packet_scope(), tvb, curr_offset, (len - 1), ENC_UCS_2|ENC_BIG_ENDIAN);
--- a/epan/dissectors/packet-gsm_map.c
+++ b/epan/dissectors/packet-gsm_map.c
@ -4891,7 +4891,7 @@ dissect_gsm_map_ss_USSD_String(gboolean implicit_tag _U_, tvbuff_t *tvb _U_, int
  switch(sms_encoding){
    case SMS_ENCODING_7BIT:
    case SMS_ENCODING_7BIT_LANG:
-	proto_tree_add_text(tree, parameter_tvb , 0, length, "USSD String: %s", tvb_get_string_enc(wmem_packet_scope(), parameter_tvb, 0, length, ENC_3GPP_TS_23_038|ENC_NA));
+	proto_tree_add_text(tree, parameter_tvb , 0, length, "USSD String: %s", tvb_get_string_enc(wmem_packet_scope(), parameter_tvb, 0, length, ENC_3GPP_TS_23_038_7BITS|ENC_NA));
 	break;
    case SMS_ENCODING_8BIT:
        /* XXX - ASCII, or some extended ASCII? */
--- a/epan/dissectors/packet-gsm_sms.c
+++ b/epan/dissectors/packet-gsm_sms.c
@ -338,7 +338,6 @@ dis_field_addr(tvbuff_t *tvb, proto_tree *tree, guint32 *offset_p, const gchar *
    guint32      numdigocts;
    guint32      length, addrlength;
    guint32      i, j;
-    char         addrbuf[MAX_ADDR_SIZE+1];
    gchar       *addrstr;

    offset = *offset_p;
@ -431,10 +430,8 @@ dis_field_addr(tvbuff_t *tvb, proto_tree *tree, guint32 *offset_p, const gchar *
    {
    case 0x05: /* "Alphanumeric (coded according to 3GPP TS 23.038 GSM 7-bit default alphabet)" */
        addrlength = (addrlength << 2) / 7;
-        i = gsm_sms_char_7bit_unpack(0, numdigocts, ((addrlength > MAX_ADDR_SIZE) ? MAX_ADDR_SIZE : addrlength),
-                                     tvb_get_ptr(tvb, offset, numdigocts), addrbuf);
-        addrbuf[i] = '\0';
-        addrstr = gsm_sms_chars_to_utf8(addrbuf, i);
+        addrstr = tvb_get_ts_23_038_7bits_string(wmem_packet_scope(), tvb, offset << 3,
+                                                 (addrlength > MAX_ADDR_SIZE) ? MAX_ADDR_SIZE : addrlength);
        break;
    default:
        addrstr = (gchar *)wmem_alloc(wmem_packet_scope(), numdigocts*2 + 1);
@ -1606,168 +1603,6 @@ dis_field_fcs(tvbuff_t *tvb, proto_tree *tree, guint32 offset, guint8 oct _U_)
        "The TP-UD field contains only the"); \
 }

-/*
- * FROM GNOKII
- * gsm-encoding.c
- * gsm-sms.c
- */
-#define GN_BYTE_MASK ((1 << bits) - 1)
-
-int
-gsm_sms_char_7bit_unpack(unsigned int offset, unsigned int in_length, unsigned int out_length,
-                         const guint8 *input, unsigned char *output)
-{
-    unsigned char *out_num = output; /* Current pointer to the output buffer */
-    const guint8  *in_num  = input; /* Current pointer to the input buffer */
-    unsigned char  rest    = 0x00;
-    int            bits;
-
-    bits = offset ? offset : 7;
-
-    while ((unsigned int)(in_num - input) < in_length)
-    {
-        *out_num = ((*in_num & GN_BYTE_MASK) << (7 - bits)) | rest;
-        rest = *in_num >> bits;
-
-        /* If we don't start from 0th bit, we shouldn't go to the
-           next char. Under *out_num we have now 0 and under Rest -
-           _first_ part of the char. */
-        if ((in_num != input) || (bits == 7)) out_num++;
-        in_num++;
-
-        if ((unsigned int)(out_num - output) >= out_length) break;
-
-        /* After reading 7 octets we have read 7 full characters but
-           we have 7 bits as well. This is the next character */
-        if (bits == 1)
-        {
-            *out_num = rest;
-            out_num++;
-            bits = 7;
-            rest = 0x00;
-        }
-        else
-        {
-            bits--;
-        }
-    }
-
-    return (int)(out_num - output);
-}
-
-#define GN_CHAR_ALPHABET_SIZE 128
-
-#define GN_CHAR_ESCAPE 0x1b
-
-static const gunichar gsm_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
-
-    /* ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet */
-    /* Fixed to use unicode */
-    /* Characters in hex position 10, [12 to 1a] and 24 are not present on
-       latin1 charset, so we cannot reproduce on the screen, however they are
-       greek symbol not present even on my Nokia */
-
-    '@',   0xa3,  '$',   0xa5,  0xe8,  0xe9,  0xf9,  0xec,
-    0xf2,  0xc7,  '\n',  0xd8,  0xf8,  '\r',  0xc5,  0xe5,
-    0x394, '_',   0x3a6, 0x393, 0x39b, 0x3a9, 0x3a0, 0x3a8,
-    0x3a3, 0x398, 0x39e, 0xa0,  0xc6,  0xe6,  0xdf,  0xc9,
-    ' ',   '!',   '\"',  '#',   0xa4,  '%',   '&',   '\'',
-    '(',   ')',   '*',   '+',   ',',   '-',   '.',   '/',
-    '0',   '1',   '2',   '3',   '4',   '5',   '6',   '7',
-    '8',   '9',   ':',   ';',   '<',   '=',   '>',   '?',
-    0xa1,  'A',   'B',   'C',   'D',   'E',   'F',   'G',
-    'H',   'I',   'J',   'K',   'L',   'M',   'N',   'O',
-    'P',   'Q',   'R',   'S',   'T',   'U',   'V',   'W',
-    'X',   'Y',   'Z',   0xc4,  0xd6,  0xd1,  0xdc,  0xa7,
-    0xbf,  'a',   'b',   'c',   'd',   'e',   'f',   'g',
-    'h',   'i',   'j',   'k',   'l',   'm',   'n',   'o',
-    'p',   'q',   'r',   's',   't',   'u',   'v',   'w',
-    'x',   'y',   'z',   0xe4,  0xf6,  0xf1,  0xfc,  0xe0
-};
-
-static gboolean
-char_is_escape(unsigned char value)
-{
-    return (value == GN_CHAR_ESCAPE);
-}
-
-static gunichar
-char_def_alphabet_ext_decode(unsigned char value)
-{
-    switch (value)
-    {
-    case 0x0a: return 0x0c; /* form feed */
-    case 0x14: return '^';
-    case 0x28: return '{';
-    case 0x29: return '}';
-    case 0x2f: return '\\';
-    case 0x3c: return '[';
-    case 0x3d: return '~';
-    case 0x3e: return ']';
-    case 0x40: return '|';
-    case 0x65: return 0x20ac; /* euro */
-    default: return '?'; /* invalid character */
-    }
-}
-
-static gunichar
-char_def_alphabet_decode(unsigned char value)
-{
-    if (value < GN_CHAR_ALPHABET_SIZE)
-    {
-        return gsm_default_alphabet[value];
-    }
-    else
-    {
-        return '?';
-    }
-}
-
-gchar *
-gsm_sms_chars_to_utf8(const unsigned char* src, int len)
-{
-    gint      outlen, i, j;
-    gunichar  c;
-    gchar    *outbuf;
-
-    /* Scan the input string to see how long the output string will be */
-    for (outlen = 0, j = 0; j < len;  j++)
-    {
-        if (char_is_escape(src[j])) {
-            j++;
-            if (j == len)
-                c = '?';        /* escape with nothing following it - error */
-            else
-                c = char_def_alphabet_ext_decode(src[j]);
-        }
-        else
-            c = char_def_alphabet_decode(src[j]);
-        outlen += g_unichar_to_utf8(c,NULL);
-    }
-
-    /* Now allocate a buffer for the output string and fill it in */
-    outbuf = (gchar *)wmem_alloc(wmem_packet_scope(), outlen + 1);
-    for (i = 0, j = 0; j < len;  j++)
-    {
-        if (char_is_escape(src[j])) {
-            j++;
-            if (j == len)
-                c = '?';        /* escape with nothing following it - error */
-            else
-                c = char_def_alphabet_ext_decode(src[j]);
-        }
-        else
-            c = char_def_alphabet_decode(src[j]);
-        i += g_unichar_to_utf8(c,&(outbuf[i]));
-    }
-    outbuf[i] = '\0';
-    return outbuf;
-}
-
-/*
- * END FROM GNOKII
- */
-
 /* 9.2.3.24.1 */
 static void
 dis_iei_csm8(tvbuff_t *tvb, proto_tree *tree, guint32 offset, guint8 length, gsm_sms_udh_fields_t *p_udh_fields)
@ -2645,7 +2480,6 @@ dis_field_udh(tvbuff_t *tvb, proto_tree *tree, guint32 *offset, guint32 *length,

 /* 9.2.3.24 */
 #define SMS_MAX_MESSAGE_SIZE 160
-static char    messagebuf[SMS_MAX_MESSAGE_SIZE+1];
 static void
 dis_field_ud(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset,
             guint32 length, gboolean udhi, guint8 udl, gboolean seven_bit,
@ -2656,7 +2490,7 @@ dis_field_ud(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset
    tvbuff_t          *sm_tvb = NULL;
    fragment_head     *fd_sm = NULL;
    guint8             fill_bits;
-    guint32            out_len, total_sms_len, len_sms, length_ucs2, i;
+    guint32            total_sms_len, len_sms, length_ucs2, i;
    proto_item        *ucs2_item;
    gchar             *utf8_text = NULL;
    gchar              save_byte = 0, save_byte2 = 0;
@ -2782,13 +2616,8 @@ dis_field_ud(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset
            if(!(reassembled && pinfo->fd->num == reassembled_in))
            {
                /* Show unassembled SMS */
-                out_len =
-                    gsm_sms_char_7bit_unpack(fill_bits, length ,
-                                             (udl > SMS_MAX_MESSAGE_SIZE ? SMS_MAX_MESSAGE_SIZE : udl),
-                                             tvb_get_ptr(tvb , offset , length) , messagebuf);
-                messagebuf[out_len] = '\0';
-                proto_tree_add_string(subtree, hf_gsm_sms_text, tvb, offset, length,
-                                      gsm_sms_chars_to_utf8(messagebuf, out_len));
+                proto_tree_add_ts_23_038_7bits_item(subtree, hf_gsm_sms_text, tvb, (offset<<3)+fill_bits,
+                                                    (udl > SMS_MAX_MESSAGE_SIZE ? SMS_MAX_MESSAGE_SIZE : udl));
            }
            else
            {
@ -2803,15 +2632,9 @@ dis_field_ud(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset
                                                            GUINT_TO_POINTER((guint)((udh_fields.sm_id<<16)|i)));

                    if (p_frag_params) {
-                        out_len =
-                            gsm_sms_char_7bit_unpack(p_frag_params->fill_bits, p_frag_params->length,
-                                (p_frag_params->udl > SMS_MAX_MESSAGE_SIZE ? SMS_MAX_MESSAGE_SIZE : p_frag_params->udl),
-                                tvb_get_ptr(sm_tvb, total_sms_len, p_frag_params->length), messagebuf);
-
-                        messagebuf[out_len] = '\0';
-                        proto_tree_add_string(subtree, hf_gsm_sms_text, sm_tvb,
-                                              total_sms_len, p_frag_params->length,
-                                              gsm_sms_chars_to_utf8(messagebuf, out_len));
+                        proto_tree_add_ts_23_038_7bits_item(subtree, hf_gsm_sms_text, sm_tvb,
+                            (total_sms_len<<3)+p_frag_params->fill_bits,
+                            (p_frag_params->udl > SMS_MAX_MESSAGE_SIZE ? SMS_MAX_MESSAGE_SIZE : p_frag_params->udl));

                        total_sms_len += p_frag_params->length;
                    }
--- a/epan/dissectors/packet-gsm_sms.h
+++ b/epan/dissectors/packet-gsm_sms.h
@ -24,31 +24,6 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

-/* Convert a 7-bit GSM SMS packed string into an unpacked string.
- *
- * @param offset Bit offset of the start of the string.
- * @param in_length Length of the packed string in bytes.
- * @param out_length Length of the output string in bytes.
- * @param input The string to unpack
- * @param output The buffer for the output string. This buffer must
- *               be pre-allocated and be at least out_length characters
- *               long, or out_length + 1 if you're planning on adding a
- *               terminating '\0'.
- * @return The number of unpacked characters.
- */
-
-extern int gsm_sms_char_7bit_unpack(unsigned int offset, unsigned int in_length, unsigned int out_length,
-                     const guint8 *input, unsigned char *output);
-
-/* Convert an unpacked SMS string to UTF-8.
- *
- * @param src The string to convert.
- * @param len Length of the string to convert, in bytes.
- * @return An ep_allocated UTF-8 string.
- */
-
-extern gchar *gsm_sms_chars_to_utf8(const unsigned char* src, int len);
-
 enum character_set {
    OTHER,
    GSM_7BITS,
--- a/epan/dissectors/packet-mbim.c
+++ b/epan/dissectors/packet-mbim.c
@ -3094,7 +3094,7 @@ mbim_dissect_set_ussd(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, gint
            case SMS_ENCODING_7BIT:
            case SMS_ENCODING_7BIT_LANG:
                proto_tree_add_item(subtree, hf_mbim_set_ussd_ussd_payload_text,
-                                    ussd_tvb, 0, ussd_payload_length, ENC_3GPP_TS_23_038|ENC_NA);
+                                    ussd_tvb, 0, ussd_payload_length, ENC_3GPP_TS_23_038_7BITS|ENC_NA);
                break;
            case SMS_ENCODING_8BIT:
                /* XXX - ASCII, or some extended ASCII? */
@ -3146,7 +3146,7 @@ mbim_dissect_ussd_info(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, gint
            case SMS_ENCODING_7BIT:
            case SMS_ENCODING_7BIT_LANG:
                proto_tree_add_item(subtree, hf_mbim_ussd_info_ussd_payload_text,
-                                    ussd_tvb, 0, ussd_payload_length, ENC_3GPP_TS_23_038|ENC_NA);
+                                    ussd_tvb, 0, ussd_payload_length, ENC_3GPP_TS_23_038_7BITS|ENC_NA);
                break;
            case SMS_ENCODING_8BIT:
                /* XXX - ASCII, or some extended ASCII? */
--- a/epan/proto.c
+++ b/epan/proto.c
@ -7821,6 +7821,36 @@ proto_tree_add_boolean_bits_format_value(proto_tree *tree, const int hfindex,
 	return proto_tree_add_bits_format_value(tree, hfindex, tvb, bit_offset, no_of_bits, &value, dst);
 }

+proto_item *
+proto_tree_add_ts_23_038_7bits_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb,
+	const guint bit_offset, const gint no_of_chars)
+{
+	proto_item	  *pi;
+	header_field_info *hfinfo;
+	gint		   byte_length;
+	gint		   byte_offset;
+	gchar		  *string;
+
+	TRY_TO_FAKE_THIS_ITEM(tree, hfindex, hfinfo);
+
+	DISSECTOR_ASSERT(hfinfo->type == FT_STRING);
+
+	byte_length = (((no_of_chars + 1) * 7) + (bit_offset & 0x07)) >> 3;
+	byte_offset = bit_offset >> 3;
+
+	string = tvb_get_ts_23_038_7bits_string(wmem_packet_scope(), tvb, bit_offset, no_of_chars);
+
+	if (hfinfo->display == STR_UNICODE) {
+		DISSECTOR_ASSERT(g_utf8_validate(string, -1, NULL));
+	}
+
+	pi = proto_tree_add_pi(tree, hfinfo, tvb, byte_offset, &byte_length);
+	DISSECTOR_ASSERT(byte_length >= 0);
+	proto_tree_set_string(PNODE_FINFO(pi), string);
+
+	return pi;
+}
+
 guchar
 proto_check_field_name(const gchar *field_name)
 {
--- a/epan/proto.h
+++ b/epan/proto.h
@ -274,31 +274,31 @@ WS_DLL_PUBLIC WS_MSVC_NORETURN void proto_report_dissector_bug(const char *messa
 * Unicode glyphs showing the name of the control character in small
 * caps, diagonally.  (Unfortunately, those only exist for C0, not C1.)
 */
-#define ENC_CHARENCODING_MASK	0x7FFFFFFE	/* mask out byte-order bits */
-#define ENC_ASCII		0x00000000
-#define ENC_UTF_8		0x00000002
-#define ENC_UTF_16		0x00000004
-#define ENC_UCS_2		0x00000006
-#define ENC_UCS_4		0x00000008
-#define ENC_ISO_8859_1		0x0000000A
-#define ENC_ISO_8859_2		0x0000000C
-#define ENC_ISO_8859_3		0x0000000E
-#define ENC_ISO_8859_4		0x00000010
-#define ENC_ISO_8859_5		0x00000012
-#define ENC_ISO_8859_6		0x00000014
-#define ENC_ISO_8859_7		0x00000016
-#define ENC_ISO_8859_8		0x00000018
-#define ENC_ISO_8859_9		0x0000001A
-#define ENC_ISO_8859_10		0x0000001C
-#define ENC_ISO_8859_11		0x0000001E
-/* #define ENC_ISO_8859_12		0x00000020 ISO 8859-12 was abandoned */
-#define ENC_ISO_8859_13		0x00000022
-#define ENC_ISO_8859_14		0x00000024
-#define ENC_ISO_8859_15		0x00000026
-#define ENC_ISO_8859_16		0x00000028
-#define ENC_WINDOWS_1250	0x0000002A
-#define ENC_3GPP_TS_23_038	0x0000002C
-#define ENC_EBCDIC		0x0000002E
+#define ENC_CHARENCODING_MASK		0x7FFFFFFE	/* mask out byte-order bits */
+#define ENC_ASCII			0x00000000
+#define ENC_UTF_8			0x00000002
+#define ENC_UTF_16			0x00000004
+#define ENC_UCS_2			0x00000006
+#define ENC_UCS_4			0x00000008
+#define ENC_ISO_8859_1			0x0000000A
+#define ENC_ISO_8859_2			0x0000000C
+#define ENC_ISO_8859_3			0x0000000E
+#define ENC_ISO_8859_4			0x00000010
+#define ENC_ISO_8859_5			0x00000012
+#define ENC_ISO_8859_6			0x00000014
+#define ENC_ISO_8859_7			0x00000016
+#define ENC_ISO_8859_8			0x00000018
+#define ENC_ISO_8859_9			0x0000001A
+#define ENC_ISO_8859_10			0x0000001C
+#define ENC_ISO_8859_11			0x0000001E
+/* #define ENC_ISO_8859_12			0x00000020 ISO 8859-12 was abandoned */
+#define ENC_ISO_8859_13			0x00000022
+#define ENC_ISO_8859_14			0x00000024
+#define ENC_ISO_8859_15			0x00000026
+#define ENC_ISO_8859_16			0x00000028
+#define ENC_WINDOWS_1250		0x0000002A
+#define ENC_3GPP_TS_23_038_7BITS	0x0000002C
+#define ENC_EBCDIC			0x0000002E

 /*
 * TODO:
@ -307,11 +307,7 @@ WS_DLL_PUBLIC WS_MSVC_NORETURN void proto_report_dissector_bug(const char *messa
 *
 *	"IBM MS DBCS"
 *	JIS C 6226
- *	7-bit encodings such as ETSI 03.38 (GSM SMS character set -
- *	    used in some files, but packet-ansi_637.c,
- *	    packet-cell_broadcast.c, packet-gmr1_rr.c,
- *	    packet-gsm_a_dtap.c, and packet-gsm_sms.c need some
- *	    work to use it)
+ *	7-bit encodings such as 7 bits ASCII used in packet-ansi_637.c
 *
 * As those are added, change code such as the code in packet-bacapp.c
 * to use them.
@ -2129,6 +2125,18 @@ proto_item *
 proto_tree_add_float_bits_format_value(proto_tree *tree, const int hf_index, tvbuff_t *tvb, const guint bit_offset, const gint no_of_bits,
 	float value, const char *format, ...) G_GNUC_PRINTF(7,8);

+
+/** Add a FT_STRING with ENC_3GPP_TS_23_038_7BITS encoding to a proto_tree.
+ @param tree the tree to append this item to
+ @param hfindex field index
+ @param tvb the tv buffer of the current data
+ @param bit_offset start of data in tvb expressed in bits
+ @param no_of_chars number of 7bits characters to display
+ @return the newly created item */
+WS_DLL_PUBLIC proto_item *
+proto_tree_add_ts_23_038_7bits_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb,
+	const guint bit_offset, const gint no_of_chars);
+
 /** Check if given string is a valid field name
 @param field_name the field name to check
 @return 0 if valid, else first illegal character */
--- a/epan/tvbuff.c
+++ b/epan/tvbuff.c
@ -2251,21 +2251,25 @@ handle_ts_23_038_char(wmem_strbuf_t *strbuf, guint8 code_point,
 	return saw_escape;
 }

-static gchar *
-tvb_get_ts_23_038_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint length)
+gchar *
+tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb,
+	const gint bit_offset, gint no_of_chars)
 {
 	wmem_strbuf_t *strbuf;
-	gint           i;       /* Byte counter for tvbuff */
-	gint           in_offset = offset; /* Current pointer to the input buffer */
+	gint           char_count;                  /* character counter for tvbuff */
+	gint           in_offset = bit_offset >> 3; /* Current pointer to the input buffer */
 	guint8         in_byte, out_byte, rest = 0x00;
 	gboolean       saw_escape = FALSE;
 	int            bits;

-	bits = 7;
+	bits = bit_offset & 0x07;
+	if (!bits) {
+		bits = 7;
+	}

-	tvb_ensure_bytes_exist(tvb, offset, length);
+	tvb_ensure_bytes_exist(tvb, in_offset, ((no_of_chars + 1) * 7 + (bit_offset & 0x07)) >> 3);
 	strbuf = wmem_strbuf_new(scope, NULL);
-	for(i = 0; i < length; i++) {
+	for(char_count = 0; char_count < no_of_chars;) {
 		/* Get the next byte from the string. */
 		in_byte = tvb_get_guint8(tvb, in_offset);
 		in_offset++;
@ -2286,17 +2290,20 @@ tvb_get_ts_23_038_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offs
 		 * next char. Under *out_num we have now 0 and under Rest -
 		 * _first_ part of the char.
 		 */
-		if ((in_offset != offset) || (bits == 7))
+		if (char_count || (bits == 7)) {
 			saw_escape = handle_ts_23_038_char(strbuf, out_byte,
 			    saw_escape);
+			char_count++;
+		}

 		/*
 		 * After reading 7 octets we have read 7 full characters
 		 * but we have 7 bits as well. This is the next character.
 		 */
-		if (bits == 1) {
+		if ((bits == 1) && (char_count < no_of_chars)) {
 			saw_escape = handle_ts_23_038_char(strbuf, rest,
 			    saw_escape);
+			char_count++;
 			bits = 7;
 			rest = 0x00;
 		} else
@ -2452,8 +2459,12 @@ tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
 		strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp1250);
 		break;

-	case ENC_3GPP_TS_23_038:
-		strbuf = tvb_get_ts_23_038_string(scope, tvb, offset, length);
+	case ENC_3GPP_TS_23_038_7BITS:
+		{
+			gint bit_offset = offset << 3;
+			gint no_of_chars = (length << 3) / 7;
+			strbuf = tvb_get_ts_23_038_7bits_string(scope, tvb, bit_offset, no_of_chars);
+		}
 		break;

 	case ENC_EBCDIC:
@ -2748,8 +2759,8 @@ tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, g
 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp1250);
 		break;

-	case ENC_3GPP_TS_23_038:
-		REPORT_DISSECTOR_BUG("TS 23.038 has no null character and doesn't support null-terminated strings");
+	case ENC_3GPP_TS_23_038_7BITS:
+		REPORT_DISSECTOR_BUG("TS 23.038 7bits has no null character and doesn't support null-terminated strings");
 		break;

 	case ENC_EBCDIC:
--- a/epan/tvbuff.h
+++ b/epan/tvbuff.h
@ -499,6 +499,19 @@ WS_DLL_PUBLIC guint8 *tvb_get_string(wmem_allocator_t *scope, tvbuff_t *tvb,
 WS_DLL_PUBLIC guint8 *tvb_get_string_enc(wmem_allocator_t *scope,
    tvbuff_t *tvb, const gint offset, const gint length, const guint encoding);

+/**
+ * Given a tvbuff, a bit offset, and a number of characters, allocate
+ * a buffer big enough to hold a non-null-terminated string of no_of_chars
+ * encoded according to 3GPP TS 23.038 7bits encoding at that offset,
+ * plus a trailing zero, copy the string into it, and return a pointer
+ * to the string.
+ *
+ * If scope is set to NULL it is the user's responsibility to g_free()
+ * the memory allocated by tvb_memdup(). Otherwise memory is
+ * automatically freed when the scope lifetime is reached.
+ */
+WS_DLL_PUBLIC gchar *tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope,
+    tvbuff_t *tvb, const gint bit_offset, gint no_of_chars);

 /**
 * Given a tvbuff and an offset, with the offset assumed to refer to