Move GSM guint8 to unicode conversion functions to charsets.c

charsets.c is already place with huge number of conversion tables. Also make gsm_default_alphabet gunichar2, all values fits in 2 bytes. Change-Id: Ia5ab6c176b4fec21ec76b06513c1d00794ba10ef Reviewed-on: https://code.wireshark.org/review/1328 Reviewed-by: Anders Broman <a.broman58@gmail.com>
2014-04-24 21:56:43 +02:00 · 2014-04-24 21:56:43 +02:00 · 4bd8336017
parent 75cb2675fd
commit 4bd8336017
3 changed files with 68 additions and 61 deletions
--- a/epan/charsets.c
+++ b/epan/charsets.c
@ -26,6 +26,9 @@

 #include "charsets.h"

+/* REPLACEMENT CHARACTER */
+#define UNREPL 0xFFFD
+
 /*
 * Wikipedia's "Character encoding" template, giving a pile of character encodings and
 * Wikipedia pages for them:
@ -166,15 +169,68 @@ EBCDIC_to_ASCII1(guint8 c)
    return EBCDIC_translate_ASCII[c];
 }

+/*
+ * FROM GNOKII
+ * gsm-encoding.c
+ * gsm-sms.c
+ */
+
+/* ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet */
+static const gunichar2 gsm_default_alphabet[0x80] = {
+    '@',   0xa3,  '$',   0xa5,  0xe8,  0xe9,  0xf9,  0xec,
+    0xf2,  0xc7,  '\n',  0xd8,  0xf8,  '\r',  0xc5,  0xe5,
+    0x394, '_',   0x3a6, 0x393, 0x39b, 0x3a9, 0x3a0, 0x3a8,
+    0x3a3, 0x398, 0x39e, 0xa0,  0xc6,  0xe6,  0xdf,  0xc9,
+    ' ',   '!',   '\"',  '#',   0xa4,  '%',   '&',   '\'',
+    '(',   ')',   '*',   '+',   ',',   '-',   '.',   '/',
+    '0',   '1',   '2',   '3',   '4',   '5',   '6',   '7',
+    '8',   '9',   ':',   ';',   '<',   '=',   '>',   '?',
+    0xa1,  'A',   'B',   'C',   'D',   'E',   'F',   'G',
+    'H',   'I',   'J',   'K',   'L',   'M',   'N',   'O',
+    'P',   'Q',   'R',   'S',   'T',   'U',   'V',   'W',
+    'X',   'Y',   'Z',   0xc4,  0xd6,  0xd1,  0xdc,  0xa7,
+    0xbf,  'a',   'b',   'c',   'd',   'e',   'f',   'g',
+    'h',   'i',   'j',   'k',   'l',   'm',   'n',   'o',
+    'p',   'q',   'r',   's',   't',   'u',   'v',   'w',
+    'x',   'y',   'z',   0xe4,  0xf6,  0xf1,  0xfc,  0xe0
+};
+
+gunichar
+GSMext_to_UNICHAR(guint8 c)
+{
+    switch (c)
+    {
+        case 0x0a: return 0x0c; /* form feed */
+        case 0x14: return '^';
+        case 0x28: return '{';
+        case 0x29: return '}';
+        case 0x2f: return '\\';
+        case 0x3c: return '[';
+        case 0x3d: return '~';
+        case 0x3e: return ']';
+        case 0x40: return '|';
+        case 0x65: return 0x20ac; /* euro */
+    }
+
+    return UNREPL; /* invalid character */
+}
+
+gunichar
+GSM_to_UNICHAR(guint8 c)
+{
+    if (c < G_N_ELEMENTS(gsm_default_alphabet))
+        return gsm_default_alphabet[c];
+
+    return UNREPL;
+}
+
+
 /*
 * Translation tables that map the upper 128 code points in single-byte
 * "extended ASCII" character encodings to Unicode code points in the
 * Basic Multilingual Plane.
 */

-/* REPLACEMENT CHARACTER */
-#define UNREPL 0xFFFD
-
 /* ISO-8859-2 (http://en.wikipedia.org/wiki/ISO/IEC_8859-2#Code_page_layout) */
 const gunichar2 charset_table_iso_8859_2[0x80] = {
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,        /* 0x80 -      */
@ -516,6 +572,7 @@ const gunichar2 charset_table_cp437[0x80] = {
    0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0,        /*      - 0xFF */
 };

+
 /*
 * Editor modelines  -  http://www.wireshark.org/tools/modelines.html
 *
--- a/epan/charsets.h
+++ b/epan/charsets.h
@ -37,6 +37,12 @@ void EBCDIC_to_ASCII(guint8 *buf, guint bytes);
 WS_DLL_PUBLIC
 guint8 EBCDIC_to_ASCII1(guint8 c);

+WS_DLL_PUBLIC gunichar
+GSM_to_UNICHAR(guint8 c);
+
+WS_DLL_PUBLIC gunichar
+GSMext_to_UNICHAR(guint8 c);
+
 /*
 * Translation tables that map the upper 128 code points in single-byte
 * "extended ASCII" character encodings to Unicode code points in the
--- a/epan/tvbuff.c
+++ b/epan/tvbuff.c
@ -2500,70 +2500,14 @@ tvb_get_ucs_4_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
 */
 #define GN_BYTE_MASK ((1 << bits) - 1)

-#define GN_CHAR_ALPHABET_SIZE 128
-
 #define GN_CHAR_ESCAPE 0x1b

-static const gunichar gsm_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
-
-	/* ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet */
-
-	'@',   0xa3,  '$',   0xa5,  0xe8,  0xe9,  0xf9,  0xec,
-	0xf2,  0xc7,  '\n',  0xd8,  0xf8,  '\r',  0xc5,  0xe5,
-	0x394, '_',   0x3a6, 0x393, 0x39b, 0x3a9, 0x3a0, 0x3a8,
-	0x3a3, 0x398, 0x39e, 0xa0,  0xc6,  0xe6,  0xdf,  0xc9,
-	' ',   '!',   '\"',  '#',   0xa4,  '%',   '&',   '\'',
-	'(',   ')',   '*',   '+',   ',',   '-',   '.',   '/',
-	'0',   '1',   '2',   '3',   '4',   '5',   '6',   '7',
-	'8',   '9',   ':',   ';',   '<',   '=',   '>',   '?',
-	0xa1,  'A',   'B',   'C',   'D',   'E',   'F',   'G',
-	'H',   'I',   'J',   'K',   'L',   'M',   'N',   'O',
-	'P',   'Q',   'R',   'S',   'T',   'U',   'V',   'W',
-	'X',   'Y',   'Z',   0xc4,  0xd6,  0xd1,  0xdc,  0xa7,
-	0xbf,  'a',   'b',   'c',   'd',   'e',   'f',   'g',
-	'h',   'i',   'j',   'k',   'l',   'm',   'n',   'o',
-	'p',   'q',   'r',   's',   't',   'u',   'v',   'w',
-	'x',   'y',   'z',   0xe4,  0xf6,  0xf1,  0xfc,  0xe0
-};
-
 static gboolean
 char_is_escape(unsigned char value)
 {
 	return (value == GN_CHAR_ESCAPE);
 }

-static gunichar
-char_def_alphabet_ext_decode(unsigned char value)
-{
-	switch (value)
-	{
-	case 0x0a: return 0x0c; /* form feed */
-	case 0x14: return '^';
-	case 0x28: return '{';
-	case 0x29: return '}';
-	case 0x2f: return '\\';
-	case 0x3c: return '[';
-	case 0x3d: return '~';
-	case 0x3e: return ']';
-	case 0x40: return '|';
-	case 0x65: return 0x20ac; /* euro */
-	default: return UNREPL; /* invalid character */
-	}
-}
-
-static gunichar
-char_def_alphabet_decode(unsigned char value)
-{
-	if (value < GN_CHAR_ALPHABET_SIZE)
-	{
-		return gsm_default_alphabet[value];
-	}
-	else
-	{
-		return UNREPL;
-	}
-}
-
 static gboolean
 handle_ts_23_038_char(wmem_strbuf_t *strbuf, guint8 code_point,
 		      gboolean saw_escape)
@ -2584,9 +2528,9 @@ handle_ts_23_038_char(wmem_strbuf_t *strbuf, guint8 code_point,
 		 */
 		if (saw_escape) {
 			saw_escape = FALSE;
-			uchar = char_def_alphabet_ext_decode(code_point);
+			uchar = GSMext_to_UNICHAR(code_point);
 		} else {
-			uchar = char_def_alphabet_decode(code_point);
+			uchar = GSM_to_UNICHAR(code_point);
 		}
 		wmem_strbuf_append_unichar(strbuf, uchar);
 	}