From Jakub

support DVB-SI character tables (EN 300 468) in a generic way

From me
move things to charsets.c/.h
distinguish between single and multi byte encoding for some tables
(so that the highlighted bytes match the displayed value)
no character table byte -> length 0, use default table

svn path=/trunk/; revision=53886
This commit is contained in:
Martin Kaiser 2013-12-09 20:46:27 +00:00
parent 3ae2e5ece2
commit cb1cb946d3
2 changed files with 255 additions and 0 deletions

View File

@ -26,6 +26,10 @@
#include <glib.h>
#include <epan/proto.h>
#include <epan/tvbuff.h>
#include <epan/value_string.h>
#include "charsets.h"
/*
@ -216,3 +220,202 @@ const gunichar2 charset_table_cp1250[0x80] = {
0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, /* 0xF0 - */
0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, /* - 0xFF */
};
static dvb_encoding_e
dvb_analyze_string_charset0(guint8 byte0)
{
switch (byte0) {
case 0x01:
return DVB_ENCODING_ISO_8859_5;
case 0x02:
return DVB_ENCODING_ISO_8859_6;
case 0x03:
return DVB_ENCODING_ISO_8859_7;
case 0x04:
return DVB_ENCODING_ISO_8859_8;
case 0x05:
return DVB_ENCODING_ISO_8859_9;
case 0x06:
return DVB_ENCODING_ISO_8859_10;
case 0x07:
return DVB_ENCODING_ISO_8859_11;
case 0x08:
return DVB_ENCODING_RESERVED; /* was reserved for ISO-8859-12 */
case 0x09: /* 0x09 */
return DVB_ENCODING_ISO_8859_13;
case 0x0A: /* 0x0A */
return DVB_ENCODING_ISO_8859_14;
case 0x0B: /* 0x0B */
return DVB_ENCODING_ISO_8859_15;
/* XXX 0x11 ... 0x15 */
default:
return DVB_ENCODING_UNKNOWN;
}
}
static dvb_encoding_e
dvb_analyze_string_charset0_10(guint16 byte12)
{
switch (byte12) {
case 0x0000:
return DVB_ENCODING_RESERVED;
case 0x0001:
return DVB_ENCODING_EXT_ISO_8859_1;
case 0x0002:
return DVB_ENCODING_EXT_ISO_8859_2;
case 0x0003:
return DVB_ENCODING_EXT_ISO_8859_3;
case 0x0004:
return DVB_ENCODING_EXT_ISO_8859_4;
case 0x0005:
return DVB_ENCODING_EXT_ISO_8859_5;
case 0x0006:
return DVB_ENCODING_EXT_ISO_8859_6;
case 0x0007:
return DVB_ENCODING_EXT_ISO_8859_7;
case 0x0008:
return DVB_ENCODING_EXT_ISO_8859_8;
case 0x0009:
return DVB_ENCODING_EXT_ISO_8859_9;
case 0x000A:
return DVB_ENCODING_EXT_ISO_8859_10;
case 0x000B:
return DVB_ENCODING_EXT_ISO_8859_11;
case 0x000C:
return DVB_ENCODING_RESERVED;
case 0x000D:
return DVB_ENCODING_EXT_ISO_8859_13;
case 0x000E:
return DVB_ENCODING_EXT_ISO_8859_14;
case 0x000F:
return DVB_ENCODING_EXT_ISO_8859_15;
default: /* 0x10 XX XX */
return DVB_ENCODING_UNKNOWN;
}
}
static dvb_encoding_e
dvb_analyze_string_charset0_1F(guint8 byte1)
{
/* http://www.dvbservices.com/identifiers/encoding_type_id */
switch (byte1) {
case 0x00: /* 0x1F 0x00 */
return DVB_ENCODING_RESERVED;
case 0x01:
case 0x02:
case 0x03:
case 0x04:
/* XXX: BBC */
return DVB_ENCODING_RESERVED;
case 0x05:
case 0x06:
/* XXX: Malaysian Technical Standards Forum Bhd */
return DVB_ENCODING_RESERVED;
default: /* 0x1F XX */
return DVB_ENCODING_RESERVED;
}
}
guint
dvb_analyze_string_charset(tvbuff_t *tvb, int offset, int length, dvb_encoding_e *encoding)
{
if (length >= 1) {
guint8 byte0 = tvb_get_guint8(tvb, offset + 0);
if (byte0 >= 0x20) {
/* the first byte is a normal character, not the number of a character table */
*encoding = DVB_ENCODING_LATIN;
return 0;
} else if (byte0 == 0x1F) {
if (length >= 2) {
*encoding = dvb_analyze_string_charset0_1F(tvb_get_guint8(tvb, offset + 1));
return 2;
}
*encoding = DVB_ENCODING_INVALID;
return 1;
} else if (byte0 >= 0x16) { /* 16 ... 1E */
*encoding = DVB_ENCODING_RESERVED;
return 1;
} else if (byte0 == 0x10) {
if (length >= 3) {
*encoding = dvb_analyze_string_charset0_10(tvb_get_ntohs(tvb, offset + 1));
return 3;
}
*encoding = DVB_ENCODING_INVALID;
return 1;
} else if ((byte0 >= 0x0C && byte0 <= 0x0F)) {
*encoding = DVB_ENCODING_RESERVED;
return 1;
} else {
*encoding = dvb_analyze_string_charset0(byte0);
return 1;
}
} else
*encoding = DVB_ENCODING_LATIN;
return 0;
}
guint
dvb_enc_to_item_enc(dvb_encoding_e encoding)
{
/* XXX: take ISO control codes into account,
e.g. 0x86 - turn emphasis on ; 0x87 - turn emphasis off */
switch (encoding) {
case DVB_ENCODING_EXT_ISO_8859_2:
return ENC_ISO_8859_2 | ENC_NA;
default: /* not supported */
return ENC_ASCII | ENC_NA;
}
}
const value_string dvb_string_encoding_vals[] = {
{ DVB_ENCODING_INVALID, "Incorrect length for encoding" },
{ DVB_ENCODING_RESERVED, "Reserved for future use" },
{ DVB_ENCODING_UNKNOWN, "Value not specified by spec" },
{ DVB_ENCODING_LATIN, "Latin (default table)" },
{ DVB_ENCODING_ISO_8859_5, "ISO/IEC 8859-5 (Latin/Cyrillic)" },
{ DVB_ENCODING_ISO_8859_6, "ISO/IEC 8859-6 (Latin/Arabic)" },
{ DVB_ENCODING_ISO_8859_7, "ISO/IEC 8859-7 (Latin/Greek)" },
{ DVB_ENCODING_ISO_8859_8, "ISO/IEC 8859-8 (Latin/Hebrew)" },
{ DVB_ENCODING_ISO_8859_9, "ISO/IEC 8859-9 (West European & Turkish)" },
{ DVB_ENCODING_ISO_8859_10, "ISO/IEC 8859-10 (North European)" },
{ DVB_ENCODING_ISO_8859_11, "ISO/IEC 8859-11 (Thai)" },
{ DVB_ENCODING_ISO_8859_13, "ISO/IEC 8859-13 (Baltic)" },
{ DVB_ENCODING_ISO_8859_14, "ISO/IEC 8859-14 (Celtic)" },
{ DVB_ENCODING_ISO_8859_15, "ISO/IEC 8859-15 (West European)" },
{ DVB_ENCODING_EXT_ISO_8859_1, "ISO/IEC 8859-1 (West European)" },
{ DVB_ENCODING_EXT_ISO_8859_2, "ISO/IEC 8859-2 (East European)" },
{ DVB_ENCODING_EXT_ISO_8859_3, "ISO/IEC 8859-3 (South European)" },
{ DVB_ENCODING_EXT_ISO_8859_4, "ISO/IEC 8859-4 (North and North-East European)" },
{ DVB_ENCODING_EXT_ISO_8859_5, "ISO/IEC 8859-5 (Latin/Cyrillic)" },
{ DVB_ENCODING_EXT_ISO_8859_6, "ISO/IEC 8859-6 (Latin/Arabic)" },
{ DVB_ENCODING_EXT_ISO_8859_7, "ISO/IEC 8859-7 (Latin/Greek)" },
{ DVB_ENCODING_EXT_ISO_8859_8, "ISO/IEC 8859-8 (Latin/Hebrew)" },
{ DVB_ENCODING_EXT_ISO_8859_9, "ISO/IEC 8859-9 (West European & Turkish)" },
{ DVB_ENCODING_EXT_ISO_8859_10, "ISO/IEC 8859-10 (North European)" },
{ DVB_ENCODING_EXT_ISO_8859_11, "ISO/IEC 8859-11 (Thai)" },
{ DVB_ENCODING_EXT_ISO_8859_13, "ISO/IEC 8859-13 (Baltic)" },
{ DVB_ENCODING_EXT_ISO_8859_14, "ISO/IEC 8859-14 (Celtic)" },
{ DVB_ENCODING_EXT_ISO_8859_15, "ISO/IEC 8859-15 (West European)" },
{ 0, NULL }
};

View File

@ -26,10 +26,62 @@
#include "ws_symbol_export.h"
#include <epan/tvbuff.h>
#include <epan/value_string.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
typedef enum {
DVB_ENCODING_INVALID = -3, /* length invalid */
DVB_ENCODING_RESERVED = -2, /* reserved by spec */
DVB_ENCODING_UNKNOWN = -1, /* not defined by spec */
DVB_ENCODING_LATIN = 0,
DVB_ENCODING_ISO_8859_5,
DVB_ENCODING_ISO_8859_6,
DVB_ENCODING_ISO_8859_7,
DVB_ENCODING_ISO_8859_8,
DVB_ENCODING_ISO_8859_9,
DVB_ENCODING_ISO_8859_10,
DVB_ENCODING_ISO_8859_11,
/* 0x08 is reserved */
DVB_ENCODING_ISO_8859_13 = 9,
DVB_ENCODING_ISO_8859_14,
DVB_ENCODING_ISO_8859_15,
/* TODO: 0x11...0x15 */
DVB_ENCODING_EXT_BASE = 0x100000,
DVB_ENCODING_EXT_ISO_8859_1 = DVB_ENCODING_EXT_BASE | 1,
DVB_ENCODING_EXT_ISO_8859_2 = DVB_ENCODING_EXT_BASE | 2,
DVB_ENCODING_EXT_ISO_8859_3 = DVB_ENCODING_EXT_BASE | 3,
DVB_ENCODING_EXT_ISO_8859_4 = DVB_ENCODING_EXT_BASE | 4,
DVB_ENCODING_EXT_ISO_8859_5 = DVB_ENCODING_EXT_BASE | 5,
DVB_ENCODING_EXT_ISO_8859_6 = DVB_ENCODING_EXT_BASE | 6,
DVB_ENCODING_EXT_ISO_8859_7 = DVB_ENCODING_EXT_BASE | 7,
DVB_ENCODING_EXT_ISO_8859_8 = DVB_ENCODING_EXT_BASE | 8,
DVB_ENCODING_EXT_ISO_8859_9 = DVB_ENCODING_EXT_BASE | 9,
DVB_ENCODING_EXT_ISO_8859_10 = DVB_ENCODING_EXT_BASE | 10,
DVB_ENCODING_EXT_ISO_8859_11 = DVB_ENCODING_EXT_BASE | 11,
/* DVB_ENCODING_ISO_8859_12 = DVB_ENCODING_EXT_BASE | 12 */
DVB_ENCODING_EXT_ISO_8859_13 = DVB_ENCODING_EXT_BASE | 13,
DVB_ENCODING_EXT_ISO_8859_14 = DVB_ENCODING_EXT_BASE | 14,
DVB_ENCODING_EXT_ISO_8859_15 = DVB_ENCODING_EXT_BASE | 15,
} dvb_encoding_e;
extern const value_string dvb_string_encoding_vals[];
WS_DLL_PUBLIC
guint dvb_analyze_string_charset(tvbuff_t *tvb, int offset, int length,
dvb_encoding_e *encoding);
WS_DLL_PUBLIC
guint dvb_enc_to_item_enc(dvb_encoding_e encoding);
#if 0
void ASCII_to_EBCDIC(guint8 *buf, guint bytes);
guint8 ASCII_to_EBCDIC1(guint8 c);