epan: Implement EBCDIC CP 500, for DRDA

EBCDIC Code Page 500 has exactly the same repertoire as CP 037,
covering all of ISO-8859-1, but has 7 bytes permuted. It is
the default code page for DRDA; use it there.
This commit is contained in:
John Thacker 2023-04-02 19:45:11 -04:00
parent d6f2ecc8e0
commit 9ea2b3db5e
8 changed files with 85 additions and 11 deletions

View File

@ -1543,6 +1543,7 @@ currently supported are:
in ETSI TS 102 221 Annex A
ENC_EBCDIC - EBCDIC
ENC_EBCDIC_CP037 - EBCDIC code page 037
ENC_EBCDIC_CP500 - EBCDIC code page 500
ENC_MAC_ROMAN - MAC ROMAN
ENC_CP437 - DOS code page 437
ENC_CP855 - DOS code page 855

View File

@ -57,6 +57,14 @@
* The IBM pages are no longer available; the versions archived on the
* Wayback Machine are, but the links to the PDF and text versions of
* the code pages don't all work (do *any* work?).
*
* Mappings to Unicode at the Unicode Consortium:
*
* https://www.unicode.org/Public/MAPPINGS/
*
* Of note, the VENDORS/MICSFT directory not only has various Windows
* and DOS code pages, but also several of the common MAC and EBCDIC
* code page mappings to Unicode.
*/
/*
@ -1233,6 +1241,11 @@ get_ascii_7bits_string(wmem_allocator_t *scope, const guint8 *ptr,
of Operation, but with some code points that don't correspond to
the same characters in code pages 037 and 1158 mapped to REPLACEMENT
CHARACTER - there may be more code points of that sort */
/* There are a few EBCDIC control codes that, strictly speaking, do not
* map to any control codes in ASCII or Unicode for that matter. The
* customary treatment is to map them in a particular way to ASCII C1
* control codes that have no exact equivalent in EBCDIC, as below. */
const gunichar2 charset_table_ebcdic[256] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x009c, 0x0009, 0x0086, 0x007f,
0x0097, 0x008d, 0x008e, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
@ -1304,6 +1317,48 @@ const gunichar2 charset_table_ebcdic_cp037[256] = {
0x0038, 0x0039, 0x00b3, 0x00db, 0x00dc, 0x00d9, 0x00da, 0x009f,
};
/* EBCDIC code page 500
* https://www.ibm.com/support/pages/conversion-character-differences-between-ccsid-037-and-ccsid-500
* CCSID 500 ("International Latin-1") has exactly the same repertoire as 37,
* covering all of ISO-8559-1, but with seven code points permuted.
* It is notable because it is the default code page for DRDA:
* https://www.ibm.com/support/pages/drda-user-id-and-password-not-being-transmitted-correctly-when-containing-characters-%C2%AC-%C2%A2?lnk=hm
*/
const gunichar2 charset_table_ebcdic_cp500[256] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x009c, 0x0009, 0x0086, 0x007f,
0x0097, 0x008d, 0x008e, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
0x0010, 0x0011, 0x0012, 0x0013, 0x009d, 0x0085, 0x0008, 0x0087,
0x0018, 0x0019, 0x0092, 0x008f, 0x001c, 0x001d, 0x001e, 0x001f,
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x000a, 0x0017, 0x001b,
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x0005, 0x0006, 0x0007,
0x0090, 0x0091, 0x0016, 0x0093, 0x0094, 0x0095, 0x0096, 0x0004,
0x0098, 0x0099, 0x009a, 0x009b, 0x0014, 0x0015, 0x009e, 0x001a,
0x0020, 0x00a0, 0x00e2, 0x00e4, 0x00e0, 0x00e1, 0x00e3, 0x00e5,
0x00e7, 0x00f1, 0x005b, 0x002e, 0x003c, 0x0028, 0x002b, 0x0021,
0x0026, 0x00e9, 0x00ea, 0x00eb, 0x00e8, 0x00ed, 0x00ee, 0x00ef,
0x00ec, 0x00df, 0x005d, 0x0024, 0x002a, 0x0029, 0x003b, 0x005e,
0x002d, 0x002f, 0x00c2, 0x00c4, 0x00c0, 0x00c1, 0x00c3, 0x00c5,
0x00c7, 0x00d1, 0x00a6, 0x002c, 0x0025, 0x005f, 0x003e, 0x003f,
0x00f8, 0x00c9, 0x00ca, 0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf,
0x00cc, 0x0060, 0x003a, 0x0023, 0x0040, 0x0027, 0x003d, 0x0022,
0x00d8, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
0x0068, 0x0069, 0x00ab, 0x00bb, 0x00f0, 0x00fd, 0x00fe, 0x00b1,
0x00b0, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070,
0x0071, 0x0072, 0x00aa, 0x00ba, 0x00e6, 0x00b8, 0x00c6, 0x00a4,
0x00b5, 0x007e, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078,
0x0079, 0x007a, 0x00a1, 0x00bf, 0x00d0, 0x00dd, 0x00de, 0x00ae,
0x00a2, 0x00a3, 0x00a5, 0x00b7, 0x00a9, 0x00a7, 0x00b6, 0x00bc,
0x00bd, 0x00be, 0x00ac, 0x007c, 0x00af, 0x00a8, 0x00b4, 0x00d7,
0x007b, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
0x0048, 0x0049, 0x00ad, 0x00f4, 0x00f6, 0x00f2, 0x00f3, 0x00f5,
0x007d, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050,
0x0051, 0x0052, 0x00b9, 0x00fb, 0x00fc, 0x00f9, 0x00fa, 0x00ff,
0x005c, 0x00f7, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058,
0x0059, 0x005a, 0x00b2, 0x00d4, 0x00d6, 0x00d2, 0x00d3, 0x00d5,
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
0x0038, 0x0039, 0x00b3, 0x00db, 0x00dc, 0x00d9, 0x00da, 0x009f,
};
/*
* Given a wmem scope, a pointer, a length, and a translation table with
* 256 entries, treat the string of bytes referred to by the pointer and

View File

@ -63,6 +63,7 @@ extern const gunichar2 charset_table_iso_646_basic[0x80];
/* Tables for EBCDIC code pages */
extern const gunichar2 charset_table_ebcdic[256];
extern const gunichar2 charset_table_ebcdic_cp037[256];
extern const gunichar2 charset_table_ebcdic_cp500[256];
/*
* Given a wmem scope, a pointer, and a length, treat the string of bytes

View File

@ -1116,7 +1116,7 @@ dissect_drda_typdefnam(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void
break;
}
}
proto_tree_add_item_ret_string(tree, hf_drda_typdefnam, tvb, 0, tvb_reported_length(tvb), ENC_EBCDIC, pinfo->pool, &typdefnam);
proto_tree_add_item_ret_string(tree, hf_drda_typdefnam, tvb, 0, tvb_reported_length(tvb), ENC_EBCDIC_CP500, pinfo->pool, &typdefnam);
for (int i = 0; typdefnam_vals[i].name != NULL; i++) {
if (strcmp(typdefnam_vals[i].name, typdefnam) == 0) {
pdu_info->typdefnam = typdefnam_vals[i].value;
@ -1448,7 +1448,7 @@ ccsid_to_encoding(guint32 ccsid)
case 0:
case 500:
case 65535:
return ENC_EBCDIC; /* XXX: CP 500 not yet supported */
return ENC_EBCDIC_CP500;
case 37:
return ENC_EBCDIC_CP037;
case 367:
@ -1555,13 +1555,13 @@ dissect_drda_pkgnam(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *d
if (tvb_reported_length(tvb) == 54) {
/* 58 - 4 bytes for the code point and length already removed. */
proto_tree_add_item(tree, hf_drda_rdbnam, tvb, offset, 18, ENC_UTF_8);
proto_tree_add_item(tree, hf_drda_rdbnam_ebcdic, tvb, offset, 18, ENC_EBCDIC);
proto_tree_add_item(tree, hf_drda_rdbnam_ebcdic, tvb, offset, 18, ENC_EBCDIC_CP500);
offset += 18;
proto_tree_add_item(tree, hf_drda_rdbcolid, tvb, offset, 18, ENC_UTF_8);
proto_tree_add_item(tree, hf_drda_rdbcolid_ebcdic, tvb, offset, 18, ENC_EBCDIC);
proto_tree_add_item(tree, hf_drda_rdbcolid_ebcdic, tvb, offset, 18, ENC_EBCDIC_CP500);
offset += 18;
proto_tree_add_item(tree, hf_drda_pkgid, tvb, offset, 18, ENC_UTF_8);
proto_tree_add_item(tree, hf_drda_pkgid_ebcdic, tvb, offset, 18, ENC_EBCDIC);
proto_tree_add_item(tree, hf_drda_pkgid_ebcdic, tvb, offset, 18, ENC_EBCDIC_CP500);
offset += 18;
} else if (tvb_reported_length(tvb) > 64) {
ti_length = proto_tree_add_item_ret_uint(tree, hf_drda_param_length, tvb, offset, 2, ENC_BIG_ENDIAN, &length);
@ -1570,7 +1570,7 @@ dissect_drda_pkgnam(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *d
}
offset += 2;
proto_tree_add_item(tree, hf_drda_rdbnam, tvb, offset, length, ENC_UTF_8);
proto_tree_add_item(tree, hf_drda_rdbnam_ebcdic, tvb, offset, length, ENC_EBCDIC);
proto_tree_add_item(tree, hf_drda_rdbnam_ebcdic, tvb, offset, length, ENC_EBCDIC_CP500);
offset += length;
ti_length = proto_tree_add_item_ret_uint(tree, hf_drda_param_length, tvb, offset, 2, ENC_BIG_ENDIAN, &length);
if (length < 18 || length > 255) {
@ -1578,7 +1578,7 @@ dissect_drda_pkgnam(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *d
}
offset += 2;
proto_tree_add_item(tree, hf_drda_rdbcolid, tvb, offset, length, ENC_UTF_8);
proto_tree_add_item(tree, hf_drda_rdbcolid_ebcdic, tvb, offset, length, ENC_EBCDIC);
proto_tree_add_item(tree, hf_drda_rdbcolid_ebcdic, tvb, offset, length, ENC_EBCDIC_CP500);
offset += length;
ti_length = proto_tree_add_item_ret_uint(tree, hf_drda_param_length, tvb, offset, 2, ENC_BIG_ENDIAN, &length);
if (length < 18 || length > 255) {
@ -1586,7 +1586,7 @@ dissect_drda_pkgnam(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *d
}
offset += 2;
proto_tree_add_item(tree, hf_drda_pkgid, tvb, offset, length, ENC_UTF_8);
proto_tree_add_item(tree, hf_drda_pkgid_ebcdic, tvb, offset, length, ENC_EBCDIC);
proto_tree_add_item(tree, hf_drda_pkgid_ebcdic, tvb, offset, length, ENC_EBCDIC_CP500);
offset += length;
} else {
proto_tree_add_expert_format(tree, pinfo, &ei_drda_opcode_invalid_length, tvb, 0, tvb_reported_length(tvb), "Invalid length; RDBNAM, RDBCOLID, and PKGID should all be length 18 or larger.");
@ -1939,7 +1939,7 @@ dissect_drda_collection(tvbuff_t *tvb, packet_info *pinfo _U_, proto_tree *tree,
proto_tree_add_item(drda_tree_sub, hf_drda_param_codepoint, tvb, offset + 2, 2, ENC_BIG_ENDIAN);
if (!dissector_try_uint_new(drda_opcode_table, iParameterCP, tvb_new_subset_length(tvb, offset + 4, iLengthParam - 4), pinfo, drda_tree_sub, FALSE, data)) {
proto_tree_add_item(drda_tree_sub, hf_drda_param_data, tvb, offset + 4, iLengthParam - 4, ENC_UTF_8);
proto_tree_add_item(drda_tree_sub, hf_drda_param_data_ebcdic, tvb, offset + 4, iLengthParam - 4, ENC_EBCDIC);
proto_tree_add_item(drda_tree_sub, hf_drda_param_data_ebcdic, tvb, offset + 4, iLengthParam - 4, ENC_EBCDIC_CP500);
}
}
offset += iLengthParam;
@ -2035,7 +2035,7 @@ dissect_drda_pdu(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void* data
proto_tree_add_item(drda_tree_sub, hf_drda_param_codepoint, tvb, offset + 2, 2, ENC_BIG_ENDIAN);
if (!dissector_try_uint_new(drda_opcode_table, iParameterCP, tvb_new_subset_length(tvb, offset + 4, iLengthParam - 4), pinfo, drda_tree_sub, FALSE, pdu_info)) {
proto_tree_add_item(drda_tree_sub, hf_drda_param_data, tvb, offset + 4, iLengthParam - 4, ENC_UTF_8);
proto_tree_add_item(drda_tree_sub, hf_drda_param_data_ebcdic, tvb, offset + 4, iLengthParam - 4, ENC_EBCDIC);
proto_tree_add_item(drda_tree_sub, hf_drda_param_data_ebcdic, tvb, offset + 4, iLengthParam - 4, ENC_EBCDIC_CP500);
}
}
offset += iLengthParam;

View File

@ -237,7 +237,7 @@ WS_DLL_PUBLIC value_string_ext mibenum_vals_character_sets_ext;
ZZZ(0, YYY(XXX, IANA_CS_IBM420, 2041, "IBM420", ENC_NA|_DEFAULT_WS_ENC)) \
ZZZ(0, YYY(XXX, IANA_CS_IBM423, 2042, "IBM423", ENC_NA|_DEFAULT_WS_ENC)) \
ZZZ(0, YYY(XXX, IANA_CS_IBM424, 2043, "IBM424", ENC_NA|_DEFAULT_WS_ENC)) \
ZZZ(0, YYY(XXX, IANA_CS_IBM500, 2044, "IBM500", ENC_NA|_DEFAULT_WS_ENC)) \
ZZZ(1, YYY(XXX, IANA_CS_IBM500, 2044, "IBM500", ENC_NA|ENC_EBCDIC_CP500)) \
ZZZ(0, YYY(XXX, IANA_CS_IBM851, 2045, "IBM851", ENC_NA|_DEFAULT_WS_ENC)) \
ZZZ(1, YYY(XXX, IANA_CS_IBM855, 2046, "IBM855", ENC_NA|ENC_CP855)) \
ZZZ(0, YYY(XXX, IANA_CS_IBM857, 2047, "IBM857", ENC_NA|_DEFAULT_WS_ENC)) \

View File

@ -90,6 +90,7 @@ static ws_enum_t all_enums[] = {
ENUM(ENC_DECT_STANDARD_8BITS),
ENUM(ENC_EBCDIC),
ENUM(ENC_EBCDIC_CP037),
ENUM(ENC_EBCDIC_CP500),
ENUM(ENC_ETSI_TS_102_221_ANNEX_A),
ENUM(ENC_EUC_KR),
ENUM(ENC_GB18030),

View File

@ -426,6 +426,7 @@ void proto_report_dissector_bug(const char *format, ...)
#define ENC_APN_STR 0x00000054 /* The encoding the APN/DNN field follows 3GPP TS 23.003 [2] clause 9.1.*/
#define ENC_DECT_STANDARD_8BITS 0x00000056 /* DECT standard character set as defined in ETSI EN 300 175-5 Annex D */
#define ENC_DECT_STANDARD_4BITS_TBCD 0x00000058 /* DECT standard 4bits character set as defined in ETSI EN 300 175-5 Annex D (BCD with 0xb = SPACE)*/
#define ENC_EBCDIC_CP500 0x00000060
/*
* TODO:
*

View File

@ -1717,6 +1717,7 @@ validate_single_byte_ascii_encoding(const guint encoding)
case ENC_ASCII_7BITS:
case ENC_EBCDIC:
case ENC_EBCDIC_CP037:
case ENC_EBCDIC_CP500:
case ENC_BCD_DIGITS_0_9:
case ENC_KEYPAD_ABC_TBCD:
case ENC_KEYPAD_BC_TBCD:
@ -3278,6 +3279,13 @@ tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
strptr = tvb_get_nonascii_unichar2_string(scope, tvb, offset, length, charset_table_ebcdic_cp037);
break;
case ENC_EBCDIC_CP500:
/*
* EBCDIC code page 500.
*/
strptr = tvb_get_nonascii_unichar2_string(scope, tvb, offset, length, charset_table_ebcdic_cp500);
break;
case ENC_T61:
strptr = tvb_get_t61_string(scope, tvb, offset, length);
break;
@ -3761,6 +3769,13 @@ tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, g
strptr = tvb_get_nonascii_unichar2_stringz(scope, tvb, offset, lengthp, charset_table_ebcdic_cp037);
break;
case ENC_EBCDIC_CP500:
/*
* EBCDIC code page 500.
*/
strptr = tvb_get_nonascii_unichar2_stringz(scope, tvb, offset, lengthp, charset_table_ebcdic_cp500);
break;
case ENC_T61:
strptr = tvb_get_t61_stringz(scope, tvb, offset, lengthp);
break;