diff --git a/wsutil/unicode-utils.c b/wsutil/unicode-utils.c index 2ade075ad9..764c0b3c03 100644 --- a/wsutil/unicode-utils.c +++ b/wsutil/unicode-utils.c @@ -12,18 +12,24 @@ #include "unicode-utils.h" -int -ws_utf8_char_len(guint8 ch) -{ - if (ch >= 0xfe) return -1; - if (ch >= 0xfc) return 6; - if (ch >= 0xf8) return 5; - if (ch >= 0xf0) return 4; - if (ch >= 0xe0) return 3; - if (ch >= 0xc0) return 2; - else return 1; -} - +int ws_utf8_seqlen[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x00...0x0f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x10...0x1f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x20...0x2f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x30...0x3f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x40...0x4f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x50...0x5f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x60...0x6f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x70...0x7f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80...0x8f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90...0x9f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0...0xaf */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0...0xbf */ + 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xc0...0xcf */ + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xd0...0xdf */ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* 0xe0...0xef */ + 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, /* 0xf0...0xff */ +}; #ifdef _WIN32 diff --git a/wsutil/unicode-utils.h b/wsutil/unicode-utils.h index 2ffb90ad32..538d2b03de 100644 --- a/wsutil/unicode-utils.h +++ b/wsutil/unicode-utils.h @@ -59,8 +59,16 @@ extern "C" { } while (0) -WS_DLL_PUBLIC -int ws_utf8_char_len(guint8 ch); +WSUTIL_EXPORT +int ws_utf8_seqlen[256]; + +/** Given the first byte in an UTF-8 encoded code point, + * return the length of the multibyte sequence, or *ZERO* + * if the byte is invalid as the first byte in a multibyte + * sequence. + */ +#define ws_utf8_char_len(ch) (ws_utf8_seqlen[(ch)]) + #ifdef _WIN32