diff --git a/wsutil/str_util.c b/wsutil/str_util.c index f41e639bde..0d6e1694b8 100644 --- a/wsutil/str_util.c +++ b/wsutil/str_util.c @@ -1031,6 +1031,21 @@ format_text_chr(wmem_allocator_t *allocator, const char *string, size_t len, cha return wmem_strbuf_finalize(buf); } +char* +ws_utf8_truncate(char *string, size_t len) +{ + char* last_char; + + /* Ensure that it is null terminated */ + string[len] = '\0'; + last_char = g_utf8_find_prev_char(string, string + len); + if (last_char != NULL && g_utf8_get_char_validated(last_char, -1) == (gunichar)-2) { + /* The last UTF-8 character was truncated into a partial sequence. */ + *last_char = '\0'; + } + return string; +} + /* ASCII/EBCDIC conversion tables from * https://web.archive.org/web/20060813174742/http://www.room42.com/store/computer_center/code_tables.shtml */ diff --git a/wsutil/str_util.h b/wsutil/str_util.h index 9b2b1e295e..03bd76498c 100644 --- a/wsutil/str_util.h +++ b/wsutil/str_util.h @@ -288,6 +288,29 @@ WS_DLL_PUBLIC char *format_text_chr(wmem_allocator_t *allocator, const char *string, size_t len, char chr); +/** + * Truncate a UTF-8 string in place so that it is no larger than len bytes, + * ensuring that the string is null terminated and ends with a complete + * character instead of a partial sequence (e.g., possibly truncating up + * to 3 additional bytes if the terminal character is 4 bytes long). + * + * The buffer holding the string must be large enough (at least len + 1 + * including the null terminator), and the first len bytes of the buffer + * must be a valid UTF-8 string, except for possibly ending in a partial + * sequence or not being null terminated. This is a convenience function + * that for speed does not check either of those conditions. + * + * A common use case is when a valid UTF-8 string has been copied into a + * buffer of length len+1 via snprintf, strlcpy, or strlcat and truncated, + * to ensure that the final UTF-8 character is not a partial sequence. + * + * @param string A pointer to the input string + * @param len The maximum length to truncate to + * @return ptr to the string + */ +WS_DLL_PUBLIC +char* ws_utf8_truncate(char *string, size_t len); + WS_DLL_PUBLIC void EBCDIC_to_ASCII(guint8 *buf, guint bytes);