charsets: Optimize ENC_ASCII by appending valid bytes at once

For ASCII encoding, most bytes are copied directly. Count consecutive
valid bytes in an accumulator and append them all at once when we
get an invalid character with the high bit set, or at the end.
This reduces the number of reallocations and allows larger, more
optimized memcpys.
This commit is contained in:
John Thacker 2022-12-27 10:41:16 -05:00
parent afe5ed0aa5
commit ba7917309a
1 changed files with 15 additions and 5 deletions

View File

@ -72,19 +72,29 @@ guint8 *
get_ascii_string(wmem_allocator_t *scope, const guint8 *ptr, gint length)
{
wmem_strbuf_t *str;
const guint8 *prev = ptr;
size_t valid_bytes = 0;
str = wmem_strbuf_new_sized(scope, length+1);
while (length > 0) {
guint8 ch = *ptr;
guint8 ch = *ptr++;
if (ch < 0x80)
wmem_strbuf_append_c(str, ch);
else
if (ch < 0x80) {
valid_bytes++;
} else {
if (valid_bytes) {
wmem_strbuf_append_len(str, prev, valid_bytes);
prev = ptr;
valid_bytes = 0;
}
wmem_strbuf_append_unichar_repl(str);
ptr++;
}
length--;
}
if (valid_bytes) {
wmem_strbuf_append_len(str, prev, valid_bytes);
}
return (guint8 *) wmem_strbuf_finalize(str);
}