wmem: make wmem_strsplit behave more like g_strsplit

In the past "g_strsplit" users were converted to use "wmem_strsplit" to
avoid memory leaks when dissection fails. The semantics were slightly
different though. When the DNS dissector tried to split the string "."
using delimiter ".", it would previously (unexpectedly) receive an empty
vector (and crash). Now it will receive a vector with one element.

Additionally, suggest that users of wmem_strsplit with a NULL allocator
use g_strsplit instead, otherwise it will leak the elements.

Bug: 14980
Change-Id: I408dfdb0ffa9e24ccdba69c8ee095abea72f6feb
Reviewed-on: https://code.wireshark.org/review/28724
Petri-Dish: Peter Wu <peter@lekensteyn.nl>
Tested-by: Petri Dish Buildbot
Reviewed-by: Anders Broman <a.broman58@gmail.com>
This commit is contained in:
Peter Wu 2018-07-16 13:17:27 +02:00 committed by Anders Broman
parent 59dcf1f790
commit 3d1e3023d2
3 changed files with 59 additions and 59 deletions

View File

@ -253,71 +253,52 @@ gchar **
wmem_strsplit(wmem_allocator_t *allocator, const gchar *src,
const gchar *delimiter, int max_tokens)
{
gchar* splitted;
gchar* s;
gchar *splitted;
gchar *s;
guint tokens;
guint str_len;
guint sep_len;
guint i;
gchar** vec;
enum { AT_START, IN_PAD, IN_TOKEN } state;
guint curr_tok = 0;
gchar **vec;
if ( ! src
|| ! delimiter
|| ! delimiter[0])
if (!src || !delimiter || !delimiter[0])
return NULL;
s = splitted = wmem_strdup(allocator, src);
str_len = (guint) strlen(splitted);
sep_len = (guint) strlen(delimiter);
/* An empty string results in an empty vector. */
if (!src[0]) {
vec = wmem_new0(allocator, gchar *);
return vec;
}
if (max_tokens < 1) max_tokens = INT_MAX;
splitted = wmem_strdup(allocator, src);
sep_len = (guint)strlen(delimiter);
if (max_tokens < 1)
max_tokens = INT_MAX;
/* Calculate the number of fields. */
s = splitted;
tokens = 1;
while (tokens < (guint)max_tokens && (s = strstr(s, delimiter))) {
s += sep_len;
tokens++;
}
vec = wmem_alloc_array(allocator, gchar *, tokens + 1);
while (tokens <= (guint)max_tokens && ( s = strstr(s,delimiter) )) {
/* Populate the array of string tokens. */
s = splitted;
vec[0] = s;
tokens = 1;
while (tokens < (guint)max_tokens && (s = strstr(s, delimiter))) {
for (i = 0; i < sep_len; i++)
s[i] = '\0';
s += sep_len;
vec[tokens] = s;
tokens++;
for(i=0; i < sep_len; i++ )
s[i] = '\0';
s += sep_len;
}
vec = wmem_alloc_array(allocator, gchar*,tokens+1);
state = AT_START;
for (i=0; i< str_len; i++) {
switch(state) {
case AT_START:
if (splitted[i] == '\0') {
state = IN_PAD;
}
else {
vec[curr_tok] = &(splitted[i]);
curr_tok++;
state = IN_TOKEN;
}
break;
case IN_TOKEN:
if (splitted[i] == '\0') {
state = IN_PAD;
}
break;
case IN_PAD:
if (splitted[i] != '\0') {
vec[curr_tok] = &(splitted[i]);
curr_tok++;
state = IN_TOKEN;
}
break;
}
}
vec[curr_tok] = NULL;
vec[tokens] = NULL;
return vec;
}

View File

@ -69,7 +69,10 @@ G_GNUC_MALLOC;
/**
* Splits a string into a maximum of max_tokens pieces, using the given
* delimiter. If max_tokens is reached, the remainder of string is appended
* to the last token. Consecutive delimiters are treated as a single delimiter.
* to the last token. Successive tokens are not folded and will instead result
* in an empty string as element.
*
* Do not use with a NULL allocator, use g_strsplit instead.
*/
WS_DLL_PUBLIC
gchar **

View File

@ -441,15 +441,31 @@ wmem_test_strutls(void)
split_str = wmem_strsplit(allocator, "A-C", "-", 2);
g_assert_cmpstr(split_str[0], ==, "A");
g_assert_cmpstr(split_str[1], ==, "C");
g_assert(split_str[2] == NULL);
split_str = wmem_strsplit(allocator, "A-C", "-", 0);
g_assert_cmpstr(split_str[0], ==, "A");
g_assert_cmpstr(split_str[1], ==, "C");
g_assert(split_str[2] == NULL);
split_str = wmem_strsplit(allocator, "--aslkf-asio--asfj-as--", "-", 10);
g_assert_cmpstr(split_str[0], ==, "aslkf");
g_assert_cmpstr(split_str[1], ==, "asio");
g_assert_cmpstr(split_str[2], ==, "asfj");
g_assert_cmpstr(split_str[3], ==, "as");
split_str = wmem_strsplit(allocator, "--aslkf-asio--asfj-as--", "-", 4);
g_assert_cmpstr(split_str[0], ==, "aslkf");
g_assert_cmpstr(split_str[1], ==, "asio");
g_assert_cmpstr(split_str[2], ==, "-asfj-as--");
g_assert_cmpstr(split_str[0], ==, "");
g_assert_cmpstr(split_str[1], ==, "");
g_assert_cmpstr(split_str[2], ==, "aslkf");
g_assert_cmpstr(split_str[3], ==, "asio");
g_assert_cmpstr(split_str[4], ==, "");
g_assert_cmpstr(split_str[5], ==, "asfj");
g_assert_cmpstr(split_str[6], ==, "as");
g_assert_cmpstr(split_str[7], ==, "");
g_assert_cmpstr(split_str[8], ==, "");
g_assert(split_str[9] == NULL);
split_str = wmem_strsplit(allocator, "--aslkf-asio--asfj-as--", "-", 5);
g_assert_cmpstr(split_str[0], ==, "");
g_assert_cmpstr(split_str[1], ==, "");
g_assert_cmpstr(split_str[2], ==, "aslkf");
g_assert_cmpstr(split_str[3], ==, "asio");
g_assert_cmpstr(split_str[4], ==, "-asfj-as--");
g_assert(split_str[5] == NULL);
split_str = wmem_strsplit(allocator, "", "-", -1);
g_assert(split_str[0] == NULL);
wmem_strict_check_canaries(allocator);
orig_str = "TeStAsCiIsTrDoWn";