Combine SSE and pre-compiled patterns for faster pbrk

This combines the SSE4.2 instructions usage, with pre-compiled
pattern searching usage, for a faster pbrk search method.

Testing against large files of HTTP and SIP, there is about
a 5% performance improvement by using pre-"compiled" patterns
for guint8_pbrk() instead of passing it the search string and
having it build the match array every time.
Similar to regular expressions, "compiling" the pattern match array
in advance only once and using the "compiled" patterns for
the searches is faster than compiling it every time.

Change-Id: Ifcbc14a6c93f32d15663a10d974bacdca5119a8e
Ping-Bug: 10798
Reviewed-on: https://code.wireshark.org/review/6990
Petri-Dish: Hadriel Kaplan <hadrielk@yahoo.com>
Tested-by: Petri Dish Buildbot <buildbot-no-reply@wireshark.org>
Reviewed-by: Anders Broman <a.broman58@gmail.com>
This commit is contained in:
Hadriel Kaplan 2015-02-06 13:52:37 -05:00 committed by Anders Broman
parent a618f1c0d6
commit a837570d02
15 changed files with 225 additions and 197 deletions

View File

@ -1374,7 +1374,8 @@ libwireshark.so.0 libwireshark0 #MINVER#
tvb_new_subset_remaining@Base 1.9.1
tvb_offset_exists@Base 1.9.1
tvb_offset_from_real_beginning@Base 1.9.1
tvb_pbrk_guint8@Base 1.9.1
tvb_pbrk_compile@Base 1.99.2
tvb_pbrk_pattern_guint8@Base 1.99.2
tvb_raw_offset@Base 1.9.1
tvb_reported_length@Base 1.9.1
tvb_reported_length_remaining@Base 1.9.1

View File

@ -141,6 +141,8 @@ libwsutil.so.0 libwsutil0 #MINVER#
started_with_special_privs@Base 1.10.0
test_for_directory@Base 1.12.0~rc1
test_for_fifo@Base 1.12.0~rc1
tvb_pbrk_compile@Base 1.99.3
tvb_pbrk_exec@Base 1.99.3
type_util_gdouble_to_guint64@Base 1.10.0
type_util_guint64_to_gdouble@Base 1.10.0
u3_active@Base 1.12.0~rc1
@ -162,6 +164,5 @@ libwsutil.so.0 libwsutil0 #MINVER#
ws_buffer_free@Base 1.99.0
ws_buffer_init@Base 1.99.0
ws_buffer_remove_start@Base 1.99.0
ws_mempbrk@Base 1.99.0
ws_utf8_char_len@Base 1.12.0~rc1
ws_xton@Base 1.12.0~rc1

View File

@ -103,6 +103,9 @@ static int hf_cups_make_model = -1;
static gint ett_cups = -1;
static gint ett_cups_ptype = -1;
/* patterns used for tvb_pbrk_pattern_guint8 */
static tvb_pbrk_pattern pbrk_whitespace = INIT_PBRK_PATTERN;
/* This protocol is heavily related to IPP, but it is CUPS-specific
and non-standard. */
#define UDP_PORT_CUPS 631
@ -286,7 +289,7 @@ get_unquoted_string(tvbuff_t *tvb, gint offset, gint *next_offset, guint *len)
guint l = 0;
gint o;
o = tvb_pbrk_guint8(tvb, offset, -1, " \t\r\n", NULL);
o = tvb_pbrk_pattern_guint8(tvb, offset, -1, &pbrk_whitespace, NULL);
if (o != -1) {
l = o - offset;
s = tvb_get_string_enc(wmem_packet_scope(), tvb, offset, l, ENC_ASCII);
@ -389,6 +392,9 @@ proto_register_cups(void)
"CUPS", "cups");
proto_register_field_array(proto_cups, hf, array_length(hf));
proto_register_subtree_array(ett, array_length(ett));
/* compile patterns */
tvb_pbrk_compile(&pbrk_whitespace, " \t\r\n");
}
void

View File

@ -75,6 +75,8 @@ static expert_field ei_irc_tag_data_invalid = EI_INIT;
/* This must be a null-terminated string */
static const guint8 TAG_DELIMITER[] = {0x01, 0x00};
/* patterns used for tvb_pbrk_pattern_guint8 */
static tvb_pbrk_pattern pbrk_tag_delimiter = INIT_PBRK_PATTERN;
#define TCP_PORT_IRC 6667
@ -88,14 +90,14 @@ dissect_irc_tag_data(proto_tree *tree, proto_item *item, tvbuff_t *tvb, int offs
found_end_needle = 0;
gint tag_start_offset, tag_end_offset;
tag_start_offset = tvb_pbrk_guint8(tvb, offset, datalen, TAG_DELIMITER, &found_start_needle);
tag_start_offset = tvb_pbrk_pattern_guint8(tvb, offset, datalen, &pbrk_tag_delimiter, &found_start_needle);
if (tag_start_offset == -1)
{
/* no tag data */
return;
}
tag_end_offset = tvb_pbrk_guint8(tvb, offset, datalen-offset, TAG_DELIMITER, &found_end_needle);
tag_end_offset = tvb_pbrk_pattern_guint8(tvb, offset, datalen-offset, &pbrk_tag_delimiter, &found_end_needle);
if (tag_end_offset == -1)
{
expert_add_info(pinfo, item, &ei_irc_missing_end_delimiter);
@ -124,8 +126,7 @@ dissect_irc_request(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int off
eocp_offset,
tag_start_offset, tag_end_offset;
guint8* str_command;
guchar found_needle = 0,
found_tag_needle = 0;
guchar found_tag_needle = 0;
gboolean first_command_param = TRUE;
request_item = proto_tree_add_item(tree, hf_irc_request, tvb, offset, linelen, ENC_ASCII|ENC_NA);
@ -138,7 +139,7 @@ dissect_irc_request(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int off
if (tvb_get_guint8(tvb, offset) == ':')
{
/* find the end of the prefix */
eop_offset = tvb_pbrk_guint8(tvb, offset+1, linelen-1, " ", &found_needle);
eop_offset = tvb_find_guint8(tvb, offset+1, linelen-1, ' ');
if (eop_offset == -1)
{
expert_add_info(pinfo, request_item, &ei_irc_prefix_missing_ending_space);
@ -146,7 +147,6 @@ dissect_irc_request(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int off
}
proto_tree_add_item(request_tree, hf_irc_request_prefix, tvb, offset+1, eop_offset-offset-1, ENC_ASCII|ENC_NA);
found_needle = 0;
offset = eop_offset+1;
}
@ -161,7 +161,7 @@ dissect_irc_request(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int off
return;
}
eoc_offset = tvb_pbrk_guint8(tvb, offset, end_offset-offset, " ", &found_needle);
eoc_offset = tvb_find_guint8(tvb, offset, end_offset-offset, ' ');
if (eoc_offset == -1)
{
proto_tree_add_item(request_tree, hf_irc_request_command, tvb, offset, end_offset-offset, ENC_ASCII|ENC_NA);
@ -192,7 +192,6 @@ dissect_irc_request(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int off
expert_add_info(pinfo, request_item, &ei_irc_numeric_request_command);
}
found_needle = 0;
offset = eoc_offset+1;
/* clear out any whitespace before command parameter */
@ -216,8 +215,8 @@ dissect_irc_request(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int off
while(offset < end_offset)
{
eocp_offset = tvb_pbrk_guint8(tvb, offset, end_offset-offset, " ", &found_needle);
tag_start_offset = tvb_pbrk_guint8(tvb, offset, end_offset-offset, TAG_DELIMITER, &found_tag_needle);
eocp_offset = tvb_find_guint8(tvb, offset, end_offset-offset, ' ');
tag_start_offset = tvb_pbrk_pattern_guint8(tvb, offset, end_offset-offset, &pbrk_tag_delimiter, &found_tag_needle);
/* Create subtree when the first parameter is found */
if (first_command_param)
@ -233,7 +232,6 @@ dissect_irc_request(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int off
{
/* regular message should be dissected */
found_needle = 0;
if (eocp_offset == -1)
{
proto_tree_add_item(command_tree, hf_irc_request_command_param, tvb, offset, end_offset-offset, ENC_ASCII|ENC_NA);
@ -267,7 +265,7 @@ dissect_irc_request(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int off
/* tag data dissected */
found_tag_needle = 0;
tag_end_offset = tvb_pbrk_guint8(tvb, tag_start_offset+1, end_offset-tag_start_offset-1, TAG_DELIMITER, &found_tag_needle);
tag_end_offset = tvb_pbrk_pattern_guint8(tvb, tag_start_offset+1, end_offset-tag_start_offset-1, &pbrk_tag_delimiter, &found_tag_needle);
if (tag_end_offset == -1)
{
expert_add_info(pinfo, request_item, &ei_irc_missing_end_delimiter);
@ -293,8 +291,7 @@ dissect_irc_response(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int of
tag_start_offset, tag_end_offset;
guint8* str_command;
guint16 num_command;
guchar found_needle = 0,
found_tag_needle = 0;
guchar found_tag_needle = 0;
gboolean first_command_param = TRUE;
response_item = proto_tree_add_item(tree, hf_irc_response, tvb, offset, linelen, ENC_ASCII|ENC_NA);
@ -307,7 +304,7 @@ dissect_irc_response(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int of
if (tvb_get_guint8(tvb, offset) == ':')
{
/* find the end of the prefix */
eop_offset = tvb_pbrk_guint8(tvb, offset+1, linelen-1, " ", &found_needle);
eop_offset = tvb_find_guint8(tvb, offset+1, linelen-1, ' ');
if (eop_offset == -1)
{
expert_add_info(pinfo, response_item, &ei_irc_prefix_missing_ending_space);
@ -315,7 +312,6 @@ dissect_irc_response(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int of
}
proto_tree_add_item(response_tree, hf_irc_response_prefix, tvb, offset+1, eop_offset-offset-1, ENC_ASCII|ENC_NA);
found_needle = 0;
offset = eop_offset+1;
}
@ -330,7 +326,7 @@ dissect_irc_response(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int of
return;
}
eoc_offset = tvb_pbrk_guint8(tvb, offset, end_offset-offset, " ", &found_needle);
eoc_offset = tvb_find_guint8(tvb, offset, end_offset-offset, ' ');
if (eoc_offset == -1)
{
proto_tree_add_item(response_tree, hf_irc_response_command, tvb, offset, end_offset-offset, ENC_ASCII|ENC_NA);
@ -365,7 +361,6 @@ dissect_irc_response(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int of
PROTO_ITEM_SET_HIDDEN(hidden_item);
}
found_needle = 0;
offset = eoc_offset+1;
/* clear out any whitespace before command parameter */
@ -389,8 +384,8 @@ dissect_irc_response(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int of
while(offset < end_offset)
{
eocp_offset = tvb_pbrk_guint8(tvb, offset, end_offset-offset, " ", &found_needle);
tag_start_offset = tvb_pbrk_guint8(tvb, offset, end_offset-offset, TAG_DELIMITER, &found_tag_needle);
eocp_offset = tvb_find_guint8(tvb, offset, end_offset-offset, ' ');
tag_start_offset = tvb_pbrk_pattern_guint8(tvb, offset, end_offset-offset, &pbrk_tag_delimiter, &found_tag_needle);
/* Create subtree when the first parameter is found */
if (first_command_param)
@ -404,7 +399,6 @@ dissect_irc_response(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int of
{
/* regular message should be dissected */
found_needle = 0;
if (eocp_offset == -1)
{
proto_tree_add_item(command_tree, hf_irc_response_command_param, tvb, offset, end_offset-offset, ENC_ASCII|ENC_NA);
@ -437,7 +431,7 @@ dissect_irc_response(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, int of
/* tag data dissected */
found_tag_needle = 0;
tag_end_offset = tvb_pbrk_guint8(tvb, tag_start_offset+1, end_offset-tag_start_offset-1, TAG_DELIMITER, &found_tag_needle);
tag_end_offset = tvb_pbrk_pattern_guint8(tvb, tag_start_offset+1, end_offset-tag_start_offset-1, &pbrk_tag_delimiter, &found_tag_needle);
if (tag_end_offset == -1)
{
expert_add_info(pinfo, response_item, &ei_irc_missing_end_delimiter);
@ -567,6 +561,9 @@ proto_register_irc(void)
proto_register_subtree_array(ett, array_length(ett));
expert_irc = expert_register_protocol(proto_irc);
expert_register_field_array(expert_irc, ei, array_length(ei));
/* compile patterns */
tvb_pbrk_compile(&pbrk_tag_delimiter, TAG_DELIMITER);
}
void

View File

@ -139,6 +139,11 @@ static dissector_handle_t megaco_text_handle;
static int megaco_tap = -1;
/* patterns used for tvb_pbrk_pattern_guint8 */
static tvb_pbrk_pattern pbrk_whitespace = INIT_PBRK_PATTERN;
static tvb_pbrk_pattern pbrk_braces = INIT_PBRK_PATTERN;
/*
* Here are the global variables associated with
* the various user definable characteristics of the dissection
@ -451,7 +456,7 @@ dissect_megaco_text(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
* pathNAME = ["*"] NAME *("/" / "*"/ ALPHA / DIGIT /"_" / "$" )["@" pathDomainName ]
*/
tvb_current_offset = tvb_pbrk_guint8(tvb, tvb_current_offset, -1, " \t\r\n", &needle);
tvb_current_offset = tvb_pbrk_pattern_guint8(tvb, tvb_current_offset, -1, &pbrk_whitespace, &needle);
if (tvb_current_offset == -1) {
expert_add_info_format(pinfo, ti, &ei_megaco_parse_error,
"[ Parse error: no body in MEGACO message (missing SEP after mId) ]");
@ -3284,7 +3289,7 @@ static gint megaco_tvb_find_token(tvbuff_t* tvb, gint offset, gint maxlength){
guchar needle;
do {
pos = tvb_pbrk_guint8(tvb, pos + 1, maxlength, "{}", &needle);
pos = tvb_pbrk_pattern_guint8(tvb, pos + 1, maxlength, &pbrk_braces, &needle);
if(pos == -1)
return -1;
switch(needle){
@ -3564,6 +3569,10 @@ proto_register_megaco(void)
megaco_tap = register_tap("megaco");
/* compile patterns */
tvb_pbrk_compile(&pbrk_whitespace, " \t\r\n");
tvb_pbrk_compile(&pbrk_braces, "{}");
}
/* Register all the bits needed with the filtering engine */

View File

@ -197,6 +197,10 @@ static expert_field ei_sdp_invalid_line_fields = EI_INIT;
static expert_field ei_sdp_invalid_line_space = EI_INIT;
static expert_field ei_sdp_invalid_conversion = EI_INIT;
/* patterns used for tvb_pbrk_pattern_guint8 */
static tvb_pbrk_pattern pbrk_digits = INIT_PBRK_PATTERN;
static tvb_pbrk_pattern pbrk_alpha = INIT_PBRK_PATTERN;
#define SDP_RTP_PROTO 0x00000001
#define SDP_SRTP_PROTO 0x00000002
#define SDP_T38_PROTO 0x00000004
@ -821,7 +825,7 @@ static void dissect_sdp_session_attribute(tvbuff_t *tvb, packet_info * pinfo, pr
offset = next_offset + 1;
if (strcmp((char*)field_name, "ipbcp") == 0) {
offset = tvb_pbrk_guint8(tvb, offset, -1,"0123456789", NULL);
offset = tvb_pbrk_pattern_guint8(tvb, offset, -1,&pbrk_digits, NULL);
if (offset == -1)
return;
@ -832,7 +836,7 @@ static void dissect_sdp_session_attribute(tvbuff_t *tvb, packet_info * pinfo, pr
proto_tree_add_item(sdp_session_attribute_tree, hf_ipbcp_version, tvb, offset, tokenlen, ENC_UTF_8|ENC_NA);
offset = tvb_pbrk_guint8(tvb, offset, -1,"ABCDEFGHIJKLMNOPQRSTUVWXYZ", NULL);
offset = tvb_pbrk_pattern_guint8(tvb, offset, -1,&pbrk_alpha, NULL);
if (offset == -1)
return;
@ -3086,6 +3090,10 @@ proto_register_sdp(void)
/* Register for tapping */
sdp_tap = register_tap("sdp");
/* compile patterns */
tvb_pbrk_compile(&pbrk_digits, "0123456789");
tvb_pbrk_compile(&pbrk_alpha, "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
}
void

View File

@ -237,6 +237,18 @@ static expert_field ei_sip_header_not_terminated = EI_INIT;
static expert_field ei_sip_odd_register_response = EI_INIT;
static expert_field ei_sip_sipsec_malformed = EI_INIT;
/* patterns used for tvb_pbrk_pattern_guint8 */
static tvb_pbrk_pattern pbrk_comma_semi = INIT_PBRK_PATTERN;
static tvb_pbrk_pattern pbrk_whitespace = INIT_PBRK_PATTERN;
static tvb_pbrk_pattern pbrk_param_end = INIT_PBRK_PATTERN;
static tvb_pbrk_pattern pbrk_param_end_colon_brackets = INIT_PBRK_PATTERN;
static tvb_pbrk_pattern pbrk_header_end_dquote = INIT_PBRK_PATTERN;
static tvb_pbrk_pattern pbrk_quotes = INIT_PBRK_PATTERN;
static tvb_pbrk_pattern pbrk_tab_sp_fslash = INIT_PBRK_PATTERN;
static tvb_pbrk_pattern pbrk_addr_end = INIT_PBRK_PATTERN;
static tvb_pbrk_pattern pbrk_via_param_end = INIT_PBRK_PATTERN;
/* PUBLISH method added as per http://www.ietf.org/internet-drafts/draft-ietf-sip-publish-01.txt */
static const char *sip_methods[] = {
#define SIP_METHOD_INVALID 0
@ -1110,7 +1122,7 @@ dissect_sip_uri(tvbuff_t *tvb, packet_info *pinfo _U_, gint start_offset,
*/
int end_offset;
end_offset = tvb_pbrk_guint8(tvb, current_offset, line_end_offset - current_offset, ",;", NULL);
end_offset = tvb_pbrk_pattern_guint8(tvb, current_offset, line_end_offset - current_offset, &pbrk_comma_semi, NULL);
if (end_offset != -1)
{
@ -1150,14 +1162,14 @@ dissect_sip_uri(tvbuff_t *tvb, packet_info *pinfo _U_, gint start_offset,
while (parameter_end_offset < line_end_offset)
{
parameter_end_offset++;
parameter_end_offset = tvb_pbrk_guint8(tvb, parameter_end_offset, line_end_offset - parameter_end_offset, ">,;? \r:[]", &c);
parameter_end_offset = tvb_pbrk_pattern_guint8(tvb, parameter_end_offset, line_end_offset - parameter_end_offset, &pbrk_param_end_colon_brackets, &c);
if (parameter_end_offset == -1)
{
parameter_end_offset = line_end_offset;
break;
}
/* after adding character to this switch() , update also string in tvb_pbrk_guint8() call above */
/* after adding character to this switch() , update also pbrk_param_end_colon_brackets */
switch (c) {
case '>':
case ',':
@ -1196,14 +1208,14 @@ uri_host_end_found:
while (parameter_end_offset < line_end_offset)
{
parameter_end_offset++;
parameter_end_offset = tvb_pbrk_guint8(tvb, parameter_end_offset, line_end_offset - parameter_end_offset, ">,;? \r", &c);
parameter_end_offset = tvb_pbrk_pattern_guint8(tvb, parameter_end_offset, line_end_offset - parameter_end_offset, &pbrk_param_end, &c);
if (parameter_end_offset == -1)
{
parameter_end_offset = line_end_offset;
break;
}
/* after adding character to this switch(), update also string in tvb_pbrk_guint8() call above */
/* after adding character to this switch(), update also pbrk_param_end */
switch (c) {
case '>':
case ',':
@ -1568,7 +1580,7 @@ display_sip_uri (tvbuff_t *tvb, proto_tree *sip_element_tree, packet_info *pinfo
/* Put the contact parameters in the tree */
while (current_offset < uri_offsets->name_addr_end) {
queried_offset = tvb_pbrk_guint8(tvb, current_offset, uri_offsets->name_addr_end - current_offset, ",;", &c);
queried_offset = tvb_pbrk_pattern_guint8(tvb, current_offset, uri_offsets->name_addr_end - current_offset, &pbrk_comma_semi, &c);
if (queried_offset == -1) {
/* Reached line end */
@ -1669,7 +1681,7 @@ dissect_sip_contact_item(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, gi
while(current_offset< line_end_offset){
c = '\0';
queried_offset++;
queried_offset = (queried_offset < line_end_offset) ? tvb_pbrk_guint8(tvb, queried_offset, line_end_offset - queried_offset, "\r\n,;\"", &c) : -1;
queried_offset = (queried_offset < line_end_offset) ? tvb_pbrk_pattern_guint8(tvb, queried_offset, line_end_offset - queried_offset, &pbrk_header_end_dquote, &c) : -1;
if (queried_offset != -1)
{
switch (c) {
@ -1698,7 +1710,7 @@ dissect_sip_contact_item(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, gi
/* We have an opening quote but no closing quote. */
current_offset = line_end_offset;
} else {
current_offset = tvb_pbrk_guint8(tvb, queried_offset+1, line_end_offset - queried_offset, ",;", &c);
current_offset = tvb_pbrk_pattern_guint8(tvb, queried_offset+1, line_end_offset - queried_offset, &pbrk_comma_semi, &c);
if(current_offset==-1){
/* Last parameter, line end */
current_offset = line_end_offset;
@ -1797,7 +1809,7 @@ dissect_sip_authorization_item(tvbuff_t *tvb, proto_tree *tree, gint start_offse
name = tvb_get_string_enc(wmem_packet_scope(), tvb, start_offset, par_name_end_offset-start_offset, ENC_UTF_8|ENC_NA);
/* Find end of parameter, it can be a quoted string so check for quoutes too */
queried_offset = tvb_pbrk_guint8(tvb, par_name_end_offset, line_end_offset - par_name_end_offset, "'\"", &c);
queried_offset = tvb_pbrk_pattern_guint8(tvb, par_name_end_offset, line_end_offset - par_name_end_offset, &pbrk_quotes, &c);
if (queried_offset == -1) {
/* Last parameter, line end */
current_offset = line_end_offset;
@ -2166,7 +2178,7 @@ static void dissect_sip_via_header(tvbuff_t *tvb, proto_tree *tree, gint start_o
{
int transport_start_offset = current_offset;
current_offset = tvb_pbrk_guint8(tvb, current_offset, line_end_offset - current_offset, "\t /", &c);
current_offset = tvb_pbrk_pattern_guint8(tvb, current_offset, line_end_offset - current_offset, &pbrk_tab_sp_fslash, &c);
if (current_offset != -1){
proto_tree_add_item(tree, hf_sip_via_transport, tvb, transport_start_offset,
current_offset - transport_start_offset, ENC_UTF_8|ENC_NA);
@ -2195,7 +2207,7 @@ static void dissect_sip_via_header(tvbuff_t *tvb, proto_tree *tree, gint start_o
address_start_offset = current_offset;
while (current_offset < line_end_offset)
{
current_offset = tvb_pbrk_guint8(tvb, current_offset, line_end_offset - current_offset, "[] \t:;", &c);
current_offset = tvb_pbrk_pattern_guint8(tvb, current_offset, line_end_offset - current_offset, &pbrk_addr_end, &c);
if (current_offset == -1)
{
current_offset = line_end_offset;
@ -2330,7 +2342,7 @@ static void dissect_sip_via_header(tvbuff_t *tvb, proto_tree *tree, gint start_o
parameter_name_end = current_offset;
/* Read until end of parameter value */
current_offset = tvb_pbrk_guint8(tvb, current_offset, line_end_offset - current_offset, "\t;, ", NULL);
current_offset = tvb_pbrk_pattern_guint8(tvb, current_offset, line_end_offset - current_offset, &pbrk_via_param_end, NULL);
if (current_offset == -1)
current_offset = line_end_offset;
@ -3407,7 +3419,7 @@ dissect_sip_common(tvbuff_t *tvb, int offset, int remaining_length, packet_info
if (hf_index != POS_AUTHENTICATION_INFO)
{
/* The first time comma_offset is "start of parameters" */
comma_offset = tvb_pbrk_guint8(tvb, value_offset, line_end_offset - value_offset, " \t\r\n", NULL);
comma_offset = tvb_pbrk_pattern_guint8(tvb, value_offset, line_end_offset - value_offset, &pbrk_whitespace, NULL);
proto_tree_add_item(sip_element_tree, hf_sip_auth_scheme,
tvb, value_offset, comma_offset - value_offset,
ENC_UTF_8|ENC_NA);
@ -5946,6 +5958,17 @@ void proto_register_sip(void)
ext_hdr_subdissector_table = register_dissector_table("sip.hdr", "SIP Extension header", FT_STRING, BASE_NONE);
/* compile patterns */
tvb_pbrk_compile(&pbrk_comma_semi, ",;");
tvb_pbrk_compile(&pbrk_whitespace, " \t\r\n");
tvb_pbrk_compile(&pbrk_param_end, ">,;? \r");
tvb_pbrk_compile(&pbrk_param_end_colon_brackets, ">,;? \r:[]");
tvb_pbrk_compile(&pbrk_header_end_dquote, "\r\n,;\"");
tvb_pbrk_compile(&pbrk_quotes, "'\"");
tvb_pbrk_compile(&pbrk_tab_sp_fslash, "\t /");
tvb_pbrk_compile(&pbrk_addr_end, "[] \t:;");
tvb_pbrk_compile(&pbrk_via_param_end, "\t;, ");
}
void

View File

@ -34,7 +34,7 @@ struct tvb_ops {
void *(*tvb_memcpy)(struct tvbuff *tvb, void *target, guint offset, guint length);
gint (*tvb_find_guint8)(tvbuff_t *tvb, guint abs_offset, guint limit, guint8 needle);
gint (*tvb_pbrk_guint8)(tvbuff_t *tvb, guint abs_offset, guint limit, const guint8 *needles, guchar *found_needle);
gint (*tvb_pbrk_pattern_guint8)(tvbuff_t *tvb, guint abs_offset, guint limit, const tvb_pbrk_pattern* pattern, guchar *found_needle);
tvbuff_t *(*tvb_clone)(tvbuff_t *tvb, guint abs_offset, guint abs_length);
};

View File

@ -44,7 +44,6 @@
#include "wsutil/unicode-utils.h"
#include "wsutil/nstime.h"
#include "wsutil/time_util.h"
#include "wsutil/ws_mempbrk.h"
#include "tvbuff.h"
#include "tvbuff-int.h"
#include "strutil.h"
@ -755,17 +754,6 @@ fast_ensure_contiguous(tvbuff_t *tvb, const gint offset, const guint length)
return NULL;
}
static inline const guint8*
guint8_pbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles, guchar *found_needle)
{
const guint8 *result = ws_mempbrk(haystack, haystacklen, needles);
if (result && found_needle)
*found_needle = *result;
return result;
}
/************** ACCESSORS **************/
@ -1906,21 +1894,22 @@ tvb_find_guint8(tvbuff_t *tvb, const gint offset, const gint maxlength, const gu
}
static inline gint
tvb_pbrk_guint8_generic(tvbuff_t *tvb, guint abs_offset, guint limit, const guint8 *needles, guchar *found_needle)
tvb_pbrk_guint8_generic(tvbuff_t *tvb, guint abs_offset, guint limit, const tvb_pbrk_pattern* pattern, guchar *found_needle)
{
const guint8 *ptr;
const guint8 *result;
ptr = ensure_contiguous(tvb, abs_offset, limit); /* tvb_get_ptr */
result = guint8_pbrk(ptr, limit, needles, found_needle);
result = tvb_pbrk_exec(ptr, limit, pattern, found_needle);
if (!result)
return -1;
return (gint) ((result - ptr) + abs_offset);
}
/* Find first occurrence of any of the needles in tvbuff, starting at offset.
/* Find first occurrence of any of the pattern chars in tvbuff, starting at offset.
* Searches at most maxlength number of bytes; if maxlength is -1, searches
* to end of tvbuff.
* Returns the offset of the found needle, or -1 if not found.
@ -1928,7 +1917,8 @@ tvb_pbrk_guint8_generic(tvbuff_t *tvb, guint abs_offset, guint limit, const guin
* in that case, -1 will be returned if the boundary is reached before
* finding needle. */
gint
tvb_pbrk_guint8(tvbuff_t *tvb, const gint offset, const gint maxlength, const guint8 *needles, guchar *found_needle)
tvb_pbrk_pattern_guint8(tvbuff_t *tvb, const gint offset, const gint maxlength,
const tvb_pbrk_pattern* pattern, guchar *found_needle)
{
const guint8 *result;
guint abs_offset;
@ -1950,7 +1940,7 @@ tvb_pbrk_guint8(tvbuff_t *tvb, const gint offset, const gint maxlength, const gu
/* If we have real data, perform our search now. */
if (tvb->real_data) {
result = guint8_pbrk(tvb->real_data + abs_offset, limit, needles, found_needle);
result = tvb_pbrk_exec(tvb->real_data + abs_offset, limit, pattern, found_needle);
if (result == NULL) {
return -1;
}
@ -1959,10 +1949,10 @@ tvb_pbrk_guint8(tvbuff_t *tvb, const gint offset, const gint maxlength, const gu
}
}
if (tvb->ops->tvb_pbrk_guint8)
return tvb->ops->tvb_pbrk_guint8(tvb, abs_offset, limit, needles, found_needle);
if (tvb->ops->tvb_pbrk_pattern_guint8)
return tvb->ops->tvb_pbrk_pattern_guint8(tvb, abs_offset, limit, pattern, found_needle);
return tvb_pbrk_guint8_generic(tvb, abs_offset, limit, needles, found_needle);
return tvb_pbrk_guint8_generic(tvb, abs_offset, limit, pattern, found_needle);
}
/* Find size of stringz (NUL-terminated string) by looking for terminating
@ -3048,6 +3038,8 @@ tvb_get_nstringz0(tvbuff_t *tvb, const gint offset, const guint bufsize, guint8*
}
}
static tvb_pbrk_pattern pbrk_crlf = INIT_PBRK_PATTERN;
/*
* Given a tvbuff, an offset into the tvbuff, and a length that starts
* at that offset (which may be -1 for "all the way to the end of the
@ -3070,16 +3062,11 @@ tvb_get_nstringz0(tvbuff_t *tvb, const gint offset, const guint bufsize, guint8*
gint
tvb_find_line_end(tvbuff_t *tvb, const gint offset, int len, gint *next_offset, const gboolean desegment)
{
#ifdef WIN32
static const char __declspec(align(16)) crlf[] = "\r\n" ;
#else
static const char crlf[] __attribute__((aligned(16))) = "\r\n" ;
#endif
gint eob_offset;
gint eol_offset;
int linelen;
guchar found_needle = 0;
static gboolean compiled = FALSE;
DISSECTOR_ASSERT(tvb && tvb->initialized);
@ -3091,10 +3078,15 @@ tvb_find_line_end(tvbuff_t *tvb, const gint offset, int len, gint *next_offset,
*/
eob_offset = offset + len;
if (!compiled) {
tvb_pbrk_compile(&pbrk_crlf, "\r\n");
compiled = TRUE;
}
/*
* Look either for a CR or an LF.
*/
eol_offset = tvb_pbrk_guint8(tvb, offset, len, crlf, &found_needle);
eol_offset = tvb_pbrk_pattern_guint8(tvb, offset, len, &pbrk_crlf, &found_needle);
if (eol_offset == -1) {
/*
* No CR or LF - line is presumably continued in next packet.
@ -3172,6 +3164,7 @@ tvb_find_line_end(tvbuff_t *tvb, const gint offset, int len, gint *next_offset,
return linelen;
}
static tvb_pbrk_pattern pbrk_crlf_dquote = INIT_PBRK_PATTERN;
/*
* Given a tvbuff, an offset into the tvbuff, and a length that starts
* at that offset (which may be -1 for "all the way to the end of the
@ -3198,11 +3191,18 @@ tvb_find_line_end_unquoted(tvbuff_t *tvb, const gint offset, int len, gint *next
guchar c = 0;
gint eob_offset;
int linelen;
static gboolean compiled = FALSE;
DISSECTOR_ASSERT(tvb && tvb->initialized);
if (len == -1)
len = _tvb_captured_length_remaining(tvb, offset);
if (!compiled) {
tvb_pbrk_compile(&pbrk_crlf_dquote, "\r\n\"");
compiled = TRUE;
}
/*
* XXX - what if "len" is still -1, meaning "offset is past the
* end of the tvbuff"?
@ -3225,7 +3225,7 @@ tvb_find_line_end_unquoted(tvbuff_t *tvb, const gint offset, int len, gint *next
/*
* Look either for a CR, an LF, or a '"'.
*/
char_offset = tvb_pbrk_guint8(tvb, cur_offset, len, "\r\n\"", &c);
char_offset = tvb_pbrk_pattern_guint8(tvb, cur_offset, len, &pbrk_crlf_dquote, &c);
}
if (char_offset == -1) {
/*

View File

@ -36,6 +36,7 @@
#include <glib.h>
#include <epan/guid-utils.h>
#include <epan/wmem/wmem.h>
#include "wsutil/ws_mempbrk.h"
#ifdef __cplusplus
extern "C" {
@ -505,14 +506,18 @@ WS_DLL_PUBLIC const guint8 *tvb_get_ptr(tvbuff_t *tvb, const gint offset,
WS_DLL_PUBLIC gint tvb_find_guint8(tvbuff_t *tvb, const gint offset,
const gint maxlength, const guint8 needle);
/** Find first occurrence of any of the needles in tvbuff, starting at offset.
/** Find first occurrence of any of the needles of the pre-compiled pattern in
* tvbuff, starting at offset. The passed in pattern must have been "compiled"
* before-hand, using tvb_pbrk_compile() above.
* Searches at most maxlength number of bytes. Returns the offset of the
* found needle, or -1 if not found and the found needle.
* Will not throw an exception, even if
* maxlength exceeds boundary of tvbuff; in that case, -1 will be returned if
* the boundary is reached before finding needle. */
WS_DLL_PUBLIC gint tvb_pbrk_guint8(tvbuff_t *tvb, const gint offset,
const gint maxlength, const guint8 *needles, guchar *found_needle);
WS_DLL_PUBLIC gint tvb_pbrk_pattern_guint8(tvbuff_t *tvb, const gint offset,
const gint maxlength, const tvb_pbrk_pattern* pattern, guchar *found_needle);
/** Find size of stringz (NUL-terminated string) by looking for terminating
* NUL. The size of the string includes the terminating NUL.

View File

@ -79,11 +79,11 @@ subset_find_guint8(tvbuff_t *tvb, guint abs_offset, guint limit, guint8 needle)
}
static gint
subset_pbrk_guint8(tvbuff_t *tvb, guint abs_offset, guint limit, const guint8 *needles, guchar *found_needle)
subset_pbrk_guint8(tvbuff_t *tvb, guint abs_offset, guint limit, const tvb_pbrk_pattern* pattern, guchar *found_needle)
{
struct tvb_subset *subset_tvb = (struct tvb_subset *) tvb;
return tvb_pbrk_guint8(subset_tvb->subset.tvb, subset_tvb->subset.offset + abs_offset, limit, needles, found_needle);
return tvb_pbrk_pattern_guint8(subset_tvb->subset.tvb, subset_tvb->subset.offset + abs_offset, limit, pattern, found_needle);
}
static tvbuff_t *

View File

@ -139,13 +139,13 @@ frame_find_guint8(tvbuff_t *tvb, guint abs_offset, guint limit, guint8 needle)
}
static gint
frame_pbrk_guint8(tvbuff_t *tvb, guint abs_offset, guint limit, const guint8 *needles, guchar *found_needle)
frame_pbrk_guint8(tvbuff_t *tvb, guint abs_offset, guint limit, const tvb_pbrk_pattern* pattern, guchar *found_needle)
{
struct tvb_frame *frame_tvb = (struct tvb_frame *) tvb;
frame_cache(frame_tvb);
return tvb_pbrk_guint8(tvb, abs_offset, limit, needles, found_needle);
return tvb_pbrk_pattern_guint8(tvb, abs_offset, limit, pattern, found_needle);
}
static guint

View File

@ -36,50 +36,54 @@
#include <glib.h>
#include "ws_symbol_export.h"
#ifdef HAVE_SSE4_2
#include "ws_cpuid.h"
#endif
#include "ws_mempbrk.h"
const guint8 *
_ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles)
void
tvb_pbrk_compile(tvb_pbrk_pattern* pattern, const gchar *needles)
{
gchar tmp[256] = { 0 };
const guint8 *haystack_end;
const gchar *n = needles;
while (*n) {
pattern->patt[(int)*n] = 1;
n++;
}
while (*needles)
tmp[*needles++] = 1;
haystack_end = haystack + haystacklen;
while (haystack < haystack_end) {
if (tmp[*haystack])
return haystack;
haystack++;
}
return NULL;
#ifdef HAVE_SSE4_2
ws_mempbrk_sse42_compile(pattern, needles);
#endif
}
const guint8 *
ws_mempbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle)
{
const guint8 *haystack_end = haystack + haystacklen;
while (haystack < haystack_end) {
if (pattern->patt[*haystack]) {
if (found_needle)
*found_needle = *haystack;
return haystack;
}
haystack++;
}
return NULL;
}
WS_DLL_PUBLIC const guint8 *
ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles)
tvb_pbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle)
{
#ifdef HAVE_SSE4_2
static int have_sse42 = -1;
#endif
if (*needles == 0)
return NULL;
#ifdef HAVE_SSE4_2
if G_UNLIKELY(have_sse42 < 0)
have_sse42 = ws_cpuid_sse42();
if (haystacklen >= 16 && have_sse42)
return _ws_mempbrk_sse42(haystack, haystacklen, needles);
if (haystacklen >= 16 && pattern->use_sse42)
return ws_mempbrk_sse42_exec(haystack, haystacklen, pattern, found_needle);
#endif
return _ws_mempbrk(haystack, haystacklen, needles);
return ws_mempbrk_exec(haystack, haystacklen, pattern, found_needle);
}
/*
* Editor modelines - http://www.wireshark.org/tools/modelines.html
*

View File

@ -24,13 +24,30 @@
#include "ws_symbol_export.h"
WS_DLL_PUBLIC const guint8 *ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles);
/** The pattern object used for tvb_pbrk_pattern_guint8().
*/
typedef struct {
gchar patt[256];
gboolean use_sse42;
void *mask;
} tvb_pbrk_pattern;
#ifdef HAVE_SSE4_2
const char *_ws_mempbrk_sse42(const char* haystack, size_t haystacklen, const char *needles);
#endif
/** The value to use when initializing a tvb_pbrk_pattern variable.
* For example:
* static tvb_pbrk_pattern pbrk_mypattern = INIT_PBRK_PATTERN;
*/
#define INIT_PBRK_PATTERN { { 0 }, FALSE, NULL }
const guint8 *_ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles);
/** Compile the pattern for the needles to find using tvb_pbrk_pattern_guint8().
*/
WS_DLL_PUBLIC void tvb_pbrk_compile(tvb_pbrk_pattern* pattern, const gchar *needles);
WS_DLL_PUBLIC const guint8 *tvb_pbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle);
void ws_mempbrk_sse42_compile(tvb_pbrk_pattern* pattern, const gchar *needles);
const char *ws_mempbrk_sse42_exec(const char* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle);
const guint8 *ws_mempbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle);
#endif /* __WS_MEMPBRK_H__ */

View File

@ -23,7 +23,7 @@
#ifdef HAVE_SSE4_2
#include <glib.h>
#include "ws_cpuid.h"
#ifdef WIN32
#include <tmmintrin.h>
@ -59,6 +59,23 @@ __m128i_shift_right (__m128i value, unsigned long int offset)
_mm_loadu_si128 (cast_128aligned__m128i(___m128i_shift_right + offset)));
}
void
ws_mempbrk_sse42_compile(tvb_pbrk_pattern* pattern, const gchar *needles)
{
size_t length = strlen(needles);
pattern->use_sse42 = ws_cpuid_sse42() && (length <= 16);
if (pattern->use_sse42) {
__m128i *pmask = NULL;
pattern->mask = g_malloc(sizeof(__m128i));
pmask = (__m128i *) pattern->mask;
*pmask = _mm_setzero_si128();
memcpy(pmask, needles, length);
}
}
/* We use 0x2:
_SIDD_SBYTE_OPS
| _SIDD_CMP_EQUAL_ANY
@ -92,81 +109,12 @@ __m128i_shift_right (__m128i value, unsigned long int offset)
X for case 1. */
const char *
_ws_mempbrk_sse42(const char *s, size_t slen, const char *a)
ws_mempbrk_sse42_exec(const char *s, size_t slen, const tvb_pbrk_pattern* pattern, guchar *found_needle)
{
const char *aligned;
__m128i mask;
__m128i *pmask = (__m128i *) pattern->mask;
int offset;
#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
{
/* As 'a' is not guarantueed to have a size of at least 16 bytes, and is not
* aligned, _mm_load_si128() cannot be used when ASAN is enabled. That
* triggers a buffer overflow which is harmless as 'a' is guaranteed to be
* '\0' terminated, and the PCMISTRI instruction always ignored everything
* starting from EOS ('\0'). A false positive indeed. */
size_t length;
length = strlen(a);
/* Don't use SSE4.2 if the length of A > 16. */
if (length > 16)
return _ws_mempbrk(s, slen, a);
mask = _mm_setzero_si128();
memcpy(&mask, a, length);
}
#else /* else if ASAN is disabled */
offset = (int) ((size_t) a & 15);
aligned = (const char *) ((size_t) a & -16L);
if (offset != 0)
{
int length;
/* Load masks. */
/* cast safe - _mm_load_si128() it's 16B aligned */
mask = __m128i_shift_right(_mm_load_si128 (cast_128aligned__m128i(aligned)), offset);
/* Find where the NULL terminator is. */
length = _mm_cmpistri (mask, mask, 0x3a);
if (length == 16 - offset)
{
/* There is no NULL terminator. */
__m128i mask1 = _mm_load_si128 (cast_128aligned__m128i(aligned + 16));
int idx = _mm_cmpistri (mask1, mask1, 0x3a);
length += idx;
/* Don't use SSE4.2 if the length of A > 16. */
if (length > 16)
return _ws_mempbrk(s, slen, a);
if (idx != 0)
{
/* Combine mask0 and mask1. We could play games with
palignr, but frankly this data should be in L1 now
so do the merge via an unaligned load. */
mask = _mm_loadu_si128 (cast_128aligned__m128i(a));
}
}
}
else
{
int length;
/* A is aligned. (cast safe) */
mask = _mm_load_si128 (cast_128aligned__m128i(a));
/* Find where the NULL terminator is. */
length = _mm_cmpistri (mask, mask, 0x3a);
if (length == 16)
{
/* There is no NULL terminator. Don't use SSE4.2 if the length
of A > 16. */
if (a[16] != 0)
return _ws_mempbrk(s, slen, a);
}
}
#endif /* ASAN disabled */
offset = (int) ((size_t) s & 15);
aligned = (const char *) ((size_t) s & -16L);
if (offset != 0)
@ -174,18 +122,23 @@ _ws_mempbrk_sse42(const char *s, size_t slen, const char *a)
/* Check partial string. cast safe it's 16B aligned */
__m128i value = __m128i_shift_right (_mm_load_si128 (cast_128aligned__m128i(aligned)), offset);
int length = _mm_cmpistri (mask, value, 0x2);
int length = _mm_cmpistri (*pmask, value, 0x2);
/* No need to check ZFlag since ZFlag is always 1. */
int cflag = _mm_cmpistrc (mask, value, 0x2);
int cflag = _mm_cmpistrc (*pmask, value, 0x2);
/* XXX: why does this compare value with value? */
int idx = _mm_cmpistri (value, value, 0x3a);
if (cflag)
if (cflag) {
if (found_needle)
*found_needle = *(s + length);
return s + length;
}
/* Find where the NULL terminator is. */
if (idx < 16 - offset)
{
/* fond NUL @ 'idx', need to switch to slower mempbrk */
return _ws_mempbrk(s + idx + 1, slen - idx - 1, a); /* slen is bigger than 16 & idx < 16 so no undeflow here */
/* found NUL @ 'idx', need to switch to slower mempbrk */
return ws_mempbrk_exec(s + idx + 1, slen - idx - 1, pattern, found_needle); /* slen is bigger than 16 & idx < 16 so no undeflow here */
}
aligned += 16;
slen -= (16 - offset);
@ -196,23 +149,27 @@ _ws_mempbrk_sse42(const char *s, size_t slen, const char *a)
while (slen >= 16)
{
__m128i value = _mm_load_si128 (cast_128aligned__m128i(aligned));
int idx = _mm_cmpistri (mask, value, 0x2);
int cflag = _mm_cmpistrc (mask, value, 0x2);
int zflag = _mm_cmpistrz (mask, value, 0x2);
int idx = _mm_cmpistri (*pmask, value, 0x2);
int cflag = _mm_cmpistrc (*pmask, value, 0x2);
int zflag = _mm_cmpistrz (*pmask, value, 0x2);
if (cflag)
if (cflag) {
if (found_needle)
*found_needle = *(aligned + idx);
return aligned + idx;
}
if (zflag)
{
/* found NUL, need to switch to slower mempbrk */
return _ws_mempbrk(aligned, slen, a);
return ws_mempbrk_exec(aligned, slen, pattern, found_needle);
}
aligned += 16;
slen -= 16;
}
/* XXX, use mempbrk_slow here? */
return _ws_mempbrk(aligned, slen, a);
return ws_mempbrk_exec(aligned, slen, pattern, found_needle);
}
#endif /* HAVE_SSE4_2 */