From 0b3a96222b39ea4df1fdc899a950c50ec56a1077 Mon Sep 17 00:00:00 2001 From: Guy Harris Date: Mon, 22 Aug 2016 17:53:10 -0700 Subject: [PATCH] Handle quoted-pairs in quoted-strings correctly. Backslash, in a quoted-string, escapes quotes (and any other characters, although the only ones that *need* escaping are a double-quote and a backslash). This means that the value of a parameter isn't just the raw characters from the parameters string; for a quoted string, it needs to be un-escaped, and for a *non*-quoted string, it has to stop at the first non-token character (you can put comments in). So ws_find_media_type_parameter() must return an allocated string with the actual value. Get rid of index_of_char(); it doesn't do anything that strchr() does. Change-Id: I36328ea71c28fe6ac4918a8e73c281a25f6be844 Reviewed-on: https://code.wireshark.org/review/17251 Reviewed-by: Guy Harris --- epan/dissectors/packet-isup.c | 17 +-- epan/dissectors/packet-multipart.c | 92 +++++------- epan/media_params.c | 224 +++++++++++++++++++++-------- epan/media_params.h | 4 +- 4 files changed, 216 insertions(+), 121 deletions(-) diff --git a/epan/dissectors/packet-isup.c b/epan/dissectors/packet-isup.c index 9bd8a0c80b..42b234212e 100644 --- a/epan/dissectors/packet-isup.c +++ b/epan/dissectors/packet-isup.c @@ -10422,17 +10422,16 @@ dissect_application_isup(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, vo proto_tree *isup_tree = NULL; tvbuff_t *message_tvb; guint8 message_type; - const char *version, *base; - int len_version, len_base; + char *version, *base; guint8 itu_isup_variant = ISUP_ITU_STANDARD_VARIANT; /* Default */ if (data) { http_message_info_t *message_info = (http_message_info_t *)data; if (message_info->media_str) { - version = ws_find_media_type_parameter(message_info->media_str, "version=", &len_version); - base = ws_find_media_type_parameter(message_info->media_str, "base=", &len_base); - if ((version && len_version >= 4 && g_ascii_strncasecmp(version, "ansi", 4) == 0) || - (base && len_base >= 4 && g_ascii_strncasecmp(base, "ansi", 4) == 0)) { + version = ws_find_media_type_parameter(message_info->media_str, "version"); + base = ws_find_media_type_parameter(message_info->media_str, "base"); + if ((version && g_ascii_strncasecmp(version, "ansi", 4) == 0) || + (base && g_ascii_strncasecmp(base, "ansi", 4) == 0)) { /* * "version" or "base" parameter begins with "ansi", so it's ANSI. */ @@ -10451,8 +10450,8 @@ dissect_application_isup(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, vo message_tvb = tvb_new_subset_remaining(tvb, 0); dissect_ansi_isup_message(message_tvb, pinfo, isup_tree, ISUP_ITU_STANDARD_VARIANT, 0); return tvb_reported_length(tvb); - } else if ((version && g_ascii_strncasecmp(version, "spirou", len_version) == 0) || - (base && g_ascii_strncasecmp(base, "spirou", len_base) == 0)) { + } else if ((version && g_ascii_strcasecmp(version, "spirou") == 0) || + (base && g_ascii_strcasecmp(base, "spirou") == 0)) { /* * "version" or "base" version is "spirou", so it's SPIROU. */ @@ -10461,6 +10460,8 @@ dissect_application_isup(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, vo } else { isup_standard = ITU_STANDARD; } + g_free(version); + g_free(base); } else { /* default to ITU */ isup_standard = ITU_STANDARD; diff --git a/epan/dissectors/packet-multipart.c b/epan/dissectors/packet-multipart.c index 7b728b9455..05b96f5fd0 100644 --- a/epan/dissectors/packet-multipart.c +++ b/epan/dissectors/packet-multipart.c @@ -279,9 +279,15 @@ unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset) *(q++) = c; while (c) { c = *(q++) = *(++p); - if (c == '"') { - p++; /* Skip closing quote */ - break; + if (c == '\\') { + /* First part of a quoted-pair; copy the other part, + without checking if it's a quote */ + c = *(q++) = *(++p); + } else { + if (c == '"') { + p++; /* Skip closing quote */ + break; + } } } /* if already zero terminated now, rewind one char to avoid an "off by one" */ @@ -311,25 +317,6 @@ unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset) return (ret); } -/* Return the index of a given char in the given string, - * or -1 if not found. - */ -static gint -index_of_char(const char *str, const char c) -{ - gint len = 0; - const char *p = str; - - while (*p && *p != c) { - p++; - len++; - } - - if (*p) - return len; - return -1; -} - /* Retrieve the media information from pinfo->private_data, * and compute the boundary string and its length. * Return a pointer to a filled-in multipart_info_t, or NULL on failure. @@ -341,8 +328,7 @@ index_of_char(const char *str, const char c) static multipart_info_t * get_multipart_info(packet_info *pinfo, http_message_info_t *message_info) { - const char *start_boundary, *start_protocol = NULL; - int len_boundary = 0, len_protocol = 0; + char *start_boundary, *start_protocol = NULL; multipart_info_t *m_info = NULL; const char *type = pinfo->match_string; char *parameters; @@ -365,14 +351,15 @@ get_multipart_info(packet_info *pinfo, http_message_info_t *message_info) /* Clean up the parameters */ parameters = unfold_and_compact_mime_header(message_info->media_str, &dummy); - start_boundary = ws_find_media_type_parameter(parameters, "boundary=", &len_boundary); + start_boundary = ws_find_media_type_parameter(parameters, "boundary"); if(!start_boundary) { return NULL; } if(strncmp(type, "multipart/encrypted", sizeof("multipart/encrypted")-1) == 0) { - start_protocol = ws_find_media_type_parameter(parameters, "protocol=", &len_protocol); + start_protocol = ws_find_media_type_parameter(parameters, "protocol"); if(!start_protocol) { + g_free(start_boundary); return NULL; } } @@ -382,11 +369,13 @@ get_multipart_info(packet_info *pinfo, http_message_info_t *message_info) */ m_info = wmem_new(wmem_packet_scope(), multipart_info_t); m_info->type = type; - m_info->boundary = wmem_strndup(wmem_packet_scope(), start_boundary, len_boundary); - m_info->boundary_length = len_boundary; + m_info->boundary = wmem_strdup(wmem_packet_scope(), start_boundary); + m_info->boundary_length = (guint)strlen(start_boundary); + g_free(start_boundary); if(start_protocol) { - m_info->protocol = wmem_strndup(wmem_packet_scope(), start_protocol, len_protocol); - m_info->protocol_length = len_protocol; + m_info->protocol = wmem_strdup(wmem_packet_scope(), start_protocol); + m_info->protocol_length = (guint)strlen(start_protocol); + g_free(start_protocol); } else { m_info->protocol = NULL; m_info->protocol_length = -1; @@ -577,7 +566,6 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb, gchar *content_encoding_str = NULL; char *filename = NULL; char *mimetypename = NULL; - int len = 0; gboolean last_field = FALSE; gboolean is_raw_data = FALSE; @@ -652,7 +640,7 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb, break; } } else { - char *value_str = header_str + colon_offset + 1; + char *value_str = g_strdup(header_str + colon_offset + 1); proto_tree_add_string_format(subtree, hf_header_array[hf_index], tvb, @@ -663,20 +651,20 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb, switch (hf_index) { case POS_ORIGINALCONTENT: { - gint semicolon_offset; + char *semicolonp; /* The Content-Type starts at colon_offset + 1 or after the type parameter */ - char* type_str = ws_find_media_type_parameter(value_str, "type=", NULL); + char* type_str = ws_find_media_type_parameter(value_str, "type"); if(type_str != NULL) { + g_free(value_str); value_str = type_str; } - semicolon_offset = index_of_char( - value_str, ';'); + semicolonp = strchr(value_str, ';'); - if (semicolon_offset > 0) { - value_str[semicolon_offset] = '\0'; + if (semicolonp != NULL) { + *semicolonp = '\0'; m_info->orig_parameters = wmem_strdup(wmem_packet_scope(), - value_str + semicolon_offset + 1); + semicolonp + 1); } m_info->orig_content_type = wmem_ascii_strdown(wmem_packet_scope(), value_str, -1); @@ -685,12 +673,11 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb, case POS_CONTENT_TYPE: { /* The Content-Type starts at colon_offset + 1 */ - gint semicolon_offset = index_of_char( - value_str, ';'); + char *semicolonp = strchr(value_str, ';'); - if (semicolon_offset > 0) { - value_str[semicolon_offset] = '\0'; - message_info.media_str = wmem_strdup(wmem_packet_scope(), value_str + semicolon_offset + 1); + if (semicolonp != NULL) { + *semicolonp = '\0'; + message_info.media_str = wmem_strdup(wmem_packet_scope(), semicolonp + 1); } else { message_info.media_str = NULL; } @@ -701,9 +688,7 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb, proto_item_append_text(ti, " (%s)", content_type_str); /* find the "name" parameter in case we don't find a content disposition "filename" */ - if((mimetypename = ws_find_media_type_parameter(message_info.media_str, "name=", &len)) != NULL) { - mimetypename = g_strndup(mimetypename, len); - } + mimetypename = ws_find_media_type_parameter(message_info.media_str, "name"); if(strncmp(content_type_str, "application/octet-stream", sizeof("application/octet-stream")-1) == 0) { @@ -722,26 +707,25 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb, case POS_CONTENT_TRANSFER_ENCODING: { /* The Content-Transferring starts at colon_offset + 1 */ - gint cr_offset = index_of_char(value_str, '\r'); + char *crp = strchr(value_str, '\r'); - if (cr_offset > 0) { - value_str[cr_offset] = '\0'; + if (crp != NULL) { + *crp = '\0'; } content_encoding_str = wmem_ascii_strdown(wmem_packet_scope(), value_str, -1); } break; case POS_CONTENT_DISPOSITION: - { + { /* find the "filename" parameter */ - if((filename = ws_find_media_type_parameter(value_str, "filename=", &len)) != NULL) { - filename = g_strndup(filename, len); - } + filename = ws_find_media_type_parameter(value_str, "filename"); } break; default: break; } + g_free(value_str); } } offset = next_offset; diff --git a/epan/media_params.c b/epan/media_params.c index dbfd7ba35d..c892f6124f 100644 --- a/epan/media_params.c +++ b/epan/media_params.c @@ -1,5 +1,5 @@ /* media_params.c - * Routines for parsing media type parameters + * Routines for parsing media type parameters as per RFC 822 and RFC 2045 * Copyright 2004, Anders Broman. * Copyright 2004, Olivier Biot. * @@ -33,12 +33,117 @@ #include -char * -ws_find_media_type_parameter(const char *parameters, const char *key, int *retlen) +static const char * +ws_get_next_media_type_parameter(const char *pos, int *retnamelen, + const char **retvalue, int *retvaluelen, + const char **nextp) { - const char *start, *p; - int keylen = 0; - int len = 0; + const char *p, *namep, *valuep; + char c; + + p = pos; + while ((c = *p) != '\0' && g_ascii_isspace(c)) + p++; /* Skip white space */ + + if (c == '\0') { + /* No more parameters left */ + return NULL; + } + + namep = p; + + /* Look for a '\0' (end of string), '=' (end of parameter name, + beginning of parameter value), or ';' (end of parameter). */ + while ((c = *p) != '\0' && c != '=' && c != ';') + p++; + *retnamelen = (int) (p - namep); + if (c == '\0') { + /* End of string, so end of parameter, no parameter value */ + *retvalue = NULL; + *nextp = p; + return namep; + } + if (c == ';') { + /* End of parameter, no parameter value */ + *retvalue = NULL; + *nextp = p + 1; + return namep; + } + /* The parameter has a value. Skip the '=' */ + p++; + valuep = p; + if (retvalue != NULL) + *retvalue = valuep; + /* Is the value a quoted string? */ + if (*p == '"') { + /* Yes. Skip the opening quote, and scan forward looking for + a non-escaped closing quote. */ + p++; + for (;;) { + c = *p; + if (c == '\0') { + /* End-of-string. We're done. + (XXX - this is an error.) */ + if (retvaluelen != NULL) { + *retvaluelen = (int) (p - valuep); + } + *nextp = p; + return namep; + } + if (c == '"') { + /* Closing quote. Skip it; we're done with + the quoted-string. */ + p++; + break; + } + if (c == '\\') { + /* Backslash; this escapes the next character + (quoted-pair). Skip the backslash, and make + sure there *is* a next character. */ + p++; + if (*p == '\0') { + /* Nothing left; we're done. + (XXX - this is an error.) */ + break; + } + } + /* Skip the character we just processed. */ + p++; + } + /* Now scan forward looking for a '\0' (end of string) + or ';' (end of parameter), in case there's any + extra cruft after the quoted-string. */ + while ((c = *p) != '\0' && c != ';') + p++; + } else { + /* No. Just scan forward looking for a '\0' (end + of string) or ';' (end of parameter). */ + while ((c = *p) != '\0' && c != ';') + p++; + } + if (c == '\0') { + /* End of string, so end of parameter */ + if (retvaluelen != NULL) { + *retvaluelen = (int) (p - valuep); + } + *nextp = p; + return namep; + } + /* End of parameter; point past the terminating ';' */ + if (retvaluelen != NULL) { + *retvaluelen = (int) (p - valuep); + } + *nextp = p + 1; + return namep; +} + +char * +ws_find_media_type_parameter(const char *parameters, const char *key) +{ + const char *p, *name, *value; + char c; + int keylen, namelen, valuelen; + char *valuestr, *vp; if(!parameters || !*parameters || !key || strlen(key) == 0) /* we won't be able to find anything */ @@ -48,65 +153,70 @@ ws_find_media_type_parameter(const char *parameters, const char *key, int *retle p = parameters; while (*p) { - - while ((*p) && g_ascii_isspace(*p)) - p++; /* Skip white space */ - - if (g_ascii_strncasecmp(p, key, keylen) == 0) - break; - /* Skip to next parameter */ - p = strchr(p, ';'); - if (p == NULL) - { + /* Get the next parameter. */ + name = ws_get_next_media_type_parameter(p, &namelen, &value, + &valuelen, &p); + if (name == NULL) { + /* No more parameters - not found. */ return NULL; } - p++; /* Skip semicolon */ - } - if (*p == 0x0) - return NULL; /* key wasn't found */ - - start = p + keylen; - if (start[0] == 0) { - return NULL; + /* Is it the parameter we're looking for? */ + if (namelen == keylen && g_ascii_strncasecmp(name, key, keylen) == 0) { + /* Yes. */ + break; + } } - /* - * Process the parameter value - */ - if (start[0] == '"') { - /* - * Parameter value is a quoted-string - */ - start++; /* Skip the quote */ - if (NULL == strchr(start, '"')) { - /* - * No closing quote - */ - return NULL; + /* We found the parameter with that name; now extract the value. */ + valuestr = g_malloc(valuelen + 1); + vp = valuestr; + p = value; + /* Is the value a quoted string? */ + if (*p == '"') { + /* Yes. Skip the opening quote, and scan forward looking for + a non-escaped closing quote, copying characters. */ + p++; + for (;;) { + c = *p; + if (c == '\0') { + /* End-of-string. We're done. + (XXX - this is an error.) */ + *vp = '\0'; + return valuestr; + } + if (c == '"') { + /* Closing quote. Skip it; we're done with + the quoted-string. */ + p++; + break; + } + if (c == '\\') { + /* Backslash; this escapes the next character + (quoted-pair). Skip the backslash, and make + sure there *is* a next character. */ + p++; + if (*p == '\0') { + /* Nothing left; we're done. + (XXX - this is an error.) */ + break; + } + } + /* Copy the character. */ + *vp++ = *p++; } } else { - /* - * Look for end of boundary - */ - p = start; - while (*p) { - if (*p == ';' || g_ascii_isspace(*p)) - break; + /* No. Just scan forward until we see a '\0' (end of + string or a non-token character, copying characters. */ + while ((c = *p) != '\0' && g_ascii_isgraph(c) && c != '(' && + c != ')' && c != '<' && c != '>' && c != '@' && + c != ',' && c != ';' && c != ':' && c != '\\' && + c != '"' && c != '/' && c != '[' && c != ']' && + c != '?' && c != '=' && c != '{' && c != '}') { + *vp++ = c; p++; - len++; } } - - if(retlen) - (*retlen) = len; - - /* - * This is one of those ugly routines like strchr() where you can - * pass in a constant or non-constant string, and the result - * points into that string and inherits the constness of the - * input argument, but C doesn't support that, so the input - * parameter is const char * and the result is char *. - */ - return (char *)start; + *vp = '\0'; + return valuestr; } diff --git a/epan/media_params.h b/epan/media_params.h index 0be153cc66..83e84a1d68 100644 --- a/epan/media_params.h +++ b/epan/media_params.h @@ -1,5 +1,5 @@ /* media_params.h - * Routines for parsing media type parameters + * Routines for parsing media type parameters as per RFC 822 and RFC 2045 * Copyright 2004, Anders Broman. * Copyright 2004, Olivier Biot. * @@ -37,7 +37,7 @@ extern "C" { #endif /* __cplusplus */ WS_DLL_PUBLIC char * -ws_find_media_type_parameter(const char *parameters, const char *key, int *retlen); +ws_find_media_type_parameter(const char *parameters, const char *key); #ifdef __cplusplus }