Handle quoted-pairs in quoted-strings correctly.

Backslash, in a quoted-string, escapes quotes (and any other characters,
although the only ones that *need* escaping are a double-quote and a
backslash).

This means that the value of a parameter isn't just the raw characters
from the parameters string; for a quoted string, it needs to be
un-escaped, and for a *non*-quoted string, it has to stop at the first
non-token character (you can put comments in).  So
ws_find_media_type_parameter() must return an allocated string with the
actual value.

Get rid of index_of_char(); it doesn't do anything that strchr() does.

Change-Id: I36328ea71c28fe6ac4918a8e73c281a25f6be844
Reviewed-on: https://code.wireshark.org/review/17251
Reviewed-by: Guy Harris <guy@alum.mit.edu>
This commit is contained in:
Guy Harris 2016-08-22 17:53:10 -07:00
parent 943a9ede85
commit 0b3a96222b
4 changed files with 216 additions and 121 deletions

View File

@ -10422,17 +10422,16 @@ dissect_application_isup(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, vo
proto_tree *isup_tree = NULL;
tvbuff_t *message_tvb;
guint8 message_type;
const char *version, *base;
int len_version, len_base;
char *version, *base;
guint8 itu_isup_variant = ISUP_ITU_STANDARD_VARIANT; /* Default */
if (data) {
http_message_info_t *message_info = (http_message_info_t *)data;
if (message_info->media_str) {
version = ws_find_media_type_parameter(message_info->media_str, "version=", &len_version);
base = ws_find_media_type_parameter(message_info->media_str, "base=", &len_base);
if ((version && len_version >= 4 && g_ascii_strncasecmp(version, "ansi", 4) == 0) ||
(base && len_base >= 4 && g_ascii_strncasecmp(base, "ansi", 4) == 0)) {
version = ws_find_media_type_parameter(message_info->media_str, "version");
base = ws_find_media_type_parameter(message_info->media_str, "base");
if ((version && g_ascii_strncasecmp(version, "ansi", 4) == 0) ||
(base && g_ascii_strncasecmp(base, "ansi", 4) == 0)) {
/*
* "version" or "base" parameter begins with "ansi", so it's ANSI.
*/
@ -10451,8 +10450,8 @@ dissect_application_isup(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, vo
message_tvb = tvb_new_subset_remaining(tvb, 0);
dissect_ansi_isup_message(message_tvb, pinfo, isup_tree, ISUP_ITU_STANDARD_VARIANT, 0);
return tvb_reported_length(tvb);
} else if ((version && g_ascii_strncasecmp(version, "spirou", len_version) == 0) ||
(base && g_ascii_strncasecmp(base, "spirou", len_base) == 0)) {
} else if ((version && g_ascii_strcasecmp(version, "spirou") == 0) ||
(base && g_ascii_strcasecmp(base, "spirou") == 0)) {
/*
* "version" or "base" version is "spirou", so it's SPIROU.
*/
@ -10461,6 +10460,8 @@ dissect_application_isup(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, vo
} else {
isup_standard = ITU_STANDARD;
}
g_free(version);
g_free(base);
} else {
/* default to ITU */
isup_standard = ITU_STANDARD;

View File

@ -279,9 +279,15 @@ unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset)
*(q++) = c;
while (c) {
c = *(q++) = *(++p);
if (c == '"') {
p++; /* Skip closing quote */
break;
if (c == '\\') {
/* First part of a quoted-pair; copy the other part,
without checking if it's a quote */
c = *(q++) = *(++p);
} else {
if (c == '"') {
p++; /* Skip closing quote */
break;
}
}
}
/* if already zero terminated now, rewind one char to avoid an "off by one" */
@ -311,25 +317,6 @@ unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset)
return (ret);
}
/* Return the index of a given char in the given string,
* or -1 if not found.
*/
static gint
index_of_char(const char *str, const char c)
{
gint len = 0;
const char *p = str;
while (*p && *p != c) {
p++;
len++;
}
if (*p)
return len;
return -1;
}
/* Retrieve the media information from pinfo->private_data,
* and compute the boundary string and its length.
* Return a pointer to a filled-in multipart_info_t, or NULL on failure.
@ -341,8 +328,7 @@ index_of_char(const char *str, const char c)
static multipart_info_t *
get_multipart_info(packet_info *pinfo, http_message_info_t *message_info)
{
const char *start_boundary, *start_protocol = NULL;
int len_boundary = 0, len_protocol = 0;
char *start_boundary, *start_protocol = NULL;
multipart_info_t *m_info = NULL;
const char *type = pinfo->match_string;
char *parameters;
@ -365,14 +351,15 @@ get_multipart_info(packet_info *pinfo, http_message_info_t *message_info)
/* Clean up the parameters */
parameters = unfold_and_compact_mime_header(message_info->media_str, &dummy);
start_boundary = ws_find_media_type_parameter(parameters, "boundary=", &len_boundary);
start_boundary = ws_find_media_type_parameter(parameters, "boundary");
if(!start_boundary) {
return NULL;
}
if(strncmp(type, "multipart/encrypted", sizeof("multipart/encrypted")-1) == 0) {
start_protocol = ws_find_media_type_parameter(parameters, "protocol=", &len_protocol);
start_protocol = ws_find_media_type_parameter(parameters, "protocol");
if(!start_protocol) {
g_free(start_boundary);
return NULL;
}
}
@ -382,11 +369,13 @@ get_multipart_info(packet_info *pinfo, http_message_info_t *message_info)
*/
m_info = wmem_new(wmem_packet_scope(), multipart_info_t);
m_info->type = type;
m_info->boundary = wmem_strndup(wmem_packet_scope(), start_boundary, len_boundary);
m_info->boundary_length = len_boundary;
m_info->boundary = wmem_strdup(wmem_packet_scope(), start_boundary);
m_info->boundary_length = (guint)strlen(start_boundary);
g_free(start_boundary);
if(start_protocol) {
m_info->protocol = wmem_strndup(wmem_packet_scope(), start_protocol, len_protocol);
m_info->protocol_length = len_protocol;
m_info->protocol = wmem_strdup(wmem_packet_scope(), start_protocol);
m_info->protocol_length = (guint)strlen(start_protocol);
g_free(start_protocol);
} else {
m_info->protocol = NULL;
m_info->protocol_length = -1;
@ -577,7 +566,6 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb,
gchar *content_encoding_str = NULL;
char *filename = NULL;
char *mimetypename = NULL;
int len = 0;
gboolean last_field = FALSE;
gboolean is_raw_data = FALSE;
@ -652,7 +640,7 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb,
break;
}
} else {
char *value_str = header_str + colon_offset + 1;
char *value_str = g_strdup(header_str + colon_offset + 1);
proto_tree_add_string_format(subtree,
hf_header_array[hf_index], tvb,
@ -663,20 +651,20 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb,
switch (hf_index) {
case POS_ORIGINALCONTENT:
{
gint semicolon_offset;
char *semicolonp;
/* The Content-Type starts at colon_offset + 1 or after the type parameter */
char* type_str = ws_find_media_type_parameter(value_str, "type=", NULL);
char* type_str = ws_find_media_type_parameter(value_str, "type");
if(type_str != NULL) {
g_free(value_str);
value_str = type_str;
}
semicolon_offset = index_of_char(
value_str, ';');
semicolonp = strchr(value_str, ';');
if (semicolon_offset > 0) {
value_str[semicolon_offset] = '\0';
if (semicolonp != NULL) {
*semicolonp = '\0';
m_info->orig_parameters = wmem_strdup(wmem_packet_scope(),
value_str + semicolon_offset + 1);
semicolonp + 1);
}
m_info->orig_content_type = wmem_ascii_strdown(wmem_packet_scope(), value_str, -1);
@ -685,12 +673,11 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb,
case POS_CONTENT_TYPE:
{
/* The Content-Type starts at colon_offset + 1 */
gint semicolon_offset = index_of_char(
value_str, ';');
char *semicolonp = strchr(value_str, ';');
if (semicolon_offset > 0) {
value_str[semicolon_offset] = '\0';
message_info.media_str = wmem_strdup(wmem_packet_scope(), value_str + semicolon_offset + 1);
if (semicolonp != NULL) {
*semicolonp = '\0';
message_info.media_str = wmem_strdup(wmem_packet_scope(), semicolonp + 1);
} else {
message_info.media_str = NULL;
}
@ -701,9 +688,7 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb,
proto_item_append_text(ti, " (%s)", content_type_str);
/* find the "name" parameter in case we don't find a content disposition "filename" */
if((mimetypename = ws_find_media_type_parameter(message_info.media_str, "name=", &len)) != NULL) {
mimetypename = g_strndup(mimetypename, len);
}
mimetypename = ws_find_media_type_parameter(message_info.media_str, "name");
if(strncmp(content_type_str, "application/octet-stream",
sizeof("application/octet-stream")-1) == 0) {
@ -722,26 +707,25 @@ process_body_part(proto_tree *tree, tvbuff_t *tvb,
case POS_CONTENT_TRANSFER_ENCODING:
{
/* The Content-Transferring starts at colon_offset + 1 */
gint cr_offset = index_of_char(value_str, '\r');
char *crp = strchr(value_str, '\r');
if (cr_offset > 0) {
value_str[cr_offset] = '\0';
if (crp != NULL) {
*crp = '\0';
}
content_encoding_str = wmem_ascii_strdown(wmem_packet_scope(), value_str, -1);
}
break;
case POS_CONTENT_DISPOSITION:
{
{
/* find the "filename" parameter */
if((filename = ws_find_media_type_parameter(value_str, "filename=", &len)) != NULL) {
filename = g_strndup(filename, len);
}
filename = ws_find_media_type_parameter(value_str, "filename");
}
break;
default:
break;
}
g_free(value_str);
}
}
offset = next_offset;

View File

@ -1,5 +1,5 @@
/* media_params.c
* Routines for parsing media type parameters
* Routines for parsing media type parameters as per RFC 822 and RFC 2045
* Copyright 2004, Anders Broman.
* Copyright 2004, Olivier Biot.
*
@ -33,12 +33,117 @@
#include <epan/media_params.h>
char *
ws_find_media_type_parameter(const char *parameters, const char *key, int *retlen)
static const char *
ws_get_next_media_type_parameter(const char *pos, int *retnamelen,
const char **retvalue, int *retvaluelen,
const char **nextp)
{
const char *start, *p;
int keylen = 0;
int len = 0;
const char *p, *namep, *valuep;
char c;
p = pos;
while ((c = *p) != '\0' && g_ascii_isspace(c))
p++; /* Skip white space */
if (c == '\0') {
/* No more parameters left */
return NULL;
}
namep = p;
/* Look for a '\0' (end of string), '=' (end of parameter name,
beginning of parameter value), or ';' (end of parameter). */
while ((c = *p) != '\0' && c != '=' && c != ';')
p++;
*retnamelen = (int) (p - namep);
if (c == '\0') {
/* End of string, so end of parameter, no parameter value */
*retvalue = NULL;
*nextp = p;
return namep;
}
if (c == ';') {
/* End of parameter, no parameter value */
*retvalue = NULL;
*nextp = p + 1;
return namep;
}
/* The parameter has a value. Skip the '=' */
p++;
valuep = p;
if (retvalue != NULL)
*retvalue = valuep;
/* Is the value a quoted string? */
if (*p == '"') {
/* Yes. Skip the opening quote, and scan forward looking for
a non-escaped closing quote. */
p++;
for (;;) {
c = *p;
if (c == '\0') {
/* End-of-string. We're done.
(XXX - this is an error.) */
if (retvaluelen != NULL) {
*retvaluelen = (int) (p - valuep);
}
*nextp = p;
return namep;
}
if (c == '"') {
/* Closing quote. Skip it; we're done with
the quoted-string. */
p++;
break;
}
if (c == '\\') {
/* Backslash; this escapes the next character
(quoted-pair). Skip the backslash, and make
sure there *is* a next character. */
p++;
if (*p == '\0') {
/* Nothing left; we're done.
(XXX - this is an error.) */
break;
}
}
/* Skip the character we just processed. */
p++;
}
/* Now scan forward looking for a '\0' (end of string)
or ';' (end of parameter), in case there's any
extra cruft after the quoted-string. */
while ((c = *p) != '\0' && c != ';')
p++;
} else {
/* No. Just scan forward looking for a '\0' (end
of string) or ';' (end of parameter). */
while ((c = *p) != '\0' && c != ';')
p++;
}
if (c == '\0') {
/* End of string, so end of parameter */
if (retvaluelen != NULL) {
*retvaluelen = (int) (p - valuep);
}
*nextp = p;
return namep;
}
/* End of parameter; point past the terminating ';' */
if (retvaluelen != NULL) {
*retvaluelen = (int) (p - valuep);
}
*nextp = p + 1;
return namep;
}
char *
ws_find_media_type_parameter(const char *parameters, const char *key)
{
const char *p, *name, *value;
char c;
int keylen, namelen, valuelen;
char *valuestr, *vp;
if(!parameters || !*parameters || !key || strlen(key) == 0)
/* we won't be able to find anything */
@ -48,65 +153,70 @@ ws_find_media_type_parameter(const char *parameters, const char *key, int *retle
p = parameters;
while (*p) {
while ((*p) && g_ascii_isspace(*p))
p++; /* Skip white space */
if (g_ascii_strncasecmp(p, key, keylen) == 0)
break;
/* Skip to next parameter */
p = strchr(p, ';');
if (p == NULL)
{
/* Get the next parameter. */
name = ws_get_next_media_type_parameter(p, &namelen, &value,
&valuelen, &p);
if (name == NULL) {
/* No more parameters - not found. */
return NULL;
}
p++; /* Skip semicolon */
}
if (*p == 0x0)
return NULL; /* key wasn't found */
start = p + keylen;
if (start[0] == 0) {
return NULL;
/* Is it the parameter we're looking for? */
if (namelen == keylen && g_ascii_strncasecmp(name, key, keylen) == 0) {
/* Yes. */
break;
}
}
/*
* Process the parameter value
*/
if (start[0] == '"') {
/*
* Parameter value is a quoted-string
*/
start++; /* Skip the quote */
if (NULL == strchr(start, '"')) {
/*
* No closing quote
*/
return NULL;
/* We found the parameter with that name; now extract the value. */
valuestr = g_malloc(valuelen + 1);
vp = valuestr;
p = value;
/* Is the value a quoted string? */
if (*p == '"') {
/* Yes. Skip the opening quote, and scan forward looking for
a non-escaped closing quote, copying characters. */
p++;
for (;;) {
c = *p;
if (c == '\0') {
/* End-of-string. We're done.
(XXX - this is an error.) */
*vp = '\0';
return valuestr;
}
if (c == '"') {
/* Closing quote. Skip it; we're done with
the quoted-string. */
p++;
break;
}
if (c == '\\') {
/* Backslash; this escapes the next character
(quoted-pair). Skip the backslash, and make
sure there *is* a next character. */
p++;
if (*p == '\0') {
/* Nothing left; we're done.
(XXX - this is an error.) */
break;
}
}
/* Copy the character. */
*vp++ = *p++;
}
} else {
/*
* Look for end of boundary
*/
p = start;
while (*p) {
if (*p == ';' || g_ascii_isspace(*p))
break;
/* No. Just scan forward until we see a '\0' (end of
string or a non-token character, copying characters. */
while ((c = *p) != '\0' && g_ascii_isgraph(c) && c != '(' &&
c != ')' && c != '<' && c != '>' && c != '@' &&
c != ',' && c != ';' && c != ':' && c != '\\' &&
c != '"' && c != '/' && c != '[' && c != ']' &&
c != '?' && c != '=' && c != '{' && c != '}') {
*vp++ = c;
p++;
len++;
}
}
if(retlen)
(*retlen) = len;
/*
* This is one of those ugly routines like strchr() where you can
* pass in a constant or non-constant string, and the result
* points into that string and inherits the constness of the
* input argument, but C doesn't support that, so the input
* parameter is const char * and the result is char *.
*/
return (char *)start;
*vp = '\0';
return valuestr;
}

View File

@ -1,5 +1,5 @@
/* media_params.h
* Routines for parsing media type parameters
* Routines for parsing media type parameters as per RFC 822 and RFC 2045
* Copyright 2004, Anders Broman.
* Copyright 2004, Olivier Biot.
*
@ -37,7 +37,7 @@ extern "C" {
#endif /* __cplusplus */
WS_DLL_PUBLIC char *
ws_find_media_type_parameter(const char *parameters, const char *key, int *retlen);
ws_find_media_type_parameter(const char *parameters, const char *key);
#ifdef __cplusplus
}