2007-07-30 20:22:37 +00:00
|
|
|
/*
|
|
|
|
* We don't use unput, so don't generate code for it.
|
|
|
|
*/
|
2005-09-10 17:29:15 +00:00
|
|
|
%option nounput
|
2007-07-30 20:22:37 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't read from the terminal.
|
|
|
|
*/
|
2005-09-10 17:29:15 +00:00
|
|
|
%option never-interactive
|
|
|
|
|
2007-07-30 20:22:37 +00:00
|
|
|
/*
|
|
|
|
* Prefix scanner routines with "Dtd_Parse_" rather than "yy", so this scanner
|
|
|
|
* can coexist with other scanners.
|
|
|
|
*/
|
|
|
|
%option prefix="Dtd_Parse_"
|
|
|
|
|
|
|
|
%option outfile="dtd_parse.c"
|
|
|
|
|
2005-09-10 17:29:15 +00:00
|
|
|
%{
|
|
|
|
|
2005-09-12 19:32:35 +00:00
|
|
|
/* dtd_parse.l
|
2006-05-29 20:44:06 +00:00
|
|
|
* an XML dissector for Wireshark
|
2005-09-10 17:29:15 +00:00
|
|
|
* lexical analyzer for DTDs
|
|
|
|
*
|
2008-08-05 21:03:46 +00:00
|
|
|
* Copyright 2004, Luis E. Garcia Ontanon <luis@ontanon.org>
|
2005-09-10 17:29:15 +00:00
|
|
|
*
|
|
|
|
* $Id$
|
|
|
|
*
|
2006-05-21 05:12:17 +00:00
|
|
|
* Wireshark - Network traffic analyzer
|
|
|
|
* By Gerald Combs <gerald@wireshark.org>
|
2005-09-10 17:29:15 +00:00
|
|
|
* Copyright 1998 Gerald Combs
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version 2
|
|
|
|
* of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
2007-07-30 20:22:37 +00:00
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
2005-09-10 17:29:15 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <glib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "dtd.h"
|
|
|
|
#include "dtd_grammar.h"
|
2005-09-12 19:32:35 +00:00
|
|
|
#include "dtd_parse.h"
|
2007-08-04 02:13:52 +00:00
|
|
|
#include "dtd_parse_lex.h"
|
2005-09-10 17:29:15 +00:00
|
|
|
|
|
|
|
struct _proto_xmlpi_attr {
|
|
|
|
gchar* name;
|
|
|
|
void (*act)(gchar*);
|
|
|
|
};
|
|
|
|
|
2005-09-22 18:39:25 +00:00
|
|
|
static void* pParser;
|
|
|
|
static GString* input_string;
|
|
|
|
static guint offset;
|
|
|
|
static guint len;
|
|
|
|
static gchar* location;
|
|
|
|
static gchar* attr_name;
|
2005-09-10 17:29:15 +00:00
|
|
|
|
|
|
|
static int my_yyinput(char* buff,guint size);
|
|
|
|
|
|
|
|
static dtd_token_data_t* new_token(gchar*);
|
|
|
|
|
|
|
|
static dtd_build_data_t* build_data;
|
|
|
|
|
|
|
|
static void set_proto_name (gchar* val) { if(build_data->proto_name) g_free(build_data->proto_name); build_data->proto_name = g_strdup(val); }
|
|
|
|
static void set_media_type (gchar* val) { if(build_data->media_type) g_free(build_data->media_type); build_data->media_type = g_strdup(val); }
|
|
|
|
static void set_proto_root (gchar* val) { if(build_data->proto_root) g_free(build_data->proto_root); build_data->proto_root = g_strdup(val); }
|
|
|
|
static void set_description (gchar* val) { if(build_data->description) g_free(build_data->description); build_data->description = g_strdup(val); }
|
strcasecmp(), strncasecmp(), g_strcasecmp(), and g_strncasecmp() delenda
est. Use g_ascii_strcasecmp() and g_ascii_strncasecmp(), and supply our
own versions if they're missing from GLib (as is the case with GLib
1.x).
In the code to build the list of named fields for Diameter, don't use
g_strdown(); do our own g_ascii_-style upper-case to lower-case mapping
in the hash function and use g_ascii_strcasecmp() in the compare
function.
We do this because there is no guarantee that toupper(), tolower(), and
functions that use them will, for example, map between "I" and "i" in
all locales; in Turkish locales, for example, there are, in both
upper case and lower case, versions of "i" with and without a dot, and
the upper-case version of "i" is "I"-with-a-dot and the lower-case
version of "I" is "i"-without-a-dot. This causes strings that should
match not to match.
This finishes fixing bug 2010 - an earlier checkin prevented the crash
(as there are other ways to produce the same crash, e.g. a bogus
dictionary.xml file), but didn't fix the case-insensitive string matching.
svn path=/trunk/; revision=23623
2007-11-27 18:52:51 +00:00
|
|
|
static void set_recursive (gchar* val) { build_data->recursion = ( g_ascii_strcasecmp(val,"yes") == 0 ) ? TRUE : FALSE; }
|
2005-09-10 17:29:15 +00:00
|
|
|
|
2005-09-22 18:39:25 +00:00
|
|
|
static struct _proto_xmlpi_attr proto_attrs[] =
|
2005-09-10 17:29:15 +00:00
|
|
|
{
|
2005-09-17 17:05:46 +00:00
|
|
|
{ "proto_name", set_proto_name },
|
2005-09-10 17:29:15 +00:00
|
|
|
{ "media", set_media_type },
|
|
|
|
{ "root", set_proto_root },
|
|
|
|
{ "description", set_description },
|
2005-09-22 18:39:25 +00:00
|
|
|
{ "hierarchy", set_recursive },
|
2005-09-10 17:29:15 +00:00
|
|
|
{NULL,NULL}
|
|
|
|
};
|
|
|
|
|
2005-09-20 00:31:53 +00:00
|
|
|
#ifdef DEBUG_DTD_PARSER
|
|
|
|
#define DEBUG_DTD_TOKEN fprintf(stderr,"->%s (%i)%s\n",location,token_type,yytext)
|
|
|
|
#else
|
|
|
|
#define DEBUG_DTD_TOKEN
|
|
|
|
#endif
|
|
|
|
|
2005-09-10 17:29:15 +00:00
|
|
|
#define DTD_PARSE(token_type) \
|
2005-09-20 00:31:53 +00:00
|
|
|
{ DEBUG_DTD_TOKEN; \
|
2005-09-10 17:29:15 +00:00
|
|
|
DtdParse(pParser, (token_type), new_token(yytext), build_data); \
|
|
|
|
if(build_data->error->len > 0) yyterminate(); \
|
|
|
|
}
|
|
|
|
|
2005-09-20 00:31:53 +00:00
|
|
|
|
2005-09-10 17:29:15 +00:00
|
|
|
#define YY_INPUT(buff,result,max_size) ( (result) = my_yyinput((buff),(max_size)) )
|
|
|
|
|
2008-04-25 17:40:29 +00:00
|
|
|
/*
|
|
|
|
* Flex (v 2.5.35) uses this symbol to "exclude" unistd.h
|
|
|
|
*/
|
|
|
|
#ifdef _WIN32
|
|
|
|
#define YY_NO_UNISTD_H
|
|
|
|
#endif
|
|
|
|
|
2005-09-10 17:29:15 +00:00
|
|
|
%}
|
|
|
|
|
2005-09-17 17:05:46 +00:00
|
|
|
comment_start "<!--"
|
|
|
|
comment_stop "-->"
|
|
|
|
|
2005-09-10 17:29:15 +00:00
|
|
|
start_xmlpi "<?"
|
|
|
|
|
2006-05-29 20:44:06 +00:00
|
|
|
location_xmlpi "wireshark:location"
|
|
|
|
protocol_xmlpi "wireshark:protocol"
|
2005-09-10 17:29:15 +00:00
|
|
|
|
|
|
|
get_attr_quote =[:blank:]*["]
|
|
|
|
avoid_editor_bug ["]
|
|
|
|
|
|
|
|
get_location_xmlpi [^[:blank:]]+
|
|
|
|
|
|
|
|
stop_xmlpi "?>"
|
|
|
|
|
2005-09-24 19:03:35 +00:00
|
|
|
notation_tag "<!"[:blank:]*NOTATION
|
|
|
|
|
2005-09-10 17:29:15 +00:00
|
|
|
special_start "<!"
|
|
|
|
special_stop ">"
|
|
|
|
whitespace [[:blank:]\r\n]+
|
|
|
|
newline \n
|
|
|
|
attlist_kw ATTLIST
|
|
|
|
doctype_kw DOCTYPE
|
|
|
|
element_kw ELEMENT
|
|
|
|
|
|
|
|
pcdata #PCDATA
|
|
|
|
any ANY
|
|
|
|
cdata #CDATA
|
|
|
|
|
|
|
|
iD ID
|
|
|
|
idref IDREF
|
|
|
|
idrefs IDREFS
|
|
|
|
nmtoken NMTOKEN
|
|
|
|
nmtokens NMTOKENS
|
|
|
|
entity ENTITY
|
|
|
|
entities ENTITIES
|
|
|
|
notation NOTATION
|
|
|
|
cdata_t CDATA
|
|
|
|
|
|
|
|
empty EMPTY
|
|
|
|
defaulT #DEFAULT
|
|
|
|
fixed #FIXED
|
|
|
|
required #REQUIRED
|
|
|
|
implied #IMPLIED
|
|
|
|
|
|
|
|
star "*"
|
|
|
|
question "?"
|
|
|
|
plus "+"
|
|
|
|
open_parens "("
|
|
|
|
close_parens ")"
|
|
|
|
open_bracket "["
|
|
|
|
close_bracket "]"
|
|
|
|
comma ","
|
|
|
|
pipe "|"
|
|
|
|
dquote ["]
|
|
|
|
|
2006-11-15 18:40:37 +00:00
|
|
|
name [A-Za-z0-9][-a-zA-Z0-9_]*
|
2005-09-10 17:29:15 +00:00
|
|
|
dquoted ["][^\"]*["]
|
|
|
|
squoted ['][^\']*[']
|
|
|
|
|
2005-09-24 19:03:35 +00:00
|
|
|
%START DTD XMLPI LOCATION DONE PROTOCOL GET_ATTR_QUOTE GET_ATTR_VAL GET_ATTR_CLOSE_QUOTE IN_COMMENT IN_NOTATION
|
2005-09-10 17:29:15 +00:00
|
|
|
%%
|
|
|
|
|
|
|
|
{whitespace} ;
|
|
|
|
|
2005-09-17 17:05:46 +00:00
|
|
|
|
|
|
|
<DTD>{comment_start} { BEGIN IN_COMMENT; }
|
|
|
|
<IN_COMMENT>[^-]? |
|
|
|
|
<IN_COMMENT>[-] ;
|
|
|
|
<IN_COMMENT>{comment_stop} { BEGIN DTD; }
|
|
|
|
|
2005-09-24 19:03:35 +00:00
|
|
|
<DTD>{notation_tag} { BEGIN IN_NOTATION; }
|
2005-10-07 08:36:23 +00:00
|
|
|
<IN_NOTATION>[^>] ;
|
2005-09-24 19:03:35 +00:00
|
|
|
<IN_NOTATION>{special_stop} { BEGIN DTD; }
|
|
|
|
|
2005-09-10 17:29:15 +00:00
|
|
|
<DTD>{start_xmlpi} {
|
|
|
|
BEGIN XMLPI;
|
|
|
|
}
|
|
|
|
|
|
|
|
<XMLPI>{location_xmlpi} {
|
|
|
|
BEGIN LOCATION;
|
|
|
|
}
|
|
|
|
|
|
|
|
<XMLPI>{protocol_xmlpi} {
|
|
|
|
BEGIN PROTOCOL;
|
|
|
|
}
|
|
|
|
|
|
|
|
<XMLPI><.> ;
|
|
|
|
<XMLPI>{stop_xmlpi} BEGIN DTD;
|
|
|
|
|
|
|
|
<LOCATION>{get_location_xmlpi} {
|
2005-10-07 08:36:23 +00:00
|
|
|
if(location) g_free(location);
|
2005-09-10 17:29:15 +00:00
|
|
|
location = g_strdup(yytext);
|
|
|
|
BEGIN DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
<DONE>{stop_xmlpi} BEGIN DTD;
|
|
|
|
|
|
|
|
<PROTOCOL>{name} {
|
|
|
|
attr_name = g_strdup(yytext);
|
2005-09-22 18:39:25 +00:00
|
|
|
g_strdown(attr_name);
|
2005-09-10 17:29:15 +00:00
|
|
|
BEGIN GET_ATTR_QUOTE;
|
|
|
|
}
|
|
|
|
|
|
|
|
<GET_ATTR_QUOTE>{get_attr_quote} { BEGIN GET_ATTR_VAL; }
|
|
|
|
|
|
|
|
<GET_ATTR_QUOTE>. {
|
2008-05-11 18:33:49 +00:00
|
|
|
g_string_append_printf(build_data->error,
|
2006-05-22 08:14:01 +00:00
|
|
|
"error in wireshark:protocol xmpli at %s : could not find attribute value!",
|
2005-09-10 17:29:15 +00:00
|
|
|
location);
|
|
|
|
yyterminate();
|
|
|
|
}
|
|
|
|
|
|
|
|
<GET_ATTR_VAL>[^"]+ {
|
|
|
|
/*"*/
|
|
|
|
struct _proto_xmlpi_attr* pa;
|
|
|
|
gboolean got_it = FALSE;
|
|
|
|
|
|
|
|
for(pa = proto_attrs; pa->name; pa++) {
|
strcasecmp(), strncasecmp(), g_strcasecmp(), and g_strncasecmp() delenda
est. Use g_ascii_strcasecmp() and g_ascii_strncasecmp(), and supply our
own versions if they're missing from GLib (as is the case with GLib
1.x).
In the code to build the list of named fields for Diameter, don't use
g_strdown(); do our own g_ascii_-style upper-case to lower-case mapping
in the hash function and use g_ascii_strcasecmp() in the compare
function.
We do this because there is no guarantee that toupper(), tolower(), and
functions that use them will, for example, map between "I" and "i" in
all locales; in Turkish locales, for example, there are, in both
upper case and lower case, versions of "i" with and without a dot, and
the upper-case version of "i" is "I"-with-a-dot and the lower-case
version of "I" is "i"-without-a-dot. This causes strings that should
match not to match.
This finishes fixing bug 2010 - an earlier checkin prevented the crash
(as there are other ways to produce the same crash, e.g. a bogus
dictionary.xml file), but didn't fix the case-insensitive string matching.
svn path=/trunk/; revision=23623
2007-11-27 18:52:51 +00:00
|
|
|
if (g_ascii_strcasecmp(attr_name,pa->name) == 0) {
|
2005-09-10 17:29:15 +00:00
|
|
|
pa->act(yytext);
|
|
|
|
got_it = TRUE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (! got_it) {
|
2008-05-11 18:33:49 +00:00
|
|
|
g_string_append_printf(build_data->error,
|
2006-05-22 08:14:01 +00:00
|
|
|
"error in wireshark:protocol xmpli at %s : no such parameter %s!",
|
2005-09-10 17:29:15 +00:00
|
|
|
location, attr_name);
|
|
|
|
g_free(attr_name);
|
|
|
|
yyterminate();
|
|
|
|
}
|
|
|
|
|
|
|
|
g_free(attr_name);
|
|
|
|
|
|
|
|
BEGIN GET_ATTR_CLOSE_QUOTE;
|
|
|
|
}
|
|
|
|
|
|
|
|
<GET_ATTR_CLOSE_QUOTE>{dquote} { BEGIN PROTOCOL;}
|
|
|
|
|
|
|
|
<PROTOCOL>{stop_xmlpi} BEGIN DTD;
|
|
|
|
|
|
|
|
<DTD>{special_start} { DTD_PARSE(TOKEN_TAG_START); }
|
|
|
|
<DTD>{special_stop} { DTD_PARSE(TOKEN_TAG_STOP); }
|
|
|
|
|
|
|
|
<DTD>{attlist_kw} { DTD_PARSE(TOKEN_ATTLIST_KW); }
|
|
|
|
<DTD>{element_kw} { DTD_PARSE(TOKEN_ELEMENT_KW); }
|
|
|
|
<DTD>{doctype_kw} { DTD_PARSE(TOKEN_DOCTYPE_KW); }
|
|
|
|
|
|
|
|
<DTD>{pcdata} { DTD_PARSE(TOKEN_ELEM_DATA); }
|
|
|
|
<DTD>{any} { DTD_PARSE(TOKEN_ELEM_DATA); }
|
|
|
|
<DTD>{cdata} { DTD_PARSE(TOKEN_ELEM_DATA); }
|
|
|
|
<DTD>{empty} { DTD_PARSE(TOKEN_EMPTY_KW); }
|
|
|
|
|
|
|
|
<DTD>{iD} { DTD_PARSE(TOKEN_ATT_TYPE); }
|
|
|
|
<DTD>{idref} { DTD_PARSE(TOKEN_ATT_TYPE); }
|
|
|
|
<DTD>{idrefs} { DTD_PARSE(TOKEN_ATT_TYPE); }
|
|
|
|
<DTD>{nmtoken} { DTD_PARSE(TOKEN_ATT_TYPE); }
|
|
|
|
<DTD>{nmtokens} { DTD_PARSE(TOKEN_ATT_TYPE); }
|
|
|
|
<DTD>{entity} { DTD_PARSE(TOKEN_ATT_TYPE); }
|
|
|
|
<DTD>{entities} { DTD_PARSE(TOKEN_ATT_TYPE); }
|
|
|
|
<DTD>{notation} { DTD_PARSE(TOKEN_ATT_TYPE); }
|
|
|
|
<DTD>{cdata_t} { DTD_PARSE(TOKEN_ATT_TYPE); }
|
|
|
|
<DTD>{defaulT} { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
|
|
|
|
<DTD>{fixed} { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
|
|
|
|
<DTD>{required} { DTD_PARSE(TOKEN_ATT_DEF); }
|
|
|
|
<DTD>{implied} { DTD_PARSE(TOKEN_ATT_DEF); }
|
|
|
|
|
|
|
|
<DTD>{star} { DTD_PARSE(TOKEN_STAR); }
|
|
|
|
<DTD>{question} { DTD_PARSE(TOKEN_QUESTION); }
|
|
|
|
<DTD>{plus} { DTD_PARSE(TOKEN_PLUS); }
|
|
|
|
<DTD>{comma} { DTD_PARSE(TOKEN_COMMA); }
|
|
|
|
<DTD>{open_parens} { DTD_PARSE(TOKEN_OPEN_PARENS); }
|
|
|
|
<DTD>{close_parens} { DTD_PARSE(TOKEN_CLOSE_PARENS); }
|
|
|
|
<DTD>{open_bracket} { DTD_PARSE(TOKEN_OPEN_BRACKET); }
|
|
|
|
<DTD>{close_bracket} { DTD_PARSE(TOKEN_CLOSE_BRACKET); }
|
|
|
|
<DTD>{pipe} { DTD_PARSE(TOKEN_PIPE); }
|
|
|
|
|
|
|
|
<DTD>{dquoted} |
|
|
|
|
<DTD>{squoted} { DTD_PARSE(TOKEN_QUOTED); }
|
|
|
|
<DTD>{name} { DTD_PARSE(TOKEN_NAME); }
|
|
|
|
|
|
|
|
%%
|
|
|
|
|
|
|
|
static dtd_token_data_t* new_token(gchar* text) {
|
|
|
|
dtd_token_data_t* t = g_malloc(sizeof(dtd_token_data_t));
|
|
|
|
|
|
|
|
t->text = g_strdup(text);
|
|
|
|
t->location = g_strdup(location);
|
|
|
|
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int my_yyinput(char* buff, guint size) {
|
|
|
|
|
|
|
|
if (offset >= len ) {
|
|
|
|
return YY_NULL;
|
|
|
|
} else if ( offset + size <= len ) {
|
|
|
|
memcpy(buff, input_string->str + offset,size);
|
|
|
|
offset += size;
|
|
|
|
return size;
|
|
|
|
} else {
|
|
|
|
size = len - offset;
|
|
|
|
memcpy(buff, input_string->str + offset,size);
|
|
|
|
offset = len;
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
extern dtd_build_data_t* dtd_parse(GString* s) {
|
|
|
|
|
|
|
|
input_string = s;
|
|
|
|
offset = 0;
|
2009-04-08 18:08:25 +00:00
|
|
|
len = (guint) input_string->len;
|
2005-09-10 17:29:15 +00:00
|
|
|
|
|
|
|
pParser = DtdParseAlloc(g_malloc);
|
2005-09-20 00:31:53 +00:00
|
|
|
|
|
|
|
#ifdef DEBUG_DTD_PARSER
|
2005-09-22 18:39:25 +00:00
|
|
|
DtdParseTrace(stderr, ">>");
|
2005-09-20 00:31:53 +00:00
|
|
|
#endif
|
|
|
|
|
2005-09-10 17:29:15 +00:00
|
|
|
build_data = g_malloc(sizeof(dtd_build_data_t));
|
|
|
|
|
|
|
|
build_data->proto_name = NULL;
|
|
|
|
build_data->media_type = NULL;
|
|
|
|
build_data->description = NULL;
|
|
|
|
build_data->proto_root = NULL;
|
2005-09-17 17:05:46 +00:00
|
|
|
build_data->recursion = FALSE;
|
|
|
|
|
2005-09-10 17:29:15 +00:00
|
|
|
build_data->elements = g_ptr_array_new();
|
|
|
|
build_data->attributes = g_ptr_array_new();
|
|
|
|
|
|
|
|
build_data->error = g_string_new("");
|
|
|
|
|
2005-09-22 18:39:25 +00:00
|
|
|
location = NULL;
|
2005-09-17 17:05:46 +00:00
|
|
|
|
2005-09-10 17:29:15 +00:00
|
|
|
BEGIN DTD;
|
|
|
|
|
|
|
|
yylex();
|
|
|
|
|
|
|
|
DtdParse(pParser, 0, NULL,build_data);
|
|
|
|
|
|
|
|
yyrestart(NULL);
|
|
|
|
|
2007-01-15 20:14:00 +00:00
|
|
|
if (location) g_free(location);
|
|
|
|
|
2005-09-22 18:39:25 +00:00
|
|
|
location = NULL;
|
2005-09-20 00:31:53 +00:00
|
|
|
|
2005-09-10 17:29:15 +00:00
|
|
|
DtdParseFree(pParser, g_free );
|
|
|
|
|
|
|
|
return build_data;
|
|
|
|
}
|
2008-04-25 17:40:29 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We want to stop processing when we get to the end of the input.
|
|
|
|
* (%option noyywrap is not used because if used then
|
|
|
|
* some flex versions (eg: 2.5.35) generate code which causes
|
|
|
|
* warnings by the Windows VC compiler).
|
|
|
|
*/
|
|
|
|
|
|
|
|
int yywrap(void) {
|
|
|
|
return 1;
|
|
|
|
}
|