- changed the lexer to return entire tags, refactored packet-xml.c for those changes

- add markup_declarations and indent on doctype's []
- removed debugging code
- renamed meta_tag into xmlpi


svn path=/trunk/; revision=13686
This commit is contained in:
Luis Ontanon 2005-03-10 06:32:06 +00:00
parent f34e80a2cf
commit 5a9267805c
3 changed files with 114 additions and 210 deletions

View File

@ -37,74 +37,40 @@
#include <epan/packet.h>
#include <epan/strutil.h>
#include <epan/prefs.h>
#include <epan/report_err.h>
#include "packet-xml.h"
#ifdef DEBUG_XML
static const value_string xml_token_types[] =
{
{XML_WHITESPACE, "white space"},
{XML_PROPERTY, "property"},
{XML_COMMENT_START, "comment start"},
{XML_COMMENT_END, "comment end"},
{XML_METATAG_START, "metatag start"},
{XML_METATAG_END, "metatag end"},
{XML_TAG_START, "tag start"},
{XML_TAG_END, "tag end"},
{XML_CLOSE_TAG_END, "close tag end"},
{XML_NAME, "name"},
{XML_TEXT, "text"},
{XML_GARBLED, "garbled"},
{0, NULL}
};
static const value_string xml_ctx_types[] =
{
{XML_CTX_OUT, "no_ctx"},
{XML_CTX_COMMENT, "comment"},
{XML_CTX_TAG, "tag"},
{XML_CTX_METATAG, "meta-tag"},
{XML_CTX_CLOSETAG, "close-tag"},
{0, NULL}
};
static int hf_xml_token = -1;
static int hf_xml_token_type = -1;
static int hf_xml_ctx_type = -1;
static int ett_xml_tok = -1;
#endif /* DEBUG_XML */
static int proto_xml = -1;
static int ett_xml = -1;
static int hf_xml_metatag = -1;
static int hf_xml_pi = -1;
static int hf_xml_markup_decl = -1;
static int hf_xml_tag = -1;
static int hf_xml_text = -1;
gboolean is_soap;
static proto_item* proto_tree_add_xml_item(proto_tree* tree, tvbuff_t* tvb, int offset, int len, xml_token_t* xi) {
static proto_item* proto_tree_add_xml_item(proto_tree* tree, tvbuff_t* tvb, xml_token_t* xi) {
proto_item* pi;
gchar* txt;
int hfid = 0;
int hfid;
switch (xi->type) {
case XML_TAG_END: if (xi->ctx == XML_CTX_TAG) hfid = hf_xml_tag; break;
case XML_METATAG_END: hfid = hf_xml_metatag; break;
case XML_TAG: hfid = hf_xml_tag; break;
case XML_MARKUPDECL: hfid = hf_xml_markup_decl; break;
case XML_XMLPI: hfid = hf_xml_pi; break;
case XML_TEXT: hfid = hf_xml_text; break;
default: break;
default: hfid = 0; break;
}
txt = tvb_get_string(tvb,offset,len);
txt = tvb_get_string(tvb,xi->offset,xi->len);
if ( hfid ) {
pi = proto_tree_add_string_format(tree,hfid,tvb,offset,len,txt,"%s",format_text(txt, len));
pi = proto_tree_add_string_format(tree,hfid,tvb,xi->offset,xi->len,txt,format_text(txt, xi->len));
} else {
pi = proto_tree_add_text(tree,tvb,offset,len,"%s",format_text(txt, len));
pi = proto_tree_add_text(tree,tvb,xi->offset,xi->len,"%s",format_text(txt, xi->len));
}
g_free(txt);
return pi;
}
@ -114,89 +80,42 @@ static void dissect_xml(tvbuff_t* tvb, packet_info* pinfo _U_, proto_tree* tree)
xml_token_t* xi;
xml_token_t* next_xi;
proto_item* pi = NULL;
int curr_offset = 0;
int curr_len = 0;
GPtrArray* stack;
#ifdef DEBUG_XML
proto_tree* tree2 = NULL;
proto_tree* pt = NULL;
#endif
is_soap = FALSE;
#define push() { g_ptr_array_add(stack,tree); tree = proto_item_add_subtree(pi, ett_xml); }
#define pop() { tree = g_ptr_array_remove_index(stack,stack->len - 1); }
GPtrArray* stack = g_ptr_array_new();
if (tree) {
pi = proto_tree_add_item(tree, proto_xml, tvb, 0, tvb->length, FALSE);
#ifdef DEBUG_XML
tree = proto_item_add_subtree(pi, ett_xml);
pi = proto_tree_add_item(tree, proto_xml, tvb, 0, tvb->length, FALSE);
tree2 = proto_item_add_subtree(pi, ett_xml);
#else
tree = proto_item_add_subtree(pi, ett_xml);
#endif /* DEBUG_XML */
xml_items = scan_tvb_for_xml_items(tvb, 0, tvb->length);
stack = g_ptr_array_new();
for (xi = xml_items; xi; xi = xi->next) {
#ifdef DEBUG_XML
pi = proto_tree_add_item(tree2,hf_xml_token,tvb,xi->offset,xi->len,FALSE);
pt = proto_item_add_subtree(pi, ett_xml);
proto_tree_add_uint(pt,hf_xml_token_type,tvb,0,0,xi->type);
proto_tree_add_uint(pt,hf_xml_ctx_type,tvb,0,0,xi->ctx);
proto_tree_add_text(pt,tvb,0,0,"[%i,%i] (%i,%i): '%s'",curr_offset,curr_len,xi->offset,xi->len,xi->text);
#endif /* DEBUG_XML */
switch (xi->type) {
case XML_COMMENT_START:
case XML_METATAG_START:
case XML_CLOSE_TAG_START:
case XML_TAG_START:
curr_offset = xi->offset;
case XML_PROPERTY:
case XML_NAME:
curr_len += xi->len;
break;
case XML_WHITESPACE:
if (xi->ctx == XML_CTX_OUT && curr_len == 0) {
curr_offset += xi->len;
} else {
curr_len += xi->len;
}
break;
case XML_COMMENT_END:
case XML_METATAG_END:
case XML_CLOSE_TAG_END:
case XML_TEXT:
curr_len += xi->len;
proto_tree_add_xml_item(tree,tvb,curr_offset,curr_len,xi);
curr_offset = curr_offset + curr_len;
curr_len = 0;
break;
case XML_TAG_END:
curr_len += xi->len;
if (xi->ctx == XML_CTX_CLOSETAG) pop();
pi = proto_tree_add_xml_item(tree,tvb,curr_offset,curr_len,xi);
if (xi->ctx == XML_CTX_TAG) push();
curr_offset = curr_offset + curr_len;
curr_len = 0;
break;
case XML_GARBLED:
break;
}
}
for (xi = xml_items; xi; xi = next_xi) {
next_xi = xi->next;
switch (xi->type) {
case XML_WHITESPACE:
break;
case XML_CLOSEDTAG:
case XML_TEXT:
case XML_MARKUPDECL:
case XML_XMLPI:
case XML_COMMENT:
proto_tree_add_xml_item(tree,tvb,xi);
break;
case XML_DOCTYPE_START:
case XML_TAG:
pi = proto_tree_add_xml_item(tree,tvb,xi);
g_ptr_array_add(stack,tree);
tree = proto_item_add_subtree(pi, ett_xml);
break;
case XML_CLOSE_TAG:
case XML_DOCTYPE_STOP:
pi = proto_tree_add_xml_item(tree,tvb,xi);
if ( stack->len )
tree = g_ptr_array_remove_index(stack,stack->len - 1);
break;
}
if (xi->text) g_free(xi->text);
g_free(xi);
}
@ -209,24 +128,14 @@ void
proto_register_xml(void)
{
static hf_register_info hf[] = {
#ifdef DEBUG_XML
{ &hf_xml_token,
{ "XML Token",
"xml.token", FT_STRING, BASE_NONE,NULL,0x0,
"An XML token", HFILL }},
{ &hf_xml_token_type,
{ "XML Token Type",
"xml.token.type", FT_UINT32, BASE_DEC,xml_token_types,0x0,
"the type of an XML token", HFILL }},
{ &hf_xml_ctx_type,
{ "XML Context Type",
"xml.ctx.type", FT_UINT32, BASE_DEC,xml_ctx_types,0x0,
"the context of an XML token", HFILL }},
#endif /* DEBUG_XML */
{ &hf_xml_metatag,
{ "XML Meta Tag",
"xml.meta_tag", FT_STRING, BASE_NONE, NULL, 0x0,
"XML Meta Tag", HFILL }},
{ &hf_xml_pi,
{ "XML Processing Instruction",
"xml.pi", FT_STRING, BASE_NONE, NULL, 0x0,
"XML Processing Instruction", HFILL }},
{ &hf_xml_markup_decl,
{ "XML Markup Declaration",
"xml.markrp_decl", FT_STRING, BASE_NONE, NULL, 0x0,
"XML Markup Declaration", HFILL }},
{ &hf_xml_tag,
{ "XML Tag",
"xml.tag", FT_STRING, BASE_NONE, NULL, 0x0,
@ -238,11 +147,10 @@ proto_register_xml(void)
};
static gint *ett[] = {
#ifdef DEBUG_XML
&ett_xml_tok,
#endif /* DEBUG_XML */
&ett_xml
};
proto_xml = proto_register_protocol("eXtensible Markup Language", "XML", "xml");
proto_register_field_array(proto_xml, hf, array_length(hf));

View File

@ -32,32 +32,19 @@ typedef struct _xml_token_t xml_token_t;
typedef enum _xml_token_type_t {
XML_WHITESPACE,
XML_PROPERTY,
XML_COMMENT_START,
XML_COMMENT_END,
XML_TAG_START,
XML_TAG_END,
XML_METATAG_START,
XML_METATAG_END,
XML_CLOSE_TAG_START,
XML_CLOSE_TAG_END,
XML_NAME,
XML_TEXT,
XML_GARBLED
XML_COMMENT,
XML_TAG,
XML_CLOSEDTAG,
XML_MARKUPDECL,
XML_XMLPI,
XML_CLOSE_TAG,
XML_DOCTYPE_START,
XML_DOCTYPE_STOP
} xml_token_type_t;
typedef enum _xml_context_t {
XML_CTX_OUT,
XML_CTX_COMMENT,
XML_CTX_TAG,
XML_CTX_METATAG,
XML_CTX_CLOSETAG
} xml_context_t;
struct _xml_token_t {
xml_token_type_t type;
xml_context_t ctx;
char* text;
int offset;
int len;
xml_token_t* next;

View File

@ -1,5 +1,7 @@
%option noyywrap
%option nounput
%option caseless
%{
/* xml_lexer.l
@ -35,72 +37,74 @@
static guint8* extracted = NULL;
static gint offset;
static gint last_offset;
static gint text_offset;
static gint len;
xml_token_t* head;
xml_token_t* tail;
#define YY_INPUT(buff,result,max_size) ( (result) = tvb_yyinput((buff),(max_size)) )
#define ECHO {add_xml_item(XML_GARBLED,XML_CTX_OUT, yyleng, yytext); return 0;}
static void add_xml_item(xml_token_type_t type, xml_context_t ctx, gint len, gchar* text);
#define YY_INPUT(buff,result,max_size) ( (result) = tvb_yyinput((buff),(max_size)) )
#define ECHO ;
static void add_xml_item(xml_token_type_t type, gint len);
static int tvb_yyinput(char* buff, guint max_len);
%}
property_dq [A-Za-z][_A-Za-z0-9:]*=["][^\"]*["]
property_sq [A-Za-z][_A-Za-z0-9:]*='[^\']*'
property_nq [A-Za-z][_A-Za-z0-9:]*=[_A-Za-z0-9]+
metatag_start "<?"
metatag_end "?>"
comment_start "<!--"
comment_end "-->"
xmlpi_start "<?"
xmlpi_end "?>"
comment_start <!--
comment_end -->
markupdecl_start "<!"
closetag_start "</"
tag_start "<"
tag_end ">"
tag_end >
closedtag_end "/>"
name [A-Za-z][-_A-Za-z0-9:]*
whitespace [ \t\r\n]+
text [^<]*
%START COMMENT TAG CLOSE_TAG META_TAG OUT
%%
{comment_start} { add_xml_item(XML_COMMENT_START, XML_CTX_COMMENT, yyleng, yytext); BEGIN COMMENT;}
<COMMENT>{comment_end} { add_xml_item(XML_COMMENT_END, XML_CTX_COMMENT, yyleng, yytext); BEGIN OUT; }
<COMMENT>{text} add_xml_item(XML_TEXT, XML_CTX_COMMENT, yyleng, yytext);
<COMMENT>{tag_start} add_xml_item(XML_TEXT, XML_CTX_COMMENT, yyleng, yytext);
open_brace "["
{closetag_start} { add_xml_item(XML_CLOSE_TAG_START, XML_CTX_CLOSETAG, yyleng, yytext); BEGIN CLOSE_TAG; }
<CLOSE_TAG>{name} add_xml_item(XML_NAME, XML_CTX_CLOSETAG, yyleng, yytext);
<CLOSE_TAG>{whitespace} add_xml_item(XML_WHITESPACE, XML_CTX_CLOSETAG, yyleng,yytext);
<CLOSE_TAG>{tag_end} { add_xml_item(XML_TAG_END, XML_CTX_CLOSETAG, yyleng, yytext); BEGIN OUT; }
dtd_doctype_stop "]"[:blank:]*>
{metatag_start} { add_xml_item(XML_METATAG_START, XML_CTX_METATAG, yyleng, yytext); BEGIN META_TAG; }
<META_TAG>{property_dq} add_xml_item(XML_PROPERTY, XML_CTX_METATAG, yyleng, yytext);
<META_TAG>{property_sq} add_xml_item(XML_PROPERTY, XML_CTX_METATAG, yyleng, yytext);
<META_TAG>{property_nq} add_xml_item(XML_PROPERTY, XML_CTX_METATAG, yyleng, yytext);
<META_TAG>{whitespace} add_xml_item(XML_WHITESPACE, XML_CTX_METATAG, yyleng, yytext);
<META_TAG>{name} add_xml_item(XML_NAME, XML_CTX_METATAG, yyleng, yytext);
<META_TAG>{metatag_end} { add_xml_item(XML_METATAG_END, XML_CTX_METATAG, yyleng,yytext); BEGIN OUT; }
whitespace [[:blank:]\r\n]+
text_start ([^<[:blank:]\r\n]|[^<])
text_stop <
{tag_start} { add_xml_item(XML_TAG_START, XML_CTX_TAG, yyleng, yytext); BEGIN TAG; }
<TAG>{property_dq} add_xml_item(XML_PROPERTY, XML_CTX_TAG, yyleng, yytext);
<TAG>{property_sq} add_xml_item(XML_PROPERTY, XML_CTX_TAG, yyleng, yytext);
<TAG>{property_nq} add_xml_item(XML_PROPERTY, XML_CTX_TAG, yyleng, yytext);
<TAG>{name} add_xml_item(XML_NAME, XML_CTX_TAG, yyleng, yytext);
<TAG>{whitespace} add_xml_item(XML_WHITESPACE, XML_CTX_TAG, yyleng, yytext);
<TAG>{closedtag_end} {add_xml_item(XML_CLOSE_TAG_END, XML_CTX_TAG, yyleng, yytext); BEGIN OUT; }
<TAG>{tag_end} {add_xml_item(XML_TAG_END, XML_CTX_TAG, yyleng, yytext); BEGIN OUT; }
<OUT>{whitespace} add_xml_item(XML_WHITESPACE, XML_CTX_OUT, yyleng, yytext);
<OUT>{text} add_xml_item(XML_TEXT, XML_CTX_OUT, yyleng, yytext);
%START OUT COMMENT TAG CLOSE_TAG XMLPI DOCTYPE MARKUPDECL TEXT
%%
static void add_xml_item(xml_token_type_t type, xml_context_t ctx, gint the_len, gchar* text) {
<OUT>{whitespace} { add_xml_item(XML_WHITESPACE, yyleng); }
<OUT>{dtd_doctype_stop} { add_xml_item(XML_DOCTYPE_STOP, yyleng); }
<OUT>{text_start} { text_offset = offset - yyleng -1; BEGIN TEXT; }
<TEXT>{text_stop} { add_xml_item(XML_TEXT, --offset - text_offset -1 ); BEGIN OUT; }
<OUT>{comment_start} { text_offset = offset - yyleng -1; BEGIN COMMENT; }
<COMMENT>{comment_end} { add_xml_item(XML_COMMENT, offset - text_offset -1); BEGIN OUT; }
<OUT>{closetag_start} { text_offset = offset - yyleng -1; BEGIN CLOSE_TAG; }
<CLOSE_TAG>{tag_end} { add_xml_item(XML_CLOSE_TAG, offset - text_offset -1); BEGIN OUT; }
<OUT>{xmlpi_start} { text_offset = offset - yyleng -1; BEGIN XMLPI; }
<XMLPI>{xmlpi_end} { add_xml_item(XML_XMLPI, offset - text_offset -1); BEGIN OUT; }
<OUT>{markupdecl_start} { text_offset = offset - yyleng -1; BEGIN MARKUPDECL; }
<MARKUPDECL>{open_brace} { add_xml_item(XML_DOCTYPE_START, offset - text_offset); BEGIN OUT; }
<MARKUPDECL>{tag_end} { add_xml_item(XML_MARKUPDECL, offset - text_offset); BEGIN OUT; }
<OUT>{tag_start} { text_offset = offset - yyleng -1; BEGIN TAG; }
<TAG>{closedtag_end} { add_xml_item(XML_CLOSEDTAG, offset - text_offset); BEGIN OUT; }
<TAG>{tag_end} { add_xml_item(XML_TAG, offset - text_offset); BEGIN OUT; }
%%
static void add_xml_item(xml_token_type_t type, gint the_len) {
xml_token_t* xi = g_malloc(sizeof(xml_token_t));
xi->type = type;
xi->ctx = ctx;
xi->offset = last_offset;
xi->len = the_len;
xi->text = text != NULL ? g_strdup(text) : NULL;
xi->next = NULL;
xi->prev = tail;
@ -134,15 +138,20 @@ extern xml_token_t* scan_tvb_for_xml_items(tvbuff_t* tvb, gint the_offset, gint
offset = the_offset;
last_offset = the_offset;
len = the_len;
text_offset = offset;
extracted = tvb_memdup(tvb,offset,len);
head = NULL;
tail = NULL;
BEGIN OUT;
yylex();
yyrestart(NULL);
g_free(extracted);
return head;
}