wireshark/epan/dtd_preparse.l
Guy Harris 6f37317539 Include config.h at the very beginning of all Flex scanners.
That way, if we #define anything for large file support, that's done
before we include any system header files that either depend on that
definition or that define it themselves if it's not already defined.

Change-Id: I9b07344151103be337899dead44d6960715d6813
Reviewed-on: https://code.wireshark.org/review/19035
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot <buildbot-no-reply@wireshark.org>
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2016-12-02 21:02:10 +00:00

296 lines
7.7 KiB
Text

%top {
/* Include this before everything else, for various large-file definitions */
#include "config.h"
}
/*
* We want a reentrant scanner.
*/
%option reentrant
/*
* We don't use input, so don't generate code for it.
*/
%option noinput
/*
* We don't use unput, so don't generate code for it.
*/
%option nounput
/*
* We don't read interactively from the terminal.
*/
%option never-interactive
/*
* We want to stop processing when we get to the end of the input.
*/
%option noyywrap
/*
* The type for the state we keep for a scanner.
*/
%option extra-type="Dtd_PreParse_scanner_state_t *"
/*
* The language we're scanning is case-insensitive.
*/
%option caseless
/*
* Prefix scanner routines with "Dtd_PreParse_" rather than "yy", so this
* scanner can coexist with other scanners.
*/
%option prefix="Dtd_PreParse_"
%option outfile="dtd_preparse.c"
/*
* We have to override the memory allocators so that we don't get
* "unused argument" warnings from the yyscanner argument (which
* we don't use, as we have a global memory allocator).
*
* We provide, as macros, our own versions of the routines generated by Flex,
* which just call malloc()/realloc()/free() (as the Flex versions do),
* discarding the extra argument.
*/
%option noyyalloc
%option noyyrealloc
%option noyyfree
%{
/*
* dtd_preparse.l
*
* an XML dissector for wireshark
*
* DTD Preparser - import a dtd file into a GString
* including files, removing comments
* and resolving %entities;
*
* Copyright 2004, Luis E. Garcia Ontanon <luis@ontanon.org>
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <glib.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include "dtd.h"
#include <wsutil/file_util.h>
#define ECHO g_string_append(yyextra->current,yytext);
typedef struct {
const gchar* dtd_dirname;
const gchar* filename;
guint linenum;
GString* error;
GHashTable* entities;
GString* current;
GString* output;
gchar* entity_name;
} Dtd_PreParse_scanner_state_t;
static const gchar* replace_entity(Dtd_PreParse_scanner_state_t* state, gchar* s);
static const gchar* location(Dtd_PreParse_scanner_state_t* state);
#define YY_USER_INIT { \
BEGIN OUTSIDE; \
}
/*
* Flex (v 2.5.35) uses this symbol to "exclude" unistd.h
*/
#ifdef _WIN32
#define YY_NO_UNISTD_H
#endif
#ifdef _WIN32
/* disable Windows VC compiler warning "signed/unsigned mismatch" associated */
/* with YY_INPUT code generated by flex versions such as 2.5.35. */
#pragma warning (disable:4018)
#endif
/*
* Sleazy hack to suppress compiler warnings in yy_fatal_error().
*/
#define YY_EXIT_FAILURE ((void)yyscanner, 2)
/*
* Macros for the allocators, to discard the extra argument.
*/
#define Dtd_PreParse_alloc(size, yyscanner) (void *)malloc(size)
#define Dtd_PreParse_realloc(ptr, size, yyscanner) (void *)realloc((char *)(ptr), (size))
#define Dtd_PreParse_free(ptr, yyscanner) free((char *)ptr)
%}
xmlpi_start "<?"
xmlpi_stop "?>"
xmlpi_chars .
comment_start "<!--"
comment_stop "-->"
special_start "<!"
special_stop ">"
entity_start "<!"[[:blank:]\n]*entity[[:blank:]\n]*"%"
system SYSTEM
filename [^"]+
name [A-Za-z][-:A-Za-z0-9_\.]*
quote "\""
percent [%]
escaped_quote "\\\""
non_quote [^"%]+
avoid_editor_bug ["]
entity [%&][A-Za-z][-A-Za-z0-9_]*;
whitespace [[blank:]]+
newline \n
%START OUTSIDE IN_COMMENT IN_ENTITY NAMED_ENTITY IN_QUOTE ENTITY_DONE XMLPI
%%
{entity} if (yyextra->current) g_string_append_printf(yyextra->current,"%s\n%s\n",replace_entity(yyextra, yytext),location(yyextra));
{whitespace} if (yyextra->current) g_string_append(yyextra->current," ");
<OUTSIDE>{xmlpi_start} { g_string_append(yyextra->current,yytext); BEGIN XMLPI; }
<XMLPI>{xmlpi_chars} { g_string_append(yyextra->current,yytext); }
<XMLPI>{newline} { g_string_append(yyextra->current,yytext); }
<XMLPI>{xmlpi_stop} { g_string_append(yyextra->current,yytext); BEGIN OUTSIDE; }
<OUTSIDE>{comment_start} { yyextra->current = NULL; BEGIN IN_COMMENT; }
<IN_COMMENT>[^-]? |
<IN_COMMENT>[-] ;
<IN_COMMENT>{comment_stop} { yyextra->current = yyextra->output; BEGIN OUTSIDE; }
{newline} {
yyextra->linenum++;
if (yyextra->current) g_string_append_printf(yyextra->current,"%s\n",location(yyextra));
}
<OUTSIDE>{entity_start} { BEGIN IN_ENTITY; }
<IN_ENTITY>{name} { yyextra->entity_name = g_strdup_printf("%%%s;",yytext); BEGIN NAMED_ENTITY; }
<NAMED_ENTITY>{quote} { yyextra->current = g_string_new(location(yyextra)); BEGIN IN_QUOTE; }
<IN_QUOTE>{quote} { g_hash_table_insert(yyextra->entities,yyextra->entity_name,yyextra->current); BEGIN ENTITY_DONE; }
<IN_QUOTE>{percent} |
<IN_QUOTE>{non_quote} |
<IN_QUOTE>{escaped_quote} g_string_append(yyextra->current,yytext);
<NAMED_ENTITY>{system} {
g_string_append_printf(yyextra->error,"at %s:%u: file inclusion is not supported!", yyextra->filename, yyextra->linenum);
yyterminate();
}
<ENTITY_DONE>{special_stop} { yyextra->current = yyextra->output; g_string_append(yyextra->current,"\n"); BEGIN OUTSIDE; }
%%
static const gchar* replace_entity(Dtd_PreParse_scanner_state_t* state, gchar* entity) {
GString* replacement;
*entity = '%';
replacement = (GString*)g_hash_table_lookup(state->entities,entity);
if (replacement) {
return replacement->str;
} else {
g_string_append_printf(state->error,"dtd_preparse: in file '%s': entity %s does not exists\n", state->filename, entity);
return "";
}
}
static const gchar* location(Dtd_PreParse_scanner_state_t* state) {
static gchar* loc = NULL;
if (loc) g_free(loc);
loc = g_strdup_printf("<? wireshark:location %s:%u ?>", state->filename, state->linenum);
return loc;
}
static gboolean free_gstring_hash_items(gpointer k,gpointer v,gpointer p _U_) {
g_free(k);
g_string_free((GString*)v,TRUE);
return TRUE;
}
extern GString* dtd_preparse(const gchar* dname,const gchar* fname, GString* err) {
gchar* fullname = g_strdup_printf("%s%c%s",dname,G_DIR_SEPARATOR,fname);
FILE *in;
yyscan_t scanner;
Dtd_PreParse_scanner_state_t state;
in = ws_fopen(fullname,"r");
if (!in) {
if (err)
g_string_append_printf(err, "Could not open file: '%s', error: %s",fullname,g_strerror(errno));
return NULL;
}
if (Dtd_PreParse_lex_init(&scanner) != 0) {
if (err)
g_string_append_printf(err, "Can't initialize scanner: %s",
strerror(errno));
fclose(in);
return FALSE;
}
Dtd_PreParse_set_in(in, scanner);
state.dtd_dirname = dname;
state.filename = fname;
state.linenum = 1;
state.error = err;
state.entities = g_hash_table_new(g_str_hash,g_str_equal);
state.current = state.output = g_string_new(location(&state));
state.entity_name = NULL;
/* Associate the state with the scanner */
Dtd_PreParse_set_extra(&state, scanner);
Dtd_PreParse_lex(scanner);
Dtd_PreParse_lex_destroy(scanner);
fclose(in);
g_hash_table_foreach_remove(state.entities,free_gstring_hash_items,NULL);
g_hash_table_destroy(state.entities);
g_free(fullname);
return state.output;
}