wireshark/epan/dfilter/scanner.l

/*
 * We don't use input, so don't generate code for it.
 */
%option noinput

/*
 * We don't use unput, so don't generate code for it.
 */
%option nounput

/*
 * We don't read from the terminal.
 */
%option never-interactive

/*
 * Prefix scanner routines with "df_" rather than "yy", so this scanner
 * can coexist with other scanners.
 */
%option prefix="df_"

%{
/*
 * Wireshark - Network traffic analyzer
 * By Gerald Combs <gerald@wireshark.org>
 * Copyright 2001 Gerald Combs
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

#include "config.h"

#include <stdlib.h>
#include <errno.h>

#include "dfilter-int.h"
#include "syntax-tree.h"
#include "grammar.h"
#include "dfunctions.h"
#include "scanner_lex.h"

#ifdef _WIN32
/* disable Windows VC compiler warning "signed/unsigned mismatch" associated  */
/* with YY_INPUT code generated by flex versions such as 2.5.35.              */
#pragma warning (disable:4018)
#endif

#define LVAL		df_lval
#define LVAL_TYPE	stnode_t*
#define LVAL_INIT_VAL	NULL
#define MODNAME		df
#define FLEX_YY_PREFIX	df_

#include <lemonflex-head.inc>

/*#undef YY_NO_UNPUT*/

static int set_lval(int token, gpointer data);
static int set_lval_int(dfwork_t *dfw, int token, char *s);
static int simple(int token);
static gboolean str_to_gint32(dfwork_t *dfw, char *s, gint32* pint);
GString* quoted_string = NULL;
static void mark_lval_deprecated(const char *s);

%}

%x RANGE_INT
%x RANGE_PUNCT
%x DQUOTE

%%

[[:blank:]\n]+	/* ignore whitespace */


"("				return simple(TOKEN_LPAREN);
")"				return simple(TOKEN_RPAREN);
","				return simple(TOKEN_COMMA);
"{"				return simple(TOKEN_LBRACE);
"}"				return simple(TOKEN_RBRACE);

"=="			return simple(TOKEN_TEST_EQ);
"eq"			return simple(TOKEN_TEST_EQ);
"!="			{
	mark_lval_deprecated("!=");
	return simple(TOKEN_TEST_NE);
}
"ne"			{
	mark_lval_deprecated("ne");
	return simple(TOKEN_TEST_NE);
}
">"				return simple(TOKEN_TEST_GT);
"gt"			return simple(TOKEN_TEST_GT);
">="			return simple(TOKEN_TEST_GE);
"ge"			return simple(TOKEN_TEST_GE);
"<"				return simple(TOKEN_TEST_LT);
"lt"			return simple(TOKEN_TEST_LT);
"<="			return simple(TOKEN_TEST_LE);
"le"			return simple(TOKEN_TEST_LE);
"bitwise_and"	return simple(TOKEN_TEST_BITWISE_AND);
"&"				return simple(TOKEN_TEST_BITWISE_AND);
"contains"		return simple(TOKEN_TEST_CONTAINS);
"~"				return simple(TOKEN_TEST_MATCHES);
"matches"		return simple(TOKEN_TEST_MATCHES);
"!"				return simple(TOKEN_TEST_NOT);
"not"			return simple(TOKEN_TEST_NOT);
"&&"			return simple(TOKEN_TEST_AND);
"and"			return simple(TOKEN_TEST_AND);
"||"			return simple(TOKEN_TEST_OR);
"or"			return simple(TOKEN_TEST_OR);
"in"			return simple(TOKEN_TEST_IN);


"["					{
	BEGIN(RANGE_INT);
	return simple(TOKEN_LBRACKET);
}

<RANGE_INT>[+-]?[[:digit:]]+		{
	BEGIN(RANGE_PUNCT);
	return set_lval_int(global_dfw, TOKEN_INTEGER, yytext);
}

<RANGE_INT>[+-]?0x[[:xdigit:]]+		{
	BEGIN(RANGE_PUNCT);
	return set_lval_int(global_dfw, TOKEN_INTEGER, yytext);
}

<RANGE_INT,RANGE_PUNCT>":"		{
	BEGIN(RANGE_INT);
	return simple(TOKEN_COLON);
}

<RANGE_PUNCT>"-"			{
	BEGIN(RANGE_INT);
	return simple(TOKEN_HYPHEN);
}

<RANGE_INT,RANGE_PUNCT>","		{
	BEGIN(RANGE_INT);
	return simple(TOKEN_COMMA);
}

<RANGE_INT,RANGE_PUNCT>"]"		{
	BEGIN(INITIAL);
	return simple(TOKEN_RBRACKET);
}

	/* Error if none of the above while scanning a range (slice) */

<RANGE_PUNCT>[^:\-,\]]+		{
	dfilter_fail(global_dfw, "Invalid string \"%s\" found while scanning slice.", yytext);
	return SCAN_FAILED;
}

	/* XXX It would be nice to be able to match an entire non-integer string,
	 * but beware of Flex's "match the most text" rule.
	 */

<RANGE_INT>.	{
	dfilter_fail(global_dfw, "Invalid character \"%s\" found while scanning slice; expected integer.", yytext);
	return SCAN_FAILED;
}

\042				{
	/* start quote */
	/* The example of how to scan for strings was taken from
	the flex 2.5.4 manual, from the section "Start Conditions".
	See:
	http://www.gnu.org/software/flex/manual/html_node/flex_11.html */

	BEGIN(DQUOTE);
	/* A previous filter that failed to compile due to
	a missing end quote will have left quoted_string set
	to something. Clear it now that we are starting
	a new quoted string. */
	if (quoted_string) {
		g_string_free(quoted_string, TRUE);
		/* Don't set quoted_string to NULL, as we
		do in other quoted_string-cleanup code, as we're
		about to set it in the next line. */
	}
	quoted_string = g_string_new("");
}

<DQUOTE><<EOF>>				{
	/* unterminated string */
	/* The example of how to handle unclosed strings was taken from
	the flex 2.5.4 manual, from the section "End-of-file rules".
	See:
	http://www.gnu.org/software/flex/manual/html_node/flex_13.html */

	dfilter_fail(global_dfw, "The final quote was missing from a quoted string.");
	return SCAN_FAILED;
}

<DQUOTE>\042			{
	/* end quote */
	int token;
	BEGIN(INITIAL);
	token = set_lval(TOKEN_STRING, quoted_string->str);
	g_string_free(quoted_string, TRUE);
	quoted_string = NULL;
	return token;
}

<DQUOTE>\\[0-7]{1,3} {
	/* octal sequence */
	unsigned long result;
	result = strtoul(yytext + 1, NULL, 8);
	if (result > 0xff) {
		g_string_free(quoted_string, TRUE);
		quoted_string = NULL;
		dfilter_fail(global_dfw, "%s is larger than 255.", yytext);
		return SCAN_FAILED;
	}
	g_string_append_c(quoted_string, (gchar) result);
}

<DQUOTE>\\x[[:xdigit:]]{1,2} {
	/* hex sequence */
	unsigned long result;
	result = strtoul(yytext + 2, NULL, 16);
	g_string_append_c(quoted_string, (gchar) result);
}


<DQUOTE>\\.				{
	/* escaped character */
	g_string_append_c(quoted_string, yytext[1]);
}

<DQUOTE>[^\\\042]+			{
	/* non-escaped string */
	g_string_append(quoted_string, yytext);
}


[-[:alnum:]_\.:]*\/[[:digit:]]+  {
        /* CIDR */
        return set_lval(TOKEN_UNPARSED, yytext);
}

[-\+[:alnum:]_.:]+	{
	/* Is it a field name? */
	header_field_info *hfinfo;
	df_func_def_t *df_func_def;

	hfinfo = proto_registrar_get_byname(yytext);
	if (hfinfo) {
		/* Yes, it's a field name */
		return set_lval(TOKEN_FIELD, hfinfo);
	}
	else {
		/* Is it a function name? */
		df_func_def = df_func_lookup(yytext);
		if (df_func_def) {
		    /* yes, it's a dfilter function */
		    return set_lval(TOKEN_FUNCTION, df_func_def);
		}
		else {
		    /* No, so treat it as an unparsed string */
		    return set_lval(TOKEN_UNPARSED, yytext);
		}
	}
}

. {
	/* Default */
	return set_lval(TOKEN_UNPARSED, yytext);
}


%%

static int
simple(int token)
{
	switch (token) {
		case TOKEN_LPAREN:
		case TOKEN_RPAREN:
		case TOKEN_LBRACKET:
		case TOKEN_RBRACKET:
		case TOKEN_LBRACE:
		case TOKEN_RBRACE:
		case TOKEN_COLON:
		case TOKEN_COMMA:
		case TOKEN_HYPHEN:
		case TOKEN_TEST_EQ:
		case TOKEN_TEST_NE:
		case TOKEN_TEST_GT:
		case TOKEN_TEST_GE:
		case TOKEN_TEST_LT:
		case TOKEN_TEST_LE:
		case TOKEN_TEST_BITWISE_AND:
		case TOKEN_TEST_CONTAINS:
		case TOKEN_TEST_MATCHES:
		case TOKEN_TEST_NOT:
		case TOKEN_TEST_AND:
		case TOKEN_TEST_OR:
		case TOKEN_TEST_IN:
			break;
		default:
			g_assert_not_reached();
	}
	return token;
}

static int
set_lval(int token, gpointer data)
{
	sttype_id_t	type_id = STTYPE_UNINITIALIZED;

	switch (token) {
		case TOKEN_STRING:
			type_id = STTYPE_STRING;
			break;
		case TOKEN_FIELD:
			type_id = STTYPE_FIELD;
			break;
		case TOKEN_UNPARSED:
			type_id = STTYPE_UNPARSED;
			break;
		case TOKEN_FUNCTION:
			type_id = STTYPE_FUNCTION;
			break;
		default:
			g_assert_not_reached();
	}
	stnode_init(df_lval, type_id, data);
	return token;
}

static int
set_lval_int(dfwork_t *dfw, int token, char *s)
{
	sttype_id_t	type_id = STTYPE_UNINITIALIZED;
	gint32		val;

	if (!str_to_gint32(dfw, s, &val)) {
		return SCAN_FAILED;
	}

	switch (token) {
		case TOKEN_INTEGER:
			type_id = STTYPE_INTEGER;
			break;
		default:
			g_assert_not_reached();
	}

	stnode_init_int(df_lval, type_id, val);
	return token;
}


static gboolean
str_to_gint32(dfwork_t *dfw, char *s, gint32* pint)
{
	char    *endptr;
	long	integer;

	errno = 0;
	integer = strtol(s, &endptr, 0);

	if (errno == EINVAL || endptr == s || *endptr != '\0') {
		/* This isn't a valid number. */
		dfilter_fail(dfw, "\"%s\" is not a valid number.", s);
		return FALSE;
	}
	if (errno == ERANGE) {
		if (integer == LONG_MAX) {
			dfilter_fail(dfw, "\"%s\" causes an integer overflow.", s);
		}
		else if (integer == LONG_MIN) {
			dfilter_fail(dfw, "\"%s\" causes an integer underflow.", s);
		}
		else {
			/*
			 * XXX - can "strtol()" set errno to ERANGE without
			 * returning LONG_MAX or LONG_MIN?
			 */
			dfilter_fail(dfw, "\"%s\" is not an integer.", s);
		}
		return FALSE;
	}
	if (integer > G_MAXINT32) {
		/*
		 * Fits in a long, but not in a gint32 (a long might be
		 * 64 bits).
		 */
		dfilter_fail(dfw, "\"%s\" causes an integer overflow.", s);
		return FALSE;
	}
	if (integer < G_MININT32) {
		/*
		 * Fits in a long, but not in a gint32 (a long might be
		 * 64 bits).
		 */
		dfilter_fail(dfw, "\"%s\" causes an integer underflow.", s);
		return FALSE;
	}

	*pint = (gint32)integer;
	return TRUE;
}

static void
mark_lval_deprecated(const char *s)
{
	df_lval->deprecated_token = s;
}

#include <lemonflex-tail.inc>