From b10db887ce9c5a4430d468eeebc8d7e42c6c6eb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Valverde?= <j@v6e.pt>
Date: Sat, 25 Jun 2022 11:25:36 +0100
Subject: [PATCH] dfilter: Remove unparsed syntax type and RHS literal bias

This removes unparsed name resolution during the semantic
check because it feels like a hack to work around limitations
in the language syntax, that should be solved at the lexical
level instead.

We were interpreting unparsed differently on the LHS and RHS.
Now an unparsed value is always a field if it matches a
registered field name (this matches the implementation in 3.6
and before).

This requires tightening a bit the allowed filter names for
protocols to avoid some common and potentially weird conflicting
cases.

Incidentally this extends set grammar to accept all entities.
That is experimental and may be reverted in the future.
---
 docbook/wsug_src/WSUG_chapter_work.adoc | 42 +++++------
 epan/dfilter/dfilter-int.h              |  7 --
 epan/dfilter/dfilter.c                  | 19 -----
 epan/dfilter/dfunctions.c               |  9 ---
 epan/dfilter/grammar.lemon              | 32 ++++++---
 epan/dfilter/scanner.l                  | 13 ++--
 epan/dfilter/semcheck.c                 | 95 +------------------------
 epan/dfilter/sttype-string.c            | 10 ---
 epan/dfilter/syntax-tree.c              |  4 +-
 epan/dfilter/syntax-tree.h              |  5 +-
 epan/proto.c                            | 26 +++++++
 test/suite_dfilter/group_syntax.py      | 11 +--
 12 files changed, 87 insertions(+), 186 deletions(-)

diff --git a/docbook/wsug_src/WSUG_chapter_work.adoc b/docbook/wsug_src/WSUG_chapter_work.adoc
index 7fe8378965..2e3fc238eb 100644
--- a/docbook/wsug_src/WSUG_chapter_work.adoc
+++ b/docbook/wsug_src/WSUG_chapter_work.adoc
@@ -869,36 +869,36 @@ to “dhcp.type” but Wireshark will show the warning “"bootp" is deprecated
 when you use it. Support for the deprecated fields may be removed in the future.
 
 ==== Some protocol names can be ambiguous
-In some uncommon cases relational display filter expressions (equal, less than, etc.)
+In some particular cases relational expressions (equal, less than, etc.)
 can be ambiguous. The filter name of a protocol or protocol field can contain
 any letter and digit in any order, possibly separated by dots. That can be
 indistinguishable from a literal value (usually numerical values in hexadecimal).
 For example the semantic value of `fc` can be the protocol Fibre Channel or the
-number 0xfc in hexadecimal. The 0x prefix is optional for literal values and
-furthermore "0xfc" would be a syntactically valid filter name for a protocol.
+number 0xFC in hexadecimal because the 0x prefix is optional for hexadecimal numbers.
 
-Wireshark will use a heuristic for parsing those expressions. A value on the
-right hand side of the expression is treated as literal value first. If that
-fails it is treated as a protocol or protocol field (if one exists with that
-name). If that fails it is a syntactical error. On the left hand side values
-are always treated as protocol fields. Literal values are not considered on the
-LHS. So the expression `fc == fc` is parsed as
-"Byte array of protocol with name 'fc' equals byte array { 0xfc } of length 1".
+Any value that matches a registered protocol or protocol field filter name is
+interpreted semantically as such. If it doesn't match a protocol name the normal
+rules for parsing literal values apply.
 
-Sometimes this may not be the intended meaning and there is additional syntax
-to resolve the ambiguity. Lexical tokens in-between angle brackets are always
-and only treated as literal values. Bytes arrays can also be prefixed with a
-colon. Tokens prefixed with a dot are always treated as a protocol name (the
-dot stands for the root of the protocol namespace and is optional).
+So in the case of 'fc' the lexical token is interpreted as "Fibre Channel" and
+not 0xFC. In the case of 'fd' it would be interpreted as 0xFD because it is a
+well-formed hexadecimal literal value (according to the rules of display filter
+language syntax) and there is no protocol registered with the filter name 'fd'.
+
+How ambiguous values are interpreted may change in the future. To avoid this
+problem and resolve the ambiguity there is additional syntax available.
+Values in-between angle brackets are always and only treated as literal
+values. Bytes arrays and numeric values can also be prefixed with a colon to
+force interpretation as a literal value. Values prefixed with a dot are always
+treated as a protocol name. The dot stands for the root of the protocol namespace
+and is optional)
 ----
 frame[10:] contains .fc or frame[10] == :fc and not frame contains <cafe.face>
 ----
-The most common use case for a display filter relational expression is to compare
-a field with a constant value on the right hand side and in that case the heuristic
-method used will parse the expression correctly. In the not-so-common case, or
-if you are writing a script, or you think your expression may not be giving the
-expected results it is advisable to use the explicit syntax to resolve any
-possible ambiguity in the meaning of filter expression.
+If you are writing a script, or you think your expression may not be giving the
+expected results because of the syntactical ambiguity of some filter expression
+it is advisable to use the explicit syntax to indicate the correct meaning for
+that expression.
 
 [#ChWorkFilterAddExpressionSection]
 
diff --git a/epan/dfilter/dfilter-int.h b/epan/dfilter/dfilter-int.h
index 2082a6d992..01251940ae 100644
--- a/epan/dfilter/dfilter-int.h
+++ b/epan/dfilter/dfilter-int.h
@@ -105,13 +105,6 @@ DfilterTrace(FILE *TraceFILE, char *zTracePrompt);
 header_field_info *
 dfilter_resolve_unparsed(dfwork_t *dfw, const char *name);
 
-gboolean
-dfw_resolve_unparsed(dfwork_t *dfw, stnode_t *st);
-
-fvalue_t *
-dfilter_fvalue_from_unparsed(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
-		gboolean allow_partial_value, header_field_info *hfinfo_value_string);
-
 WS_RETNONNULL fvalue_t*
 dfilter_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
 		gboolean allow_partial_value, header_field_info *hfinfo_value_string);
diff --git a/epan/dfilter/dfilter.c b/epan/dfilter/dfilter.c
index 6db6e78e3a..99cd04d9f5 100644
--- a/epan/dfilter/dfilter.c
+++ b/epan/dfilter/dfilter.c
@@ -94,10 +94,6 @@ dfw_set_error_location(dfwork_t *dfw, stloc_t *loc)
 	dfw->err_loc = *loc;
 }
 
-/*
- * Tries to convert an STTYPE_UNPARSED to a STTYPE_FIELD. If it's not registered as
- * a field pass UNPARSED to the semantic check.
- */
 header_field_info *
 dfilter_resolve_unparsed(dfwork_t *dfw, const char *name)
 {
@@ -121,21 +117,6 @@ dfilter_resolve_unparsed(dfwork_t *dfw, const char *name)
 	return NULL;
 }
 
-gboolean
-dfw_resolve_unparsed(dfwork_t *dfw, stnode_t *st)
-{
-	if (stnode_type_id(st) != STTYPE_UNPARSED)
-		return FALSE;
-
-	header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_data(st));
-	if (hfinfo != NULL) {
-		stnode_replace(st, STTYPE_FIELD, hfinfo);
-		return TRUE;
-	}
-	stnode_replace(st, STTYPE_LITERAL, g_strdup(stnode_data(st)));
-	return FALSE;
-}
-
 /* Initialize the dfilter module */
 void
 dfilter_init(void)
diff --git a/epan/dfilter/dfunctions.c b/epan/dfilter/dfunctions.c
index 80b335274d..66099fd9f4 100644
--- a/epan/dfilter/dfunctions.c
+++ b/epan/dfilter/dfunctions.c
@@ -274,8 +274,6 @@ ul_semcheck_is_field_string(dfwork_t *dfw, const char *func_name, ftenum_t lhs_f
     ws_assert(g_slist_length(param_list) == 1);
     stnode_t *st_node = param_list->data;
 
-    dfw_resolve_unparsed(dfw, st_node);
-
     if (stnode_type_id(st_node) == STTYPE_FIELD) {
         hfinfo = sttype_field_hfinfo(st_node);
         if (IS_FT_STRING(hfinfo->type)) {
@@ -292,8 +290,6 @@ ul_semcheck_is_field(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype _U
     ws_assert(g_slist_length(param_list) == 1);
     stnode_t *st_node = param_list->data;
 
-    dfw_resolve_unparsed(dfw, st_node);
-
     if (stnode_type_id(st_node) == STTYPE_FIELD)
         return FT_UINT32;
 
@@ -309,8 +305,6 @@ ul_semcheck_string_param(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftyp
     ws_assert(g_slist_length(param_list) == 1);
     stnode_t *st_node = param_list->data;
 
-    dfw_resolve_unparsed(dfw, st_node);
-
     if (stnode_type_id(st_node) == STTYPE_FIELD) {
         hfinfo = sttype_field_hfinfo(st_node);
         switch (hfinfo->type) {
@@ -366,7 +360,6 @@ ul_semcheck_compare(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype,
     fvalue_t *fv;
 
     arg = param_list->data;
-    dfw_resolve_unparsed(dfw, arg);
 
     if (stnode_type_id(arg) == STTYPE_ARITHMETIC) {
         ftype = check_arithmetic_expr(dfw, arg, lhs_ftype);
@@ -390,7 +383,6 @@ ul_semcheck_compare(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ftype,
 
     for (l = param_list->next; l != NULL; l = l->next) {
         arg = l->data;
-        dfw_resolve_unparsed(dfw, arg);
 
         if (stnode_type_id(arg) == STTYPE_ARITHMETIC) {
             ft_arg = check_arithmetic_expr(dfw, arg, ftype);
@@ -436,7 +428,6 @@ ul_semcheck_absolute_value(dfwork_t *dfw, const char *func_name, ftenum_t lhs_ft
     fvalue_t *fv;
 
     st_node = param_list->data;
-    dfw_resolve_unparsed(dfw, st_node);
 
     if (stnode_type_id(st_node) == STTYPE_ARITHMETIC) {
         ftype = check_arithmetic_expr(dfw, st_node, lhs_ftype);
diff --git a/epan/dfilter/grammar.lemon b/epan/dfilter/grammar.lemon
index 6cdd383ad3..f00e457c8d 100644
--- a/epan/dfilter/grammar.lemon
+++ b/epan/dfilter/grammar.lemon
@@ -120,7 +120,6 @@ expr(X) ::= LPAREN expr(Y) RPAREN.  { X = Y; }
 /* Entities, or things that can be compared/tested/checked */
 atom(A) ::= STRING(S).      { A = S; }
 atom(A) ::= CHARCONST(N).   { A = N; }
-atom(A) ::= UNPARSED(S).    { A = S; }
 atom(A) ::= LITERAL(S).     { A = S; }
 
 layer(R) ::= HASH LBRACKET range_node_list(L) RBRACKET.
@@ -154,7 +153,7 @@ field(R) ::= FIELD(F) layer(L).
 
 field(R) ::= UNPARSED(U) layer(L).
 {
-    header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_data(U));
+    header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_token(U));
     if (hfinfo == NULL) {
         FAIL(dfw, U, "%s is not a valid field", stnode_token(U));
     }
@@ -174,7 +173,7 @@ reference(R) ::= DOLLAR LBRACE field(F) RBRACE.
 
 reference(R) ::= DOLLAR LBRACE UNPARSED(U) RBRACE.
 {
-    header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_data(U));
+    header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_token(U));
     if (hfinfo == NULL) {
         FAIL(dfw, U, "%s is not a valid field", stnode_token(U));
     }
@@ -187,6 +186,19 @@ entity(E) ::= slice(R).         { E = R; }
 entity(E) ::= function(F).      { E = F; }
 entity(E) ::= field(F).         { E = F; }
 entity(E) ::= reference(R).     { E = R; }
+entity(E) ::= UNPARSED(U).
+{
+    const char *token = stnode_token(U);
+    const stloc_t *loc = stnode_location(U);
+    header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, token);
+    if (hfinfo != NULL) {
+        E = stnode_new(STTYPE_FIELD, hfinfo, g_strdup(token), loc);
+    }
+    else {
+        E = stnode_new(STTYPE_LITERAL, g_strdup(token), g_strdup(token), loc);
+    }
+    stnode_free(U);
+}
 
 arithmetic_expr(T) ::= entity(N).
 {
@@ -338,20 +350,20 @@ set_list(L) ::= set_list(P) COMMA set_element(N).
     L = g_slist_concat(P, N);
 }
 
-set_entity(N) ::= atom(X).
+set_entity(N) ::= entity(E).
 {
-    N = X;
+    N = E;
 }
 
-set_entity(N) ::= MINUS(M) atom(X).
+set_entity(N) ::= MINUS(M) entity(E).
 {
     N = M;
-    sttype_test_set1(N, OP_UNARY_MINUS, X);
+    sttype_test_set1(N, OP_UNARY_MINUS, E);
 }
 
-set_entity(N) ::= PLUS atom(X).
+set_entity(N) ::= PLUS entity(E).
 {
-    N = X;
+    N = E;
 }
 
 set_element(N) ::= set_entity(X).
@@ -408,7 +420,7 @@ range_node_list(L) ::= range_node_list(P) COMMA RANGE_NODE(N).
     static stnode_t *
     new_function(dfwork_t *dfw, stnode_t *node)
     {
-        const char *name = stnode_data(node);
+        const char *name = stnode_token(node);
 
         df_func_def_t *def = df_func_lookup(name);
         if (!def) {
diff --git a/epan/dfilter/scanner.l b/epan/dfilter/scanner.l
index 0369ef78ab..c201dcb1da 100644
--- a/epan/dfilter/scanner.l
+++ b/epan/dfilter/scanner.l
@@ -419,19 +419,19 @@ hyphen-bytes	{hex2}(-{hex2})+
 {MacAddress}|{QuadMacAddress}		{
 	/* MAC Address. */
 	update_location(yyextra, yytext);
-	return set_lval_str(yyextra, TOKEN_UNPARSED, yytext);
+	return set_lval_simple(yyextra, TOKEN_UNPARSED, yytext, STTYPE_UNINITIALIZED);
 }
 
 {IPv4address}{v4-cidr-prefix}?		{
 	/* IPv4 with or without prefix. */
 	update_location(yyextra, yytext);
-	return set_lval_str(yyextra, TOKEN_UNPARSED, yytext);
+	return set_lval_simple(yyextra, TOKEN_UNPARSED, yytext, STTYPE_UNINITIALIZED);
 }
 
 {IPv6address}{v6-cidr-prefix}?		{
 	/* IPv6 with or without prefix. */
 	update_location(yyextra, yytext);
-	return set_lval_str(yyextra, TOKEN_UNPARSED, yytext);
+	return set_lval_simple(yyextra, TOKEN_UNPARSED, yytext, STTYPE_UNINITIALIZED);
 }
 
 :?({colon-bytes}|{dot-bytes}|{hyphen-bytes})	{
@@ -439,7 +439,7 @@ hyphen-bytes	{hex2}(-{hex2})+
 	update_location(yyextra, yytext);
 	if (yytext[0] == ':')
 		return set_lval_str(yyextra, TOKEN_LITERAL, yytext); /* Keep leading colon. */
-	return set_lval_str(yyextra, TOKEN_UNPARSED, yytext);
+	return set_lval_simple(yyextra, TOKEN_UNPARSED, yytext, STTYPE_UNINITIALIZED);
 }
 
 :[[:xdigit:]]+		{
@@ -464,7 +464,7 @@ hyphen-bytes	{hex2}(-{hex2})+
 		/* Skip leading dot. */
 		return set_lval_field(yyextra, TOKEN_FIELD, yytext + 1);
 	}
-	return set_lval_str(yyextra, TOKEN_UNPARSED, yytext);
+	return set_lval_simple(yyextra, TOKEN_UNPARSED, yytext, STTYPE_UNINITIALIZED);
 }
 
 . {
@@ -522,9 +522,6 @@ set_lval_str(df_scanner_state_t *state, int token, const char *token_value)
 		case TOKEN_LITERAL:
 			type_id = STTYPE_LITERAL;
 			break;
-		case TOKEN_UNPARSED:
-			type_id = STTYPE_UNPARSED;
-			break;
 		default:
 			ws_assert_not_reached();
 	}
diff --git a/epan/dfilter/semcheck.c b/epan/dfilter/semcheck.c
index c294390b27..d1e904043d 100644
--- a/epan/dfilter/semcheck.c
+++ b/epan/dfilter/semcheck.c
@@ -184,57 +184,6 @@ dfilter_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
 	return fv;
 }
 
-fvalue_t *
-dfilter_fvalue_from_unparsed(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
-		gboolean allow_partial_value, header_field_info *hfinfo_value_string)
-{
-	fvalue_t *fv;
-	const char *s = stnode_data(st);
-
-	/* Don't set the error message if it's already set. */
-	fv = fvalue_from_literal(ftype, s, allow_partial_value,
-		dfw->error_message == NULL ? &dfw->error_message : NULL);
-
-	if (fv != NULL) {
-		/* converted to fvalue successfully. */
-		return fv;
-	}
-
-	if (hfinfo_value_string) {
-		/* check value_string */
-		fv = mk_fvalue_from_val_string(dfw, hfinfo_value_string, s);
-
-		if (fv != NULL) {
-			/*
-			 * Ignore previous errors if this can be mapped
-			 * to an item from value_string.
-			 */
-			g_free(dfw->error_message);
-			dfw->error_message = NULL;
-			return fv;
-		}
-	}
-
-	header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, s);
-
-	if (hfinfo == NULL) {
-		/* This node is neither a valid fvalue nor a valid field. */
-		/* The parse failed. Error message is already set. */
-		dfw_set_error_location(dfw, stnode_location(st));
-		THROW(TypeError);
-	}
-
-	/* Successfully resolved to a field. */
-
-	/* Free the error message for the failed fvalue_from_literal() attempt. */
-	g_free(dfw->error_message);
-	dfw->error_message = NULL;
-
-	stnode_replace(st, STTYPE_FIELD, hfinfo);
-	/* Return NULL to signal we have a field. */
-	return NULL;
-}
-
 /* Gets an fvalue from a string, and sets the error message on failure. */
 WS_RETNONNULL
 fvalue_t *
@@ -503,8 +452,6 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1)
 {
 	LOG_NODE(st_arg1);
 
-	dfw_resolve_unparsed(dfw, st_arg1);
-
 	switch (stnode_type_id(st_arg1)) {
 		case STTYPE_FIELD:
 		case STTYPE_ARITHMETIC:
@@ -535,7 +482,6 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1)
 			break;
 
 		case STTYPE_SET:
-		case STTYPE_UNPARSED:
 		case STTYPE_UNINITIALIZED:
 		case STTYPE_NUM_TYPES:
 		case STTYPE_TEST:
@@ -556,7 +502,6 @@ check_slice_sanity(dfwork_t *dfw, stnode_t *st, ftenum_t lhs_ftype)
 
 	entity1 = sttype_slice_entity(st);
 	ws_assert(entity1);
-	dfw_resolve_unparsed(dfw, entity1);
 
 	if (stnode_type_id(entity1) == STTYPE_FIELD) {
 		hfinfo1 = sttype_field_hfinfo(entity1);
@@ -659,7 +604,6 @@ check_relation_LHS_FIELD(dfwork_t *dfw, test_op_t st_op,
 
 	LOG_NODE(st_node);
 
-again:
 	type2 = stnode_type_id(st_arg2);
 
 	ws_assert(stnode_type_id(st_arg1) == STTYPE_FIELD ||
@@ -687,7 +631,7 @@ again:
 					stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
 		}
 	}
-	else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL || type2 == STTYPE_UNPARSED) {
+	else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL) {
 		/* Skip incompatible fields */
 		while (hfinfo1->same_name_prev_id != -1 &&
 				((type2 == STTYPE_STRING && ftype1 != FT_STRING && ftype1!= FT_STRINGZ) ||
@@ -696,14 +640,7 @@ again:
 			ftype1 = hfinfo1->type;
 		}
 
-		if (type2 == STTYPE_UNPARSED) {
-			fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, hfinfo1);
-			if (fvalue == NULL) {
-				/* We have a protocol or protocol field. */
-				goto again;
-			}
-		}
-		else if (type2 == STTYPE_STRING) {
+		if (type2 == STTYPE_STRING) {
 			fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, hfinfo1);
 		}
 		else {
@@ -778,7 +715,6 @@ check_relation_LHS_SLICE(dfwork_t *dfw, test_op_t st_op,
 
 	check_slice_sanity(dfw, st_arg1, FT_NONE);
 
-again:
 	type2 = stnode_type_id(st_arg2);
 
 	if (IS_FIELD_ENTITY(type2)) {
@@ -799,14 +735,6 @@ again:
 		fvalue = dfilter_fvalue_from_string(dfw, FT_BYTES, st_arg2, NULL);
 		stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
 	}
-	else if (type2 == STTYPE_UNPARSED) {
-		fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, st_arg2, allow_partial_value, NULL);
-		if (fvalue == NULL) {
-			/* We have a protocol or protocol field. */
-			goto again;
-		}
-		stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
-	}
 	else if (type2 == STTYPE_LITERAL) {
 		fvalue = dfilter_fvalue_from_literal(dfw, FT_BYTES, st_arg2, allow_partial_value, NULL);
 		stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
@@ -875,7 +803,6 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, test_op_t st_op,
 				stnode_todisplay(st_node));
 	}
 
-again:
 	type2 = stnode_type_id(st_arg2);
 
 	if (IS_FIELD_ENTITY(type2)) {
@@ -896,14 +823,6 @@ again:
 		fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL);
 		stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
 	}
-	else if (type2 == STTYPE_UNPARSED) {
-		fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, NULL);
-		if (fvalue == NULL) {
-			/* We have a protocol or protocol field. */
-			goto again;
-		}
-		stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
-	}
 	else if (type2 == STTYPE_LITERAL) {
 		fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, NULL);
 		stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
@@ -1001,8 +920,6 @@ check_relation(dfwork_t *dfw, test_op_t st_op,
 {
 	LOG_NODE(st_node);
 
-	dfw_resolve_unparsed(dfw, st_arg1);
-
 	switch (stnode_type_id(st_arg1)) {
 		case STTYPE_FIELD:
 		case STTYPE_REFERENCE:
@@ -1033,8 +950,6 @@ check_relation_contains(dfwork_t *dfw, stnode_t *st_node,
 {
 	LOG_NODE(st_node);
 
-	dfw_resolve_unparsed(dfw, st_arg1);
-
 	switch (stnode_type_id(st_arg1)) {
 		case STTYPE_FIELD:
 		case STTYPE_REFERENCE:
@@ -1066,8 +981,6 @@ check_relation_matches(dfwork_t *dfw, stnode_t *st_node,
 
 	LOG_NODE(st_node);
 
-	dfw_resolve_unparsed(dfw, st_arg1);
-
 	if (stnode_type_id(st_arg2) != STTYPE_STRING) {
 		FAIL(dfw, st_arg2, "Matches requires a double quoted string on the right side.");
 	}
@@ -1113,8 +1026,6 @@ check_relation_in(dfwork_t *dfw, stnode_t *st_node _U_,
 
 	LOG_NODE(st_node);
 
-	dfw_resolve_unparsed(dfw, st_arg1);
-
 	if (stnode_type_id(st_arg1) != STTYPE_FIELD) {
 		FAIL(dfw, st_arg1, "Only a field may be tested for membership in a set.");
 	}
@@ -1216,7 +1127,6 @@ check_arithmetic_entity(dfwork_t *dfw, stnode_t *st_arg, ftenum_t lhs_ftype)
 	 * is none we must have been passed an entity with a definite type
 	 * (field, function, etc). */
 
-	dfw_resolve_unparsed(dfw, st_arg);
 	type = stnode_type_id(st_arg);
 
 	if (type == STTYPE_LITERAL) {
@@ -1264,7 +1174,6 @@ check_arithmetic_expr(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype)
 	}
 
 	sttype_test_get(st_node, &st_op, &st_arg1, &st_arg2);
-	dfw_resolve_unparsed(dfw, st_arg1);
 
 	/* On the LHS we require a field-like value as the first term. */
 	if (lhs_ftype == FT_NONE && node_is_constant(st_arg1)) {
diff --git a/epan/dfilter/sttype-string.c b/epan/dfilter/sttype-string.c
index 409c00a366..5d3e564298 100644
--- a/epan/dfilter/sttype-string.c
+++ b/epan/dfilter/sttype-string.c
@@ -61,15 +61,6 @@ sttype_register_string(void)
 		gstring_tostr
 	};
 
-	static sttype_t unparsed_type = {
-		STTYPE_UNPARSED,
-		"UNPARSED",
-		NULL,
-		string_free,
-		string_dup,
-		string_tostr
-	};
-
 	static sttype_t literal_type = {
 		STTYPE_LITERAL,
 		"LITERAL",
@@ -80,7 +71,6 @@ sttype_register_string(void)
 	};
 
 	sttype_register(&string_type);
-	sttype_register(&unparsed_type);
 	sttype_register(&literal_type);
 }
 
diff --git a/epan/dfilter/syntax-tree.c b/epan/dfilter/syntax-tree.c
index 5e35584488..0fcae1d2a4 100644
--- a/epan/dfilter/syntax-tree.c
+++ b/epan/dfilter/syntax-tree.c
@@ -102,7 +102,7 @@ stnode_clear(stnode_t *node)
 }
 
 void
-stnode_init(stnode_t *node, sttype_id_t type_id, gpointer data, char *token, stloc_t *loc)
+stnode_init(stnode_t *node, sttype_id_t type_id, gpointer data, char *token, const stloc_t *loc)
 {
 	sttype_t	*type;
 
@@ -149,7 +149,7 @@ stnode_replace(stnode_t *node, sttype_id_t type_id, gpointer data)
 }
 
 stnode_t*
-stnode_new(sttype_id_t type_id, gpointer data, char *token, stloc_t *loc)
+stnode_new(sttype_id_t type_id, gpointer data, char *token, const stloc_t *loc)
 {
 	stnode_t	*node;
 
diff --git a/epan/dfilter/syntax-tree.h b/epan/dfilter/syntax-tree.h
index b74fae4977..f64963fea2 100644
--- a/epan/dfilter/syntax-tree.h
+++ b/epan/dfilter/syntax-tree.h
@@ -24,7 +24,6 @@ typedef enum {
 	STTYPE_UNINITIALIZED,
 	STTYPE_TEST,
 	STTYPE_LITERAL,
-	STTYPE_UNPARSED,
 	STTYPE_REFERENCE,
 	STTYPE_STRING,
 	STTYPE_CHARCONST,
@@ -120,7 +119,7 @@ void
 sttype_register(sttype_t *type);
 
 stnode_t*
-stnode_new(sttype_id_t type_id, gpointer data, char *token, stloc_t *loc);
+stnode_new(sttype_id_t type_id, gpointer data, char *token, const stloc_t *loc);
 
 stnode_t*
 stnode_dup(const stnode_t *org);
@@ -129,7 +128,7 @@ void
 stnode_clear(stnode_t *node);
 
 void
-stnode_init(stnode_t *node, sttype_id_t type_id, gpointer data, char *token, stloc_t *loc);
+stnode_init(stnode_t *node, sttype_id_t type_id, gpointer data, char *token, const stloc_t *loc);
 
 void
 stnode_replace(stnode_t *node, sttype_id_t type_id, gpointer data);
diff --git a/epan/proto.c b/epan/proto.c
index 397c649bc9..c45073c303 100644
--- a/epan/proto.c
+++ b/epan/proto.c
@@ -7425,12 +7425,38 @@ proto_tree_set_appendix(proto_tree *tree, tvbuff_t *tvb, gint start,
 static void
 check_protocol_filter_name_or_fail(const char *filter_name)
 {
+	/* Require at least two characters. */
+	if (filter_name[0] == '\0' || filter_name[1] == '\0') {
+		REPORT_DISSECTOR_BUG("Protocol filter name \"%s\" cannot have length less than two.", filter_name);
+	}
+
 	if (proto_check_field_name(filter_name) != '\0') {
 		REPORT_DISSECTOR_BUG("Protocol filter name \"%s\" has one or more invalid characters."
 			" Allowed are letters, digits, '-', '_' and non-repeating '.'."
 			" This might be caused by an inappropriate plugin or a development error.", filter_name);
 	}
 
+	/* Check that it doesn't match some very common numeric forms. */
+	if (filter_name[0] == '0' &&
+				(filter_name[1] == 'x' || filter_name[1] == 'X' ||
+				filter_name[1] == 'b' || filter_name[1] == 'B')) {
+		REPORT_DISSECTOR_BUG("Protocol filter name \"%s\" cannot start with \"%c%c\".",
+						filter_name, filter_name[0], filter_name[1]);
+	}
+
+	/* Check that it doesn't have all decimal digits. */
+	bool all_digits = true;
+	for (const char *s = filter_name; *s != '\0'; s++) {
+		if (!g_ascii_isdigit(*s)) {
+			all_digits = false;
+			break;
+		}
+	}
+	if (all_digits) {
+		REPORT_DISSECTOR_BUG("Protocol filter name \"%s\" cannot be composed of all decimal digits.",
+						filter_name);
+	}
+
 	/* Check for reserved keywords. */
 	if (g_hash_table_contains(proto_reserved_filter_names, filter_name)) {
 		REPORT_DISSECTOR_BUG("Protocol filter name \"%s\" is invalid because it is a reserved keyword."
diff --git a/test/suite_dfilter/group_syntax.py b/test/suite_dfilter/group_syntax.py
index c15ce7dcfc..010a8a990e 100644
--- a/test/suite_dfilter/group_syntax.py
+++ b/test/suite_dfilter/group_syntax.py
@@ -167,20 +167,23 @@ class case_equality(unittest.TestCase):
         dfilter = "frame[0:10] contains :00-01-6c"
         checkDFilterCount(dfilter, 1)
 
-    def test_rhs_literal_bias_1(self, checkDFilterCount):
+    def test_rhs_bias_1(self, checkDFilterCount):
+        # Protocol "Fibre Channel" on the RHS
         dfilter = 'frame[37] == fc'
-        checkDFilterCount(dfilter, 1)
+        checkDFilterCount(dfilter, 0)
 
-    def test_rhs_literal_bias_2(self, checkDFilterCount):
+    def test_rhs_bias_2(self, checkDFilterCount):
+        # Byte 0xFC on the RHS
         dfilter = 'frame[37] == :fc'
         checkDFilterCount(dfilter, 1)
 
     def test_rhs_literal_bias_3(self, checkDFilterCount):
+        # Byte 0xFC on the RHS
         dfilter = 'frame[37] == <fc>'
         checkDFilterCount(dfilter, 1)
 
     def test_rhs_literal_bias_4(self, checkDFilterCount):
-        # This is Fibre Channel on the RHS
+        # Protocol "Fibre Channel" on the RHS
         dfilter = 'frame[37] == .fc'
         checkDFilterCount(dfilter, 0)