Protobuf: fix bug about parsing negative enum value number

Change PT_DECIMALLIT, PT_OCTALLIT and PT_HEXLIT tokens to uint64
type, and make PT_IDENT excluding '-' numbers which will be parsed
in protobuf_lang.y. That negative enum number and number type of
constant can be correctly parsed.
Note, intLit is uint32 for parsing fieldNumber and enumNumber,
but might be uint64 as constant.

close #16988


(cherry picked from commit 1fff3cb106)
This commit is contained in:
Huang Qiangxiong 2020-11-04 12:38:23 +00:00 committed by Pascal Quantin
parent 4e0dbcbc97
commit b97332b12e
2 changed files with 28 additions and 11 deletions

View File

@ -92,6 +92,7 @@ DIAG_OFF_BYACC
char* sval;
pbl_node_t* node;
int ival;
guint64 u64val;
};
/* operations or symbols tokens */
@ -104,7 +105,7 @@ DIAG_OFF_BYACC
%right <sval> PT_REPEATED PT_ONEOF PT_MAP PT_RESERVED PT_ENUM PT_GROUP PT_EXTEND PT_EXTENSIONS
%right <sval> PT_MESSAGE PT_SERVICE PT_RPC PT_STREAM PT_RETURNS PT_TO PT_PROTO2 PT_PROTO3 PT_IDENT PT_STRLIT
%token <ival> PT_DECIMALLIT PT_OCTALLIT PT_HEXLIT
%token <u64val> PT_DECIMALLIT PT_OCTALLIT PT_HEXLIT
%type <sval> optionName label type keyType messageName enumName
%type <sval> streamName fieldName oneofName mapName serviceName rpcName messageType
@ -114,7 +115,8 @@ DIAG_OFF_BYACC
%type <node> enum enumBody enumField service serviceBody stream streamDecl
%type <node> fieldOptions fieldOption oneof oneofBody mapField group extend extendBody
%type <ival> intLit fieldNumber
%type <u64val> intLit
%type <ival> fieldNumber enumNumber
/* We don't care about following nodes:
syntax import package option enumValueOptions enumValueOption rpcBody streamBody
@ -262,14 +264,20 @@ enumBody:
| enumBody emptyStatement
;
/* v2/v3: enumField = ident "=" intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";" */
/* v2/v3: enumField = ident "=" [ "-" ] intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";" */
enumField:
exIdent PT_ASSIGN intLit PT_LBRACKET enumValueOptions PT_RBRACKET PT_SEMICOLON
exIdent PT_ASSIGN enumNumber PT_LBRACKET enumValueOptions PT_RBRACKET PT_SEMICOLON
{ $$ = pbl_create_enum_value_node(state->file, $1, $3); }
| exIdent PT_ASSIGN intLit
| exIdent PT_ASSIGN enumNumber
{ $$ = pbl_create_enum_value_node(state->file, $1, $3); }
;
/* v2/v3: must be in the range of a 32-bit integer. negative values are not recommended. */
enumNumber: intLit { $$ = (int)$1; }
| PT_PLUS intLit { $$ = (int)$2; }
| PT_MINUS intLit { $$ = -(int)$2; }
;
/* v2/v3: enumValueOption { "," enumValueOption } */
enumValueOptions:
enumValueOption
@ -365,7 +373,10 @@ label: PT_REQUIRED | PT_OPTIONAL | PT_REPEATED;
*/
type: exIdent;
fieldNumber: intLit;
/* v2/v3: The smallest field number is 1, and the largest is 2^29 - 1, or 536,870,911. */
fieldNumber: intLit { $$ = (int)$1; }
| PT_PLUS intLit { $$ = (int)$2; }
;
/* v2/v3: fieldOptions = fieldOption { "," fieldOption } */
fieldOptions:
@ -503,6 +514,12 @@ emptyStatement: PT_SEMICOLON;
/* constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | strLit | boolLit */
constant: exIdent | strLit
| intLit { $$ = pbl_store_string_token(state, g_strdup_printf("%" G_GUINT64_FORMAT, $1)); }
| PT_PLUS intLit { $$ = pbl_store_string_token(state, g_strdup_printf("%" G_GUINT64_FORMAT, $2)); }
| PT_MINUS intLit { $$ = pbl_store_string_token(state, g_strdup_printf("-%" G_GUINT64_FORMAT, $2)); }
/* This cover floatLit. In addition, boolLit is parsed as exIdent */
| PT_PLUS exIdent { $$ = pbl_store_string_token(state, g_strconcat("+", $2, NULL)); }
| PT_MINUS exIdent { $$ = pbl_store_string_token(state, g_strconcat("-", $2, NULL)); }
;
exIdent: PT_IDENT

View File

@ -166,12 +166,12 @@ to yylval->sval = strdup_and_store(yyscanner, yytext); retur
["']proto3["'] yylval->sval = strdup_and_store(yyscanner, yytext); return PT_PROTO3;
/* intLit values */
0|[1-9][0-9]* yylval->ival = atoi(yytext); return PT_DECIMALLIT;
0[0-7]* sscanf(yytext+1, "%o", &yylval->ival); return PT_OCTALLIT;
0[xX][0-9a-fA-F]+ sscanf(yytext+2, "%x", &yylval->ival); return PT_HEXLIT;
0|[1-9][0-9]* yylval->u64val = g_ascii_strtoull(yytext, NULL, 10); return PT_DECIMALLIT;
0[0-7]* yylval->u64val = g_ascii_strtoull(yytext+1, NULL, 8); return PT_OCTALLIT;
0[xX][0-9a-fA-F]+ yylval->u64val = g_ascii_strtoull(yytext+2, NULL, 16); return PT_HEXLIT;
/* Using extended identifier because we care only about position */
[a-zA-Z0-9_.+-]+ yylval->sval = strdup_and_store(yyscanner, yytext); return PT_IDENT;
[a-zA-Z0-9_][a-zA-Z0-9_.+-]* yylval->sval = strdup_and_store(yyscanner, yytext); return PT_IDENT;
\"(\\.|\"\"|[^"\n"])*\" yylval->sval = g_strndup(yytext + 1, strlen(yytext) - 2); return PT_STRLIT;
\'(\\.|\'\'|[^"\n"])*\' yylval->sval = g_strndup(yytext + 1, strlen(yytext) - 2); return PT_STRLIT;
@ -196,4 +196,4 @@ strdup_and_store(void* yyscanner, const char* text) {
/*
* Turn diagnostics back on, so we check the code that we've written.
*/
DIAG_ON_FLEX
DIAG_ON_FLEX