Protobuf: rewrite parser of *.proto file from Bison to Lemon

In order to avoid Bison's compatibility problem (like
https://code.wireshark.org/review/#/c/33771/),
the *.proto file parser is rewritten with lemon. (rename
protobuf_lang.y.in to protobuf_lang_parser.lemon)
Also improved the mechanism of recording line number of
message, field, and enum names.
This commit is contained in:
Huang Qiangxiong 2020-11-17 20:48:00 +08:00 committed by AndersBroman
parent 69e6a16ba4
commit 7906a2f6a8
8 changed files with 764 additions and 799 deletions

View File

@ -295,10 +295,7 @@ add_lex_files(LEX_FILES LIBWIRESHARK_FILES
add_lemon_files(LEMON_FILES LIBWIRESHARK_FILES
dtd_grammar.lemon
)
add_yacc_files(YACC_FILES LIBWIRESHARK_FILES
protobuf_lang.y
protobuf_lang_parser.lemon
)
set_source_files_properties(

View File

@ -1,6 +1,6 @@
/* protobuf-helper.c
*
* Wrapper of Protocol Buffers Language library which generated by protobuf_lang.y and protobuf_lang_scanner.l.
* Wrapper of Protocol Buffers Language library which generated by protobuf_lang_parser.lemon and protobuf_lang_scanner.l.
* Copyright 2019, Huang Qiangxiong <qiangxiong.huang@qq.com>
*
* Wireshark - Network traffic analyzer
@ -15,7 +15,7 @@
* - The names of MESSAGE, ENUM, FIELD, ENUM_VALUE;
* - The data type of FIELD which assuring the value of protobuf field of packet can be dissected correctly.
*
* At present, we use C Protocol Buffers Language Parser which generated by protobuf_lang.y and protobuf_lang_scanner.l.
* At present, we use C Protocol Buffers Language Parser which generated by protobuf_lang_parser.lemon and protobuf_lang_scanner.l.
* Because wireshark is mainly implemented in plain ANSI C but the offical protobuf library is implemented in C++ language.
*
* One day, if C++ library is allowd, we can create a protobuf-helper.cpp file, that invoking offical protobuf C++ library directly,

View File

@ -15,7 +15,7 @@
* - The names of MESSAGE, ENUM, FIELD, ENUM_VALUE;
* - The data type of FIELD which assuring the value of protobuf field of packet can be dissected correctly.
*
* At present, we use C Protocol Buffers Language Parser which generated by protobuf_lang.y and protobuf_lang_scanner.l.
* At present, we use C Protocol Buffers Language Parser which generated by protobuf_lang_parser.lemon and protobuf_lang_scanner.l.
* Because wireshark is mainly implemented in plain ANSI C but the offical protobuf library is implemented in C++ language.
*
* One day, if C++ library is allowd, we can create a protobuf-helper.cpp file, that invoking offical protobuf C++ library directly,

View File

@ -1,679 +0,0 @@
/*
* We want a reentrant parser.
* Berkeley YACC and older versions of Bison use "%pure-parser" and newer
* versions of Bison use "%define api.pure".
* As https://code.wireshark.org/review/#/c/33771/
* says, "This doesn't work with Berkeley YACC, and I'd *really* prefer not to require Bison."
*/
${YACC_PURE_PARSER_DIRECTIVE}
/*
* We also want a reentrant scanner, so we have to pass the
* handle for the reentrant scanner to the parser, and the
* parser has to pass it to the lexical analyzer.
*
* We use void * rather than yyscan_t because, at least with some
* versions of Flex and Bison, if you use yyscan_t in %parse-param and
* %lex-param, you have to include the protobuf_lang_scanner_lex.h before
* protobuf_lang.h to get yyscan_t declared, and you have to include protobuf_lang.h
* before protobuf_lang_scanner_lex.h to get YYSTYPE declared. Using void *
* breaks the cycle; the Flex documentation says yyscan_t is just a void *.
*/
%parse-param {void *yyscanner}
%lex-param {void *yyscanner}
/*
* And we need to pass the parser/scanner state to the parser.
*/
%parse-param {protobuf_lang_state_t *state}
%{
/* protobuf_lang.y
*
* C Protocol Buffers Language (PBL) Parser (for *.proto files)
* Copyright 2019, Huang Qiangxiong <qiangxiong.huang@qq.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
/* This parser is mainly to get MESSAGE, ENUM, and FIELD information from *.proto files.
* There are two formats of *.proto files:
* 1) Protocol Buffers Version 3 Language Specification:
* https://developers.google.com/protocol-buffers/docs/reference/proto3-spec
* 2) Protocol Buffers Version 2 Language Specification:
* https://developers.google.com/protocol-buffers/docs/reference/proto2-spec
* There are some errors about 'proto', 'option' (value) and 'reserved' (fieldName) definitions on the site.
* This parser is created because Wireshark is mainly implemented in plain ANSI C but the offical
* Protocol Buffers Language parser is implemented in C++.
*/
#include "config.h"
#if defined(_MSC_VER) && !defined(__STDC_VERSION__)
/*
* MSVC doesn't, by default, define __STDC_VERSION__, which
* means that the code generated by newer versions of winflexbison3's
* Bison end up defining YYPTRDIFF_T as long, which is wrong on
* 64-bit Windows, as that's an LLP64 platform, not an LP64 platform,
* and causes warnings to be generated. Those warnings turn into
* errors.
*
* With MSVC, if __STDC_VERSION__ isn't defined, Forcibly include
* <stdint.h> here to work around that.
*/
#include <stdint.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <glib.h>
#include <wsutil/file_util.h>
#include "protobuf_lang_tree.h"
DIAG_OFF_BYACC
#include "protobuf_lang.h"
#include "protobuf_lang_scanner_lex.h"
DIAG_ON_BYACC
#define NAME_TO_BE_SET "<NAME_TO_BE_SET>"
#define NEED_NOT_NAME "<NEED_NOT_NAME>"
/* Error handling function for bison */
void protobuf_langerror(void* yyscanner, protobuf_lang_state_t *state, const char *msg);
/* Extended error handling function */
void protobuf_langerrorv(void* yyscanner, protobuf_lang_state_t *state, const char *fmt, ...);
DIAG_OFF_BYACC
%}
%expect 23 /* suppress the warning about these conflicts */
%union {
char* sval;
pbl_node_t* node;
int ival;
guint64 u64val;
};
/* operations or symbols tokens */
%token PT_QUOTE PT_LPAREN PT_RPAREN PT_LBRACKET PT_RBRACKET PT_LCURLY PT_RCURLY PT_EQUAL PT_NOTEQUAL PT_NOTEQUAL2
%token PT_GEQUAL PT_LEQUAL PT_ASSIGN_PLUS PT_ASSIGN PT_PLUS PT_MINUS PT_MULTIPLY PT_DIV PT_LOGIC_OR PT_OR PT_LOGIC_AND
%token PT_AND PT_NOT PT_NEG PT_XOR PT_SHL PT_SHR PT_PERCENT PT_DOLLAR PT_COND PT_SEMICOLON PT_DOT PT_COMMA PT_COLON PT_LESS PT_GREATER
/* key words tokens */
%right <sval> PT_SYNTAX PT_IMPORT PT_WEAK PT_PUBLIC PT_PACKAGE PT_OPTION PT_REQUIRED PT_OPTIONAL
%right <sval> PT_REPEATED PT_ONEOF PT_MAP PT_RESERVED PT_ENUM PT_GROUP PT_EXTEND PT_EXTENSIONS
%right <sval> PT_MESSAGE PT_SERVICE PT_RPC PT_STREAM PT_RETURNS PT_TO PT_PROTO2 PT_PROTO3 PT_IDENT PT_STRLIT
%token <u64val> PT_DECIMALLIT PT_OCTALLIT PT_HEXLIT
%type <sval> optionName label type keyType messageName enumName
%type <sval> streamName fieldName oneofName mapName serviceName rpcName messageType
%type <sval> groupName constant exIdent strLit
%type <node> protoBody topLevelDef message messageBody rpc rpcDecl field oneofField
%type <node> enum enumBody enumField service serviceBody stream streamDecl
%type <node> fieldOptions fieldOption oneof oneofBody mapField group extend extendBody
%type <u64val> intLit
%type <ival> fieldNumber enumNumber
/* We don't care about following nodes:
syntax import package option enumValueOptions enumValueOption rpcBody streamBody
extensions reserved ranges range quoteFieldNames emptyStatement
*/
%start proto
%%
/* v2/v3: proto = syntax { import | package | option | topLevelDef | emptyStatement } */
/* Offical PBL bugfix: proto = { syntax } { import | package | option | topLevelDef | emptyStatement }
The default syntax version is "proto2". */
proto:
syntax wholeProtoBody
| wholeProtoBody
;
wholeProtoBody: protoBody
{
/* set real package name */
pbl_set_node_name($1, state->file->package_name);
/* use the allocate mem of the name of the package node */
state->file->package_name = pbl_get_node_name($1);
/* put this file data into package tables */
pbl_node_t* packnode = (pbl_node_t*)g_hash_table_lookup(state->pool->packages, state->file->package_name);
if (packnode) {
pbl_merge_children(packnode, $1);
pbl_free_node($1);
} else {
g_hash_table_insert(state->pool->packages, g_strdup(state->file->package_name), $1);
}
}
;
/* v2: syntax = "syntax" "=" quote "proto2" quote ";" */
/* v3: syntax = "syntax" "=" quote "proto3" quote ";" */
syntax:
PT_SYNTAX PT_ASSIGN PT_PROTO2 PT_SEMICOLON { state->file->syntax_version = 2; }
| PT_SYNTAX PT_ASSIGN PT_PROTO3 PT_SEMICOLON { state->file->syntax_version = 3; }
;
protoBody:
/* empty */ { $$ = pbl_create_node(state->file, PBL_PACKAGE, NAME_TO_BE_SET); } /* create am empty package node */
| protoBody import /* default action is { $$ = $1; } */
| protoBody package
| protoBody option
| protoBody topLevelDef { $$ = $1; pbl_add_child($$, $2); }
| protoBody emptyStatement
;
/* v2/v3: import = "import" [ "weak" | "public" ] strLit ";" */
import:
PT_IMPORT strLit PT_SEMICOLON { pbl_add_proto_file_to_be_parsed(state->pool, $2); } /* append file to todo list */
| PT_IMPORT PT_PUBLIC strLit PT_SEMICOLON { pbl_add_proto_file_to_be_parsed(state->pool, $3); }
| PT_IMPORT PT_WEAK strLit PT_SEMICOLON { pbl_add_proto_file_to_be_parsed(state->pool, $3); }
;
/* v2/v3: package = "package" fullIdent ";" */
package: PT_PACKAGE exIdent PT_SEMICOLON
{ /* The memory of $2 will be freed after parsing, but the package_name will
be replaced by the new-allocated name of package node late */
state->file->package_name = $2;
}
;
/* v2/v3: option = "option" optionName "=" constant ";" */
/* Offical PBL bugfix: option = "option" optionName "=" ( constant | customOptionValue ) ";" */
option:
PT_OPTION optionName PT_ASSIGN constant PT_SEMICOLON
| PT_OPTION optionName PT_ASSIGN customOptionValue PT_SEMICOLON
;
/* v2/v3: optionName = ( ident | "(" fullIdent ")" ) { "." ident } */
optionName:
exIdent
| PT_LPAREN exIdent PT_RPAREN { $$ = pbl_store_string_token(state, g_strconcat("(", $2, ")", NULL)); }
| PT_LPAREN exIdent PT_RPAREN exIdent { $$ = pbl_store_string_token(state, g_strconcat("(", $2, ")", $4, NULL)); } /* exIdent contains "." */
;
/* Allow format which not defined in offical PBL specification like:
option (google.api.http) = { post: "/v3alpha/kv/put" body: "*" };
option (google.api.http) = { post: "/v3alpha/kv/put", body: "*" };
option (google.api.http) = { post: "/v3alpha/kv/put" { any format } body: "*" };
*/
customOptionValue: PT_LCURLY customOptionBody PT_RCURLY
;
customOptionBody:
/* empty */
| customOptionBody exIdent
| customOptionBody strLit
| customOptionBody symbolsWithoutCurly
| customOptionBody intLit
| customOptionBody customOptionValue
;
symbolsWithoutCurly:
PT_LPAREN | PT_RPAREN | PT_LBRACKET | PT_RBRACKET | PT_EQUAL | PT_NOTEQUAL | PT_NOTEQUAL2 | PT_GEQUAL
| PT_LEQUAL | PT_ASSIGN_PLUS | PT_ASSIGN | PT_PLUS | PT_MINUS | PT_MULTIPLY | PT_DIV | PT_LOGIC_OR | PT_OR
| PT_LOGIC_AND | PT_AND | PT_NOT | PT_NEG | PT_XOR | PT_SHL | PT_SHR | PT_PERCENT | PT_DOLLAR | PT_COND
| PT_SEMICOLON | PT_DOT | PT_COMMA | PT_COLON | PT_LESS | PT_GREATER
;
/* v2: topLevelDef = message | enum | extend | service */
/* v3: topLevelDef = message | enum | service */
topLevelDef:
message
| enum
| extend /*v2 only */
| service
;
/* v2/v3: message = "message" messageName messageBody */
message: PT_MESSAGE messageName PT_LCURLY messageBody PT_RCURLY { $$ = $4; pbl_set_node_name($$, $2); }
;
/* v2: messageBody = "{" { field | enum | message | extend | extensions | group | option | oneof | mapField | reserved | emptyStatement } "}" */
/* v3: messageBody = "{" { field | enum | message | option | oneof | mapField | reserved | emptyStatement } "}" */
messageBody:
/* empty */ { $$ = pbl_create_node(state->file, PBL_MESSAGE, NAME_TO_BE_SET); }
| messageBody field { $$ = $1; pbl_add_child($$, $2); }
| messageBody enum { $$ = $1; pbl_add_child($$, $2); }
| messageBody message { $$ = $1; pbl_add_child($$, $2); }
| messageBody extend /* v2 only */
| messageBody extensions /* v2 only */
| messageBody group /* v2 only */ { $$ = $1; pbl_add_child($$, $2); }
| messageBody option
| messageBody oneof { $$ = $1; pbl_merge_children($$, $2); pbl_free_node($2); }
| messageBody mapField { $$ = $1; pbl_add_child($$, $2); }
| messageBody reserved
| messageBody emptyStatement
;
/* v2/v3: enum = "enum" enumName enumBody */
/* 1 2 3 4 5 */
enum: PT_ENUM enumName PT_LCURLY enumBody PT_RCURLY { $$ = $4; pbl_set_node_name($$, $2); }
;
/* v2/v3: enumBody = "{" { option | enumField | emptyStatement } "}" */
enumBody:
/* empty */ { $$ = pbl_create_node(state->file, PBL_ENUM, NAME_TO_BE_SET); }
| enumBody option
| enumBody enumField { $$ = $1; pbl_add_child($$, $2); }
| enumBody emptyStatement
;
/* v2/v3: enumField = ident "=" [ "-" ] intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";" */
enumField:
exIdent PT_ASSIGN enumNumber PT_LBRACKET enumValueOptions PT_RBRACKET PT_SEMICOLON
{ $$ = pbl_create_enum_value_node(state->file, $1, $3); }
| exIdent PT_ASSIGN enumNumber
{ $$ = pbl_create_enum_value_node(state->file, $1, $3); }
;
/* v2/v3: must be in the range of a 32-bit integer. negative values are not recommended. */
enumNumber: intLit { $$ = (int)$1; }
| PT_PLUS intLit { $$ = (int)$2; }
| PT_MINUS intLit { $$ = -(int)$2; }
;
/* v2/v3: enumValueOption { "," enumValueOption } */
enumValueOptions:
enumValueOption
| enumValueOptions PT_COMMA enumValueOption
/* v2/v3: enumValueOption = optionName "=" constant */
enumValueOption: optionName PT_ASSIGN constant
;
/* v2: service = "service" serviceName "{" { option | rpc | stream | emptyStatement } "}" */
/* v3: service = "service" serviceName "{" { option | rpc | emptyStatement } "}" */
service: PT_SERVICE serviceName PT_LCURLY serviceBody PT_RCURLY { $$ = $4; pbl_set_node_name($$, $2); }
;
serviceBody:
/* empty */ { $$ = pbl_create_node(state->file, PBL_SERVICE, NAME_TO_BE_SET); }
| serviceBody option
| serviceBody rpc { $$ = $1; pbl_add_child($$, $2); }
| serviceBody emptyStatement
| serviceBody stream /* v2 only */ { $$ = $1; pbl_add_child($$, $2); }
;
/* v2/v3: rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" (( "{" {option | emptyStatement } "}" ) | ";") */
rpc:
rpcDecl PT_SEMICOLON
| rpcDecl PT_LCURLY rpcBody PT_RCURLY
;
/* "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" */
rpcDecl:
/* 1 2 3 4 5 6 7 8 9 */
PT_RPC rpcName PT_LPAREN messageType PT_RPAREN PT_RETURNS PT_LPAREN messageType PT_RPAREN
{ $$ = pbl_create_method_node(state->file, $2, $4, FALSE, $8, FALSE); }
/* 1 2 3 4 5 6 7 8 9 10 */
| PT_RPC rpcName PT_LPAREN PT_STREAM messageType PT_RPAREN PT_RETURNS PT_LPAREN messageType PT_RPAREN
{ $$ = pbl_create_method_node(state->file, $2, $5, TRUE, $9, FALSE); }
/* 1 2 3 4 5 6 7 8 9 10 */
| PT_RPC rpcName PT_LPAREN messageType PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType PT_RPAREN
{ $$ = pbl_create_method_node(state->file, $2, $4, FALSE, $9, TRUE); }
/* 1 2 3 4 5 6 7 8 9 10 11 */
| PT_RPC rpcName PT_LPAREN PT_STREAM messageType PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType PT_RPAREN
{ $$ = pbl_create_method_node(state->file, $2, $5, TRUE, $10, TRUE); }
;
rpcBody:
/* empty */
| rpcBody option
| rpcBody emptyStatement
;
/* v2: stream = "stream" streamName "(" messageType "," messageType ")" (( "{" { option | emptyStatement } "}") | ";" ) */
stream:
streamDecl PT_SEMICOLON
| streamDecl PT_LCURLY streamBody PT_RCURLY
;
/* v2 only */
/* 1 2 3 4 5 6 7 */
streamDecl: PT_STREAM streamName PT_LPAREN messageType PT_COMMA messageType PT_RPAREN
{ $$ = pbl_create_method_node(state->file, $2, $4, TRUE, $6, TRUE); }
;
/* v2 only */
streamBody:
/* empty */
| streamBody option
| streamBody emptyStatement
;
/* v2: label type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
/* v3: field = [ "repeated" ] type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
field:
/* 1 2 3 4 5 */
type fieldName PT_ASSIGN fieldNumber PT_SEMICOLON
{ $$ = pbl_create_field_node(state->file, NULL, $1, $2, $4, NULL); }
/* 1 2 3 4 5 6 7 8 */
| type fieldName PT_ASSIGN fieldNumber PT_LBRACKET fieldOptions PT_RBRACKET PT_SEMICOLON
{ $$ = pbl_create_field_node(state->file, NULL, $1, $2, $4, $6); }
/* 1 2 3 4 5 6 */
| label type fieldName PT_ASSIGN fieldNumber PT_SEMICOLON
{ $$ = pbl_create_field_node(state->file, $1, $2, $3, $5, NULL); }
/* 1 2 3 4 5 6 7 8 9 */
| label type fieldName PT_ASSIGN fieldNumber PT_LBRACKET fieldOptions PT_RBRACKET PT_SEMICOLON
{ $$ = pbl_create_field_node(state->file, $1, $2, $3, $5, $7); }
;
/* v2: label = "required" | "optional" | "repeated" */
label: PT_REQUIRED | PT_OPTIONAL | PT_REPEATED;
/* v2/v3: type = "double" | "float" | "int32" | "int64" | "uint32" | "uint64"
| "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64"
| "bool" | "string" | "bytes" | messageType | enumType
*/
type: exIdent;
/* v2/v3: The smallest field number is 1, and the largest is 2^29 - 1, or 536,870,911. */
fieldNumber: intLit { $$ = (int)$1; }
| PT_PLUS intLit { $$ = (int)$2; }
;
/* v2/v3: fieldOptions = fieldOption { "," fieldOption } */
fieldOptions:
fieldOption
{ $$ = pbl_create_node(state->file, PBL_OPTIONS, NEED_NOT_NAME); pbl_add_child($$, $1); }
| fieldOptions PT_COMMA fieldOption
{ $$ = $1; pbl_add_child($$, $3); }
;
/* v2/v3: fieldOption = optionName "=" constant */
fieldOption: optionName PT_ASSIGN constant
{ $$ = pbl_create_option_node(state->file, $1, $3); }
;
/* v2 only: group = label "group" groupName "=" fieldNumber messageBody */
/* 1 2 3 4 5 6 7 8 */
group: label PT_GROUP groupName PT_ASSIGN fieldNumber PT_LCURLY messageBody PT_RCURLY
{ $$ = $7; pbl_set_node_name($$, $3); }
;
groupName: exIdent;
/* v2/v3: oneof = "oneof" oneofName "{" { oneofField | emptyStatement } "}" */
/* 1 2 3 4 5 */
oneof: PT_ONEOF oneofName PT_LCURLY oneofBody PT_RCURLY { $$ = $4; pbl_set_node_name($$, $2); }
;
oneofBody:
/* empty */ { $$ = pbl_create_node(state->file, PBL_ONEOF, NAME_TO_BE_SET); }
| oneofBody oneofField { $$ = $1; pbl_add_child($$, $2); }
| oneofBody emptyStatement
;
/* v2/v3: oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
oneofField:
/* 1 2 3 4 5 6 7 8 */
type fieldName PT_ASSIGN fieldNumber PT_LBRACKET fieldOptions PT_RBRACKET PT_SEMICOLON
{ $$ = pbl_create_field_node(state->file, NULL, $1, $2, $4, $6); }
/* 1 2 3 4 5 */
| type fieldName PT_ASSIGN fieldNumber PT_SEMICOLON
{ $$ = pbl_create_field_node(state->file, NULL, $1, $2, $4, NULL); }
;
/* v2/v3: mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
mapField:
/* 1 2 3 4 5 6 7 8 9 10 11 12 13 */
PT_MAP PT_LESS keyType PT_COMMA type PT_GREATER mapName PT_ASSIGN fieldNumber PT_LBRACKET fieldOptions PT_RBRACKET PT_SEMICOLON
{
$$ = pbl_create_map_field_node(state->file, $7, $9, $11);
pbl_add_child($$, pbl_create_field_node(state->file, NULL, $3, "key", 1, NULL)); /* add key field */
pbl_add_child($$, pbl_create_field_node(state->file, NULL, $5, "value", 2, NULL)); /* add value field */
}
/* 1 2 3 4 5 6 7 8 9 10 */
| PT_MAP PT_LESS keyType PT_COMMA type PT_GREATER mapName PT_ASSIGN fieldNumber PT_SEMICOLON
{
$$ = pbl_create_map_field_node(state->file, $7, $9, NULL);
pbl_add_child($$, pbl_create_field_node(state->file, NULL, $3, "key", 1, NULL)); /* add key field */
pbl_add_child($$, pbl_create_field_node(state->file, NULL, $5, "value", 2, NULL)); /* add value field */
}
;
/* keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" |
"fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" */
keyType: exIdent
;
/* v2 only: extensions = "extensions" ranges ";" */
extensions: PT_EXTENSIONS ranges PT_SEMICOLON
;
/* v2/v3: reserved = "reserved" ( ranges | fieldNames ) ";" */
reserved:
PT_RESERVED ranges PT_SEMICOLON
| PT_RESERVED quoteFieldNames PT_SEMICOLON
;
/* v2/v3: ranges = range { "," range } */
ranges:
range
| ranges PT_COMMA range
;
/* v2/v3: range = intLit [ "to" ( intLit | "max" ) ] */
range:
intLit
| intLit PT_TO intLit
| intLit PT_TO exIdent
;
/* v2/v3: fieldNames = fieldName { "," fieldName }
Note: There is an error in BNF definition about reserved fieldName. It's strLit not ident.
*/
quoteFieldNames:
strLit
| quoteFieldNames PT_COMMA strLit
;
/* v2 only: extend = "extend" messageType "{" {field | group | emptyStatement} "}" */
/* 1 2 3 4 5 */
extend: PT_EXTEND messageType PT_LCURLY extendBody PT_RCURLY
{ $$ = $4; pbl_set_node_name($$, pbl_store_string_token(state, g_strconcat($2, "Extend", NULL))); }
;
/* v2 only */
extendBody:
/* empty */ { $$ = pbl_create_node(state->file, PBL_MESSAGE, NAME_TO_BE_SET); }
| extendBody field { $$ = $1; pbl_add_child($$, $2); }
| extendBody group { $$ = $1; pbl_add_child($$, $2); }
| extendBody emptyStatement
;
messageName: exIdent;
enumName: exIdent;
streamName: exIdent;
fieldName: exIdent;
oneofName: exIdent;
mapName: exIdent;
serviceName: exIdent;
rpcName: exIdent;
/* messageType = [ "." ] { ident "." } messageName */
messageType: exIdent
;
/* enumType = [ "." ] { ident "." } enumName */
/*enumType: exIdent*/
;
/* intLit = decimalLit | octalLit | hexLit */
intLit: PT_DECIMALLIT | PT_OCTALLIT | PT_HEXLIT
;
/* emptyStatement = ";" */
emptyStatement: PT_SEMICOLON;
/* constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | strLit | boolLit */
constant: exIdent | strLit
| intLit { $$ = pbl_store_string_token(state, g_strdup_printf("%" G_GUINT64_FORMAT, $1)); }
| PT_PLUS intLit { $$ = pbl_store_string_token(state, g_strdup_printf("%" G_GUINT64_FORMAT, $2)); }
| PT_MINUS intLit { $$ = pbl_store_string_token(state, g_strdup_printf("-%" G_GUINT64_FORMAT, $2)); }
/* This cover floatLit. In addition, boolLit is parsed as exIdent */
| PT_PLUS exIdent { $$ = pbl_store_string_token(state, g_strconcat("+", $2, NULL)); }
| PT_MINUS exIdent { $$ = pbl_store_string_token(state, g_strconcat("-", $2, NULL)); }
;
exIdent: PT_IDENT
| PT_SYNTAX | PT_IMPORT | PT_WEAK | PT_PUBLIC | PT_PACKAGE | PT_OPTION
| PT_ONEOF | PT_MAP | PT_RESERVED | PT_ENUM | PT_GROUP | PT_EXTEND | PT_EXTENSIONS
| PT_MESSAGE | PT_SERVICE | PT_RPC | PT_STREAM | PT_RETURNS | PT_TO | label
;
strLit: PT_STRLIT | PT_PROTO2 | PT_PROTO3
;
%%
DIAG_ON_BYACC
int
pbl_get_current_lineno(void* scanner)
{
return protobuf_langget_lineno(scanner);
}
void
protobuf_langerror(void* yyscanner, protobuf_lang_state_t *state, const char *msg)
{
int lineno;
void(*error_cb)(const char *format, ...);
const char* filepath = (state && state->file) ?
state->file->filename : "UNKNOWN";
error_cb = (state && state->pool->error_cb) ?
state->pool->error_cb : pbl_printf;
lineno = yyscanner ? protobuf_langget_lineno(yyscanner) : -1;
if (lineno > -1) {
error_cb("Protobuf: Parsing file [%s:%d] failed: %s\n", filepath, lineno, msg);
} else {
error_cb("Protobuf: Parsing file [%s] failed: %s\n", filepath, msg);
}
}
void
protobuf_langerrorv(void* yyscanner, protobuf_lang_state_t *state, const char *fmt, ...)
{
char* msg;
va_list ap;
va_start(ap, fmt);
msg = g_strdup_vprintf(fmt, ap);
protobuf_langerror(yyscanner, state, msg);
va_end(ap);
g_free(msg);
}
void
pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...)
{
char* msg;
void* scanner;
va_list ap;
va_start(ap, fmt);
msg = g_strdup_vprintf(fmt, ap);
scanner = state ? state->scanner : NULL;
protobuf_langerror(scanner, state, msg);
va_end(ap);
g_free(msg);
}
static void
pbl_clear_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool)
{
if (state == NULL) {
return;
}
state->pool = NULL;
state->file = NULL;
state->scanner = NULL;
if (state->lex_string_tokens) {
g_slist_free_full(state->lex_string_tokens, g_free);
}
state->lex_string_tokens = NULL;
if (pool) {
pool->parser_state = NULL;
}
}
static void
pbl_reinit_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool, const char* filepath)
{
if (state == NULL) {
return;
}
pbl_clear_state(state, pool);
state->pool = pool;
state->file = (pbl_file_descriptor_t*) g_hash_table_lookup(pool->proto_files, filepath);
if (pool) {
pool->parser_state = state;
}
}
int run_pbl_parser(pbl_descriptor_pool_t* pool)
{
protobuf_lang_state_t state = {0};
yyscan_t scanner;
GSList* it;
FILE * fp;
int status;
const char* filepath;
it = pool->proto_files_to_be_parsed;
while (it) {
filepath = (const char*) it->data;
/* reinit state and scanner */
pbl_reinit_state(&state, pool, filepath);
scanner = NULL;
/* Note that filepath is absolute path in proto_files */
fp = ws_fopen(filepath, "r");
if (fp == NULL) {
protobuf_langerrorv(NULL, &state, "File does not exists!");
pbl_clear_state(&state, pool);
return -1;
}
status = protobuf_langlex_init(&scanner);
if (status != 0) {
protobuf_langerrorv(NULL, &state, "Initialize Protocol Buffers Languange scanner failed!\n");
fclose(fp);
pbl_clear_state(&state, pool);
return status;
}
/* associate the parser state with the lexical analyzer state */
protobuf_langset_extra(&state, scanner);
state.scanner = scanner;
protobuf_langrestart(fp, scanner);
status = protobuf_langparse(scanner, &state);
fclose(fp);
if (status != 0) {
/* grammar errors should have been reported during parsing */
pbl_clear_state(&state, pool);
return status;
}
/* remove the parsed file from list */
pool->proto_files_to_be_parsed = it = g_slist_delete_link(pool->proto_files_to_be_parsed, it);
}
return 0;
}
DIAG_OFF_BYACC

View File

@ -0,0 +1,632 @@
%include {
/* protobuf_lang_parser.lemon
*
* C Protocol Buffers Language (PBL) Parser (for *.proto files)
* Copyright 2020, Huang Qiangxiong <qiangxiong.huang@qq.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
/* This parser is mainly to get MESSAGE, ENUM, and FIELD information from *.proto files.
* There are two formats of *.proto files:
* 1) Protocol Buffers Version 3 Language Specification:
* https://developers.google.com/protocol-buffers/docs/reference/proto3-spec
* 2) Protocol Buffers Version 2 Language Specification:
* https://developers.google.com/protocol-buffers/docs/reference/proto2-spec
* There are some errors about 'proto', 'option' (value) and 'reserved' (fieldName) definitions on that sites.
* This parser is created because Wireshark is mainly implemented in plain ANSI C but the offical
* Protocol Buffers Language parser is implemented in C++.
*/
#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <glib.h>
#include <assert.h>
#include <wsutil/file_util.h>
#include "protobuf_lang_tree.h"
#include "protobuf_lang_parser.h"
#include "protobuf_lang_scanner_lex.h"
#define NAME_TO_BE_SET "<NAME_TO_BE_SET>"
#define NEED_NOT_NAME "<NEED_NOT_NAME>"
/* Error handling function for parser */
void protobuf_lang_error(void* yyscanner, protobuf_lang_state_t *state, const char *msg);
/* Extended error handling function */
void pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...);
/* It's just the approximate line number which is gotten when a grammar rule is reduced
by the parser (lemon). That might be overridden by the lineno argument of
pbl_set_node_name() later. */
#define CUR_LINENO (protobuf_lang_get_lineno(state->scanner))
} /* end of %include */
%name ProtobufLangParser
%extra_argument { protobuf_lang_state_t *state }
%token_type { protobuf_lang_token_t* }
%token_destructor {
/* We manage memory allocated for token values by ourself */
(void) state; /* Mark unused, similar to Q_UNUSED */
(void) $$; /* Mark unused, similar to Q_UNUSED */
}
%syntax_error {
pbl_parser_error(state, "Syntax Error: unexpected token \"%s\"!", yyminor->v);
state->grammar_error = TRUE;
}
%parse_failure {
pbl_parser_error(state, "Parse Error");
state->grammar_error = TRUE;
}
/* Keywords like 'syntax', 'message', etc can be used as the names of messages, fields or enums.
So we tell the lemon: "If you are unable to parse this keyword, try treating it as an identifier instead.*/
%fallback PT_IDENT PT_SYNTAX PT_IMPORT PT_WEAK PT_PUBLIC PT_PACKAGE PT_OPTION PT_REQUIRED PT_OPTIONAL.
%fallback PT_IDENT PT_REPEATED PT_ONEOF PT_MAP PT_RESERVED PT_ENUM PT_GROUP PT_EXTEND PT_EXTENSIONS.
%fallback PT_IDENT PT_MESSAGE PT_SERVICE PT_RPC PT_STREAM PT_RETURNS PT_TO.
%type strLit { gchar* }
%type label { gchar* }
%type type { gchar* }
%type keyType { gchar* }
%type messageType { gchar* }
%type constant { gchar* }
%type exIdent { protobuf_lang_token_t* }
%type optionName { protobuf_lang_token_t* }
%type messageName { protobuf_lang_token_t* }
%type enumName { protobuf_lang_token_t* }
%type streamName { protobuf_lang_token_t* }
%type fieldName { protobuf_lang_token_t* }
%type oneofName { protobuf_lang_token_t* }
%type mapName { protobuf_lang_token_t* }
%type serviceName { protobuf_lang_token_t* }
%type rpcName { protobuf_lang_token_t* }
%type groupName { protobuf_lang_token_t* }
%type protoBody { pbl_node_t* }
%type topLevelDef { pbl_node_t* }
%type message { pbl_node_t* }
%type messageBody { pbl_node_t* }
%type rpc { pbl_node_t* }
%type rpcDecl { pbl_node_t* }
%type field { pbl_node_t* }
%type oneofField { pbl_node_t* }
%type enum { pbl_node_t* }
%type enumBody { pbl_node_t* }
%type enumField { pbl_node_t* }
%type service { pbl_node_t* }
%type serviceBody { pbl_node_t* }
%type stream { pbl_node_t* }
%type streamDecl { pbl_node_t* }
%type fieldOptions { pbl_node_t* }
%type fieldOption { pbl_node_t* }
%type oneof { pbl_node_t* }
%type oneofBody { pbl_node_t* }
%type mapField { pbl_node_t* }
%type group { pbl_node_t* }
%type extend { pbl_node_t* }
%type extendBody { pbl_node_t* }
%type intLit { guint64 }
%type fieldNumber { int }
%type enumNumber { int }
/* We don't care about the types of following nodes:
syntax import package option enumValueOptions enumValueOption rpcBody streamBody
extensions reserved ranges range quoteFieldNames emptyStatement
*/
%start_symbol proto
/* v2/v3: proto = syntax { import | package | option | topLevelDef | emptyStatement } */
/* Offical PBL bugfix: proto = { syntax } { import | package | option | topLevelDef | emptyStatement }
The default syntax version is "proto2". */
proto ::= wholeProtoBody.
proto ::= syntax wholeProtoBody.
wholeProtoBody ::= protoBody(B).
{
/* set real package name */
pbl_set_node_name(B, state->file->package_name_lineno, state->file->package_name);
/* use the allocate mem of the name of the package node */
state->file->package_name = pbl_get_node_name(B);
/* put this file data into package tables */
pbl_node_t* packnode = (pbl_node_t*)g_hash_table_lookup(state->pool->packages, state->file->package_name);
if (packnode) {
pbl_merge_children(packnode, B);
pbl_free_node(B);
} else {
g_hash_table_insert(state->pool->packages, g_strdup(state->file->package_name), B);
}
}
/* v2: syntax = "syntax" "=" quote "proto2" quote ";" */
/* v3: syntax = "syntax" "=" quote "proto3" quote ";" */
syntax ::= PT_SYNTAX PT_ASSIGN strLit(B) PT_SEMICOLON.
{
if (!strcmp(B, "proto3")) {
state->file->syntax_version = 3;
} else if (!strcmp(B, "proto2")) {
state->file->syntax_version = 2;
} else {
pbl_parser_error(state, "Unrecognized syntax identifier [%s]. This parser only recognizes \"proto3\" or \"proto2\"!", B);
state->grammar_error = TRUE;
}
}
protoBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_PACKAGE, NAME_TO_BE_SET); } /* create am empty package node */
protoBody ::= protoBody import. /* default action is {A = B; } */
protoBody ::= protoBody package.
protoBody ::= protoBody option.
protoBody(A) ::= protoBody(B) topLevelDef(C). { A = B; pbl_add_child(A, C); }
protoBody ::= protoBody emptyStatement.
/* v2/v3: import = "import" [ "weak" | "public" ] strLit ";" */
import ::= PT_IMPORT strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); } /* append file to todo list */
import ::= PT_IMPORT PT_PUBLIC strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); }
import ::= PT_IMPORT PT_WEAK strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); }
/* v2/v3: package = "package" fullIdent ";" */
package ::= PT_PACKAGE exIdent(B) PT_SEMICOLON.
{ /* The memory of (B) will be freed after parsing, but the package_name will
be replaced by the new-allocated name of package node late */
state->file->package_name = B->v;
state->file->package_name_lineno = B->ln;
}
/* v2/v3: option = "option" optionName "=" constant ";" */
/* Offical PBL bugfix: option = "option" optionName "=" ( constant | customOptionValue ) ";" */
option ::= PT_OPTION optionName PT_ASSIGN constant PT_SEMICOLON.
option ::= PT_OPTION optionName PT_ASSIGN customOptionValue PT_SEMICOLON.
/* v2/v3: optionName = ( ident | "(" fullIdent ")" ) { "." ident } */
optionName ::= exIdent.
optionName(A) ::= PT_LPAREN exIdent(B) PT_RPAREN.
{ A = B; A->v = pbl_store_string_token(state, g_strconcat("(", B->v, ")", NULL)); }
optionName(A) ::= PT_LPAREN exIdent(B) PT_RPAREN exIdent(C). /* Note that the exIdent contains "." */
{ A = B; A->v = pbl_store_string_token(state, g_strconcat("(", B->v, ")", C->v, NULL)); }
/* Allow format which not defined in offical PBL specification like:
option (google.api.http) = { post: "/v3alpha/kv/put" body: "*" };
option (google.api.http) = { post: "/v3alpha/kv/put", body: "*" };
option (google.api.http) = { post: "/v3alpha/kv/put" { any format } body: "*" };
*/
customOptionValue ::= PT_LCURLY customOptionBody PT_RCURLY.
customOptionBody ::= .
customOptionBody ::= customOptionBody exIdent.
customOptionBody ::= customOptionBody strLit.
customOptionBody ::= customOptionBody symbolsWithoutCurly.
customOptionBody ::= customOptionBody intLit.
customOptionBody ::= customOptionBody customOptionValue.
symbolsWithoutCurly ::= PT_LPAREN.
symbolsWithoutCurly ::= PT_RPAREN.
symbolsWithoutCurly ::= PT_LBRACKET.
symbolsWithoutCurly ::= PT_RBRACKET.
symbolsWithoutCurly ::= PT_EQUAL.
symbolsWithoutCurly ::= PT_NOTEQUAL.
symbolsWithoutCurly ::= PT_NOTEQUAL2.
symbolsWithoutCurly ::= PT_GEQUAL.
symbolsWithoutCurly ::= PT_LEQUAL.
symbolsWithoutCurly ::= PT_ASSIGN_PLUS.
symbolsWithoutCurly ::= PT_ASSIGN.
symbolsWithoutCurly ::= PT_PLUS.
symbolsWithoutCurly ::= PT_MINUS.
symbolsWithoutCurly ::= PT_MULTIPLY.
symbolsWithoutCurly ::= PT_DIV.
symbolsWithoutCurly ::= PT_LOGIC_OR.
symbolsWithoutCurly ::= PT_OR.
symbolsWithoutCurly ::= PT_LOGIC_AND.
symbolsWithoutCurly ::= PT_AND.
symbolsWithoutCurly ::= PT_NOT.
symbolsWithoutCurly ::= PT_NEG.
symbolsWithoutCurly ::= PT_XOR.
symbolsWithoutCurly ::= PT_SHL.
symbolsWithoutCurly ::= PT_SHR.
symbolsWithoutCurly ::= PT_PERCENT.
symbolsWithoutCurly ::= PT_DOLLAR.
symbolsWithoutCurly ::= PT_COND.
symbolsWithoutCurly ::= PT_SEMICOLON.
symbolsWithoutCurly ::= PT_DOT.
symbolsWithoutCurly ::= PT_COMMA.
symbolsWithoutCurly ::= PT_COLON.
symbolsWithoutCurly ::= PT_LESS.
symbolsWithoutCurly ::= PT_GREATER.
/* v2: topLevelDef = message | enum | extend | service */
/* v3: topLevelDef = message | enum | service */
topLevelDef ::= message.
topLevelDef ::= enum.
topLevelDef ::= extend. /*v2 only */
topLevelDef ::= service.
/* v2/v3: message = "message" messageName messageBody */
message(A) ::= PT_MESSAGE messageName(B) PT_LCURLY messageBody(C) PT_RCURLY.
{ A = C; pbl_set_node_name(A, B->ln, B->v); }
/* v2: messageBody = "{" { field | enum | message | extend | extensions | group | option | oneof | mapField | reserved | emptyStatement } "}" */
/* v3: messageBody = "{" { field | enum | message | option | oneof | mapField | reserved | emptyStatement } "}" */
messageBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_MESSAGE, NAME_TO_BE_SET); }
messageBody(A) ::= messageBody(B) field(C). { A = B; pbl_add_child(A, C); }
messageBody(A) ::= messageBody(B) enum(C). { A = B; pbl_add_child(A, C); }
messageBody(A) ::= messageBody(B) message(C). { A = B; pbl_add_child(A, C); }
messageBody ::= messageBody extend. /* v2 only */
messageBody ::= messageBody extensions. /* v2 only */
messageBody(A) ::= messageBody(B) group(C). /* v2 only */ { A = B; pbl_add_child(A, C); }
messageBody ::= messageBody option.
messageBody(A) ::= messageBody(B) oneof(C). { A = B; pbl_merge_children(A, C); pbl_free_node(C); }
messageBody(A) ::= messageBody(B) mapField(C). { A = B; pbl_add_child(A, C); }
messageBody ::= messageBody reserved.
messageBody ::= messageBody emptyStatement.
/* v2/v3: enum = "enum" enumName enumBody */
enum(A) ::= PT_ENUM enumName(B) PT_LCURLY enumBody(C) PT_RCURLY.
{ A = C; pbl_set_node_name(A, B->ln, B->v); }
/* v2/v3: enumBody = "{" { option | enumField | emptyStatement } "}" */
enumBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_ENUM, NAME_TO_BE_SET); }
enumBody ::= enumBody option.
enumBody(A) ::= enumBody(B) enumField(C). { A = B; pbl_add_child(A, C); }
enumBody ::= enumBody emptyStatement.
/* v2/v3: enumField = ident "=" [ "-" ] intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";" */
enumField(A) ::= exIdent(B) PT_ASSIGN enumNumber(C) PT_LBRACKET enumValueOptions PT_RBRACKET PT_SEMICOLON.
{ A = pbl_create_enum_value_node(state->file, B->ln, B->v, C); }
enumField(A) ::= exIdent(B) PT_ASSIGN enumNumber(C).
{ A = pbl_create_enum_value_node(state->file, B->ln, B->v, C); }
/* v2/v3: must be in the range of a 32-bit integer. negative values are not recommended. */
enumNumber(A) ::= intLit(B). { A = (int)B; }
enumNumber(A) ::= PT_PLUS intLit(B). { A = (int)B; }
enumNumber(A) ::= PT_MINUS intLit(B). { A = -(int)B; }
/* v2/v3: enumValueOption { "," enumValueOption } */
enumValueOptions ::= enumValueOption.
enumValueOptions ::= enumValueOptions PT_COMMA enumValueOption.
/* v2/v3: enumValueOption = optionName "=" constant */
enumValueOption ::= optionName PT_ASSIGN constant.
/* v2: service = "service" serviceName "{" { option | rpc | stream | emptyStatement } "}" */
/* v3: service = "service" serviceName "{" { option | rpc | emptyStatement } "}" */
service(A) ::= PT_SERVICE serviceName(B) PT_LCURLY serviceBody(C) PT_RCURLY.
{ A = C; pbl_set_node_name(A, B->ln, B->v); }
serviceBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_SERVICE, NAME_TO_BE_SET); }
serviceBody ::= serviceBody option.
serviceBody(A) ::= serviceBody(B) rpc(C). { A = B; pbl_add_child(A, C); }
serviceBody ::= serviceBody emptyStatement.
serviceBody(A) ::= serviceBody(B) stream(C). /* v2 only */ { A = B; pbl_add_child(A, C); }
/* v2/v3: rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" (( "{" {option | emptyStatement } "}" ) | ";") */
rpc ::= rpcDecl PT_SEMICOLON.
rpc ::= rpcDecl PT_LCURLY rpcBody PT_RCURLY.
/* "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" */
rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN messageType(D) PT_RPAREN.
{ A = pbl_create_method_node(state->file, B->ln, B->v, C, FALSE, D, FALSE); }
rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN PT_STREAM messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN messageType(D) PT_RPAREN.
{ A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, FALSE); }
rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType(D) PT_RPAREN.
{ A = pbl_create_method_node(state->file, B->ln, B->v, C, FALSE, D, TRUE); }
rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN PT_STREAM messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType(D) PT_RPAREN.
{ A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, TRUE); }
rpcBody ::= .
rpcBody ::= rpcBody option.
rpcBody ::= rpcBody emptyStatement.
/* v2: stream = "stream" streamName "(" messageType "," messageType ")" (( "{" { option | emptyStatement } "}") | ";" ) */
stream ::= streamDecl PT_SEMICOLON.
stream ::= streamDecl PT_LCURLY streamBody PT_RCURLY.
/* v2 only */
streamDecl(A) ::= PT_STREAM streamName(B) PT_LPAREN messageType(C) PT_COMMA messageType(D) PT_RPAREN.
{ A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, TRUE); }
/* v2 only */
streamBody ::= .
streamBody ::= streamBody option.
streamBody ::= streamBody emptyStatement.
/* v2: label type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
/* v3: field = [ "repeated" ] type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
field(A) ::= type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
{ A = pbl_create_field_node(state->file, D->ln, NULL, C, D->v, E, NULL); }
field(A) ::= type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
{ A = pbl_create_field_node(state->file, D->ln, NULL, C, D->v, E, F); }
field(A) ::= label(B) type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
{ A = pbl_create_field_node(state->file, D->ln, B, C, D->v, E, NULL); }
field(A) ::= label(B) type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
{ A = pbl_create_field_node(state->file, D->ln, B, C, D->v, E, F); }
/* v2: label = "required" | "optional" | "repeated" */
label(A) ::= PT_REQUIRED(B). { A = B->v; }
label(A) ::= PT_OPTIONAL(B). { A = B->v; }
label(A) ::= PT_REPEATED(B). { A = B->v; }
/* v2/v3: type = "double" | "float" | "int32" | "int64" | "uint32" | "uint64"
| "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64"
| "bool" | "string" | "bytes" | messageType | enumType
*/
type(A) ::= exIdent(B). { A = B->v; }
/* v2/v3: The smallest field number is 1, and the largest is 2^29 - 1, or 536,870,911. */
fieldNumber(A) ::= intLit(B). { A = (int)B; }
fieldNumber(A) ::= PT_PLUS intLit(B). { A = (int)B; }
/* v2/v3: fieldOptions = fieldOption { "," fieldOption } */
fieldOptions(A) ::= fieldOption(B).
{ A = pbl_create_node(state->file, CUR_LINENO, PBL_OPTIONS, NEED_NOT_NAME); pbl_add_child(A, B); }
fieldOptions(A) ::= fieldOptions(B) PT_COMMA fieldOption(C).
{ A = B; pbl_add_child(A, C); }
/* v2/v3: fieldOption = optionName "=" constant */
fieldOption(A) ::= optionName(B) PT_ASSIGN constant(C).
{ A = pbl_create_option_node(state->file, B->ln, B->v, C); }
/* v2 only: group = label "group" groupName "=" fieldNumber messageBody */
group(A) ::= label PT_GROUP groupName(B) PT_ASSIGN fieldNumber PT_LCURLY messageBody(C) PT_RCURLY.
{ A = C; pbl_set_node_name(A, B->ln, B->v); }
groupName ::= exIdent.
/* v2/v3: oneof = "oneof" oneofName "{" { oneofField | emptyStatement } "}" */
oneof(A) ::= PT_ONEOF oneofName(B) PT_LCURLY oneofBody(C) PT_RCURLY.
{ A = C; pbl_set_node_name(A, B->ln, B->v); }
oneofBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_ONEOF, NAME_TO_BE_SET); }
oneofBody(A) ::= oneofBody(B) oneofField(C). { A = B; pbl_add_child(A, C); }
oneofBody ::= oneofBody emptyStatement.
/* v2/v3: oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
oneofField(A) ::= type(B) fieldName(C) PT_ASSIGN fieldNumber(D) PT_LBRACKET fieldOptions(E) PT_RBRACKET PT_SEMICOLON.
{ A = pbl_create_field_node(state->file, C->ln, NULL, B, C->v, D, E); }
oneofField(A) ::= type(B) fieldName(C) PT_ASSIGN fieldNumber(D) PT_SEMICOLON.
{ A = pbl_create_field_node(state->file, C->ln, NULL, B, C->v, D, NULL); }
/* v2/v3: mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
mapField(A) ::= PT_MAP PT_LESS keyType(B) PT_COMMA type(C) PT_GREATER mapName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
{
A = pbl_create_map_field_node(state->file, D->ln, D->v, E, F);
pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, B, "key", 1, NULL)); /* add key field */
pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, C, "value", 2, NULL)); /* add value field */
}
mapField(A) ::= PT_MAP PT_LESS keyType(B) PT_COMMA type(C) PT_GREATER mapName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
{
A = pbl_create_map_field_node(state->file, D->ln, D->v, E, NULL);
pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, B, "key", 1, NULL)); /* add key field */
pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, C, "value", 2, NULL)); /* add value field */
}
/* keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" |
"fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" */
keyType(A) ::= exIdent(B). { A = B->v; }
/* v2 only: extensions = "extensions" ranges ";" */
extensions ::= PT_EXTENSIONS ranges PT_SEMICOLON.
/* v2/v3: reserved = "reserved" ( ranges | fieldNames ) ";" */
reserved ::= PT_RESERVED ranges PT_SEMICOLON.
reserved ::= PT_RESERVED quoteFieldNames PT_SEMICOLON.
/* v2/v3: ranges = range { "," range } */
ranges ::= range.
ranges ::= ranges PT_COMMA range.
/* v2/v3: range = intLit [ "to" ( intLit | "max" ) ] */
range ::= intLit.
range ::= intLit PT_TO intLit.
range ::= intLit PT_TO exIdent.
/* v2/v3: fieldNames = fieldName { "," fieldName }
Note that there is an error in BNF definition about reserved fieldName. It's strLit, not ident.
*/
quoteFieldNames ::= strLit.
quoteFieldNames ::= quoteFieldNames PT_COMMA strLit.
/* v2 only: extend = "extend" messageType "{" {field | group | emptyStatement} "}" */
extend(A) ::= PT_EXTEND(X) messageType(B) PT_LCURLY extendBody(C) PT_RCURLY.
{ A = C; pbl_set_node_name(A, X->ln, pbl_store_string_token(state, g_strconcat(B, "Extend", NULL))); }
/* v2 only */
extendBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_MESSAGE, NAME_TO_BE_SET); }
extendBody(A) ::= extendBody(B) field(C). { A = B; pbl_add_child(A, C); }
extendBody(A) ::= extendBody(B) group(C). { A = B; pbl_add_child(A, C); }
extendBody ::= extendBody emptyStatement.
messageName ::= exIdent.
enumName ::= exIdent.
streamName ::= exIdent.
fieldName ::= exIdent.
oneofName ::= exIdent.
mapName ::= exIdent.
serviceName ::= exIdent.
rpcName ::= exIdent.
/* messageType = [ "." ] { ident "." } messageName */
messageType(A) ::= exIdent(B). { A = B->v; }
/* enumType = [ "." ] { ident "." } enumName */
/*enumType ::= exIdent.*/
/* intLit = decimalLit | octalLit | hexLit */
intLit(A) ::= PT_DECIMALLIT(B). { A = g_ascii_strtoull(B->v, NULL, 10); }
intLit(A) ::= PT_OCTALLIT(B). { A = g_ascii_strtoull(B->v+1, NULL, 8); }
intLit(A) ::= PT_HEXLIT(B). { A = g_ascii_strtoull(B->v+2, NULL, 16); }
/* emptyStatement = ";" */
emptyStatement ::= PT_SEMICOLON.
/* constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | strLit | boolLit */
constant(A) ::= exIdent(B). { A = B->v; } /* boolLit is parsed as exIdent */
constant ::= strLit.
constant(A) ::= intLit(B). { A = pbl_store_string_token(state, g_strdup_printf("%" G_GUINT64_FORMAT, B)); }
constant(A) ::= PT_PLUS intLit(B). { A = pbl_store_string_token(state, g_strdup_printf("%" G_GUINT64_FORMAT, B)); }
constant(A) ::= PT_MINUS intLit(B). { A = pbl_store_string_token(state, g_strdup_printf("-%" G_GUINT64_FORMAT, B)); }
constant(A) ::= PT_PLUS exIdent(B). { A = pbl_store_string_token(state, g_strconcat("+", B->v, NULL)); } /* This cover floatLit. */
constant(A) ::= PT_MINUS exIdent(B). { A = pbl_store_string_token(state, g_strconcat("-", B->v, NULL)); }
exIdent ::= PT_IDENT.
strLit(A) ::= PT_STRLIT(B). { A = pbl_store_string_token(state, g_strndup(B->v + 1, strlen(B->v) - 2)); }
%code {
void
protobuf_lang_error(void* yyscanner, protobuf_lang_state_t *state, const char *msg)
{
int lineno;
void(*error_cb)(const char *format, ...);
const char* filepath = (state && state->file) ?
state->file->filename : "UNKNOWN";
error_cb = (state && state->pool->error_cb) ?
state->pool->error_cb : pbl_printf;
lineno = yyscanner ? protobuf_lang_get_lineno(yyscanner) : -1;
if (lineno > -1) {
error_cb("Protobuf: Parsing file [%s:%d] failed: %s\n", filepath, lineno, msg);
} else {
error_cb("Protobuf: Parsing file [%s] failed: %s\n", filepath, msg);
}
}
void
pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...)
{
char* msg;
void* scanner;
va_list ap;
va_start(ap, fmt);
msg = g_strdup_vprintf(fmt, ap);
scanner = state ? state->scanner : NULL;
protobuf_lang_error(scanner, state, msg);
va_end(ap);
g_free(msg);
}
static void
pbl_clear_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool)
{
if (state == NULL) {
return;
}
state->pool = NULL;
state->file = NULL;
state->scanner = NULL;
state->grammar_error = FALSE;
state->tmp_token = NULL;
if (state->pParser) {
ProtobufLangParserFree(state->pParser, g_free);
state->pParser = NULL;
}
if (state->lex_string_tokens) {
g_slist_free_full(state->lex_string_tokens, g_free);
state->lex_string_tokens = NULL;
}
if (state->lex_struct_tokens) {
g_slist_free_full(state->lex_struct_tokens, g_free);
state->lex_struct_tokens = NULL;
}
if (pool) {
pool->parser_state = NULL;
}
}
static void
pbl_reinit_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool, const char* filepath)
{
if (state == NULL) {
return;
}
pbl_clear_state(state, pool);
state->pool = pool;
state->file = (pbl_file_descriptor_t*) g_hash_table_lookup(pool->proto_files, filepath);
state->pParser = ProtobufLangParserAlloc(g_malloc);
if (pool) {
pool->parser_state = state;
}
}
int run_pbl_parser(pbl_descriptor_pool_t* pool)
{
protobuf_lang_state_t state = {0};
yyscan_t scanner;
GSList* it;
FILE * fp;
int status = 0;
int token_id;
const char* filepath;
it = pool->proto_files_to_be_parsed;
while (it) {
filepath = (const char*) it->data;
/* reinit state and scanner */
pbl_reinit_state(&state, pool, filepath);
scanner = NULL;
/* Note that filepath is absolute path in proto_files */
fp = ws_fopen(filepath, "r");
if (fp == NULL) {
pbl_parser_error(&state, "File does not exists!");
status = -1;
goto finish;
}
status = protobuf_lang_lex_init(&scanner);
if (status != 0) {
pbl_parser_error(&state, "Initialize Protocol Buffers Languange scanner failed!\n");
fclose(fp);
goto finish;
}
/* associate the parser state with the lexical analyzer state */
protobuf_lang_set_extra(&state, scanner);
state.scanner = scanner;
protobuf_lang_restart(fp, scanner);
/* uncomment the next line for debugging */
/* ProtobufLangParserTrace(stdout, ">>>"); */
while (!state.grammar_error && (token_id = protobuf_lang_lex(scanner))) {
/* state.tmp_token contains token string value and lineno information */
ProtobufLangParser(state.pParser, token_id, state.tmp_token, &state);
}
fclose(fp);
if (state.grammar_error) {
status = -2;
goto finish;
} else {
ProtobufLangParser(state.pParser, 0, NULL, &state);
}
/* remove the parsed file from list */
pool->proto_files_to_be_parsed = it = g_slist_delete_link(pool->proto_files_to_be_parsed, it);
}
finish:
pbl_clear_state(&state, pool);
return status;
}
} /* end of %code block */

View File

@ -8,12 +8,6 @@
*/
%option reentrant
/*
* We want to generate code that can be used by a reentrant parser
* generated by Bison or Berkeley YACC.
*/
%option bison-bridge
/*
* We don't read interactively from the terminal.
*/
@ -30,10 +24,10 @@
%option extra-type="protobuf_lang_state_t *"
/*
* Prefix scanner routines with "protobuf_lang" rather than "yy", so this scanner
* Prefix scanner routines with "protobuf_lang_" rather than "yy", so this scanner
* can coexist with other scanners.
*/
%option prefix="protobuf_lang"
%option prefix="protobuf_lang_"
/*
* We have to override the memory allocators so that we don't get
@ -55,7 +49,7 @@
/* protobuf_lang_scanner.l
*
* C Protocol Buffers Language Lexer (for *.proto files)
* Copyright 2019, Huang Qiangxiong <qiangxiong.huang@qq.com>
* Copyright 2020, Huang Qiangxiong <qiangxiong.huang@qq.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
@ -65,7 +59,7 @@
#include <string.h>
#include <glib.h>
#include "protobuf_lang_tree.h"
#include "protobuf_lang.h"
#include "protobuf_lang_parser.h"
/*
* Disable diagnostics in the code generated by Flex.
@ -80,100 +74,100 @@ DIAG_OFF_FLEX
/*
* Macros for the allocators, to discard the extra argument.
*/
#define protobuf_langalloc(size, yyscanner) (void *)malloc(size)
#define protobuf_langrealloc(ptr, size, yyscanner) (void *)realloc((char *)(ptr), (size))
#define protobuf_langfree(ptr, yyscanner) free((char *)ptr)
#define protobuf_lang_alloc(size, yyscanner) (void *)malloc(size)
#define protobuf_lang_realloc(ptr, size, yyscanner) (void *)realloc((char *)(ptr), (size))
#define protobuf_lang_free(ptr, yyscanner) free((char *)ptr)
int old_status;
/* error handling function defined in bison (*.y) file */
extern void
protobuf_langerrorv(void* yyscanner, protobuf_lang_state_t *state, const char *fmt, ...);
/* Extended error handling function defined in protobuf_lang_grammar.lemon */
void pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...);
/* duplicate the text and keep the pointer in parser state for freeing later automatically */
static gchar*
strdup_and_store(void* yyscanner, const char* text);
#define PROTOBUF_LANG_PARSE(token_type) \
protobuf_lang_get_extra(yyscanner)->tmp_token = g_new0(protobuf_lang_token_t, 1); \
protobuf_lang_get_extra(yyscanner)->tmp_token->v = strdup_and_store(yyscanner, yytext); \
protobuf_lang_get_extra(yyscanner)->tmp_token->ln = protobuf_lang_get_lineno(yyscanner); \
return (token_type);
%}
%x COMMENT
%%
/* operations or symbols (PT_ means PBL Token) */
\" return PT_QUOTE;
"(" return PT_LPAREN;
")" return PT_RPAREN;
"[" return PT_LBRACKET;
"]" return PT_RBRACKET;
"{" return PT_LCURLY;
"}" return PT_RCURLY;
"==" return PT_EQUAL;
"!=" return PT_NOTEQUAL;
"<>" return PT_NOTEQUAL2;
">=" return PT_GEQUAL;
"<=" return PT_LEQUAL;
"+=" return PT_ASSIGN_PLUS;
"=" return PT_ASSIGN;
"+" return PT_PLUS;
"-" return PT_MINUS;
"*" return PT_MULTIPLY;
"/" return PT_DIV;
"||" return PT_LOGIC_OR;
"|" return PT_OR;
"&&" return PT_LOGIC_AND;
"&" return PT_AND;
"!" return PT_NOT;
"~" return PT_NEG;
"^" return PT_XOR;
"<<" return PT_SHL;
">>" return PT_SHR;
"%" return PT_PERCENT;
"$" return PT_DOLLAR;
"?" return PT_COND;
";" return PT_SEMICOLON;
"." return PT_DOT;
"," return PT_COMMA;
":" return PT_COLON;
"<" return PT_LESS;
">" return PT_GREATER;
"(" PROTOBUF_LANG_PARSE(PT_LPAREN);
")" PROTOBUF_LANG_PARSE(PT_RPAREN);
"[" PROTOBUF_LANG_PARSE(PT_LBRACKET);
"]" PROTOBUF_LANG_PARSE(PT_RBRACKET);
"{" PROTOBUF_LANG_PARSE(PT_LCURLY);
"}" PROTOBUF_LANG_PARSE(PT_RCURLY);
"==" PROTOBUF_LANG_PARSE(PT_EQUAL);
"!=" PROTOBUF_LANG_PARSE(PT_NOTEQUAL);
"<>" PROTOBUF_LANG_PARSE(PT_NOTEQUAL2);
">=" PROTOBUF_LANG_PARSE(PT_GEQUAL);
"<=" PROTOBUF_LANG_PARSE(PT_LEQUAL);
"+=" PROTOBUF_LANG_PARSE(PT_ASSIGN_PLUS);
"=" PROTOBUF_LANG_PARSE(PT_ASSIGN);
"+" PROTOBUF_LANG_PARSE(PT_PLUS);
"-" PROTOBUF_LANG_PARSE(PT_MINUS);
"*" PROTOBUF_LANG_PARSE(PT_MULTIPLY);
"/" PROTOBUF_LANG_PARSE(PT_DIV);
"||" PROTOBUF_LANG_PARSE(PT_LOGIC_OR);
"|" PROTOBUF_LANG_PARSE(PT_OR);
"&&" PROTOBUF_LANG_PARSE(PT_LOGIC_AND);
"&" PROTOBUF_LANG_PARSE(PT_AND);
"!" PROTOBUF_LANG_PARSE(PT_NOT);
"~" PROTOBUF_LANG_PARSE(PT_NEG);
"^" PROTOBUF_LANG_PARSE(PT_XOR);
"<<" PROTOBUF_LANG_PARSE(PT_SHL);
">>" PROTOBUF_LANG_PARSE(PT_SHR);
"%" PROTOBUF_LANG_PARSE(PT_PERCENT);
"$" PROTOBUF_LANG_PARSE(PT_DOLLAR);
"?" PROTOBUF_LANG_PARSE(PT_COND);
";" PROTOBUF_LANG_PARSE(PT_SEMICOLON);
"." PROTOBUF_LANG_PARSE(PT_DOT);
"," PROTOBUF_LANG_PARSE(PT_COMMA);
":" PROTOBUF_LANG_PARSE(PT_COLON);
"<" PROTOBUF_LANG_PARSE(PT_LESS);
">" PROTOBUF_LANG_PARSE(PT_GREATER);
/* key words */
syntax yylval->sval = strdup_and_store(yyscanner, yytext); return PT_SYNTAX;
import yylval->sval = strdup_and_store(yyscanner, yytext); return PT_IMPORT;
weak yylval->sval = strdup_and_store(yyscanner, yytext); return PT_WEAK;
public yylval->sval = strdup_and_store(yyscanner, yytext); return PT_PUBLIC;
package yylval->sval = strdup_and_store(yyscanner, yytext); return PT_PACKAGE;
option yylval->sval = strdup_and_store(yyscanner, yytext); return PT_OPTION;
required yylval->sval = strdup_and_store(yyscanner, yytext); return PT_REQUIRED;
optional yylval->sval = strdup_and_store(yyscanner, yytext); return PT_OPTIONAL;
repeated yylval->sval = strdup_and_store(yyscanner, yytext); return PT_REPEATED;
oneof yylval->sval = strdup_and_store(yyscanner, yytext); return PT_ONEOF;
map yylval->sval = strdup_and_store(yyscanner, yytext); return PT_MAP;
reserved yylval->sval = strdup_and_store(yyscanner, yytext); return PT_RESERVED;
enum yylval->sval = strdup_and_store(yyscanner, yytext); return PT_ENUM;
group yylval->sval = strdup_and_store(yyscanner, yytext); return PT_GROUP;
extend yylval->sval = strdup_and_store(yyscanner, yytext); return PT_EXTEND;
extensions yylval->sval = strdup_and_store(yyscanner, yytext); return PT_EXTENSIONS;
message yylval->sval = strdup_and_store(yyscanner, yytext); return PT_MESSAGE;
service yylval->sval = strdup_and_store(yyscanner, yytext); return PT_SERVICE;
rpc yylval->sval = strdup_and_store(yyscanner, yytext); return PT_RPC;
stream yylval->sval = strdup_and_store(yyscanner, yytext); return PT_STREAM;
returns yylval->sval = strdup_and_store(yyscanner, yytext); return PT_RETURNS;
to yylval->sval = strdup_and_store(yyscanner, yytext); return PT_TO;
/* key values */
["']proto2["'] yylval->sval = strdup_and_store(yyscanner, yytext); return PT_PROTO2;
["']proto3["'] yylval->sval = strdup_and_store(yyscanner, yytext); return PT_PROTO3;
syntax PROTOBUF_LANG_PARSE(PT_SYNTAX);
import PROTOBUF_LANG_PARSE(PT_IMPORT);
weak PROTOBUF_LANG_PARSE(PT_WEAK);
public PROTOBUF_LANG_PARSE(PT_PUBLIC);
package PROTOBUF_LANG_PARSE(PT_PACKAGE);
option PROTOBUF_LANG_PARSE(PT_OPTION);
required PROTOBUF_LANG_PARSE(PT_REQUIRED);
optional PROTOBUF_LANG_PARSE(PT_OPTIONAL);
repeated PROTOBUF_LANG_PARSE(PT_REPEATED);
oneof PROTOBUF_LANG_PARSE(PT_ONEOF);
map PROTOBUF_LANG_PARSE(PT_MAP);
reserved PROTOBUF_LANG_PARSE(PT_RESERVED);
enum PROTOBUF_LANG_PARSE(PT_ENUM);
group PROTOBUF_LANG_PARSE(PT_GROUP);
extend PROTOBUF_LANG_PARSE(PT_EXTEND);
extensions PROTOBUF_LANG_PARSE(PT_EXTENSIONS);
message PROTOBUF_LANG_PARSE(PT_MESSAGE);
service PROTOBUF_LANG_PARSE(PT_SERVICE);
rpc PROTOBUF_LANG_PARSE(PT_RPC);
stream PROTOBUF_LANG_PARSE(PT_STREAM);
returns PROTOBUF_LANG_PARSE(PT_RETURNS);
to PROTOBUF_LANG_PARSE(PT_TO);
/* intLit values */
0|[1-9][0-9]* yylval->u64val = g_ascii_strtoull(yytext, NULL, 10); return PT_DECIMALLIT;
0[0-7]* yylval->u64val = g_ascii_strtoull(yytext+1, NULL, 8); return PT_OCTALLIT;
0[xX][0-9a-fA-F]+ yylval->u64val = g_ascii_strtoull(yytext+2, NULL, 16); return PT_HEXLIT;
0|[1-9][0-9]* PROTOBUF_LANG_PARSE(PT_DECIMALLIT);
0[0-7]* PROTOBUF_LANG_PARSE(PT_OCTALLIT);
0[xX][0-9a-fA-F]+ PROTOBUF_LANG_PARSE(PT_HEXLIT);
/* Using extended identifier because we care only about position */
[a-zA-Z0-9_][a-zA-Z0-9_.+-]* yylval->sval = strdup_and_store(yyscanner, yytext); return PT_IDENT;
\"(\\.|\"\"|[^"\n"])*\" yylval->sval = g_strndup(yytext + 1, strlen(yytext) - 2); return PT_STRLIT;
\'(\\.|\'\'|[^"\n"])*\' yylval->sval = g_strndup(yytext + 1, strlen(yytext) - 2); return PT_STRLIT;
[a-zA-Z0-9_.][a-zA-Z0-9_.+-]* PROTOBUF_LANG_PARSE(PT_IDENT);
\"(\\.|\"\"|[^"\n"])*\" PROTOBUF_LANG_PARSE(PT_STRLIT);
\'(\\.|\'\'|[^"\n"])*\' PROTOBUF_LANG_PARSE(PT_STRLIT);
/* comments */
"//"[^\r\n]*
@ -184,13 +178,13 @@ to yylval->sval = strdup_and_store(yyscanner, yytext); retur
/* space & tab */
[ \t\r\n]
/* prevent flex jam */
. { protobuf_langerrorv(yyscanner, protobuf_langget_extra(yyscanner), "unexpected token in proto file!\n"); }
. { pbl_parser_error(protobuf_lang_get_extra(yyscanner), "unexpected token in proto file!\n"); }
%%
static gchar*
strdup_and_store(void* yyscanner, const char* text) {
return pbl_store_string_token(protobuf_langget_extra(yyscanner), g_strdup(text));
return pbl_store_string_token(protobuf_lang_get_extra(yyscanner), g_strdup(text));
}
/*

View File

@ -17,9 +17,6 @@
#include "protobuf_lang_tree.h"
#include "protobuf-helper.h" /* only for PROTOBUF_TYPE_XXX enumeration */
extern int
pbl_get_current_lineno(void* scanner);
extern void
pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...);
@ -207,6 +204,7 @@ pbl_add_proto_file_to_be_parsed(pbl_descriptor_pool_t* pool, const char* filepat
file->filename = path;
file->syntax_version = 2;
file->package_name = PBL_DEFAULT_PACKAGE_NAME;
file->package_name_lineno = -1;
file->pool = pool;
/* store in hash table and list */
@ -782,18 +780,17 @@ pbl_foreach_message(const pbl_descriptor_pool_t* pool, void (*cb)(const pbl_mess
*/
static void
pbl_init_node(pbl_node_t* node, pbl_file_descriptor_t* file, pbl_node_type_t nodetype, const char* name)
pbl_init_node(pbl_node_t* node, pbl_file_descriptor_t* file, int lineno, pbl_node_type_t nodetype, const char* name)
{
node->nodetype = nodetype;
node->name = g_strdup(name);
node->file = file;
node->lineno = (file && file->pool && file->pool->parser_state && file->pool->parser_state->scanner) ?
pbl_get_current_lineno(file->pool->parser_state->scanner) : -1;
node->lineno = (lineno > -1) ? lineno : -1;
}
/* create a normal node */
pbl_node_t*
pbl_create_node(pbl_file_descriptor_t* file, pbl_node_type_t nodetype, const char* name)
pbl_create_node(pbl_file_descriptor_t* file, int lineno, pbl_node_type_t nodetype, const char* name)
{
pbl_node_t* node = NULL;
@ -813,15 +810,18 @@ pbl_create_node(pbl_file_descriptor_t* file, pbl_node_type_t nodetype, const cha
default:
node = g_new0(pbl_node_t, 1);
}
pbl_init_node(node, file, nodetype, name);
pbl_init_node(node, file, lineno, nodetype, name);
return node;
}
pbl_node_t*
pbl_set_node_name(pbl_node_t* node, const char* newname)
pbl_set_node_name(pbl_node_t* node, int lineno, const char* newname)
{
g_free(node->name);
node->name = g_strdup(newname);
if (lineno > -1) {
node->lineno = lineno;
}
return node;
}
@ -836,12 +836,12 @@ pbl_get_option_by_name(pbl_node_t* options, const char* name)
}
/* create a method (rpc or stream of service) node */
pbl_node_t* pbl_create_method_node(pbl_file_descriptor_t* file,
pbl_node_t* pbl_create_method_node(pbl_file_descriptor_t* file, int lineno,
const char* name, const char* in_msg_type,
gboolean in_is_stream, const char* out_msg_type, gboolean out_is_stream)
{
pbl_method_descriptor_t* node = g_new0(pbl_method_descriptor_t, 1);
pbl_init_node(&node->basic_info, file, PBL_METHOD, name);
pbl_init_node(&node->basic_info, file, lineno, PBL_METHOD, name);
node->in_msg_type = g_strdup(in_msg_type);
node->in_is_stream = in_is_stream;
@ -865,12 +865,12 @@ pbl_get_simple_type_enum_value_by_typename(const char* type_name)
}
/* create a field node */
pbl_node_t* pbl_create_field_node(pbl_file_descriptor_t* file, const char* label,
pbl_node_t* pbl_create_field_node(pbl_file_descriptor_t* file, int lineno, const char* label,
const char* type_name, const char* name, int number, pbl_node_t* options)
{
pbl_option_descriptor_t* default_option;
pbl_field_descriptor_t* node = g_new0(pbl_field_descriptor_t, 1);
pbl_init_node(&node->basic_info, file, PBL_FIELD, name);
pbl_init_node(&node->basic_info, file, lineno, PBL_FIELD, name);
node->number = number;
node->options_node = options;
@ -944,11 +944,11 @@ pbl_node_t* pbl_create_field_node(pbl_file_descriptor_t* file, const char* label
}
/* create a map field node */
pbl_node_t* pbl_create_map_field_node(pbl_file_descriptor_t* file,
pbl_node_t* pbl_create_map_field_node(pbl_file_descriptor_t* file, int lineno,
const char* name, int number, pbl_node_t* options)
{
pbl_field_descriptor_t* node = g_new0(pbl_field_descriptor_t, 1);
pbl_init_node(&node->basic_info, file, PBL_MAP_FIELD, name);
pbl_init_node(&node->basic_info, file, lineno, PBL_MAP_FIELD, name);
node->number = number;
node->type_name = g_strconcat(name, "MapEntry", NULL);
@ -961,21 +961,21 @@ pbl_node_t* pbl_create_map_field_node(pbl_file_descriptor_t* file,
/* create an enumeration field node */
pbl_node_t*
pbl_create_enum_value_node(pbl_file_descriptor_t* file, const char* name, int number)
pbl_create_enum_value_node(pbl_file_descriptor_t* file, int lineno, const char* name, int number)
{
pbl_enum_value_descriptor_t* node = g_new0(pbl_enum_value_descriptor_t, 1);
pbl_init_node(&node->basic_info, file, PBL_ENUM_VALUE, name);
pbl_init_node(&node->basic_info, file, lineno, PBL_ENUM_VALUE, name);
node->number = number;
return (pbl_node_t*)node;
}
/* create an option node */
pbl_node_t* pbl_create_option_node(pbl_file_descriptor_t* file,
pbl_node_t* pbl_create_option_node(pbl_file_descriptor_t* file, int lineno,
const char* name, const char* value)
{
pbl_option_descriptor_t* node = g_new0(pbl_option_descriptor_t, 1);
pbl_init_node(&node->basic_info, file, PBL_OPTION, name);
pbl_init_node(&node->basic_info, file, lineno, PBL_OPTION, name);
if (value)
node->value = g_strdup(value);
@ -993,7 +993,7 @@ pbl_add_child(pbl_node_t* parent, pbl_node_t* child)
/* add a message node for mapField first */
if (child->nodetype == PBL_MAP_FIELD) {
node = pbl_create_node(child->file, PBL_MESSAGE, ((pbl_field_descriptor_t*)child)->type_name);
node = pbl_create_node(child->file, child->lineno, PBL_MESSAGE, ((pbl_field_descriptor_t*)child)->type_name);
pbl_merge_children(node, child);
pbl_add_child(parent, node);
}

View File

@ -21,6 +21,7 @@ extern "C" {
#include <stdio.h>
#include <stdarg.h>
#include "ws_attributes.h"
#define PBL_DEFAULT_PACKAGE_NAME ""
@ -57,6 +58,7 @@ typedef struct {
const char* filename;
int syntax_version;
const char* package_name;
int package_name_lineno;
pbl_descriptor_pool_t* pool;
} pbl_file_descriptor_t;
@ -133,12 +135,22 @@ typedef struct {
char* value;
} pbl_option_descriptor_t;
/* the struct of token used by the parser */
typedef struct _protobuf_lang_token_t {
gchar* v; /* token string value */
int ln; /* line number of this token in the .proto file */
} protobuf_lang_token_t;
/* parser state */
typedef struct _protobuf_lang_state_t {
pbl_descriptor_pool_t* pool; /* pool will keep the parsing result */
pbl_file_descriptor_t* file; /* info of current parsing file */
GSList* lex_string_tokens;
GSList* lex_struct_tokens;
void* scanner;
void* pParser;
gboolean grammar_error;
protobuf_lang_token_t* tmp_token; /* just for passing token value from protobuf_lang_lex() to ProtobufLangParser() */
} protobuf_lang_state_t;
/* Store chars created by strdup or g_strconcat into protobuf_lang_state_t temporarily,
@ -151,6 +163,15 @@ pbl_store_string_token(protobuf_lang_state_t* parser_state, char* dupstr)
return dupstr;
}
/* Store a protobuf_lang_token_t in protobuf_lang_state_t temporarily, and return back
the input pointer. It will be freed when protobuf_lang_state_t is released */
static inline protobuf_lang_token_t*
pbl_store_struct_token(protobuf_lang_state_t* parser_state, protobuf_lang_token_t* newtoken)
{
parser_state->lex_struct_tokens = g_slist_append(parser_state->lex_struct_tokens, newtoken);
return newtoken;
}
/* default error_cb */
static inline void
pbl_printf(const char* fmt, ...)
@ -355,11 +376,11 @@ pbl_foreach_message(const pbl_descriptor_pool_t* pool, void (*cb)(const pbl_mess
/* create a normal node */
pbl_node_t*
pbl_create_node(pbl_file_descriptor_t* file, pbl_node_type_t nodetype, const char* name);
pbl_create_node(pbl_file_descriptor_t* file, int lineno, pbl_node_type_t nodetype, const char* name);
/* change the name of node */
pbl_node_t*
pbl_set_node_name(pbl_node_t* node, const char* newname);
pbl_set_node_name(pbl_node_t* node, int lineno, const char* newname);
/* get the name of node */
static inline const char*
@ -378,7 +399,7 @@ pbl_add_child(pbl_node_t* parent, pbl_node_t* child);
/* create an enumeration field node */
pbl_node_t*
pbl_create_enum_value_node(pbl_file_descriptor_t* file, const char* name, int number);
pbl_create_enum_value_node(pbl_file_descriptor_t* file, int lineno, const char* name, int number);
/* merge one('from') node's children to another('to') node, and return the 'to' pointer */
pbl_node_t*
@ -386,19 +407,19 @@ pbl_merge_children(pbl_node_t* to, pbl_node_t* from);
/* create a field node */
pbl_node_t*
pbl_create_field_node(pbl_file_descriptor_t* file, const char* label, const char* type_name, const char* name, int number, pbl_node_t* options);
pbl_create_field_node(pbl_file_descriptor_t* file, int lineno, const char* label, const char* type_name, const char* name, int number, pbl_node_t* options);
/* create a map field node */
pbl_node_t*
pbl_create_map_field_node(pbl_file_descriptor_t* file, const char* name, int number, pbl_node_t* options);
pbl_create_map_field_node(pbl_file_descriptor_t* file, int lineno, const char* name, int number, pbl_node_t* options);
/* create a method (rpc or stream of service) node */
pbl_node_t*
pbl_create_method_node(pbl_file_descriptor_t* file, const char* name, const char* in_msg_type, gboolean in_is_stream, const char* out_msg_type, gboolean out_is_stream);
pbl_create_method_node(pbl_file_descriptor_t* file, int lineno, const char* name, const char* in_msg_type, gboolean in_is_stream, const char* out_msg_type, gboolean out_is_stream);
/* create an option node */
pbl_node_t*
pbl_create_option_node(pbl_file_descriptor_t* file, const char* name, const char* value);
pbl_create_option_node(pbl_file_descriptor_t* file, int lineno, const char* name, const char* value);
/* free a pbl_node_t and its children. */
void