diff --git a/tools/asn2eth.py b/tools/asn2eth.py new file mode 100644 index 0000000000..9689a5246d --- /dev/null +++ b/tools/asn2eth.py @@ -0,0 +1,3604 @@ +#!/usr/bin/env python + +# +# competh.py +# ASN.1 to Ethereal dissector compiler +# 2004 Tomas Kukosa +# +# $Id: asn2eth.py,v 1.1 2004/05/24 08:33:09 sahlberg Exp $ +# + +"""ASN.1 to Ethereal PER dissector compiler""" + +# +# Compiler from ASN.1 specification to the Ethereal PER dissector +# +# Based on ASN.1 to Python compiler from Aaron S. Lav's PyZ3950 package licensed under the X Consortium license +# http://www.pobox.com/~asl2/software/PyZ3950/ +# (ASN.1 to Python compiler functionality is broken but not removed, it could be revived if necessary) +# +# It requires Dave Beazley's PLY parsing package licensed under the LGPL (tested with version 1.3.1) +# http://systems.cs.uchicago.edu/ply/ +# +# +# ITU-T Recommendation X.680 (07/2002), +# Information technology – Abstract Syntax Notation One (ASN.1): Specification of basic notation +# +# ITU-T Recommendation X.682 (07/2002), +# Information technology – Abstract Syntax Notation One (ASN.1): Constraint specification +# +# ITU-T Recommendation X.683 (07/2002), +# Information technology – Abstract Syntax Notation One (ASN.1): Parameterization of ASN.1 specifications +# + +from __future__ import nested_scopes + +import warnings + +class LexError(Exception): pass +class ParseError(Exception): pass + +# 11 ASN.1 lexical items + +static_tokens = { + r'::=' : 'ASSIGNMENT', # 11.16 Assignment lexical item + r'\.\.' : 'RANGE', # 11.17 Range separator + r'\.\.\.' : 'ELLIPSIS', # 11.18 Ellipsis + #r'\[\[' : 'LVERBRACK', # 11.19 Left version brackets + #r'\]\]' : 'RVERBRACK', # 11.20 Right version brackets + # 11.26 Single character lexical items + r'\{' : 'LBRACE', + r'\}' : 'RBRACE', + r'<' : 'LT', + #r'>' : 'GT', + r',' : 'COMMA', + r'\.' : 'DOT', + r'\(' : 'LPAREN', + r'\)' : 'RPAREN', + r'\[' : 'LBRACK', + r'\]' : 'RBRACK', + r'-' : 'MINUS', + #r':' : 'COLON', + #r'=' : 'EQ', + #r'"' : 'QUOTATION', + #r"'" : 'APOSTROPHE', + r';' : 'SEMICOLON', + #r'@' : 'AT', + #r'\!' : 'EXCLAMATION', + #r'\^' : 'CIRCUMFLEX' +} + +# 11.27 Reserved words + +# all keys in reserved_words must start w/ upper case +reserved_words = { + 'TAGS' : 'TAGS', + 'BOOLEAN' : 'BOOLEAN', + 'INTEGER' : 'INTEGER', + 'BIT' : 'BIT', + 'CHARACTER' : 'CHARACTER', + 'STRING' : 'STRING', + 'OCTET' : 'OCTET', + 'NULL' : 'NULL', + 'SEQUENCE': 'SEQUENCE', + 'OF' : 'OF', + 'SET' : 'SET', + 'IMPLICIT': 'IMPLICIT', + 'CHOICE' : 'CHOICE', + 'ANY' : 'ANY', + 'EXTERNAL' : 'EXTERNAL', # XXX added over base + 'OPTIONAL':'OPTIONAL', + 'DEFAULT' : 'DEFAULT', + 'COMPONENTS': 'COMPONENTS', + 'UNIVERSAL' : 'UNIVERSAL', + 'APPLICATION' : 'APPLICATION', + 'PRIVATE' : 'PRIVATE', + 'TRUE' : 'TRUE', + 'FALSE' : 'FALSE', + 'BEGIN' : 'BEGIN', + 'END' : 'END', + 'DEFINITIONS' : 'DEFINITIONS', + 'EXPLICIT' : 'EXPLICIT', + 'ENUMERATED' : 'ENUMERATED', + 'EXPORTS' : 'EXPORTS', + 'IMPORTS' : 'IMPORTS', + 'REAL' : 'REAL', + 'INCLUDES': 'INCLUDES', + 'MIN' : 'MIN', + 'MAX' : 'MAX', + 'SIZE' : 'SIZE', + 'FROM' : 'FROM', + 'PATTERN' : 'PATTERN', + 'WITH' : 'WITH', + 'COMPONENT': 'COMPONENT', + 'PRESENT' : 'PRESENT', + 'ABSENT' : 'ABSENT', + 'DEFINED' : 'DEFINED', + 'CONSTRAINED' : 'CONSTRAINED', + 'BY' : 'BY', + 'PLUS-INFINITY' : 'PLUS_INFINITY', + 'MINUS-INFINITY' : 'MINUS_INFINITY', + 'GeneralizedTime' : 'GeneralizedTime', + 'UTCTime' : 'UTCTime', + 'ObjectDescriptor': 'ObjectDescriptor', + 'AUTOMATIC': 'AUTOMATIC', + 'OBJECT': 'OBJECT', + 'IDENTIFIER': 'IDENTIFIER', +# 'OPERATION' : 'OPERATION', +# 'ARGUMENT' : 'ARGUMENT', +# 'RESULT' : 'RESULT', +# 'ERRORS' : 'ERRORS', +# 'LINKED' : 'LINKED', +# 'ERROR' : 'ERROR', +# 'PARAMETER' : 'PARAMETER', +# 'BIND' : 'BIND', +# 'BIND-ERROR' : 'BIND_ERROR', +# 'UNBIND' : 'UNBIND', +# 'APPLICATION-CONTEXT' : 'AC', +# 'APPLICATON-SERVICE-ELEMENTS' : 'ASES', +# 'REMOTE' : 'REMOTE', +# 'INITIATOR' : 'INITIATOR', +# 'RESPONDER' : 'RESPONDER', +# 'APPLICATION-SERVICE-ELEMENT' : 'ASE', +# 'OPERATIONS' : None, +# 'EXTENSION-ATTRIBUTE' : 'EXTENSION_ATTRIBUTE', +# 'EXTENSIONS' : None, +# 'CHOSEN' : None, +# 'EXTENSION' : None, +# 'CRITICAL': None, +# 'FOR' : None, +# 'SUBMISSION' : None, +# 'DELIVERY' : None, +# 'TRANSFER' : None, +# 'OBJECT' : None, +# 'PORTS' : None, +# 'PORT' : None, +# r'ABSTRACT\s*OPERATIONS' : 'ABSTR_OPS', +# 'REFINE' : None, +# 'AS' : None, +# 'RECURRING' : None + } + +for k in static_tokens.keys (): + if static_tokens [k] == None: + static_tokens [k] = k + +StringTypes = ['Numeric', 'Printable', 'IA5', 'BMP', 'Universal', 'UTF8', + 'Teletex', 'T61', 'Videotex', 'Graphic', 'ISO646', 'Visible', + 'General'] + +for s in StringTypes: + reserved_words[s + 'String'] = s + 'String' + +tokens = static_tokens.values() \ + + reserved_words.values() \ + + ['BSTRING', 'HSTRING', 'QSTRING', + 'UCASE_IDENT', 'LCASE_IDENT', + 'NUMBER', 'PYQUOTE'] + +import __main__ # XXX blech! + +for (k, v) in static_tokens.items (): + __main__.__dict__['t_' + v] = k + +# 11.10 Binary strings +def t_BSTRING (t): + r"'[01]*'B" + return t + +# 11.12 Hexadecimal strings +def t_HSTRING (t): + r"'[0-9A-Fa-f]*'H" + return t + +def t_QSTRING (t): + r'"([^"]|"")*"' + return t # XXX might want to un-"" + +def t_UCASE_IDENT (t): + r"[A-Z](-[a-zA-Z0-9]|[a-zA-Z0-9])*" # can't end w/ '-' + t.type = reserved_words.get(t.value, "UCASE_IDENT") + #t.value = t.value.replace('-', '_') # XXX is it OK to do this during lex + return t + +def t_LCASE_IDENT (t): + r"[a-z](-[a-zA-Z0-9]|[a-zA-Z0-9])*" # can't end w/ '-' + #t.value = t.value.replace ('-', '_') # XXX is it OK to do this during lex + return t + +# 11.8 Numbers +def t_NUMBER (t): + r"0|([1-9][0-9]*)" + return t + +# 11.9 Real numbers +# not supported yet + +# 11.6 Comments +pyquote_str = 'PYQUOTE' +def t_COMMENT(t): + r"--(-[^\-\n]|[^\-\n])*(--|\n|-\n|$|-$)" + if (t.value.find("\n") >= 0) : t.lineno += 1 + if t.value[2:2+len (pyquote_str)] == pyquote_str: + t.value = t.value[2+len(pyquote_str):] + t.value = t.value.lstrip () + t.type = pyquote_str + return t + return None + +t_ignore = " \t\r" + +def t_NEWLINE(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Error", t.value[:100], t.lineno + raise LexError + + +import lex +lexer = lex.lex(debug=0) + +import yacc + +class Ctx: + def __init__ (self, defined_dict, indent = 0): + self.tags_def = 'EXPLICIT' # default = explicit + self.indent_lev = 0 + self.assignments = {} + self.dependencies = {} + self.pyquotes = [] + self.defined_dict = defined_dict + self.name_ctr = 0 + def spaces (self): + return " " * (4 * self.indent_lev) + def indent (self): + self.indent_lev += 1 + def outdent (self): + self.indent_lev -= 1 + assert (self.indent_lev >= 0) + def register_assignment (self, ident, val, dependencies): + if self.assignments.has_key (ident): + raise "Duplicate assignment for " + ident + if self.defined_dict.has_key (ident): + raise "cross-module duplicates for " + ident + self.defined_dict [ident] = 1 + self.assignments[ident] = val + self.dependencies [ident] = dependencies + return "" + # return "#%s depends on %s" % (ident, str (dependencies)) + def register_pyquote (self, val): + self.pyquotes.append (val) + return "" + def output_assignments (self): + already_output = {} + text_list = [] + assign_keys = self.assignments.keys() + to_output_count = len (assign_keys) + while 1: + any_output = 0 + for (ident, val) in self.assignments.iteritems (): + if already_output.has_key (ident): + continue + ok = 1 + for d in self.dependencies [ident]: + if (not already_output.has_key (d) and + d in assign_keys): + ok = 0 + if ok: + text_list.append ("%s=%s" % (ident, + self.assignments [ident])) + already_output [ident] = 1 + any_output = 1 + to_output_count -= 1 + assert (to_output_count >= 0) + if not any_output: + if to_output_count == 0: + break + # OK, we detected a cycle + cycle_list = [] + for ident in self.assignments.iterkeys (): + if not already_output.has_key (ident): + depend_list = [d for d in self.dependencies[ident] if d in assign_keys] + cycle_list.append ("%s(%s)" % (ident, ",".join (depend_list))) + + text_list.append ("# Cycle XXX " + ",".join (cycle_list)) + for (ident, val) in self.assignments.iteritems (): + if not already_output.has_key (ident): + text_list.append ("%s=%s" % (ident, self.assignments [ident])) + break + + return "\n".join (text_list) + def output_pyquotes (self): + return "\n".join (self.pyquotes) + def make_new_name (self): + self.name_ctr += 1 + return "_compiler_generated_name_%d" % (self.name_ctr,) + +#--- EthCtx ------------------------------------------------------------------- +class EthCtx: + def __init__(self, conform, indent = 0): + self.tags_def = 'EXPLICIT' # default = explicit + self.conform = conform + self.assign = {} + self.assign_ord = [] + self.field = {} + self.field_ord = [] + self.type = {} + self.type_ord = [] + self.type_imp = [] + self.type_dep = {} + + def pvp(self): # PER dissector version postfix + if (self.new): + return '_new' + else: + return '' + + # API type + def Org(self): return not self.new + def New(self): return self.new + def Per(self): return self.encoding == 'per' + def OPer(self): return self.Org() and self.Per() + def NPer(self): return self.New() and self.Per() + def Ber(self): return self.encoding == 'ber' + def OBer(self): return self.Org() and self.Ber() + def NBer(self): return self.New() and self.Ber() + + def dbg(self, d): + if (self.dbgopt.find(d) >= 0): + return True + else: + return False + + #--- eth_reg_assign --------------------------------------------------------- + def eth_reg_assign(self, ident, val): + #print "eth_reg_assign(ident='%s')" % (ident) + if self.assign.has_key(ident): + raise "Duplicate assignment for " + ident + self.assign[ident] = val + self.assign_ord.append(ident) + + #--- eth_import_type -------------------------------------------------------- + def eth_import_type(self, ident, mod, proto): + #print "eth_import_type(ident='%s', mod='%s', prot='%s')" % (ident, mod, prot) + if self.type.has_key(ident): + raise "Duplicate type for " + ident + self.type[ident] = {'import' : mod, 'proto' : proto, + 'ethname' : '', + 'ftype' : 'FT_NONE', 'display' : 'BASE_NONE', + 'strings' : 'NULL'} + self.type_imp.append(ident) + + #--- eth_dep_add ------------------------------------------------------------ + def eth_dep_add(self, type, dep): + if self.type_dep.has_key(type): + self.type_dep[type].append(dep) + else: + self.type_dep[type] = [dep] + + #--- eth_reg_type ----------------------------------------------------------- + def eth_reg_type(self, ident, val): + #print "eth_reg_type(ident='%s')" % (ident) + if self.type.has_key(ident): + raise "Duplicate type for " + ident + self.type[ident] = { 'val' : val, 'import' : None } + if len(ident.split('/')) > 1: + self.type[ident]['tname'] = val.eth_tname() + else: + self.type[ident]['tname'] = ident.replace('-', '_') + self.type[ident]['export'] = self.conform.use_export(ident) + self.type[ident]['tname'] = self.conform.use_type_rename(ident, self.type[ident]['tname']) + self.type[ident]['ethname'] = '' + self.type_ord.append(ident) + + #--- eth_reg_field ---------------------------------------------------------- + def eth_reg_field(self, ident, type, idx='', parent=None, impl=False): + #print "eth_reg_field(ident='%s', type='%s')" % (ident, type) + if self.field.has_key(ident): + raise "Duplicate field for " + ident + self.field[ident] = {'type' : type, 'idx' : idx, 'impl' : impl} + self.field_ord.append(ident) + if parent: self.eth_dep_add(parent, type) + + #--- eth_prepare ------------------------------------------------------------ + def eth_prepare(self): + #--- types ------------------- + self.eth_type = {} + self.eth_type_ord = [] + self.eth_export_ord = [] + self.eth_type_dupl = {} + self.named_bit = [] + + for t in self.type_imp: + nm = t + self.eth_type[nm] = { 'import' : self.type[t]['import'], 'proto' : self.type[t]['proto'], 'ref' : []} + self.type[t]['ethname'] = nm + for t in self.type_ord: + nm = self.type[t]['tname'] + if ((nm.find('#') >= 0) or + ((len(t.split('/'))>1) and self.conform.get_fn_presence(t) and not self.conform.exist_type_rename(t))): + if len(t.split('/')) == 2 and t.split('/')[1] == '_item': # Sequnce of type at the 1st level + nm = t.split('/')[0] + t.split('/')[1] + elif t.split('/')[-1] == '_item': # Sequnce of type at next levels + nm = 'T_' + t.split('/')[-2] + t.split('/')[-1] + else: + nm = 'T_' + t.split('/')[-1] + nm = nm.replace('-', '_') + if self.eth_type.has_key(nm): + if self.eth_type_dupl.has_key(nm): + self.eth_type_dupl[nm].append(t) + else: + self.eth_type_dupl[nm] = [self.eth_type[nm]['ref'][0], t] + nm += str(len(self.eth_type_dupl[nm])-1) + if self.eth_type.has_key(nm): + self.eth_type[nm]['ref'].append(t) + else: + self.eth_type_ord.append(nm) + self.eth_type[nm] = { 'import' : None, 'proto' : self.proto, 'export' : 0, + 'val' : self.type[t]['val'], 'ref' : [t]} + self.type[t]['ethname'] = nm + if (not self.eth_type[nm]['export'] and self.type[t]['export']): # new export + self.eth_export_ord.append(nm) + self.eth_type[nm]['export'] |= self.type[t]['export'] + for t in self.eth_type_ord: + bits = self.eth_type[t]['val'].eth_named_bits() + if (bits): + for (val, id) in bits: + self.named_bit.append({'name' : id, 'val' : val, + 'ethname' : 'hf_%s_%s_%s' % (self.proto, t, id), + 'ftype' : 'FT_BOOLEAN', 'display' : '8', + 'strings' : 'NULL', + 'bitmask' : '0x'+('80','40','20','10','08','04','02','01')[val]}) + if self.eth_type[t]['val'].eth_need_tree(): + self.eth_type[t]['tree'] = "ett_%s_%s" % (self.proto, t) + else: + self.eth_type[t]['tree'] = None + + #--- fields ------------------------- + self.eth_hf = {} + self.eth_hf_ord = [] + self.eth_hf_dupl = {} + + for f in self.field_ord: + if len(f.split('/')) > 1 and f.split('/')[-1] == '_item': # Sequnce of type + nm = f.split('/')[-2] + f.split('/')[-1] + name = 'Item' + else: + nm = f.split('/')[-1] + name = nm + name += self.field[f]['idx'] + abbrev = nm.replace('-', '_') + nm = self.conform.use_field_rename(f, nm) + nm = "hf_%s_%s" % (self.proto, nm.replace('-', '_')) + t = self.field[f]['type'] + if self.type.has_key(t): + ethtype = self.type[t]['ethname'] + else: # undefined type + # dummy imported + print "Dummy imported: ", t + self.type[t] = {'import' : 'xxx', 'proto' : 'xxx', + 'ethname' : t, + 'ftype' : 'FT_NONE', 'display' : 'BASE_NONE', + 'strings' : 'NULL'} + self.eth_type[t] = { 'import' : 'xxx', 'proto' : 'xxx' , 'ref' : []} + ethtype = t + if self.eth_hf.has_key(nm): + if self.eth_hf_dupl.has_key(nm): + if self.eth_hf_dupl[nm].has_key(ethtype): + nm = self.eth_hf_dupl[nm][ethtype] + self.eth_hf[nm]['ref'].append(f) + self.field[f]['ethname'] = nm + continue + else: + nmx = nm + str(len(self.eth_hf_dupl[nm])) + self.eth_hf_dupl[nm][ethtype] = nmx + nm = nmx + else: + if self.eth_hf[nm]['ethtype'] == ethtype: + self.eth_hf[nm]['ref'].append(f) + self.field[f]['ethname'] = nm + continue + else: + self.eth_hf_dupl[nm] = {self.eth_hf[nm]['ethtype'] : nm, \ + ethtype : nm+'1'} + nm += '1' + self.eth_hf_ord.append(nm) + type = self.field[f]['type'] + while (not self.type[type]['import'] + and self.type[type]['val'].type == 'Type_Ref'): + type = self.type[type]['val'].val + #print self.field[f]['type'], ' -> ', type + if (self.type[type]['import']): + ftype = self.type[type]['ftype'] + display = self.type[type]['display'] + strings = self.type[type]['strings'] + else: + (ftype, display) = self.type[type]['val'].eth_ftype() + strings = self.type[type]['val'].eth_strings() + if strings == '$$': + strings = 'VALS(%s_vals)' % (self.type[type]['ethname']) + self.eth_hf[nm] = {'ethtype' : ethtype, 'ref' : [f], + 'name' : name, 'abbrev' : abbrev, + 'type' : ftype, + 'display' : display, + 'strings' : strings, + 'bitmask' : '0'} + self.field[f]['ethname'] = nm + #--- dependencies ------------------- + self.eth_type_ord1 = [] + self.eth_dep_cycle = [] + x = {} # already emitted + #print '# Dependency computation' + for t in self.type_ord: + if x.has_key(self.type[t]['ethname']): + continue + stack = [t] + stackx = {t : self.type_dep.get(t, [])[:]} + #print 'Push: %s : %s' % (t, str(stackx[t])) + while stack: + if stackx[stack[-1]]: # has dependencies + d = stackx[stack[-1]].pop(0) + if x.has_key(self.type[d]['ethname']) or self.type[d]['import']: + continue + if stackx.has_key(d): # cyclic dependency + c = stack[:] + c.reverse() + c = [d] + c[0:c.index(d)+1] + c.reverse() + self.eth_dep_cycle.append(c) + #print 'Cyclic: %s ' % (' -> '.join(c)) + continue + stack.append(d) + stackx[d] = self.type_dep.get(d, [])[:] + #print 'Push: %s : %s' % (d, str(stackx[d])) + else: + #print 'Pop: %s' % (stack[-1]) + del stackx[stack[-1]] + e = self.type[stack.pop()]['ethname'] + self.eth_type_ord1.append(e) + x[e] = True + + #--- eth_vals --------------------------------------------------------------- + def eth_vals(self, tname, vals): + out = "" + if (not self.eth_type[tname]['export'] & 0x02): + out += "static " + out += "const value_string %s_vals[] = {\n" % (tname) + for (val, id) in vals: + out += ' { %3s, "%s" },\n' % (val, id) + out += " { 0, NULL }\n};\n" + return out + + #--- eth_bits --------------------------------------------------------------- + def eth_bits(self, tname, bits): + out = "" + out += "static " + out += "asn_namedbit %s_bits[] = {\n" % (tname) + for (val, id) in bits: + out += ' { %2d, &hf_%s_%s_%s, -1, -1, NULL, NULL },\n' % (val, self.proto, tname, id) + out += " { 0, NULL, 0, 0, NULL, NULL }\n};\n" + return out + + #--- eth_type_fn_h ---------------------------------------------------------- + def eth_type_fn_h(self, tname): + out = "" + if (not self.eth_type[tname]['export'] & 0x01): + out += "static " + out += "guint32 " + if (self.OBer()): + out += "dissect_%s_%s(gboolean implicit_tag, tvbuff_t *tvb, guint32 offset, packet_info *pinfo, proto_tree *tree, int hf_index)" % (self.proto, tname) + elif (self.NPer()): + out += "dissect_%s_%s(tvbuff_t *tvb, guint32 offset, packet_info *pinfo, proto_tree *tree, int hf_index, proto_item **item, void *private_data)" % (self.proto, tname) + elif (self.OPer()): + out += "dissect_%s_%s(tvbuff_t *tvb, guint32 offset, packet_info *pinfo, proto_tree *tree, int hf_index)" % (self.proto, tname) + out += ";\n" + return out + + #--- eth_fn_call ------------------------------------------------------------ + def eth_fn_call(self, fname, ret=None, indent=2, par=None): + out = indent * ' ' + if (ret): + if (ret == 'return'): + out += 'return ' + else: + out += ret + ' = ' + out += fname + '(' + ind = len(out) + for i in range(len(par)): + if (i>0): out += ind * ' ' + out += ', '.join(par[i]) + if (i<(len(par)-1)): out += ',\n' + out += ');\n' + return out + + #--- eth_type_fn_hdr -------------------------------------------------------- + def eth_type_fn_hdr(self, tname): + out = '\n' + if (not self.eth_type[tname]['export'] & 0x01): + out += "static " + out += "guint32\n" + if (self.OBer()): + out += "dissect_%s_%s(gboolean implicit_tag, tvbuff_t *tvb, guint32 offset, packet_info *pinfo, proto_tree *tree, int hf_index) {\n" % (self.proto, tname) + elif (self.NPer()): + out += "dissect_%s_%s(tvbuff_t *tvb, guint32 offset, packet_info *pinfo, proto_tree *tree, int hf_index, proto_item **item, void *private_data) {\n" % (self.proto, tname) + elif (self.OPer()): + out += "dissect_%s_%s(tvbuff_t *tvb, guint32 offset, packet_info *pinfo, proto_tree *tree, int hf_index) {\n" % (self.proto, tname) + if self.conform.get_fn_presence(self.eth_type[tname]['ref'][0]): + out += self.conform.get_fn_text(self.eth_type[tname]['ref'][0], 'FN_HDR') + return out + + #--- eth_type_fn_ftr -------------------------------------------------------- + def eth_type_fn_ftr(self, tname): + out = '\n' + if self.conform.get_fn_presence(self.eth_type[tname]['ref'][0]): + out += self.conform.get_fn_text(self.eth_type[tname]['ref'][0], 'FN_FTR') + out += " return offset;\n" + out += "}\n" + return out + + #--- eth_type_fn_body ------------------------------------------------------- + def eth_type_fn_body(self, tname, body, pars=None): + if self.conform.get_fn_body_presence(self.eth_type[tname]['ref'][0]): + out = self.conform.get_fn_text(self.eth_type[tname]['ref'][0], 'FN_BODY') + elif pars: + out = body % pars + else: + out = body + return out + + #--- eth_output_fname ------------------------------------------------------- + def eth_output_fname (self, ftype, ext='c'): + fn = 'packet-' + self.outnm + if (ftype): + fn += '-' + ftype + fn += '.' + ext + return fn + + #--- eth_output_hf ---------------------------------------------------------- + def eth_output_hf (self): + fn = self.eth_output_fname('hf') + fx = file(fn, 'w') + fx.write(eth_fhdr(fn)) + for f in self.eth_hf_ord: + fx.write("%-50s/* %s */\n" % ("static int %s = -1; " % (f), self.eth_hf[f]['ethtype'])) + if (self.named_bit): + fx.write('/* named bits */\n') + for nb in self.named_bit: + fx.write("static int %s = -1;\n" % (nb['ethname'])) + fx.close() + + #--- eth_output_hf_arr ------------------------------------------------------ + def eth_output_hf_arr (self): + fn = self.eth_output_fname('hfarr') + fx = file(fn, 'w') + fx.write(eth_fhdr(fn)) + for f in self.eth_hf_ord: + if len(self.eth_hf[f]['ref']) == 1: + blurb = self.eth_hf[f]['ref'][0] + else: + blurb = '' + fx.write(' { &%s,\n' % (f)) + fx.write(' { "%s", "%s.%s",\n' % (self.eth_hf[f]['name'], self.proto, self.eth_hf[f]['abbrev'])) + fx.write(' %s, %s, %s, %s,\n' % (self.eth_hf[f]['type'], self.eth_hf[f]['display'], self.eth_hf[f]['strings'], self.eth_hf[f]['bitmask'])) + fx.write(' "%s", HFILL }},\n' % (blurb)) + for nb in self.named_bit: + blurb = '' + fx.write(' { &%s,\n' % (nb['ethname'])) + fx.write(' { "%s", "%s.%s",\n' % (nb['name'], self.proto, nb['name'])) + fx.write(' %s, %s, %s, %s,\n' % (nb['ftype'], nb['display'], nb['strings'], nb['bitmask'])) + fx.write(' "%s", HFILL }},\n' % (blurb)) + fx.close() + + #--- eth_output_ett --------------------------------------------------------- + def eth_output_ett (self): + fn = self.eth_output_fname('ett') + fx = file(fn, 'w') + fx.write(eth_fhdr(fn)) + #fx.write("static gint ett_%s = -1;\n" % (self.proto)) + for t in self.eth_type_ord: + if self.eth_type[t]['tree']: + fx.write("static gint %s = -1;\n" % (self.eth_type[t]['tree'])) + fx.close() + + #--- eth_output_ett_arr ----------------------------------------------------- + def eth_output_ett_arr(self): + fn = self.eth_output_fname('ettarr') + fx = file(fn, 'w') + fx.write(eth_fhdr(fn)) + #fx.write(" &ett_%s,\n" % (self.proto)) + for t in self.eth_type_ord: + if self.eth_type[t]['tree']: + fx.write(" &%s,\n" % (self.eth_type[t]['tree'])) + fx.close() + + def eth_output_export(self): + if (not len(self.eth_export_ord)): return + fn = self.eth_output_fname('exp', ext='h') + fx = file(fn, 'w') + fx.write(eth_fhdr(fn)) + for t in self.eth_export_ord: # vals + if (self.eth_type[t]['export'] & 0x02): + fx.write("const value_string %s_vals[];\n" % (t)) + for t in self.eth_export_ord: # functions + if (self.eth_type[t]['export'] & 0x01): + fx.write(self.eth_type_fn_h(t)) + fx.close() + + def eth_output_types(self): + def out_field(f): + t = self.eth_hf[f]['ethtype'] + if (self.Ber()): + x = {} + for r in self.eth_hf[f]['ref']: + x[self.field[r]['impl']] = self.field[r]['impl'] + else: + x = {False : False} + x = x.values() + x.sort() + for i in x: + if (i): + postfix = '_impl' + impl = 'TRUE' + else: + postfix = '' + impl = 'FALSE' + if (self.Ber()): + if (i): postfix = '_impl'; impl = 'TRUE' + else: postfix = ''; impl = 'FALSE' + out = 'static guint32 dissect_'+f+postfix+'(packet_info *pinfo, proto_tree *tree, tvbuff_t *tvb, int offset) {\n' + par=((impl, 'tvb', 'offset', 'pinfo', 'tree', f),) + else: + out = 'static guint32 dissect_'+f+'(tvbuff_t *tvb, int offset, packet_info *pinfo, proto_tree *tree) {\n' + par=(('tvb', 'offset', 'pinfo', 'tree', f),) + out += self.eth_fn_call('dissect_%s_%s' % (self.eth_type[t]['proto'], t), ret='return', + par=par) + out += '}\n' + return out + #end out_field() + fn = self.eth_output_fname('fn') + fx = file(fn, 'w') + fx.write(eth_fhdr(fn)) + if self.eth_dep_cycle: + fx.write('/* Cyclic dependencies */\n') + for c in self.eth_dep_cycle: + fx.write('/* %s */\n' % ' -> '.join(c)) + fx.write('\n') + fx.write(self.eth_type_fn_h(self.type[c[0]]['ethname'])) + if (not self.new): # fields for imported type + for f in self.eth_hf_ord: + if (self.eth_type[self.eth_hf[f]['ethtype']]['import']): + fx.write(out_field(f)) + for t in self.eth_type_ord1: + if self.eth_type[t]['import']: + continue + fx.write(self.eth_type[t]['val'].eth_type_fn(self.proto, t, self)) + if (not self.new): + for f in self.eth_hf_ord: + if (self.eth_hf[f]['ethtype'] == t): + fx.write(out_field(f)) + fx.write('\n') + fx.close() + + def dupl_report(self): + # types + tmplist = self.eth_type_dupl.keys() + tmplist.sort() + for t in tmplist: + msg = "The same type names for different types. Explicit renaming is recommended.\n" + msg += t + "\n" + x = '' + for tt in self.eth_type_dupl[t]: + msg += " %-20s %s\n" % (t+str(x), tt) + if not x: x = 1 + else: x += 1 + warnings.warn(msg) + # fields + tmplist = self.eth_hf_dupl.keys() + tmplist.sort() + for f in tmplist: + msg = "The same field names for different types. Explicit renaming is recommended.\n" + msg += f + "\n" + for tt in self.eth_hf_dupl[f].keys(): + msg += " %-20s %-20s " % (self.eth_hf_dupl[f][tt], tt) + msg += ", ".join(self.eth_hf[self.eth_hf_dupl[f][tt]]['ref']) + msg += "\n" + warnings.warn(msg) + +#--- EthCnf ------------------------------------------------------------------- +import re +class EthCnf: + def __init__(self): + self.export = {} + self.module_import = {} + self.type_rename = {} + self.field_rename = {} + self.fn = {} + + def add_export(self, asn_name, fn, lineno, flag=1): + if self.export.has_key(asn_name): + warnings.warn_explicit("Duplicated export for %s. Previous one is at %s:%d" % + (asn_name, self.export[asn_name]['fn'], self.export[asn_name]['lineno']), + UserWarning, fn, lineno) + return + self.export[asn_name] = {'flag' : flag, 'used' : False, + 'fn' : fn, 'lineno' : lineno} + def use_export(self, asn_name): + if self.export.has_key(asn_name): + self.export[asn_name]['used'] = True + return self.export[asn_name]['flag'] + return 0 + + def add_module_import(self, module, proto, fn, lineno): + if self.module_import.has_key(module): + warnings.warn_explicit("Duplicated module import for %s" % (module), + UserWarning, fn, lineno) + return + self.module_import[module] = proto + def use_module_import(self, module, proto): + return self.module_import.get(module, proto) + + def add_type_rename(self, asn_name, eth_name, fn, lineno): + if self.type_rename.has_key(asn_name): + warnings.warn_explicit("Duplicated type rename for %s. Previous one is at %s:%d" % + (asn_name, self.type_rename[asn_name]['fn'], self.type_rename[asn_name]['lineno']), + UserWarning, fn, lineno) + return + self.type_rename[asn_name] = {'name' : eth_name, 'used' : False, + 'fn' : fn, 'lineno' : lineno} + def exist_type_rename(self, asn_name): + return self.type_rename.has_key(asn_name) + def use_type_rename(self, asn_name, eth_name): + if self.type_rename.has_key(asn_name): + self.type_rename[asn_name]['used'] = True + return self.type_rename[asn_name]['name'] + return eth_name + + def add_field_rename(self, asn_name, eth_name, fn, lineno): + if self.field_rename.has_key(asn_name): + warnings.warn_explicit("Duplicated field rename for %s. Previous one is at %s:%d" % + (asn_name, self.field_rename[asn_name]['fn'], self.field_rename[asn_name]['lineno']), + UserWarning, fn, lineno) + return + self.field_rename[asn_name] = {'name' : eth_name, 'used' : False, + 'fn' : fn, 'lineno' : lineno} + def use_field_rename(self, asn_name, eth_name): + if self.field_rename.has_key(asn_name): + self.field_rename[asn_name]['used'] = True + return self.field_rename[asn_name]['name'] + return eth_name + + def add_fn_line(self, name, ctx, line, fn, lineno): + if not self.fn.has_key(name): + self.fn[name] = {'FN_HDR' : None, 'FN_FTR' : None, 'FN_BODY' : None} + if (self.fn[name][ctx]): + self.fn[name][ctx]['text'] += line + else: + self.fn[name][ctx] = {'text' : line, 'used' : False, + 'fn' : fn, 'lineno' : lineno} + def get_fn_presence(self, name): + #print "get_fn_presence('%s'):%s" % (name, str(self.fn.has_key(name))) + #if self.fn.has_key(name): print self.fn[name] + return self.fn.has_key(name) + def get_fn_body_presence(self, name): + return self.fn.has_key(name) and self.fn[name]['FN_BODY'] + def get_fn_text(self, name, ctx): + if (not self.fn.has_key(name)): + return ''; + if (not self.fn[name][ctx]): + return ''; + return self.fn[name][ctx]['text'] + + def read(self, fn): + def get_par(line, pmin, pmax, fn, lineno): + par = line.split(None, pmax) + for i in range(len(par)): + if par[i][0] == '#': + par[i:] = [] + break + if len(par) < pmin: + warnings.warn_explicit("Too few parameters. At least %d parameters are required" % (pmin), UserWarning, fn, lineno) + return None + if len(par) > pmax: + warnings.warn_explicit("Too many parameters. Only %d parameters are allowed" % (pmax), UserWarning, fn, lineno) + return par[0:pmax] + return par + + f = open(fn, "r") + directive = re.compile(r'^\s*#\.(?P[A-Z_]+)\s+') + comment = re.compile(r'^\s*#[^.]') + empty = re.compile(r'^\s*$') + lineno = 0 + ctx = '' + name = '' + while 1: + line = f.readline() + if not line: break + lineno += 1 + if comment.search(line): continue + result = directive.search(line) + if result: # directive + if result.group('name') in ('EXPORTS', 'MODULE_IMPORT', 'TYPE_RENAME', 'FIELD_RENAME'): + ctx = result.group('name') + elif result.group('name') in ('FN_HDR', 'FN_FTR', 'FN_BODY'): + par = get_par(line[result.end():], 1, 1, fn=fn, lineno=lineno) + if not par: continue + ctx = result.group('name') + name = par[0] + elif result.group('name') == 'END': + ctx = '' + else: + warnings.warn_explicit("Unknown directive '%s'" % (result.group('name')), UserWarning, fn, lineno) + continue + if not ctx: + if not empty.search(line): + warnings.warn_explicit("Non-empty line in empty context", UserWarning, fn, lineno) + elif ctx == 'EXPORTS': + if empty.search(line): continue + par = get_par(line, 1, 2, fn=fn, lineno=lineno) + if not par: continue + flag = 0x01 + if (len(par)>=2): + if (par[1] == 'WITH_VALS'): + flag = 0x03 + elif (par[1] == 'WITHOUT_VALS'): + flag = 0x01 + elif (par[1] == 'ONLY_VALS'): + flag = 0x02 + else: + warnings.warn_explicit("Unknown parameter value '%s'" % (par[1]), UserWarning, fn, lineno) + self.add_export(par[0], flag=flag, fn=fn, lineno=lineno) + elif ctx == 'MODULE_IMPORT': + if empty.search(line): continue + par = get_par(line, 2, 2, fn=fn, lineno=lineno) + if not par: continue + self.add_module_import(par[0], par[1], fn=fn, lineno=lineno) + elif ctx == 'TYPE_RENAME': + if empty.search(line): continue + par = get_par(line, 2, 2, fn=fn, lineno=lineno) + if not par: continue + self.add_type_rename(par[0], par[1], fn=fn, lineno=lineno) + elif ctx == 'FIELD_RENAME': + if empty.search(line): continue + par = get_par(line, 2, 2, fn=fn, lineno=lineno) + if not par: continue + self.add_field_rename(par[0], par[1], fn=fn, lineno=lineno) + elif ctx in ('FN_HDR', 'FN_FTR', 'FN_BODY'): + self.add_fn_line(name, ctx, line, fn=fn, lineno=lineno) + f.close() + + def unused_report(self): + # export + keys = self.export.keys() + for k in keys: + if not self.export[k]['used']: + warnings.warn_explicit("Unused export for %s" % (k), + UserWarning, self.export[k]['fn'], self.export[k]['lineno']) + # type rename + keys = self.type_rename.keys() + for k in keys: + if not self.type_rename[k]['used']: + warnings.warn_explicit("Unused type rename for %s" % (k), + UserWarning, self.type_rename[k]['fn'], self.type_rename[k]['lineno']) + # field rename + keys = self.field_rename.keys() + for k in keys: + if not self.field_rename[k]['used']: + warnings.warn_explicit("Unused field rename for %s" % (k), + UserWarning, self.field_rename[k]['fn'], self.field_rename[k]['lineno']) + +#--- Node --------------------------------------------------------------------- +class Node: + def __init__(self,*args, **kw): + if len (args) == 0: + self.type = self.__class__.__name__ + else: + assert (len(args) == 1) + self.type = args[0] + self.__dict__.update (kw) + def str_child (self, key, child, depth): + indent = " " * (2 * depth) + keystr = indent + key + ": " + if key == 'type': # already processed in str_depth + return "" + if isinstance (child, Node): # ugh + return keystr + "\n" + child.str_depth (depth+1) + if type (child) == type ([]): + l = [] + for x in child: + if isinstance (x, Node): + l.append (x.str_depth (depth+1)) + else: + l.append (indent + " " + str(x) + "\n") + return keystr + "[\n" + ''.join (l) + indent + "]\n" + else: + return keystr + str (child) + "\n" + def str_depth (self, depth): # ugh + indent = " " * (2 * depth) + l = ["%s%s" % (indent, self.type)] + l.append ("".join (map (lambda (k,v): self.str_child (k, v, depth + 1), + self.__dict__.items ()))) + return "\n".join (l) + def __str__(self): + return "\n" + self.str_depth (0) + def to_python (self, ctx): + return self.str_depth (ctx.indent_lev) + + def eth_reg(self, ident, ectx): + pass + + +#--- Type --------------------------------------------------------------------- +class Type (Node): + def __init__(self,*args, **kw) : + self.name = None + self.constr = None + Node.__init__ (self,*args, **kw) + + def IsNamed(self): + if self.name is None : + return False + else: + return True + + def HasConstraint(self): + if self.constr is None : + return False + else : + return True + + def HasOwnTag(self): + return self.__dict__.has_key('tag') + + def HasImplicitTag(self): + return self.HasOwnTag() and (self.tag.mode == 'IMPLICIT') + + def IndetermTag(self, ectx): + return False + + def SetTag(self, tag): + self.tag = tag + + def GetTag(self, ectx): + if (self.HasOwnTag()): + return self.tag.GetTag(ectx) + else: + return self.GetTTag(ectx) + + def GetTTag(self, ectx): + print "#Unhandled GetTTag() in %s" % (self.type) + print self.str_depth(1) + return ('BER_CLASS_unknown', 'TAG_unknown') + + def SetName(self, name) : + self.name = name + + def AddConstraint(self, constr): + if not self.HasConstraint(): + self.constr = constr + else: + self.constr = Constraint(type = 'Intersection', subtype = [self.constr, constr]) + + def eth_tname(self): + return '#' + self.type + '_' + str(id(self)) + + def eth_ftype(self): + return ('FT_NONE', 'BASE_NONE') + + def eth_strings(self): + return 'NULL' + + def eth_need_tree(self): + return False + + def eth_named_bits(self): + return None + + def eth_reg_sub(self, ident, ectx): + pass + + def eth_reg(self, ident, ectx, idx='', parent=None): + nm = '' + if ident and self.IsNamed (): + nm = ident + '/' + self.name + elif self.IsNamed(): + nm = self.name + elif ident: + nm = ident + if not ident: # Assignment + ectx.eth_reg_assign(nm, self) + if self.type == 'Type_Ref': + ectx.eth_reg_type(nm, self) + if self.type == 'Type_Ref': + if ectx.conform.exist_type_rename(nm) or ectx.conform.get_fn_presence(nm): + ectx.eth_reg_type(nm, self) # new type + trnm = nm + else: + trnm = self.val + else: + ectx.eth_reg_type(nm, self) + if ident: + if self.type == 'Type_Ref': + ectx.eth_reg_field(nm, trnm, idx=idx, parent=parent, impl=self.HasImplicitTag()) + else: + ectx.eth_reg_field(nm, nm, idx=idx, parent=parent, impl=self.HasImplicitTag()) + self.eth_reg_sub(nm, ectx) + + def eth_get_size_constr(self, ): + minv = '-1' + maxv = '-1' + ext = 'FALSE' + if not self.HasConstraint(): + minv = '-1' + maxv = '-1' + ext = 'FALSE' + elif self.constr.type == 'Size' and (self.constr.subtype.type == 'SingleValue' or self.constr.subtype.type == 'ValueRange'): + if self.constr.subtype.type == 'SingleValue': + minv = self.constr.subtype.subtype + maxv = self.constr.subtype.subtype + else: + minv = self.constr.subtype.subtype[0] + maxv = self.constr.subtype.subtype[1] + if hasattr(self.constr.subtype, 'ext') and self.constr.subtype.ext: + ext = 'TRUE' + else: + ext = 'FALSE' + return (minv, maxv, ext) + + def eth_type_fn(self, proto, tname, ectx): + print "#Unhandled eth_type_fn('%s', '%s') in %s" % (proto, tname, self.type) + print self.str_depth(1) + return '' + +#--- Constraint --------------------------------------------------------------- +class Constraint (Node): + def to_python (self, ctx): + print "Ignoring constraint:", self.type + return self.subtype.typ.to_python (ctx) + def __str__ (self): + return "Constraint: type=%s, subtype=%s" % (self.type, self.subtype) + + def eth_constrname(self): + ext = '' + if hasattr(self, 'ext') and self.ext: + ext = '_' + if self.type == 'SingleValue': + return str(self.subtype) + ext + elif self.type == 'ValueRange': + return str(self.subtype[0]) + '_' + str(self.subtype[1]) + ext + elif self.type == 'Size': + return 'SIZE_' + self.subtype.eth_constrname() + ext + else: + return 'CONSTR' + str(id(self)) + ext + + +class Module (Node): + def to_python (self, ctx): + ctx.tag_def = self.tag_def.dfl_tag + return """#%s +%s""" % (self.ident, self.body.to_python (ctx)) + + def to_eth (self, ectx): + self.body.to_eth(ectx) + +class Module_Body (Node): + def to_python (self, ctx): + # XXX handle exports, imports. + l = map (lambda x: x.to_python (ctx), self.assign_list) + l = [a for a in l if a <> ''] + return "\n".join (l) + + def to_eth(self, ectx): + for i in self.imports: + mod = i.module.val + proto = ectx.conform.use_module_import(mod, mod.replace('-', '_')) + for s in i.symbol_list: + if isinstance(s, Type_Ref): + ectx.eth_import_type(s.val, mod, proto) + for a in self.assign_list: + a.eth_reg('', ectx) + +class Default_Tags (Node): + def to_python (self, ctx): # not to be used directly + assert (0) + +# XXX should just calculate dependencies as we go along. +def calc_dependencies (node, dict, trace = 0): + if not hasattr (node, '__dict__'): + if trace: print "#returning, node=", node + return + if isinstance (node, Type_Ref): + dict [node.val] = 1 + if trace: print "#Setting", node.val + return + for (a, val) in node.__dict__.items (): + if trace: print "# Testing node ", node, "attr", a, " val", val + if a[0] == '_': + continue + elif isinstance (val, Node): + calc_dependencies (val, dict, trace) + elif isinstance (val, type ([])): + for v in val: + calc_dependencies (v, dict, trace) + + +class Type_Assign (Node): + def __init__ (self, *args, **kw): + Node.__init__ (self, *args, **kw) + if isinstance (self.val, Tag): # XXX replace with generalized get_typ_ignoring_tag (no-op for Node, override in Tag) + to_test = self.val.typ + else: + to_test = self.val + if isinstance (to_test, SequenceType): + to_test.sequence_name = self.name.name + + def to_python (self, ctx): + dep_dict = {} + calc_dependencies (self.val, dep_dict, 0) + depend_list = dep_dict.keys () + return ctx.register_assignment (self.name.name, + self.val.to_python (ctx), + depend_list) + + def to_eth(self, ctx): + ctx.eth_reg_type(self.name.val, self.val) + ctx.eth_reg_assign(self.name.val, self.val) + +class PyQuote (Node): + def to_python (self, ctx): + return ctx.register_pyquote (self.val) + +#--- Type_Ref ----------------------------------------------------------------- +class Type_Ref (Type): + def to_python (self, ctx): + return self.val + + def eth_reg_sub(self, ident, ectx): + ectx.eth_dep_add(ident, self.val) + + def eth_tname(self): + return self.val + + def GetTTag(self, ectx): + if (ectx.type[self.val]['import']): + return ('imp', 'imp') + else: + return ectx.type[self.val]['val'].GetTag(ectx) + + def IndetermTag(self, ectx): + if (ectx.type[self.val]['import']): + return False + else: + return ectx.type[self.val]['val'].IndetermTag(ectx) + + def eth_type_fn(self, proto, tname, ectx): + out = ectx.eth_type_fn_hdr(tname) + t = ectx.type[self.val]['ethname'] + if (ectx.OBer()): + body = ectx.eth_fn_call('dissect_%s_%s' % (ectx.eth_type[t]['proto'], t), ret='offset', + par=(('implicit_tag', 'tvb', 'offset', 'pinfo', 'tree', 'hf_index'),)) + elif (ectx.NPer()): + body = ectx.eth_fn_call('dissect_%s_%s' % (ectx.eth_type[t]['proto'], t), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'private_data'))) + elif (ectx.OPer()): + body = ectx.eth_fn_call('dissect_%s_%s' % (ectx.eth_type[t]['proto'], t), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree', 'hf_index'),)) + else: + body = '#error Can not decode %s' % (tname) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +#--- SqType ----------------------------------------------------------- +class SqType (Type): + def out_item(self, f, val, optional, ext, ectx): + ef = ectx.field[f]['ethname'] + efd = ef + if (ectx.OBer() and ectx.field[f]['impl']): + efd += '_impl' + if (ectx.encoding == 'ber'): + #print "optional=%s, e.val.HasOwnTag()=%s, e.val.IndetermTag()=%s" % (str(e.optional), str(e.val.HasOwnTag()), str(e.val.IndetermTag(ectx))) + #print val.str_depth(1) + opt = '' + if (optional): + opt = 'BER_FLAGS_OPTIONAL' + if (not val.HasOwnTag()): + if (opt): opt += '|' + opt += 'BER_FLAGS_NOOWNTAG' + elif (val.HasImplicitTag()): + if (opt): opt += '|' + opt += 'BER_FLAGS_IMPLTAG' + if (val.IndetermTag(ectx)): + if (opt): opt += '|' + opt += 'BER_FLAGS_NOTCHKTAG' + if (not opt): opt = '0' + else: + if optional: + opt = 'ASN1_OPTIONAL' + else: + opt = 'ASN1_NOT_OPTIONAL' + if (ectx.OBer()): + (tc, tn) = val.GetTag(ectx) + out = ' { %-13s, %s, %s, dissect_%s },\n' \ + % (tc, tn, opt, efd) + elif (ectx.NPer()): + out = ' { &%-30s, %-23s, %-17s, dissect_%s_%s },\n' \ + % (ef, ext, opt, ectx.eth_type[ectx.eth_hf[ef]['ethtype']]['proto'], ectx.eth_hf[ef]['ethtype']) + elif (ectx.OPer()): + out = ' { %-30s, %-23s, %-17s, dissect_%s },\n' \ + % ('"'+val.name+'"', ext, opt, efd) + return out + +#--- SequenceOfType ----------------------------------------------------------- +class SequenceOfType (SqType): + def to_python (self, ctx): + # name, tag (None for no tag, EXPLICIT() for explicit), typ) + # or '' + (1,) for optional + sizestr = '' + if self.size_constr <> None: + print "#Ignoring size constraint:", self.size_constr.subtype + return "%sasn1.SEQUENCE_OF (%s%s)" % (ctx.spaces (), + self.val.to_python (ctx), + sizestr) + + def eth_reg_sub(self, ident, ectx): + itmnm = ident + if not self.val.IsNamed (): + itmnm += '/' + '_item' + self.val.eth_reg(itmnm, ectx, idx='[##]', parent=ident) + + def eth_tname(self): + return "SEQUNCE_OF_" + self.val.eth_tname() + + def eth_ftype(self): + return ('FT_UINT32', 'BASE_DEC') + + def eth_need_tree(self): + return True + + def eth_type_fn(self, proto, tname, ectx): + fname = ectx.eth_type[tname]['ref'][0] + if self.val.IsNamed (): + f = fname + '/' + self.val.name + else: + f = fname + '/' + '_item' + ef = ectx.field[f]['ethname'] + out = '' + if (ectx.Ber()): + out = "static ber_sequence %s_sequence_of[1] = {\n" % (tname) + out += self.out_item(f, self.val, False, '', ectx) + out += "};\n" + out += ectx.eth_type_fn_hdr(tname) + if (ectx.OBer()): + body = ectx.eth_fn_call('dissect_ber_sequence_of' + ectx.pvp(), ret='offset', + par=(('implicit_tag', 'pinfo', 'tree', 'tvb', 'offset'), + (tname+'_sequence_of', 'hf_index', ectx.eth_type[tname]['tree']))) + elif (ectx.NPer()): + body = ectx.eth_fn_call('dissect_per_sequence_of' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'private_data'), + (ectx.eth_type[tname]['tree'], ef, 'dissect_%s_%s' % (ectx.eth_type[ectx.eth_hf[ef]['ethtype']]['proto'], ectx.eth_hf[ef]['ethtype'])))) + elif (ectx.OPer()): + body = ectx.eth_fn_call('dissect_per_sequence_of' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree', 'hf_index'), + (ectx.eth_type[tname]['tree'], 'dissect_'+ef))) + else: + body = '#error Can not decode %s' % (tname) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + + +#--- SetOfType ---------------------------------------------------------------- +class SetOfType (SqType): + def eth_reg_sub(self, ident, ectx): + itmnm = ident + if not self.val.IsNamed (): + itmnm += '/' + '_item' + self.val.eth_reg(itmnm, ectx, idx='(##)', parent=ident) + + def eth_tname(self): + return "SET_OF_" + self.val.eth_tname() + + def eth_ftype(self): + return ('FT_UINT32', 'BASE_DEC') + + def eth_need_tree(self): + return True + + def eth_type_fn(self, proto, tname, ectx): + fname = ectx.eth_type[tname]['ref'][0] + f = fname + '/' + '_item' + ef = ectx.field[f]['ethname'] + out = ectx.eth_type_fn_hdr(tname) + body = " offset = dissect_per_set_of_new(tvb, offset, pinfo, tree,\n" \ + " hf_index, item, private_data,\n" + body += ' %s, %s, dissect_%s_%s);\n' \ + % (ectx.eth_type[tname]['tree'], ef, ectx.eth_type[ectx.eth_hf[ef]['ethtype']]['proto'], ectx.eth_hf[ef]['ethtype']) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +def mk_tag_str (ctx, cls, typ, num): + + # XXX should do conversion to int earlier! + val = int (num) + typ = typ.upper() + if typ == 'DEFAULT': + typ = ctx.tags_def + return 'asn1.%s(%d,cls=asn1.%s_FLAG)' % (typ, val, cls) # XXX still ned + +class Tag (Node): + def to_python (self, ctx): + return 'asn1.TYPE(%s,%s)' % (mk_tag_str (ctx, self.tag.cls, + self.tag_typ, + self.tag.num), + self.typ.to_python (ctx)) + def GetTag(self, ectx): + tc = '' + if (self.cls == 'UNIVERSAL'): tc = 'BER_CLASS_UNI' + elif (self.cls == 'APPLICATION'): tc = 'BER_CLASS_APP' + elif (self.cls == 'CONTEXT'): tc = 'BER_CLASS_CON' + elif (self.cls == 'PRIVATE'): tc = 'BER_CLASS_PRI' + return (tc, self.num) + +#--- SequenceType ------------------------------------------------------------- +class SequenceType (SqType): + def to_python (self, ctx): + # name, tag (None for no tag, EXPLICIT() for explicit), typ) + # or '' + (1,) for optional + # XXX should also collect names for SEQUENCE inside SEQUENCE or + # CHOICE or SEQUENCE_OF (where should the SEQUENCE_OF name come + # from? for others, element or arm name would be fine) + seq_name = getattr (self, 'sequence_name', None) + if seq_name == None: + seq_name = 'None' + else: + seq_name = "'" + seq_name + "'" + if self.__dict__.has_key('ext_list'): + return "%sasn1.SEQUENCE ([%s], ext=[%s], seq_name = %s)" % (ctx.spaces (), + self.elts_to_py (self.elt_list, ctx), + self.elts_to_py (self.ext_list, ctx), seq_name) + else: + return "%sasn1.SEQUENCE ([%s]), seq_name = %s" % (ctx.spaces (), + self.elts_to_py (self.elt_list, ctx), seq_name) + def elts_to_py (self, list, ctx): + # we have elt_type, val= named_type, maybe default=, optional= + # named_type node: either ident = or typ = + # need to dismember these in order to generate Python output syntax. + ctx.indent () + def elt_to_py (e): + assert (e.type == 'elt_type') + nt = e.val + optflag = e.optional +# assert (not hasattr (e, 'default')) # XXX add support for DEFAULT! + assert (nt.type == 'named_type') + tagstr = 'None' + identstr = nt.ident + if hasattr (nt.typ, 'type') and nt.typ.type == 'tag': # ugh + tagstr = mk_tag_str (ctx,nt.typ.tag.cls, + nt.typ.tag.tag_typ,nt.typ.tag.num) + + + nt = nt.typ + return "('%s',%s,%s,%d)" % (identstr, tagstr, + nt.typ.to_python (ctx), optflag) + indentstr = ",\n" + ctx.spaces () + rv = indentstr.join ([elt_to_py (e) for e in list]) + ctx.outdent () + return rv + + def eth_reg_sub(self, ident, ectx): + for e in (self.elt_list): + e.val.eth_reg(ident, ectx, parent=ident) + if hasattr(self, 'ext_list'): + for e in (self.ext_list): + e.val.eth_reg(ident, ectx, parent=ident) + + def eth_need_tree(self): + return True + + def GetTTag(self, ectx): + return ('BER_CLASS_UNI', 'BER_UNI_TAG_SEQUENCE') + + def eth_type_fn(self, proto, tname, ectx): + fname = ectx.eth_type[tname]['ref'][0] + if (ectx.encoding == 'ber'): + out = "static ber_sequence %s_sequence[] = {\n" % (tname) + else: + out = "static per_sequence%s_t %s_sequence%s[] = {\n" % (ectx.pvp(), tname, ectx.pvp()) + if hasattr(self, 'ext_list'): + ext = 'ASN1_EXTENSION_ROOT' + else: + ext = 'ASN1_NO_EXTENSIONS' + for e in (self.elt_list): + f = fname + '/' + e.val.name + out += self.out_item(f, e.val, e.optional, ext, ectx) + if hasattr(self, 'ext_list'): + for e in (self.ext_list): + f = fname + '/' + e.val.name + out += self.out_item(f, e.val, e.optional, 'ASN1_NOT_EXTENSION_ROOT', ectx) + if (ectx.encoding == 'ber'): + out += " { 0, 0, 0, NULL }\n};\n" + else: + out += " { NULL, 0, 0, NULL }\n};\n" + out += ectx.eth_type_fn_hdr(tname) + if (ectx.OBer()): + body = ectx.eth_fn_call('dissect_ber_sequence' + ectx.pvp(), ret='offset', + par=(('implicit_tag', 'pinfo', 'tree', 'tvb', 'offset'), + (tname+'_sequence', 'hf_index', ectx.eth_type[tname]['tree']))) + elif (ectx.NPer()): + body = ectx.eth_fn_call('dissect_per_sequence' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'private_data'), + (ectx.eth_type[tname]['tree'], tname+'_sequence'+ectx.pvp(), '"'+tname+'"'))) + elif (ectx.OPer()): + body = ectx.eth_fn_call('dissect_per_sequence' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree', 'hf_index'), + (ectx.eth_type[tname]['tree'], tname+'_sequence'+ectx.pvp()))) + else: + body = '#error Can not decode %s' % (tname) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +#--- SetType ------------------------------------------------------------------ +class SetType(SqType): + def eth_reg_sub(self, ident, ectx): + for e in (self.elt_list): + e.val.eth_reg(ident, ectx, parent=ident) + if hasattr(self, 'ext_list'): + for e in (self.ext_list): + e.val.eth_reg(ident, ectx, parent=ident) + + def eth_need_tree(self): + return True + + def eth_type_fn(self, proto, tname, ectx): + out = "static per_set_new_t %s_sequence_new[] = {\n" % (tname) + fname = ectx.eth_type[tname]['ref'][0] + if hasattr(self, 'ext_list'): + ext = 'ASN1_EXTENSION_ROOT' + else: + ext = 'ASN1_NO_EXTENSIONS' + for e in (self.elt_list): + f = fname + '/' + e.val.name + out += self.out_item(f, e.val, e.optional, ext, ectx) + if hasattr(self, 'ext_list'): + for e in (self.ext_list): + f = fname + '/' + e.val.name + out += self.out_item(f, e.val, e.optional, 'ASN1_NOT_EXTENSION_ROOT', ectx) + out += " { NULL, 0, 0, NULL }\n};\n" + out += ectx.eth_type_fn_hdr(tname) + body = " offset = dissect_per_set_new(tvb, offset, pinfo, tree,\n" \ + " hf_index, item, private_data,\n" + body += ' %s, %s_sequence_new, "%s");\n' \ + % (ectx.eth_type[tname]['tree'], tname, tname) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +#--- ChoiceType --------------------------------------------------------------- +class ChoiceType (Type): + def to_python (self, ctx): + # name, tag (None for no tag, EXPLICIT() for explicit), typ) + # or '' + (1,) for optional + if self.__dict__.has_key('ext_list'): + return "%sasn1.CHOICE ([%s], ext=[%s])" % (ctx.spaces (), + self.elts_to_py (self.elt_list, ctx), + self.elts_to_py (self.ext_list, ctx)) + else: + return "%sasn1.CHOICE ([%s])" % (ctx.spaces (), self.elts_to_py (self.elt_list, ctx)) + def elts_to_py (self, list, ctx): + ctx.indent () + def elt_to_py (nt): + assert (nt.type == 'named_type') + tagstr = 'None' + if hasattr (nt, 'ident'): + identstr = nt.ident + else: + if hasattr (nt.typ, 'val'): + identstr = nt.typ.val # XXX, making up name + elif hasattr (nt.typ, 'name'): + identstr = nt.typ.name + else: + identstr = ctx.make_new_name () + + if hasattr (nt.typ, 'type') and nt.typ.type == 'tag': # ugh + tagstr = mk_tag_str (ctx,nt.typ.tag.cls, + nt.typ.tag.tag_typ,nt.typ.tag.num) + + + nt = nt.typ + return "('%s',%s,%s)" % (identstr, tagstr, + nt.typ.to_python (ctx)) + indentstr = ",\n" + ctx.spaces () + rv = indentstr.join ([elt_to_py (e) for e in list]) + ctx.outdent () + return rv + + def eth_reg_sub(self, ident, ectx): + #print "eth_reg_sub(ident='%s')" % (ident) + for e in (self.elt_list): + e.eth_reg(ident, ectx, parent=ident) + if hasattr(self, 'ext_list'): + for e in (self.ext_list): + e.eth_reg(ident, ectx, parent=ident) + + def eth_ftype(self): + return ('FT_UINT32', 'BASE_DEC') + + def eth_strings(self): + return '$$' + + def eth_need_tree(self): + return True + + def GetTTag(self, ectx): + return (-1, -1) + + def IndetermTag(self, ectx): + #print "Choice IndetermTag()=%s" % (str(not self.HasOwnTag())) + return not self.HasOwnTag() + + def eth_type_fn(self, proto, tname, ectx): + def out_item(val, e, ext, ectx): + f = fname + '/' + e.name + ef = ectx.field[f]['ethname'] + efd = ef + if (ectx.field[f]['impl']): + efd += '_impl' + if (ectx.encoding == 'ber'): + opt = '' + if (not e.HasOwnTag()): + opt = 'BER_FLAGS_NOOWNTAG' + elif (e.tag.mode == 'IMPLICIT'): + if (opt): opt += '|' + opt += 'BER_FLAGS_IMPLTAG' + if (not opt): opt = '0' + if (ectx.OBer()): + (tc, tn) = e.GetTag(ectx) + out = ' { %3s, %-13s, %s, %s, dissect_%s },\n' \ + % (val, tc, tn, opt, efd) + elif (ectx.NPer()): + out = ' { %3s, &%-30s, %-23s, dissect_%s_%s },\n' \ + % (val, ef, ext, ectx.eth_type[ectx.eth_hf[ef]['ethtype']]['proto'], ectx.eth_hf[ef]['ethtype']) + elif (ectx.OPer()): + out = ' { %3s, %-30s, %-23s, dissect_%s },\n' \ + % (val, '"'+e.name+'"', ext, efd) + return out + # end out_item() + fname = ectx.eth_type[tname]['ref'][0] + out = '\n' + tagval = False + if (ectx.Ber()): + lst = self.elt_list + if hasattr(self, 'ext_list'): + lst.extend(self.ext_list) + if (len(lst) > 0): + t = lst[0].GetTag(ectx)[0] + tagval = True + if (t == 'BER_CLASS_UNI'): + tagval = False + for e in (lst): + if (e.GetTag(ectx)[0] != t): + tagval = False + vals = [] + cnt = 0 + for e in (self.elt_list): + if (tagval): val = e.GetTag(ectx)[1] + else: val = str(cnt) + vals.append((val, e.name)) + cnt += 1 + if hasattr(self, 'ext_list'): + for e in (self.ext_list): + if (tagval): val = e.GetTag(ectx)[1] + else: val = str(cnt) + vals.append((val, e.name)) + cnt += 1 + out += ectx.eth_vals(tname, vals) + if (ectx.encoding == 'ber'): + out += "static ber_choice %s_choice[] = {\n" % (tname) + else: + out += "static per_choice%s_t %s_choice%s[] = {\n" % (ectx.pvp(), tname, ectx.pvp()) + cnt = 0 + if hasattr(self, 'ext_list'): + ext = 'ASN1_EXTENSION_ROOT' + else: + ext = 'ASN1_NO_EXTENSIONS' + for e in (self.elt_list): + if (tagval): val = e.GetTag(ectx)[1] + else: val = str(cnt) + out += out_item(val, e, ext, ectx) + cnt += 1 + if hasattr(self, 'ext_list'): + for e in (self.ext_list): + if (tagval): val = e.GetTag(ectx)[1] + else: val = str(cnt) + out += out_item(val, e, 'ASN1_NOT_EXTENSION_ROOT', ectx) + cnt += 1 + if (ectx.encoding == 'ber'): + out += " { 0, 0, 0, 0, NULL }\n};\n" + else: + out += " { 0, NULL, 0, NULL }\n};\n" + out += ectx.eth_type_fn_hdr(tname) + if (ectx.Ber()): + body = ectx.eth_fn_call('dissect_ber_choice' + ectx.pvp(), ret='offset', + par=(('pinfo', 'tree', 'tvb', 'offset'), + (tname+'_choice', 'hf_index', ectx.eth_type[tname]['tree']))) + elif (ectx.NPer()): + body = ectx.eth_fn_call('dissect_per_choice' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'private_data'), + (ectx.eth_type[tname]['tree'], tname+'_choice'+ectx.pvp(), '"'+tname+'"'), + ('NULL',))) + elif (ectx.OPer()): + body = ectx.eth_fn_call('dissect_per_choice' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree', 'hf_index'), + (ectx.eth_type[tname]['tree'], tname+'_choice'+ectx.pvp(), '"'+tname+'"'), + ('NULL',))) + else: + body = '#error Can not decode %s' % (tname) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + + +#--- EnumeratedType ----------------------------------------------------------- +class EnumeratedType (Type): + def to_python (self, ctx): + def strify_one (named_num): + return "%s=%s" % (named_num.ident, named_num.val) + return "asn1.ENUM(%s)" % ",".join (map (strify_one, self.val)) + + def eth_ftype(self): + return ('FT_UINT32', 'BASE_DEC') + + def eth_strings(self): + return '$$' + + def GetTTag(self, ectx): + return ('BER_CLASS_UNI', 'BER_UNI_TAG_ENUMERATED') + + def eth_type_fn(self, proto, tname, ectx): + fname = ectx.eth_type[tname]['ref'][0] + out = '' + vals = [] + lastv = 0 + used = {} + maxv = 0 + for e in (self.val): + if e.type == 'NamedNumber': + used[int(e.val)] = True + for e in (self.val): + if e.type == 'NamedNumber': + val = int(e.val) + else: + while used.has_key(lastv): + lastv += 1 + val = lastv + used[val] = True + vals.append((val, e.ident)) + if val > maxv: + maxv = val + if self.ext is not None: + for e in (self.ext): + if e.type == 'NamedNumber': + used[int(e.val)] = True + for e in (self.ext): + if e.type == 'NamedNumber': + val = int(e.val) + else: + while used.has_key(lastv): + lastv += 1 + val = lastv + vals.append((val, e.ident)) + if val > maxv: + maxv = val + out += ectx.eth_vals(tname, vals) + if self.ext is None: + ext = 'FALSE' + else: + ext = 'TRUE' + out += ectx.eth_type_fn_hdr(tname) + if (ectx.Ber()): + body = ectx.eth_fn_call('dissect_ber_integer' + ectx.pvp(), ret='offset', + par=(('pinfo', 'tree', 'tvb', 'offset', 'hf_index', 'NULL'),)) + else: + body = " offset = dissect_per_constrained_integer_new(tvb, offset, pinfo, tree,\n" + body += " %s, %s, %s,\n" \ + % (0, maxv, ext) + body += " NULL);\n" + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +class Literal (Node): + def to_python (self, ctx): + return self.val + +#--- NullType ----------------------------------------------------------------- +class NullType (Type): + def to_python (self, ctx): + return 'asn1.NULL' + + def eth_tname(self): + return 'NULL' + + def GetTTag(self, ectx): + return ('BER_CLASS_UNI', 'BER_UNI_TAG_NULL') + + def eth_type_fn(self, proto, tname, ectx): + out = ectx.eth_type_fn_hdr(tname) + if (ectx.new): + body = ectx.eth_fn_call('dissect_per_null' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'NULL'))) + else: + body = ' { proto_item *ti_tmp;\n'; + body += ectx.eth_fn_call('proto_tree_add_item', 'ti_tmp', + par=(('tree', 'hf_index', 'tvb', 'offset>>8', '0', 'FALSE'),)) + body += ectx.eth_fn_call('proto_item_append_text', + par=(('ti_tmp', '": NULL"'),)) + body += ' }\n'; + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +#--- RealType ----------------------------------------------------------------- +class RealType (Type): + def to_python (self, ctx): + return 'asn1.REAL' + + def eth_tname(self): + return 'REAL' + + def eth_type_fn(self, proto, tname, ectx): + out = ectx.eth_type_fn_hdr(tname) + #out += " offset = dissect_per_real_new(tvb, offset, pinfo, tree,\n" \ + # " hf_index, item, NULL);\n" + body = 'NOT_DECODED_YET("%s");\n' % (tname) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +#--- BooleanType -------------------------------------------------------------- +class BooleanType (Type): + def to_python (self, ctx): + return 'asn1.BOOLEAN' + + def eth_tname(self): + return 'BOOLEAN' + + def GetTTag(self, ectx): + return ('BER_CLASS_UNI', 'BER_UNI_TAG_BOOLEAN') + + def eth_ftype(self): + return ('FT_BOOLEAN', '8') + + def eth_type_fn(self, proto, tname, ectx): + out = ectx.eth_type_fn_hdr(tname) + if (ectx.Ber()): + body = ectx.eth_fn_call('dissect_ber_boolean' + ectx.pvp(), ret='offset', + par=(('pinfo', 'tree', 'tvb', 'offset', 'hf_index'),)) + elif (ectx.NPer()): + body = ectx.eth_fn_call('dissect_per_boolean' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'NULL'))) + elif (ectx.OPer()): + body = ectx.eth_fn_call('dissect_per_boolean' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree', 'hf_index'), + ('NULL', 'NULL'))) + else: + body = '#error Can not decode %s' % (tname) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +#--- OctetStringType ---------------------------------------------------------- +class OctetStringType (Type): + def to_python (self, ctx): + return 'asn1.OCTSTRING' + + def eth_tname(self): + if not self.HasConstraint(): + return 'OCTET_STRING' + elif self.constr.type == 'Size' and (self.constr.subtype.type == 'SingleValue' or self.constr.subtype.type == 'ValueRange'): + return 'OCTET_STRING' + '_' + self.constr.eth_constrname() + else: + return '#' + self.type + '_' + str(id(self)) + + def eth_ftype(self): + return ('FT_BYTES', 'BASE_HEX') + + def GetTTag(self, ectx): + return ('BER_CLASS_UNI', 'BER_UNI_TAG_OCTETSTRING') + + def eth_type_fn(self, proto, tname, ectx): + out = ectx.eth_type_fn_hdr(tname) + (minv, maxv, ext) = self.eth_get_size_constr() + if (ectx.OBer()): + body = ectx.eth_fn_call('dissect_ber_octet_string' + ectx.pvp(), ret='offset', + par=(('implicit_tag', 'pinfo', 'tree', 'tvb', 'offset', 'hf_index'), + ('NULL',))) + elif (ectx.NPer()): + body = ectx.eth_fn_call('dissect_per_octet_string' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'private_data'), + (minv, maxv, ext), + ('NULL', 'NULL'))) + elif (ectx.OPer()): + body = ectx.eth_fn_call('dissect_per_octet_string' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree', 'hf_index'), + (minv, maxv), + ('NULL', 'NULL'))) + else: + body = '#error Can not decode %s' % (tname) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +#--- CharacterStringType ------------------------------------------------------ +class CharacterStringType (Type): + def eth_tname(self): + if not self.HasConstraint(): + return self.eth_tsname() + elif self.constr.type == 'Size' and (self.constr.subtype.type == 'SingleValue' or self.constr.subtype.type == 'ValueRange'): + return self.eth_tsname() + '_' + self.constr.eth_constrname() + else: + return '#' + self.type + '_' + str(id(self)) + + def eth_ftype(self): + return ('FT_STRING', 'BASE_NONE') + +class RestrictedCharacterStringType (CharacterStringType): + def to_python (self, ctx): + return 'asn1.' + self.eth_tsname() + + def GetTTag(self, ectx): + return ('BER_CLASS_UNI', 'BER_UNI_TAG_' + self.eth_tsname()) + + def eth_type_fn(self, proto, tname, ectx): + out = ectx.eth_type_fn_hdr(tname) + (minv, maxv, ext) = self.eth_get_size_constr() + if (ectx.Ber()): + body = ectx.eth_fn_call('dissect_ber_restricted_string' + ectx.pvp(), ret='offset', + par=(('implicit_tag', self.GetTTag(ectx)[1]), + ('pinfo', 'tree', 'tvb', 'offset', 'hf_index'), + ('NULL',))) + elif (ectx.NPer()): + body = ectx.eth_fn_call('dissect_per_' + self.eth_tsname() + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'private_data'), + (minv, maxv, ext), + ('NULL', 'NULL'))) + elif (ectx.OPer()): + body = ectx.eth_fn_call('dissect_per_' + self.eth_tsname() + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree', 'hf_index'), + (minv, maxv))) + else: + body = '#error Can not decode %s' % (tname) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +class BMPStringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'BMPString' + +class GeneralStringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'GeneralString' + +class GraphicStringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'GraphicString' + +class IA5StringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'IA5String' + +class NumericStringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'NumericString' + +class PrintableStringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'PrintableString' + +class TeletexStringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'TeletexString' + +class T61StringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'T61String' + def GetTTag(self, ectx): + return ('BER_CLASS_UNI', 'BER_UNI_TAG_Teletext') + +class UniversalStringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'UniversalString' + +class UTF8StringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'UTF8String' + +class VideotexStringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'VideotexString' + +class VisibleStringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'VisibleString' + +class ISO646StringType (RestrictedCharacterStringType): + def eth_tsname(self): + return 'ISO646String' + def GetTTag(self, ectx): + return ('BER_CLASS_UNI', 'BER_UNI_TAG_VisibleString') + +class UnrestrictedCharacterStringType (CharacterStringType): + def to_python (self, ctx): + return 'asn1.UnrestrictedCharacterString' + def eth_tsname(self): + return 'CHARACTER_STRING' + +#--- UsefulType --------------------------------------------------------------- +class GeneralizedTime (RestrictedCharacterStringType): + def eth_tsname(self): + return 'GeneralizedTime' + + def eth_type_fn(self, proto, tname, ectx): + if (ectx.Ber()): + out = ectx.eth_type_fn_hdr(tname) + body = ectx.eth_fn_call('dissect_ber_generalized_time' + ectx.pvp(), ret='offset', + par=(('pinfo', 'tree', 'tvb', 'offset', 'hf_index'),)) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + else: + return RestrictedCharacterStringType(self, proto, tname, ectx) + +class UTCTime (RestrictedCharacterStringType): + def eth_tsname(self): + return 'UTCTime' + +class ObjectDescriptor (RestrictedCharacterStringType): + def eth_tsname(self): + return 'ObjectDescriptor' + + +#--- ObjectIdentifierType ----------------------------------------------------- +class ObjectIdentifierType (Type): + def to_python (self, ctx): + return 'asn1.OBJECT_IDENTIFIER' + + def eth_tname(self): + return 'OBJECT_IDENTIFIER' + + def eth_ftype(self): + return ('FT_STRING', 'BASE_NONE') + + def GetTTag(self, ectx): + return ('BER_CLASS_UNI', 'BER_UNI_TAG_OID') + + def eth_type_fn(self, proto, tname, ectx): + out = ectx.eth_type_fn_hdr(tname) + if (ectx.OBer()): + body = ectx.eth_fn_call('dissect_ber_object_identifier' + ectx.pvp(), ret='offset', + par=(('implicit_tag', 'pinfo', 'tree', 'tvb', 'offset'), + ('hf_index', 'NULL'))) + elif (ectx.NPer()): + body = ectx.eth_fn_call('dissect_per_object_identifier' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'NULL'))) + elif (ectx.OPer()): + body = ectx.eth_fn_call('dissect_per_object_identifier' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree', 'hf_index'), + ('NULL',))) + else: + body = '#error Can not decode %s' % (tname) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +class NamedNumber (Node): + def to_python (self, ctx): + return "('%s',%s)" % (self.ident, self.val) + +class NamedNumListBase(Node): + def to_python (self, ctx): + return "asn1.%s_class ([%s])" % (self.asn1_typ,",".join ( + map (lambda x: x.to_python (ctx), self.named_list))) + +#--- IntegerType -------------------------------------------------------------- +class IntegerType (Type): + def to_python (self, ctx): + return "asn1.INTEGER_class ([%s])" % (",".join ( + map (lambda x: x.to_python (ctx), self.named_list))) + + def eth_tname(self): + if self.named_list: + return Type.eth_tname(self) + if not self.HasConstraint(): + return 'INTEGER' + elif self.constr.type == 'SingleValue' or self.constr.type == 'ValueRange': + return 'INTEGER' + '_' + self.constr.eth_constrname() + else: + return 'INTEGER' + '_' + self.constr.eth_tname() + + def GetTTag(self, ectx): + return ('BER_CLASS_UNI', 'BER_UNI_TAG_INTEGER') + + def eth_ftype(self): + if self.HasConstraint(): + if self.constr.type == 'SingleValue': + if self.constr.subtype >= 0: + return ('FT_UINT32', 'BASE_DEC') + elif self.constr.type == 'ValueRange': + if self.constr.subtype[0] >= 0: + return ('FT_UINT32', 'BASE_DEC') + return ('FT_INT32', 'BASE_DEC') + + def eth_strings(self): + if (self.named_list): + return '$$' + else: + return 'NULL' + + def eth_type_fn(self, proto, tname, ectx): + out = '' + vals = [] + if (self.named_list): + for e in (self.named_list): + vals.append((int(e.val), e.ident)) + out += ectx.eth_vals(tname, vals) + out += ectx.eth_type_fn_hdr(tname) + if (ectx.Ber()): + body = ectx.eth_fn_call('dissect_ber_integer' + ectx.pvp(), ret='offset', + par=(('pinfo', 'tree', 'tvb', 'offset', 'hf_index', 'NULL'),)) + elif (not self.HasConstraint()): + if (ectx.New()): + body = ectx.eth_fn_call('dissect_per_integer' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'private_data'), + ('NULL',))) + else: + body = ectx.eth_fn_call('dissect_per_integer' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree', 'hf_index'), + ('NULL', 'NULL'))) + elif ((self.constr.type == 'SingleValue') or (self.constr.type == 'ValueRange')): + if self.constr.type == 'SingleValue': + minv = self.constr.subtype + maxv = self.constr.subtype + else: + minv = self.constr.subtype[0] + maxv = self.constr.subtype[1] + if hasattr(self.constr, 'ext') and self.constr.ext: + ext = 'TRUE' + else: + ext = 'FALSE' + if (ectx.New()): + body = ectx.eth_fn_call('dissect_per_constrained_integer' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'private_data'), + (minv, maxv, ext), + ('NULL',))) + else: + body = ectx.eth_fn_call('dissect_per_constrained_integer' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree', 'hf_index'), + (minv, maxv, 'NULL', 'NULL', ext))) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + +#--- BitStringType ------------------------------------------------------------ +class BitStringType (Type): + def to_python (self, ctx): + return "asn1.BITSTRING_class ([%s])" % (",".join ( + map (lambda x: x.to_python (ctx), self.named_list))) + + def eth_tname(self): + if self.named_list: + return Type.eth_tname(self) + elif not self.HasConstraint(): + return 'BIT_STRING' + elif self.constr.type == 'Size' and (self.constr.subtype.type == 'SingleValue' or self.constr.subtype.type == 'ValueRange'): + return 'BIT_STRING' + '_' + self.constr.eth_constrname() + else: + return '#' + self.type + '_' + str(id(self)) + + def GetTTag(self, ectx): + return ('BER_CLASS_UNI', 'BER_UNI_TAG_BITSTRING') + + def eth_ftype(self): + return ('FT_BYTES', 'BASE_HEX') + + def eth_need_tree(self): + return self.named_list + + def eth_named_bits(self): + bits = [] + if (self.named_list): + for e in (self.named_list): + bits.append((int(e.val), e.ident)) + return bits + + def eth_type_fn(self, proto, tname, ectx): + out = '' + bits = [] + bitsp = 'NULL' + if (self.named_list): + for e in (self.named_list): + bits.append((int(e.val), e.ident)) + out += ectx.eth_bits(tname, bits) + bitsp = tname + '_bits' + out += ectx.eth_type_fn_hdr(tname) + (minv, maxv, ext) = self.eth_get_size_constr() + tree = 'NULL' + if (ectx.eth_type[tname]['tree']): + tree = ectx.eth_type[tname]['tree'] + if (ectx.OBer()): + body = ectx.eth_fn_call('dissect_ber_bitstring' + ectx.pvp(), ret='offset', + par=(('implicit_tag', 'pinfo', 'tree', 'tvb', 'offset'), + (bitsp, 'hf_index', tree), + ('NULL',))) + elif (ectx.NPer()): + body = ectx.eth_fn_call('dissect_per_bit_string' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree'), + ('hf_index', 'item', 'private_data'), + (minv, maxv, ext), + ('NULL', 'NULL'))) + elif (ectx.OPer()): + body = ectx.eth_fn_call('dissect_per_bit_string' + ectx.pvp(), ret='offset', + par=(('tvb', 'offset', 'pinfo', 'tree', 'hf_index'), + (minv, maxv))) + else: + body = '#error Can not decode %s' % (tname) + out += ectx.eth_type_fn_body(tname, body) + out += ectx.eth_type_fn_ftr(tname) + return out + + +#============================================================================== + +def p_module_list_1 (t): + 'module_list : module_list module_def' + t[0] = t[1] + [t[2]] + +def p_module_list_2 (t): + 'module_list : module_def' + t[0] = [t[1]] + + +#--- ITU-T Recommendation X.680 ----------------------------------------------- + +# 12 Module definition -------------------------------------------------------- + +# 12.1 +def p_module_def (t): + 'module_def : module_ident DEFINITIONS TagDefault ASSIGNMENT BEGIN module_body END' + t[0] = Module (ident = t[1], tag_def = t[3], body = t[6]) + +def p_TagDefault_1 (t): + '''TagDefault : EXPLICIT TAGS + | IMPLICIT TAGS + | AUTOMATIC TAGS''' + t[0] = Default_Tags (dfl_tag = t[1]) + +def p_TagDefault_2 (t): + 'TagDefault : ' + # 12.2 The "TagDefault" is taken as EXPLICIT TAGS if it is "empty". + t[0] = Default_Tags (dfl_tag = 'EXPLICIT') + +def p_module_ident (t): + 'module_ident : type_ref assigned_ident' # name, oid + # XXX coerce type_ref to module_ref + t [0] = Node('module_ident', val = t[1].val, ident = t[2]) + + +# XXX originally we had both type_ref and module_ref, but that caused +# a reduce/reduce conflict (because both were UCASE_IDENT). Presumably +# this didn't cause a problem in the original ESNACC grammar because it +# was LALR(1) and PLY is (as of 1.1) only SLR. + +#def p_module_ref (t): +# 'module_ref : UCASE_IDENT' +# t[0] = t[1] + +def p_assigned_ident_1 (t): + 'assigned_ident : oid_val' + t[0] = t[1] + +def p_assigned_ident_2 (t): + 'assigned_ident : LCASE_IDENT' + t[0] = t[1] + +def p_assigned_ident_3 (t): + 'assigned_ident : ' + pass + +def p_module_body_1 (t): + 'module_body : exports Imports assign_list' + t[0] = Module_Body (exports = t[1], imports = t[2], assign_list = t[3]) + +def p_module_body_2 (t): + 'module_body : ' + t[0] = Node ('module_body', exports = [], imports = [], + assign_list = []) + +def p_exports_1 (t): + 'exports : EXPORTS syms_exported SEMICOLON' + t[0] = t[2] + +def p_exports_2 (t): + 'exports : ' + t[0] = [] + +def p_syms_exported_1 (t): + 'syms_exported : exp_sym_list' + t[0] = t[1] + +def p_syms_exported_2 (t): + 'syms_exported : ' + t[0] = [] + +def p_exp_sym_list_1 (t): + 'exp_sym_list : Symbol' + t[0] = [t[1]] + +def p_exp_sym_list_2 (t): + 'exp_sym_list : exp_sym_list COMMA Symbol' + t[0] = t[1] + [t[3]] + + +def p_Imports_1(t): + 'Imports : IMPORTS SymbolsImported SEMICOLON' + t[0] = t[2] + +def p_Imports_2 (t): + 'Imports : ' + t[0] = [] + +def p_SymbolsImported_1(t): + 'SymbolsImported : ' + t[0] = [] + +def p_SymbolsImported_2 (t): + 'SymbolsImported : SymbolsFromModuleList' + t[0] = t[1] + +def p_SymbolsFromModuleList_1 (t): + 'SymbolsFromModuleList : SymbolsFromModuleList SymbolsFromModule' + t[0] = t[1] + [t[2]] + +def p_SymbolsFromModuleList_2 (t): + 'SymbolsFromModuleList : SymbolsFromModule' + t[0] = [t[1]] + +def p_SymbolsFromModule (t): + 'SymbolsFromModule : SymbolList FROM module_ident' + t[0] = Node ('SymbolList', symbol_list = t[1], module = t[3]) + +def p_SymbolList_1 (t): + 'SymbolList : Symbol' + t[0] = [t[1]] + +def p_SymbolList_2 (t): + 'SymbolList : SymbolList COMMA Symbol' + t[0] = t[1] + [t[3]] + +def p_Symbol (t): + '''Symbol : type_ref + | ParameterizedReference + | identifier''' # XXX omit DefinedMacroName + t[0] = t[1] + +def p_assign_list_1 (t): + 'assign_list : assign_list assign' + t[0] = t[1] + [t[2]] + +def p_assign_list_2 (t): + 'assign_list : assign SEMICOLON' + t[0] = [t[1]] + +def p_assign_list_3 (t): + 'assign_list : assign' + t[0] = [t[1]] + +def p_assign (t): + '''assign : TypeAssignment + | value_assign + | pyquote + | ParameterizedTypeAssignment''' + t[0] = t[1] + +def p_pyquote (t): + '''pyquote : PYQUOTE''' + t[0] = PyQuote (val = t[1]) + + +# 13 Referencing type and value definitions ----------------------------------- + +# 13.1 +def p_DefinedType (t): + '''DefinedType : ext_type_ref + | type_ref + | ParameterizedType''' + t[0] = t[1] + + +# 15 Assigning types and values ----------------------------------------------- + +# 15.1 +def p_TypeAssignment (t): + 'TypeAssignment : UCASE_IDENT ASSIGNMENT Type' + t[0] = t[3] + t[0].SetName(t[1]) + + +# 16 Definition of types and values ------------------------------------------- + +# 16.1 +def p_Type (t): + '''Type : BuiltinType + | ReferencedType + | ConstrainedType''' + t[0] = t[1] + +# 16.2 +def p_BuiltinType (t): + '''BuiltinType : BitStringType + | BooleanType + | CharacterStringType + | ChoiceType + | EnumeratedType + | IntegerType + | NullType + | ObjectIdentifierType + | OctetStringType + | RealType + | SequenceType + | SequenceOfType + | SetType + | SetOfType + | selection_type + | any_type + | TaggedType''' + t[0] = t[1] + +# 16.3 +def p_ReferencedType (t): + '''ReferencedType : DefinedType + | UsefulType''' + t[0] = t[1] + +def p_ext_type_ref (t): + 'ext_type_ref : type_ref DOT type_ref' + # XXX coerce 1st type_ref to module_ref + t[0] = Node ('ext_type_ref', module = t[1], typ = t[3]) + +# 16.5 +def p_NamedType (t): + 'NamedType : identifier Type' + t[0] = t[2] + t[0].SetName (t[1]) + + +# 17 Notation for the boolean type -------------------------------------------- + +# 17.1 +def p_BooleanType (t): + 'BooleanType : BOOLEAN' + t[0] = BooleanType () + +# 17.2 +def p_BooleanValue (t): + '''BooleanValue : TRUE + | FALSE''' + t[0] = t[1] + + +# 18 Notation for the integer type -------------------------------------------- + +# 18.1 +def p_IntegerType_1 (t): + 'IntegerType : INTEGER' + t[0] = IntegerType (named_list = None) + +def p_IntegerType_2 (t): + 'IntegerType : INTEGER LBRACE NamedNumberList RBRACE' + t[0] = IntegerType (named_list = t[3]) + +def p_NamedNumberList_1 (t): + 'NamedNumberList : NamedNumber' + t[0] = [t[1]] + +def p_NamedNumberList_2 (t): + 'NamedNumberList : NamedNumberList COMMA NamedNumber' + t[0] = t[1] + [t[3]] + +def p_NamedNumber (t): + '''NamedNumber : identifier LPAREN SignedNumber RPAREN + | identifier LPAREN defined_value RPAREN''' + t[0] = NamedNumber (ident = t[1], val = t[3]) + +def p_SignedNumber_1 (t): + 'SignedNumber : NUMBER' + t[0] = t [1] + +def p_SignedNumber_2 (t): + 'SignedNumber : MINUS NUMBER' + t[0] = '-' + t[2] + + +# 19 Notation for the enumerated type ----------------------------------------- + +# 19.1 +def p_EnumeratedType (t): + 'EnumeratedType : ENUMERATED LBRACE Enumerations RBRACE' + t[0] = EnumeratedType (val = t[3]['val'], ext = t[3]['ext']) + +def p_Enumerations_1 (t): + 'Enumerations : Enumeration' + t[0] = { 'val' : t[1], 'ext' : None } + +def p_Enumerations_2 (t): + 'Enumerations : Enumeration COMMA ELLIPSIS ExceptionSpec' + t[0] = { 'val' : t[1], 'ext' : [] } + +def p_Enumerations_3 (t): + 'Enumerations : Enumeration COMMA ELLIPSIS ExceptionSpec COMMA Enumeration' + t[0] = { 'val' : t[1], 'ext' : t[6] } + +def p_Enumeration_1 (t): + 'Enumeration : EnumerationItem' + t[0] = [t[1]] + +def p_Enumeration_2 (t): + 'Enumeration : Enumeration COMMA EnumerationItem' + t[0] = t[1] + [t[3]] + +def p_EnumerationItem (t): + '''EnumerationItem : Identifier + | NamedNumber''' + t[0] = t[1] + +def p_Identifier (t): + 'Identifier : identifier' + t[0] = Node ('Identifier', ident = t[1]) + + +# 20 Notation for the real type ----------------------------------------------- + +# 20.1 +def p_RealType (t): + 'RealType : REAL' + t[0] = RealType () + +# 21 Notation for the bitstring type ------------------------------------------ + +# 21.1 +def p_BitStringType_1 (t): + 'BitStringType : BIT STRING' + t[0] = BitStringType (named_list = None) + +def p_BitStringType_2 (t): + 'BitStringType : BIT STRING LBRACE NamedBitList RBRACE' + t[0] = BitStringType (named_list = t[4]) + +def p_NamedBitList_1 (t): + 'NamedBitList : NamedBit' + t[0] = [t[1]] + +def p_NamedBitList_2 (t): + 'NamedBitList : NamedBitList COMMA NamedBit' + t[0] = t[1] + [t[3]] + +def p_NamedBit (t): + '''NamedBit : identifier LPAREN NUMBER RPAREN + | identifier LPAREN defined_value RPAREN''' + t[0] = NamedNumber (ident = t[1], val = t[3]) + + +# 22 Notation for the octetstring type ---------------------------------------- + +# 22.1 +def p_OctetStringType (t): + 'OctetStringType : OCTET STRING' + t[0] = OctetStringType () + + +# 23 Notation for the null type ----------------------------------------------- + +# 23.1 +def p_NullType (t): + 'NullType : NULL' + t[0] = NullType () + +# 23.3 +def p_NullValue (t): + 'NullValue : NULL' + t[0] = t[1] + + +# 24 Notation for sequence types ---------------------------------------------- + +# 24.1 +def p_SequenceType_1 (t): + 'SequenceType : SEQUENCE LBRACE RBRACE' + t[0] = SequenceType (elt_list = []) + +def p_SequenceType_2 (t): + 'SequenceType : SEQUENCE LBRACE ComponentTypeLists RBRACE' + if t[3].has_key('ext_list'): + t[0] = SequenceType (elt_list = t[3]['elt_list'], ext_list = t[3]['ext_list']) + else: + t[0] = SequenceType (elt_list = t[3]['elt_list']) + +def p_ExtensionAndException_1 (t): + 'ExtensionAndException : ELLIPSIS' + t[0] = [] + +def p_OptionalExtensionMarker_1 (t): + 'OptionalExtensionMarker : COMMA ELLIPSIS' + t[0] = True + +def p_OptionalExtensionMarker_2 (t): + 'OptionalExtensionMarker : ' + t[0] = False + +def p_ComponentTypeLists_1 (t): + 'ComponentTypeLists : element_type_list' + t[0] = {'elt_list' : t[1]} + +def p_ComponentTypeLists_2 (t): + 'ComponentTypeLists : element_type_list COMMA ExtensionAndException extension_additions OptionalExtensionMarker' + t[0] = {'elt_list' : t[1], 'ext_list' : t[4]} + +def p_ComponentTypeLists_3 (t): + 'ComponentTypeLists : ExtensionAndException extension_additions OptionalExtensionMarker' + t[0] = {'elt_list' : [], 'ext_list' : t[2]} + +def p_extension_additions_1 (t): + 'extension_additions : extension_addition_list' + t[0] = t[1] + +def p_extension_additions_2 (t): + 'extension_additions : ' + t[0] = [] + +def p_extension_addition_list_1 (t): + 'extension_addition_list : COMMA extension_addition' + t[0] = [t[2]] + +def p_extension_addition_list_2 (t): + 'extension_addition_list : extension_addition_list COMMA extension_addition' + t[0] = t[1] + [t[3]] + +def p_extension_addition_1 (t): + 'extension_addition : element_type' + t[0] = t[1] + +def p_element_type_list_1 (t): + 'element_type_list : element_type' + t[0] = [t[1]] + +def p_element_type_list_2 (t): + 'element_type_list : element_type_list COMMA element_type' + t[0] = t[1] + [t[3]] + +def p_element_type_1 (t): + 'element_type : NamedType' + t[0] = Node ('elt_type', val = t[1], optional = 0) + +def p_element_type_2 (t): + 'element_type : NamedType OPTIONAL' + t[0] = Node ('elt_type', val = t[1], optional = 1) + +def p_element_type_3 (t): + 'element_type : NamedType DEFAULT named_value' + t[0] = Node ('elt_type', val = t[1], optional = 1, default = t[3]) +# /* +# * this rules uses NamedValue instead of Value +# * for the stupid choice value syntax (fieldname value) +# * it should be like a set/seq value (ie with +# * enclosing { } +# */ + +# XXX get to COMPONENTS later + +# 25 Notation for sequence-of types ------------------------------------------- + +# 25.1 +def p_SequenceOfType (t): + '''SequenceOfType : SEQUENCE OF Type + | SEQUENCE OF NamedType''' + t[0] = SequenceOfType (val = t[3], size_constr = None) + + +# 26 Notation for set types --------------------------------------------------- + +# 26.1 +def p_SetType_1 (t): + 'SetType : SET LBRACE RBRACE' + if t[3].has_key('ext_list'): + t[0] = SetType (elt_list = []) + +def p_SetType_2 (t): + 'SetType : SET LBRACE ComponentTypeLists RBRACE' + if t[3].has_key('ext_list'): + t[0] = SetType (elt_list = t[3]['elt_list'], ext_list = t[3]['ext_list']) + else: + t[0] = SetType (elt_list = t[3]['elt_list']) + + +# 27 Notation for set-of types ------------------------------------------------ + +# 27.1 +def p_SetOfType (t): + '''SetOfType : SET OF Type + | SET OF NamedType''' + t[0] = SetOfType (val = t[3]) + +# 28 Notation for choice types ------------------------------------------------ + +# 28.1 +def p_ChoiceType (t): + 'ChoiceType : CHOICE LBRACE alternative_type_lists RBRACE' + if t[3].has_key('ext_list'): + t[0] = ChoiceType (elt_list = t[3]['elt_list'], ext_list = t[3]['ext_list']) + else: + t[0] = ChoiceType (elt_list = t[3]['elt_list']) + +def p_alternative_type_lists_1 (t): + 'alternative_type_lists : alternative_type_list' + t[0] = {'elt_list' : t[1]} + +def p_alternative_type_lists_2 (t): + '''alternative_type_lists : alternative_type_list COMMA ExtensionAndException extension_addition_alternatives OptionalExtensionMarker''' + t[0] = {'elt_list' : t[1], 'ext_list' : t[4]} + +def p_extension_addition_alternatives_1 (t): + 'extension_addition_alternatives : extension_addition_alternatives_list' + t[0] = t[1] + +def p_extension_addition_alternatives_2 (t): + 'extension_addition_alternatives : ' + t[0] = [] + +def p_extension_addition_alternatives_list_1 (t): + 'extension_addition_alternatives_list : COMMA extension_addition_alternative' + t[0] = [t[2]] + +def p_extension_addition_alternatives_list_2 (t): + 'extension_addition_alternatives_list : extension_addition_alternatives_list COMMA extension_addition_alternative' + t[0] = t[1] + [t[3]] + +def p_extension_addition_alternative_1 (t): + 'extension_addition_alternative : NamedType' + t[0] = t[1] + +def p_alternative_type_list_1 (t): + 'alternative_type_list : NamedType' + t[0] = [t[1]] + +def p_alternative_type_list_2 (t): + 'alternative_type_list : alternative_type_list COMMA NamedType' + t[0] = t[1] + [t[3]] + +def p_selection_type (t): # XXX what is this? + 'selection_type : identifier LT Type' + return Node ('seltype', ident = t[1], typ = t[3]) + +# 30 Notation for tagged types ------------------------------------------------ + +# 30.1 +def p_TaggedType_1 (t): + 'TaggedType : Tag Type' + t[1].mode = 'default' + t[0] = t[2] + t[0].SetTag(t[1]) + +def p_TaggedType_2 (t): + '''TaggedType : Tag IMPLICIT Type + | Tag EXPLICIT Type''' + t[1].mode = t[2] + t[0] = t[3] + t[0].SetTag(t[1]) + +def p_Tag (t): + 'Tag : LBRACK Class ClassNumber RBRACK' + t[0] = Tag(cls = t[2], num = t[3]) + +def p_ClassNumber_1 (t): + 'ClassNumber : number' + t[0] = t[1] + +def p_ClassNumber_2 (t): + 'ClassNumber : defined_value' + t[0] = t[1] + +def p_Class_1 (t): + '''Class : UNIVERSAL + | APPLICATION + | PRIVATE''' + t[0] = t[1] + +def p_Class_2 (t): + 'Class :' + t[0] = 'CONTEXT' + + +def p_any_type_1 (t): + 'any_type : ANY' + t[0] = Literal (val='asn1.ANY') + +def p_any_type_2 (t): + 'any_type : ANY DEFINED BY identifier' + t[0] = Literal (val='asn1.ANY_constr(def_by="%s")' % t[4]) # XXX + + +# 31 Notation for the object identifier type ---------------------------------- + +# 31.1 +def p_ObjectIdentifierType (t): + 'ObjectIdentifierType : OBJECT IDENTIFIER' + t[0] = ObjectIdentifierType () # XXX + + +# 36 Notation for character string types -------------------------------------- + +# 36.1 +def p_CharacterStringType (t): + '''CharacterStringType : RestrictedCharacterStringType + | UnrestrictedCharacterStringType''' + t[0] = t[1] + + +# 37 Definition of restricted character string types -------------------------- + +def p_RestrictedCharacterStringType_1 (t): + 'RestrictedCharacterStringType : BMPString' + t[0] = BMPStringType () +def p_RestrictedCharacterStringType_2 (t): + 'RestrictedCharacterStringType : GeneralString' + t[0] = GeneralStringType () +def p_RestrictedCharacterStringType_3 (t): + 'RestrictedCharacterStringType : GraphicString' + t[0] = GraphicStringType () +def p_RestrictedCharacterStringType_4 (t): + 'RestrictedCharacterStringType : IA5String' + t[0] = IA5StringType () +def p_RestrictedCharacterStringType_5 (t): + 'RestrictedCharacterStringType : ISO646String' + t[0] = ISO646StringType () +def p_RestrictedCharacterStringType_6 (t): + 'RestrictedCharacterStringType : NumericString' + t[0] = NumericStringType () +def p_RestrictedCharacterStringType_7 (t): + 'RestrictedCharacterStringType : PrintableString' + t[0] = PrintableStringType () +def p_RestrictedCharacterStringType_8 (t): + 'RestrictedCharacterStringType : TeletexString' + t[0] = TeletexStringType () +def p_RestrictedCharacterStringType_9 (t): + 'RestrictedCharacterStringType : T61String' + t[0] = T61StringType () +def p_RestrictedCharacterStringType_10 (t): + 'RestrictedCharacterStringType : UniversalString' + t[0] = UniversalStringType () +def p_RestrictedCharacterStringType_11 (t): + 'RestrictedCharacterStringType : UTF8String' + t[0] = UTF8StringType () +def p_RestrictedCharacterStringType_12 (t): + 'RestrictedCharacterStringType : VideotexString' + t[0] = VideotexStringType () +def p_RestrictedCharacterStringType_13 (t): + 'RestrictedCharacterStringType : VisibleString' + t[0] = VisibleStringType () + + +# 40 Definition of unrestricted character string types ------------------------ + +# 40.1 +def p_UnrestrictedCharacterStringType (t): + 'UnrestrictedCharacterStringType : CHARACTER STRING' + t[0] = UnrestrictedCharacterStringType () + + +# 41 Notation for types defined in clauses 42 to 44 --------------------------- + +# 42 Generalized time --------------------------------------------------------- + +def p_UsefulType_1 (t): + 'UsefulType : GeneralizedTime' + t[0] = GeneralizedTime() + +# 43 Universal time ----------------------------------------------------------- + +def p_UsefulType_2 (t): + 'UsefulType : UTCTime' + t[0] = UTCTime() + +# 44 The object descriptor type ----------------------------------------------- + +def p_UsefulType_3 (t): + 'UsefulType : ObjectDescriptor' + t[0] = ObjectDescriptor() + + +# 45 Constrained types -------------------------------------------------------- + +# 45.1 +def p_ConstrainedType_1 (t): + 'ConstrainedType : Type Constraint' + t[0] = t[1] + t[0].AddConstraint(t[2]) + +def p_ConstrainedType_2 (t): + 'ConstrainedType : TypeWithConstraint' + t[0] = t[1] + +# 45.5 +def p_TypeWithConstraint_1 (t): + '''TypeWithConstraint : SET Constraint OF Type + | SET SizeConstraint OF Type''' + t[0] = SetOfType (val = t[4], constr = t[2]) + +def p_TypeWithConstraint_2 (t): + '''TypeWithConstraint : SEQUENCE Constraint OF Type + | SEQUENCE SizeConstraint OF Type''' + t[0] = SequenceOfType (val = t[4], constr = t[2]) + +def p_TypeWithConstraint_3 (t): + '''TypeWithConstraint : SET Constraint OF NamedType + | SET SizeConstraint OF NamedType''' + t[0] = SetOfType (val = t[4], constr = t[2]) + +def p_TypeWithConstraint_4 (t): + '''TypeWithConstraint : SEQUENCE Constraint OF NamedType + | SEQUENCE SizeConstraint OF NamedType''' + t[0] = SequenceOfType (val = t[4], constr = t[2]) + +# 45.6 +# 45.7 +def p_Constraint (t): + 'Constraint : LPAREN ConstraintSpec ExceptionSpec RPAREN' + t[0] = t[2] + +def p_ConstraintSpec (t): + '''ConstraintSpec : ElementSetSpecs + | GeneralConstraint''' + t[0] = t[1] + +# 46 Element set specification ------------------------------------------------ + +# 46.1 +def p_ElementSetSpecs_1 (t): + 'ElementSetSpecs : RootElementSetSpec' + t[0] = t[1] + +def p_ElementSetSpecs_2 (t): + 'ElementSetSpecs : RootElementSetSpec COMMA ELLIPSIS' + t[0] = t[1] + t[0].ext = True + +# skip compound constraints, only simple ones are supported + +def p_RootElementSetSpec_1 (t): + 'RootElementSetSpec : SubtypeElements' + t[0] = t[1] + + +# 47 Subtype elements --------------------------------------------------------- + +# 47.1 General +def p_SubtypeElements (t): + '''SubtypeElements : SingleValue + | ContainedSubtype + | ValueRange + | PermittedAlphabet + | SizeConstraint + | InnerTypeConstraints + | PatternConstraint''' + t[0] = t[1] + +# 47.2 Single value +# 47.2.1 +def p_SingleValue (t): + 'SingleValue : value' + t[0] = Constraint(type = 'SingleValue', subtype = t[1]) + +# 47.3 Contained subtype +# 47.3.1 +def p_ContainedSubtype (t): + 'ContainedSubtype : Includes Type' + t[0] = Constraint(type = 'ContainedSubtype', subtype = t[2]) + +def p_Includes (t): + '''Includes : INCLUDES + | ''' + +# 47.4 Value range +# 47.4.1 +def p_ValueRange (t): + 'ValueRange : lower_end_point RANGE upper_end_point' + t[0] = Constraint(type = 'ValueRange', subtype = [t[1], t[3]]) + +# 47.4.3 +def p_lower_end_point_1 (t): + 'lower_end_point : lower_end_value ' + t[0] = t[1] + +def p_lower_end_point_2 (t): + 'lower_end_point : lower_end_value LT' # XXX LT first? + t[0] = t[1] # but not inclusive range + +def p_upper_end_point_1 (t): + 'upper_end_point : upper_end_value' + t[0] = t[1] + +def p_upper_end_point_2 (t): + 'upper_end_point : LT upper_end_value' + t[0] = t[1] # but not inclusive range + +def p_lower_end_value (t): + '''lower_end_value : value + | MIN''' + t[0] = t[1] # XXX + +def p_upper_end_value (t): + '''upper_end_value : value + | MAX''' + t[0] = t[1] + +# 47.5 Size constraint +# 47.5.1 +def p_SizeConstraint (t): + 'SizeConstraint : SIZE Constraint' + t[0] = Constraint (type = 'Size', subtype = t[2]) + +# 47.6 Type constraint +# 47.6.1 +#def p_TypeConstraint (t): +# 'TypeConstraint : Type' +# t[0] = Constraint (type = 'Type', subtype = t[2]) + +# 47.7 Permitted alphabet +# 47.7.1 +def p_PermittedAlphabet (t): + 'PermittedAlphabet : FROM Constraint' + t[0] = Constraint (type = 'From', subtype = t[2]) + +# 47.8 Inner subtyping +# 47.8.1 +def p_InnerTypeConstraints (t): + '''InnerTypeConstraints : WITH COMPONENT SingleTypeConstraint + | WITH COMPONENTS MultipleTypeConstraints''' + pass # ignore PER invisible constraint + +# 47.8.3 +def p_SingleTypeConstraint (t): + 'SingleTypeConstraint : Constraint' + t[0] = t[1] + +# 47.8.4 +def p_MultipleTypeConstraints (t): + '''MultipleTypeConstraints : FullSpecification + | PartialSpecification''' + t[0] = t[1] + +def p_FullSpecification (t): + 'FullSpecification : LBRACE TypeConstraints RBRACE' + t[0] = t[2] + +def p_PartialSpecification (t): + 'PartialSpecification : LBRACE ELLIPSIS COMMA TypeConstraints RBRACE' + t[0] = t[4] + +def p_TypeConstraints_1 (t): + 'TypeConstraints : named_constraint' + t [0] = [t[1]] + +def p_TypeConstraints_2 (t): + 'TypeConstraints : TypeConstraints COMMA named_constraint' + t[0] = t[1] + [t[3]] + +def p_named_constraint_1 (t): + 'named_constraint : identifier constraint' + return Node ('named_constraint', ident = t[1], constr = t[2]) + +def p_named_constraint_2 (t): + 'named_constraint : constraint' + return Node ('named_constraint', constr = t[1]) + +def p_constraint (t): + 'constraint : value_constraint presence_constraint' + t[0] = Node ('constraint', value = t[1], presence = t[2]) + +def p_value_constraint_1 (t): + 'value_constraint : Constraint' + t[0] = t[1] + +def p_value_constraint_2 (t): + 'value_constraint : ' + pass + +def p_presence_constraint_1 (t): + '''presence_constraint : PRESENT + | ABSENT + | OPTIONAL''' + t[0] = t[1] + +def p_presence_constraint_2 (t): + '''presence_constraint : ''' + pass + +# 47.9 Pattern constraint +# 47.9.1 +def p_PatternConstraint (t): + 'PatternConstraint : PATTERN value' + t[0] = Constraint (type = 'Pattern', subtype = t[2]) + +# 49 The exception identifier + +# 49.4 +def p_ExceptionSpec (t): + 'ExceptionSpec : ' + pass + +# /*-----------------------------------------------------------------------*/ +# /* Value Notation Productions */ +# /*-----------------------------------------------------------------------*/ + + +def p_value_assign (t): + 'value_assign : identifier Type ASSIGNMENT value' + t[0] = Node('value_assign', ident = t[1], typ = t[2], val = t[4]) + +def p_value (t): + '''value : builtin_value + | defined_value''' + t[0] = t[1] + +def p_defined_value(t): + '''defined_value : ext_val_ref + | identifier''' + t[0] = t[1] + +def p_ext_val_ref (t): + 'ext_val_ref : type_ref DOT identifier' + # XXX coerce type_ref to module_ref + return Node ('ext_val_ref', module = t[1], ident = t[3]) + +def p_builtin_value_1 (t): + '''builtin_value : BooleanValue + | NullValue + | special_real_val + | SignedNumber + | hex_string + | binary_string + | char_string''' # XXX we don't support {data} here + t[0] = t[1] + +def p_special_real_val (t): + '''special_real_val : PLUS_INFINITY + | MINUS_INFINITY''' + t[0] = t[1] + +def p_named_value_1 (t): + 'named_value : value' + t[0] = t[1] + +def p_named_value_2 (t): + 'named_value : identifier value' + t[0] = Node ('named_value', ident = t[1], value = t[2]) + +def p_oid_val (t): + 'oid_val : LBRACE oid_comp_list RBRACE' + t[0] = t[2] + +def p_oid_comp_list_1 (t): + 'oid_comp_list : oid_comp_list oid_component' + t[0] = t[1] + [t[2]] + +def p_oid_comp_list_2 (t): + 'oid_comp_list : oid_component' + t[0] = [t[1]] + +def p_oid_component (t): + '''oid_component : number_form + | name_form + | name_and_number_form''' + t[0] = t[1] + +def p_number_form (t): + 'number_form : NUMBER' + t [0] = t[1] + +# Note that Z39.50 v3 spec has upper-case here for, e.g., SUTRS. +# I've hacked the grammar to be liberal about what it accepts. +# XXX should have -strict command-line flag to only accept lowercase +# here, since that's what X.208 says. +def p_name_form (t): + '''name_form : type_ref + | identifier''' + t[0] = t[1] + +def p_name_and_number_form_1 (t): + '''name_and_number_form : identifier LPAREN number_form RPAREN + | type_ref LPAREN number_form RPAREN''' + t[0] = Node ('name_and_number', ident = t[1], number = t[3]) + +def p_name_and_number_form_2 (t): + 'name_and_number_form : identifier LPAREN defined_value RPAREN' + t[0] = Node ('name_and_number', ident = t[1], val = t[3]) + +# see X.208 if you are dubious about lcase only for identifier +def p_identifier (t): + 'identifier : LCASE_IDENT' + t[0] = t[1] + + +def p_binary_string (t): + 'binary_string : BSTRING' + t[0] = t[1] + +def p_hex_string (t): + 'hex_string : HSTRING' + t[0] = t[1] + +def p_char_string (t): + 'char_string : QSTRING' + t[0] = t[1] + +def p_number (t): + 'number : NUMBER' + t[0] = t[1] + + +def p_type_ref (t): + 'type_ref : UCASE_IDENT' + t[0] = Type_Ref(val=t[1]) + + +#--- ITU-T Recommendation X.682 ----------------------------------------------- + +# 8 General constraint specification ------------------------------------------ + +# 8.1 +def p_GeneralConstraint (t): + '''GeneralConstraint : UserDefinedConstraint''' +# | TableConstraint +# | ContentsConstraint'' + t[0] = t[1] + +# 9 User-defined constraints -------------------------------------------------- + +# 9.1 +def p_UserDefinedConstraint (t): + 'UserDefinedConstraint : CONSTRAINED BY LBRACE UserDefinedConstraintParameterList RBRACE' + t[0] = Constraint(type = 'UserDefined', subtype = t[4]) + +def p_UserDefinedConstraintParameterList_1 (t): + 'UserDefinedConstraintParameterList : ' + t[0] = [] + +def p_UserDefinedConstraintParameterList_2 (t): + 'UserDefinedConstraintParameterList : UserDefinedConstraintParameter' + t[0] = [t[1]] + +def p_UserDefinedConstraintParameterList_3 (t): + 'UserDefinedConstraintParameterList : UserDefinedConstraintParameterList COMMA UserDefinedConstraintParameter' + t[0] = t[1] + [t[3]] + +# 9.3 +def p_UserDefinedConstraintParameter (t): + 'UserDefinedConstraintParameter : type_ref' + t[0] = t[1] + + +#--- ITU-T Recommendation X.683 ----------------------------------------------- + +# 8 Parameterized assignments ------------------------------------------------- + +# 8.1 + +# 8.2 +def p_ParameterizedTypeAssignment (t): + 'ParameterizedTypeAssignment : UCASE_IDENT ParameterList ASSIGNMENT Type' + t[0] = t[4] + t[0].SetName(t[1] + 'xxx') + +# 8.3 +def p_ParameterList (t): + 'ParameterList : LBRACE Parameters RBRACE' + t[0] = t[2] + +def p_Parameters_1 (t): + 'Parameters : Parameter' + t[0] = [t[1]] + +def p_Parameters_2 (t): + 'Parameters : Parameters COMMA Parameter' + t[0] = t[1] + [t[3]] + +def p_Parameter (t): + 'Parameter : type_ref' + t[0] = t[1] + + +# 9 Referencing parameterized definitions ------------------------------------- + +# 9.1 +def p_ParameterizedReference (t): + 'ParameterizedReference : type_ref LBRACE RBRACE' + t[0] = t[1] + t[0].val += 'xxx' + +# 9.2 +def p_ParameterizedType (t): + 'ParameterizedType : type_ref ActualParameterList' + t[0] = t[1] + t[0].val += 'xxx' + +# 9.5 +def p_ActualParameterList (t): + 'ActualParameterList : LBRACE ActualParameters RBRACE' + t[0] = t[2] + +def p_ActualParameters_1 (t): + 'ActualParameters : ActualParameter' + t[0] = [t[1]] + +def p_ActualParameters_2 (t): + 'ActualParameters : ActualParameters COMMA ActualParameter' + t[0] = t[1] + [t[3]] + +def p_ActualParameter (t): + 'ActualParameter : Type' + t[0] = t[1] + + +def p_error(t): + raise ParseError(str(t)) + +yacc.yacc () + +def testlex (s): + lexer.input (s) + while 1: + token = lexer.token () + if not token: + break + print token + + +def do_module (ast, defined_dict): + assert (ast.type == 'Module') + ctx = Ctx (defined_dict) + print ast.to_python (ctx) + print ctx.output_assignments () + print ctx.output_pyquotes () + +def eth_do_module (ast, ectx): + assert (ast.type == 'Module') + if ectx.dbg('s'): print ast.str_depth(0) + ast.to_eth(ectx) + ectx.eth_prepare() + if ectx.dbg('t'): + print "\n# Assignments" + print "\n".join(ectx.assign_ord) + print "\n# Imported Types" + print "%-49s %-24s %-24s" % ("ASN.1 name", "Module", "Protocol") + print "-" * 100 + for t in ectx.type_imp: + print "%-49s %-24s %-24s" % (t, ectx.type[t]['import'], ectx.type[t]['proto']) + print "\n# Exported Types" + print "Ethereal type Export Flag" + print "-" * 100 + for t in ectx.eth_export_ord: + print "%-31s %d" % (t, ectx.eth_type[t]['export']) + print "\n# ASN.1 Types" + print "%-49s %-24s %-24s" % ("ASN.1 unique name", "'tname'", "Ethereal type") + print "-" * 100 + for t in ectx.type_ord: + print "%-49s %-24s %-24s" % (t, ectx.type[t]['tname'], ectx.type[t]['ethname']) + print "\n# Ethereal Types" + print "Ethereal type References (ASN.1 types)" + print "-" * 100 + for t in ectx.eth_type_ord: + print "%-31s %d" % (t, len(ectx.eth_type[t]['ref'])), + print ', '.join(ectx.eth_type[t]['ref']) + print "\n# ASN.1 Fields" + print "ASN.1 unique name Ethereal name ASN.1 type" + print "-" * 100 + for f in ectx.field_ord: + print "%-40s %-20s %s" % (f, ectx.field[f]['ethname'], ectx.field[f]['type']) + print "\n# Ethereal Fields" + print "Ethereal name Ethereal type References (ASN.1 fields)" + print "-" * 100 + for f in ectx.eth_hf_ord: + print "%-30s %-20s %s" % (f, ectx.eth_hf[f]['ethtype'], len(ectx.eth_hf[f]['ref'])), + print ', '.join(ectx.eth_hf[f]['ref']) + #print '\n'.join(ectx.eth_type_ord1) + print "\n# Cyclic dependencies" + for c in ectx.eth_dep_cycle: + print ' -> '.join(c) + ectx.dupl_report() + ectx.conform.unused_report() + ectx.eth_output_hf() + ectx.eth_output_ett() + ectx.eth_output_types() + ectx.eth_output_hf_arr() + ectx.eth_output_ett_arr() + ectx.eth_output_export() + +import time +def testyacc (s, fn, defined_dict): + ast = yacc.parse (s, debug=0) + time_str = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) + print """#!/usr/bin/env python +# Auto-generated from %s at %s +from PyZ3950 import asn1""" % (fn, time_str) + for module in ast: + eth_do_module (module, defined_dict) + +import sys +import os.path +import getopt + +# Ethereal compiler +def eth_usage(): + print """ +competh [-h|?] [-d dbg] [-p proto] [-c conform_file] input_file + -h|? : usage + -d dbg : debug output, dbg = [l][y][s][t] + l - lex + y - yacc + s - internal ASN structure + t - tables + -b : BER (default is PER) + -X : original dissector API (see Note) + -p proto : protocol name (default is basenam of without extension) + -o name : output files name (default is ) + -c conform_file : conformation file + -s template : single file output (templete is input file without .c/.h extension) + input_file : input ASN.1 file + +Note: It can create output for an original or a new PER/BER dissectors API, + but the new PER/BER dissectors API is not implemented now. +""" + +def eth_fhdr(fn): + def outln(ln): + return '/* ' + ('%-74s' % (ln)) + ' */\n' + out = '' + out += outln('Do not modify this file.') + out += outln('It is created automatically by the ASN.1 to Ethereal dissector compiler') + out += outln(fn) + out += outln(' '.join(sys.argv)) + out += '\n' + return out + +def make_include(out_nm, in_nm, inc_nms, remove_inc=False): + fin = file(in_nm, "r") + fout = file(out_nm, "w") + fout.write(eth_fhdr(out_nm)) + fout.write('/* Input file: ' + in_nm +' */\n') + fout.write('/* Include files: ' + ', '.join(inc_nms) + ' */\n') + fout.write('\n') + + include = re.compile(r'^\s*#\s*include\s+[<"](?P[^>"]+)[>"]', re.IGNORECASE) + + while (True): + line = fin.readline() + if (line == ''): break + result = include.search(line) + if (result and + (result.group('fname') in inc_nms) and + os.path.exists(result.group('fname'))): + fout.write('\n') + fout.write('/*--- Included file: ' + result.group('fname') + ' ---*/\n') + fout.write('\n') + finc = file(result.group('fname'), "r") + fout.write(finc.read()) + fout.write('\n') + fout.write('/*--- End of included file: ' + result.group('fname') + ' ---*/\n') + fout.write('\n') + finc.close() + if (remove_inc): os.unlink(result.group('fname')) + else: + fout.write(line) + + fout.close() + fin.close() + +def eth_main(): + print "ASN.1 to Ethereal dissector compiler"; + try: + opts, args = getopt.getopt(sys.argv[1:], "h?bXd:p:o:c:s:"); + except getopt.GetoptError: + eth_usage(); sys.exit(2) + if len(args) != 1: + eth_usage(); sys.exit(2) + + fn = args[0]; + conform = EthCnf() + ectx = EthCtx(conform) + ectx.encoding = 'per' + ectx.proto = os.path.splitext(os.path.basename(fn))[0].lower() + ectx.outnm = ectx.proto + ectx.new = True + ectx.dbgopt = '' + single_file = None + for o, a in opts: + if o in ("-h", "-?"): + eth_usage(); sys.exit(2) + if o in ("-b",): + ectx.encoding = 'ber' + if o in ("-p",): + ectx.proto = a + ectx.outnm = ectx.proto + if o in ("-o",): + ectx.outnm = a + if o in ("-c",): + ectx.conform.read(a) + if o in ("-X",): + ectx.new = False + if o in ("-d",): + ectx.dbgopt = a + if o in ("-s",): + single_file = a + + f = open(fn, "r") + s = f.read(); + f.close() + lexer.debug=ectx.dbg('l') + ast = yacc.parse (s, debug=ectx.dbg('y')) + for module in ast: + eth_do_module(module, ectx) + + if (single_file): + in_nm = single_file + '.c' + out_nm = ectx.eth_output_fname('') + inc_nms = map (lambda x: ectx.eth_output_fname(x), ('hf', 'ett', 'fn', 'hfarr', 'ettarr')) + make_include(out_nm, in_nm, inc_nms, remove_inc=True) + in_nm = single_file + '.h' + if (os.path.exists(in_nm)): + out_nm = ectx.eth_output_fname('', ext='h') + inc_nms = map (lambda x: ectx.eth_output_fname(x, ext='h'), ('exp',)) + make_include(out_nm, in_nm, inc_nms, remove_inc=True) + + +# Python compiler +def main(): + testfn = testyacc + if len (sys.argv) == 1: + while 1: + s = raw_input ('Query: ') + if len (s) == 0: + break + testfn (s, 'console', {}) + else: + defined_dict = {} + for fn in sys.argv [1:]: + f = open (fn, "r") + testfn (f.read (), fn, defined_dict) + f.close () + lexer.lineno = 1 + + +#--- BODY --------------------------------------------------------------------- + +if __name__ == '__main__': + if ('asn2eth' == os.path.splitext(os.path.basename(sys.argv[0]))[0].lower()): + eth_main() + else: + main() + +#------------------------------------------------------------------------------ diff --git a/tools/lex.py b/tools/lex.py new file mode 100644 index 0000000000..7033e8355a --- /dev/null +++ b/tools/lex.py @@ -0,0 +1,681 @@ +#----------------------------------------------------------------------------- +# ply: lex.py +# +# Author: David M. Beazley (beazley@cs.uchicago.edu) +# Department of Computer Science +# University of Chicago +# Chicago, IL 60637 +# +# Copyright (C) 2001, David M. Beazley +# +# $Header: /svn/cvsroot/ethereal/tools/lex.py,v 1.1 2004/05/24 08:33:09 sahlberg Exp $ +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See the file COPYING for a complete copy of the LGPL. +# +# +# This module automatically constructs a lexical analysis module from regular +# expression rules defined in a user-defined module. The idea is essentially the same +# as that used in John Aycock's Spark framework, but the implementation works +# at the module level rather than requiring the use of classes. +# +# This module tries to provide an interface that is closely modeled after +# the traditional lex interface in Unix. It also differs from Spark +# in that: +# +# - It provides more extensive error checking and reporting if +# the user supplies a set of regular expressions that can't +# be compiled or if there is any other kind of a problem in +# the specification. +# +# - The interface is geared towards LALR(1) and LR(1) parser +# generators. That is tokens are generated one at a time +# rather than being generated in advanced all in one step. +# +# There are a few limitations of this module +# +# - The module interface makes it somewhat awkward to support more +# than one lexer at a time. Although somewhat inelegant from a +# design perspective, this is rarely a practical concern for +# most compiler projects. +# +# - The lexer requires that the entire input text be read into +# a string before scanning. I suppose that most machines have +# enough memory to make this a minor issues, but it makes +# the lexer somewhat difficult to use in interactive sessions +# or with streaming data. +# +#----------------------------------------------------------------------------- + +r""" +lex.py + +This module builds lex-like scanners based on regular expression rules. +To use the module, simply write a collection of regular expression rules +and actions like this: + +# lexer.py +import lex + +# Define a list of valid tokens +tokens = ( + 'IDENTIFIER', 'NUMBER', 'PLUS', 'MINUS' + ) + +# Define tokens as functions +def t_IDENTIFIER(t): + r' ([a-zA-Z_](\w|_)* ' + return t + +def t_NUMBER(t): + r' \d+ ' + return t + +# Some simple tokens with no actions +t_PLUS = r'\+' +t_MINUS = r'-' + +# Initialize the lexer +lex.lex() + +The tokens list is required and contains a complete list of all valid +token types that the lexer is allowed to produce. Token types are +restricted to be valid identifiers. This means that 'MINUS' is a valid +token type whereas '-' is not. + +Rules are defined by writing a function with a name of the form +t_rulename. Each rule must accept a single argument which is +a token object generated by the lexer. This token has the following +attributes: + + t.type = type string of the token. This is initially set to the + name of the rule without the leading t_ + t.value = The value of the lexeme. + t.lineno = The value of the line number where the token was encountered + +For example, the t_NUMBER() rule above might be called with the following: + + t.type = 'NUMBER' + t.value = '42' + t.lineno = 3 + +Each rule returns the token object it would like to supply to the +parser. In most cases, the token t is returned with few, if any +modifications. To discard a token for things like whitespace or +comments, simply return nothing. For instance: + +def t_whitespace(t): + r' \s+ ' + pass + +For faster lexing, you can also define this in terms of the ignore set like this: + +t_ignore = ' \t' + +The characters in this string are ignored by the lexer. Use of this feature can speed +up parsing significantly since scanning will immediately proceed to the next token. + +lex requires that the token returned by each rule has an attribute +t.type. Other than this, rules are free to return any kind of token +object that they wish and may construct a new type of token object +from the attributes of t (provided the new object has the required +type attribute). + +If illegal characters are encountered, the scanner executes the +function t_error(t) where t is a token representing the rest of the +string that hasn't been matched. If this function isn't defined, a +LexError exception is raised. The .text attribute of this exception +object contains the part of the string that wasn't matched. + +The t.skip(n) method can be used to skip ahead n characters in the +input stream. This is usually only used in the error handling rule. +For instance, the following rule would print an error message and +continue: + +def t_error(t): + print "Illegal character in input %s" % t.value[0] + t.skip(1) + +Of course, a nice scanner might wish to skip more than one character +if the input looks very corrupted. + +The lex module defines a t.lineno attribute on each token that can be used +to track the current line number in the input. The value of this +variable is not modified by lex so it is up to your lexer module +to correctly update its value depending on the lexical properties +of the input language. To do this, you might write rules such as +the following: + +def t_newline(t): + r' \n+ ' + t.lineno += t.value.count("\n") + +To initialize your lexer so that it can be used, simply call the lex.lex() +function in your rule file. If there are any errors in your +specification, warning messages or an exception will be generated to +alert you to the problem. + +(dave: this needs to be rewritten) +To use the newly constructed lexer from another module, simply do +this: + + import lex + import lexer + plex.input("position = initial + rate*60") + + while 1: + token = plex.token() # Get a token + if not token: break # No more tokens + ... do whatever ... + +Assuming that the module 'lexer' has initialized plex as shown +above, parsing modules can safely import 'plex' without having +to import the rule file or any additional imformation about the +scanner you have defined. +""" + +# ----------------------------------------------------------------------------- + + +__version__ = "1.3" + +import re, types, sys, copy + +# Exception thrown when invalid token encountered and no default +class LexError(Exception): + def __init__(self,message,s): + self.args = (message,) + self.text = s + +# Token class +class LexToken: + def __str__(self): + return "LexToken(%s,%r,%d)" % (self.type,self.value,self.lineno) + def __repr__(self): + return str(self) + def skip(self,n): + try: + self._skipn += n + except AttributeError: + self._skipn = n + +# ----------------------------------------------------------------------------- +# Lexer class +# +# input() - Store a new string in the lexer +# token() - Get the next token +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexindexfunc = [ ] # Reverse mapping of groups to functions and types + self.lexerrorf = None # Error rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = None # Ignored characters + self.lineno = 1 # Current line number + self.debug = 0 # Debugging mode + self.optimize = 0 # Optimized mode + self.token = self.errtoken + + def __copy__(self): + c = Lexer() + c.lexre = self.lexre + c.lexdata = self.lexdata + c.lexpos = self.lexpos + c.lexlen = self.lexlen + c.lenindexfunc = self.lexindexfunc + c.lexerrorf = self.lexerrorf + c.lextokens = self.lextokens + c.lexignore = self.lexignore + c.lineno = self.lineno + c.optimize = self.optimize + c.token = c.realtoken + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self,s): + if not isinstance(s,types.StringType): + raise ValueError, "Expected a string" + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + self.token = self.realtoken + + # Change the token routine to point to realtoken() + global token + if token == self.errtoken: + token = self.token + + # ------------------------------------------------------------ + # errtoken() - Return error if token is called with no data + # ------------------------------------------------------------ + def errtoken(self): + raise RuntimeError, "No input string given with input()" + + # ------------------------------------------------------------ + # token() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def realtoken(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + m = self.lexre.match(lexdata,lexpos) + if m: + i = m.lastindex + lexpos = m.end() + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexer = self + func,tok.type = self.lexindexfunc[i] + if not func: + self.lexpos = lexpos + return tok + + # If token is processed by a function, call it + self.lexpos = lexpos + newtok = func(tok) + self.lineno = tok.lineno # Update line number + + # Every function must return a token, if nothing, we just move to next token + if not newtok: continue + + # Verify type of the token. If not in the token map, raise an error + if not self.optimize: + if not self.lextokens.has_key(newtok.type): + raise LexError, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( + func.func_code.co_filename, func.func_code.co_firstlineno, + func.__name__, newtok.type),lexdata[lexpos:]) + + return newtok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = "error" + tok.lexer = self + oldpos = lexpos + newtok = self.lexerrorf(tok) + lexpos += getattr(tok,"_skipn",0) + if oldpos == lexpos: + # Error method didn't change text position at all. This is an error. + self.lexpos = lexpos + raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + if not newtok: continue + self.lexpos = lexpos + return newtok + + self.lexpos = lexpos + raise LexError, ("No match found", lexdata[lexpos:]) + + # No more input data + self.lexpos = lexpos + 1 + return None + + +# ----------------------------------------------------------------------------- +# validate_file() +# +# This checks to see if there are duplicated t_rulename() functions or strings +# in the parser input file. This is done using a simple regular expression +# match on each line in the filename. +# ----------------------------------------------------------------------------- + +def validate_file(filename): + import os.path + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea what the file is. Return OK + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return 1 # Oh well + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + counthash = { } + linen = 1 + noerror = 1 + for l in lines: + m = fre.match(l) + if not m: + m = sre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + print "%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev) + noerror = 0 + linen += 1 + return noerror + +# ----------------------------------------------------------------------------- +# _read_lextab(module) +# +# Reads lexer table from a lextab file instead of using introspection. +# ----------------------------------------------------------------------------- + +def _read_lextab(lexer, fdict, module): + exec "import %s as lextab" % module + lexer.lexre = re.compile(lextab._lexre, re.VERBOSE) + lexer.lexindexfunc = lextab._lextab + for i in range(len(lextab._lextab)): + t = lexer.lexindexfunc[i] + if t: + if t[0]: + lexer.lexindexfunc[i] = (fdict[t[0]],t[1]) + lexer.lextokens = lextab._lextokens + lexer.lexignore = lextab._lexignore + if lextab._lexerrorf: + lexer.lexerrorf = fdict[lextab._lexerrorf] + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(module=None,debug=0,optimize=0,lextab="lextab"): + ldict = None + regex = "" + error = 0 + files = { } + lexer = Lexer() + lexer.debug = debug + lexer.optimize = optimize + global token,input + + if module: + if not isinstance(module, types.ModuleType): + raise ValueError,"Expected a module" + + ldict = module.__dict__ + + else: + # No module given. We might be able to get information from the caller. + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + f = f.f_back # Walk out to our calling function + ldict = f.f_globals # Grab its globals dictionary + + if optimize and lextab: + try: + _read_lextab(lexer,ldict, lextab) + if not lexer.lexignore: lexer.lexignore = "" + token = lexer.token + input = lexer.input + return lexer + + except ImportError: + pass + + # Get the tokens map + tokens = ldict.get("tokens",None) + if not tokens: + raise SyntaxError,"lex: module does not define 'tokens'" + if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): + raise SyntaxError,"lex: tokens must be a list or tuple." + + # Build a dictionary of valid token names + lexer.lextokens = { } + if not optimize: + + # Utility function for verifying tokens + def is_identifier(s): + for c in s: + if not (c.isalnum() or c == '_'): return 0 + return 1 + + for n in tokens: + if not is_identifier(n): + print "lex: Bad token name '%s'" % n + error = 1 + if lexer.lextokens.has_key(n): + print "lex: Warning. Token '%s' multiply defined." % n + lexer.lextokens[n] = None + else: + for n in tokens: lexer.lextokens[n] = None + + + if debug: + print "lex: tokens = '%s'" % lexer.lextokens.keys() + + # Get a list of symbols with the t_ prefix + tsymbols = [f for f in ldict.keys() if f[:2] == 't_'] + + # Now build up a list of functions and a list of strings + fsymbols = [ ] + ssymbols = [ ] + for f in tsymbols: + if isinstance(ldict[f],types.FunctionType): + fsymbols.append(ldict[f]) + elif isinstance(ldict[f],types.StringType): + ssymbols.append((f,ldict[f])) + else: + print "lex: %s not defined as a function or string" % f + error = 1 + + # Sort the functions by line number + fsymbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) + + # Sort the strings by regular expression length + ssymbols.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) + + # Check for non-empty symbols + if len(fsymbols) == 0 and len(ssymbols) == 0: + raise SyntaxError,"lex: no rules of the form t_rulename are defined." + + # Add all of the rules defined with actions first + for f in fsymbols: + + line = f.func_code.co_firstlineno + file = f.func_code.co_filename + files[file] = None + + if not optimize: + if f.func_code.co_argcount > 1: + print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) + error = 1 + continue + + if f.func_code.co_argcount < 1: + print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) + error = 1 + continue + + if f.__name__ == 't_ignore': + print "%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__) + error = 1 + continue + + if f.__name__ == 't_error': + lexer.lexerrorf = f + continue + + if f.__doc__: + if not optimize: + try: + c = re.compile(f.__doc__, re.VERBOSE) + except re.error,e: + print "%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e) + error = 1 + continue + + if debug: + print "lex: Adding rule %s -> '%s'" % (f.__name__,f.__doc__) + + # Okay. The regular expression seemed okay. Let's append it to the master regular + # expression we're building + + if (regex): regex += "|" + regex += "(?P<%s>%s)" % (f.__name__,f.__doc__) + else: + print "%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__) + + # Now add all of the simple rules + for name,r in ssymbols: + + if name == 't_ignore': + lexer.lexignore = r + continue + + if not optimize: + if name == 't_error': + raise SyntaxError,"lex: Rule 't_error' must be defined as a function" + error = 1 + continue + + if not lexer.lextokens.has_key(name[2:]): + print "lex: Rule '%s' defined for an unspecified token %s." % (name,name[2:]) + error = 1 + continue + try: + c = re.compile(r,re.VERBOSE) + except re.error,e: + print "lex: Invalid regular expression for rule '%s'. %s" % (name,e) + error = 1 + continue + if debug: + print "lex: Adding rule %s -> '%s'" % (name,r) + + if regex: regex += "|" + regex += "(?P<%s>%s)" % (name,r) + + if not optimize: + for f in files.keys(): + if not validate_file(f): + error = 1 + try: + if debug: + print "lex: regex = '%s'" % regex + lexer.lexre = re.compile(regex, re.VERBOSE) + + # Build the index to function map for the matching engine + lexer.lexindexfunc = [ None ] * (max(lexer.lexre.groupindex.values())+1) + for f,i in lexer.lexre.groupindex.items(): + handle = ldict[f] + if isinstance(handle,types.FunctionType): + lexer.lexindexfunc[i] = (handle,handle.__name__[2:]) + else: + # If rule was specified as a string, we build an anonymous + # callback function to carry out the action + lexer.lexindexfunc[i] = (None,f[2:]) + + # If a lextab was specified, we create a file containing the precomputed + # regular expression and index table + + if lextab and optimize: + lt = open(lextab+".py","w") + lt.write("# %s.py. This file automatically created by PLY. Don't edit.\n" % lextab) + lt.write("_lexre = %s\n" % repr(regex)) + lt.write("_lextab = [\n"); + for i in range(0,len(lexer.lexindexfunc)): + t = lexer.lexindexfunc[i] + if t: + if t[0]: + lt.write(" ('%s',%s),\n"% (t[0].__name__, repr(t[1]))) + else: + lt.write(" (None,%s),\n" % repr(t[1])) + else: + lt.write(" None,\n") + + lt.write("]\n"); + lt.write("_lextokens = %s\n" % repr(lexer.lextokens)) + lt.write("_lexignore = %s\n" % repr(lexer.lexignore)) + if (lexer.lexerrorf): + lt.write("_lexerrorf = %s\n" % repr(lexer.lexerrorf.__name__)) + else: + lt.write("_lexerrorf = None\n") + lt.close() + + except re.error,e: + print "lex: Fatal error. Unable to compile regular expression rules. %s" % e + error = 1 + if error: + raise SyntaxError,"lex: Unable to build lexer." + if not lexer.lexerrorf: + print "lex: Warning. no t_error rule is defined." + + if not lexer.lexignore: lexer.lexignore = "" + + # Create global versions of the token() and input() functions + token = lexer.token + input = lexer.input + + return lexer + +# ----------------------------------------------------------------------------- +# run() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None,data=None): + if not data: + try: + filename = sys.argv[1] + f = open(filename) + data = f.read() + f.close() + except IndexError: + print "Reading from standard input (type EOF to end):" + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while 1: + tok = _token() + if not tok: break + print "(%s,'%s',%d)" % (tok.type, tok.value, tok.lineno) + + + + diff --git a/tools/yacc.py b/tools/yacc.py new file mode 100644 index 0000000000..44298df93d --- /dev/null +++ b/tools/yacc.py @@ -0,0 +1,1844 @@ +#----------------------------------------------------------------------------- +# ply: yacc.py +# +# Author: David M. Beazley (beazley@cs.uchicago.edu) +# Department of Computer Science +# University of Chicago +# Chicago, IL 60637 +# +# Copyright (C) 2001, David M. Beazley +# +# $Header: /svn/cvsroot/ethereal/tools/yacc.py,v 1.1 2004/05/24 08:33:09 sahlberg Exp $ +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See the file COPYING for a complete copy of the LGPL. +# +# +# This implements an LR parser that is constructed from grammar rules defined +# as Python functions. Roughly speaking, this module is a cross between +# John Aycock's Spark system and the GNU bison utility. +# +# Disclaimer: This is a work in progress. SLR parsing seems to work fairly +# well and there is extensive error checking. LALR(1) is in progress. The +# rest of this file is a bit of a mess. Please pardon the dust. +# +# The current implementation is only somewhat object-oriented. The +# LR parser itself is defined in terms of an object (which allows multiple +# parsers to co-exist). However, most of the variables used during table +# construction are defined in terms of global variables. Users shouldn't +# notice unless they are trying to define multiple parsers at the same +# time using threads (in which case they should have their head examined). +#----------------------------------------------------------------------------- + +__version__ = "1.3" + +#----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +#----------------------------------------------------------------------------- + +yaccdebug = 1 # Debugging mode. If set, yacc generates a + # a 'parser.out' file in the current directory + +debug_file = 'parser.out' # Default name of the debugging file +tab_module = 'parsetab' # Default name of the table module +default_lr = 'SLR' # Default LR table generation method + +error_count = 3 # Number of symbols that must be shifted to leave recovery mode + +import re, types, sys, cStringIO, md5, os.path + +# Exception raised for yacc-related errors +class YaccError(Exception): pass + +#----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +#----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: +# .type = Grammar symbol type +# .value = Symbol value +# .lineno = Starting line number +# .endlineno = Ending line number (optional, set automatically) + +class YaccSymbol: + def __str__(self): return self.type + def __repr__(self): return str(self) + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. + +class YaccSlice: + def __init__(self,s): + self.slice = s + self.pbstack = [] + + def __getitem__(self,n): + return self.slice[n].value + + def __setitem__(self,n,v): + self.slice[n].value = v + + def lineno(self,n): + return getattr(self.slice[n],"lineno",0) + + def linespan(self,n): + startline = getattr(self.slice[n],"lineno",0) + endline = getattr(self.slice[n],"endlineno",startline) + return startline,endline + + def pushback(self,n): + if n <= 0: + raise ValueError, "Expected a positive value" + if n > (len(self.slice)-1): + raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1) + for i in range(0,n): + self.pbstack.append(self.slice[-i-1]) + +# The LR Parsing engine. This is defined as a class so that multiple parsers +# can exist in the same process. A user never instantiates this directly. +# Instead, the global yacc() function should be used to create a suitable Parser +# object. + +class Parser: + def __init__(self,magic=None): + + # This is a hack to keep users from trying to instantiate a Parser + # object directly. + + if magic != "xyzzy": + raise YaccError, "Can't instantiate Parser. Use yacc() instead." + + # Reset internal state + self.productions = None # List of productions + self.errorfunc = None # Error handling function + self.action = { } # LR Action table + self.goto = { } # LR goto table + self.require = { } # Attribute require table + self.method = "Unknown LR" # Table construction method used + + def errok(self): + self.errorcount = 0 + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$' + self.symstack.append(sym) + self.statestack.append(0) + + def parse(self,input=None,lexer=None,debug=0): + lookahead = None # Current lookahead symbol + lookaheadstack = [ ] # Stack of lookahead symbols + actions = self.action # Local reference to action table + goto = self.goto # Local reference to goto table + prod = self.productions # Local reference to production list + pslice = YaccSlice(None) # Slice object passed to grammar rules + pslice.parser = self # Parser object + self.errorcount = 0 # Used during error recovery + + # If no lexer was given, we will try to use the lex module + if not lexer: + import lex as lexer + + pslice.lexer = lexer + + # If input was supplied, pass to lexer + if input: + lexer.input(input) + + # Tokenize function + get_token = lexer.token + + statestack = [ ] # Stack of parsing states + self.statestack = statestack + symstack = [ ] # Stack of grammar symbols + self.symstack = symstack + + errtoken = None # Err token + + # The start state is assumed to be (0,$) + statestack.append(0) + sym = YaccSymbol() + sym.type = '$' + symstack.append(sym) + + while 1: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$' + if debug: + print "%-20s : %s" % (lookahead, [xx.type for xx in symstack]) + + # Check the action table + s = statestack[-1] + ltype = lookahead.type + t = actions.get((s,ltype),None) + + if t is not None: + if t > 0: + # shift a symbol on the stack + if ltype == '$': + # Error, end of input + print "yacc: Parse error. EOF" + return + statestack.append(t) + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if self.errorcount > 0: + self.errorcount -= 1 + + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + try: + sym.lineno = targ[1].lineno + sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno) + except AttributeError: + sym.lineno = 0 + del symstack[-plen:] + del statestack[-plen:] + else: + sym.lineno = 0 + targ = [ sym ] + pslice.slice = targ + pslice.pbstack = [] + # Call the grammar rule with our special slice object + p.func(pslice) + + # Validate attributes of the resulting value attribute +# if require: +# try: +# t0 = targ[0] +# r = Requires.get(t0.type,None) +# t0d = t0.__dict__ +# if r: +# for field in r: +# tn = t0 +# for fname in field: +# try: +# tf = tn.__dict__ +# tn = tf.get(fname) +# except StandardError: +# tn = None +# if not tn: +# print "%s:%d: Rule %s doesn't set required attribute '%s'" % \ +# (p.file,p.line,p.name,".".join(field)) +# except TypeError,LookupError: +# print "Bad requires directive " % r +# pass + + + # If there was a pushback, put that on the stack + if pslice.pbstack: + lookaheadstack.append(lookahead) + for _t in pslice.pbstack: + lookaheadstack.append(_t) + lookahead = None + + symstack.append(sym) + statestack.append(goto[statestack[-1],pname]) + continue + + if t == 0: + n = symstack[-1] + return getattr(n,"value",None) + + if t == None: + # We have some kind of parsing error here. To handle this, + # we are going to push the current token onto the tokenstack + # and replace it with an 'error' token. If there are any synchronization + # rules, they may catch it. + # + # In addition to pushing the error token, we call call the user defined p_error() + # function if this is the first syntax error. This function is only called + # if errorcount == 0. + + if not self.errorcount: + self.errorcount = error_count + errtoken = lookahead + if errtoken.type == '$': + errtoken = None # End of file! + if self.errorfunc: + global errok,token,restart + errok = self.errok # Set some special functions available in error recovery + token = get_token + restart = self.restart + tok = self.errorfunc(errtoken) + del errok, token, restart # Delete special functions + + if not self.errorcount: + # User must have done some kind of panic mode recovery on their own. The returned token + # is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken,"lineno"): lineno = lookahead.lineno + else: lineno = 0 + if lineno: + print "yacc: Syntax error at line %d, token=%s" % (lineno, errtoken.type) + else: + print "yacc: Syntax error, token=%s" % errtoken.type + else: + print "yacc: Parse error in input. EOF" + return + + else: + self.errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$': + lookahead = None + errtoken = None + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead,"lineno"): + t.lineno = lookahead.lineno + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + symstack.pop() + statestack.pop() + + continue + + # Call an error function here + raise RuntimeError, "yacc: internal parser error!!!\n" + +# ----------------------------------------------------------------------------- +# === Parser Construction === +# +# The following functions and variables are used to implement the yacc() function +# itself. This is pretty hairy stuff involving lots of error checking, +# construction of LR items, kernels, and so forth. Although a lot of +# this work is done using global variables, the resulting Parser object +# is completely self contained--meaning that it is safe to repeatedly +# call yacc() with different grammars in the same application. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# validate_file() +# +# This function checks to see if there are duplicated p_rulename() functions +# in the parser module file. Without this function, it is really easy for +# users to make mistakes by cutting and pasting code fragments (and it's a real +# bugger to try and figure out why the resulting parser doesn't work). Therefore, +# we just do a little regular expression pattern matching of def statements +# to try and detect duplicates. +# ----------------------------------------------------------------------------- + +def validate_file(filename): + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea. Assume it's okay. + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return 1 # Oh well + + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + counthash = { } + linen = 1 + noerror = 1 + for l in lines: + m = fre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + print "%s:%d: Function %s redefined. Previously defined on line %d" % (filename,linen,name,prev) + noerror = 0 + linen += 1 + return noerror + +# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix. +def validate_dict(d): + for n,v in d.items(): + if n[0:2] == 'p_' and isinstance(v,types.FunctionType): continue + if n[0:2] == 't_': continue + + if n[0:2] == 'p_': + print "yacc: Warning. '%s' not defined as a function" % n + if isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1: + try: + doc = v.__doc__.split(" ") + if doc[1] == ':': + print "%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix." % (v.func_code.co_filename, v.func_code.co_firstlineno,n) + except StandardError: + pass + +# ----------------------------------------------------------------------------- +# === GRAMMAR FUNCTIONS === +# +# The following global variables and functions are used to store, manipulate, +# and verify the grammar rules specified by the user. +# ----------------------------------------------------------------------------- + +# Initialize all of the global variables used during grammar construction +def initialize_vars(): + global Productions, Prodnames, Prodmap, Terminals + global Nonterminals, First, Follow, Precedence, LRitems + global Errorfunc, Signature, Requires + + Productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + Prodmap = { } # A dictionary that is only used to detect duplicate + # productions. + + Terminals = { } # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + Nonterminals = { } # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + First = { } # A dictionary of precomputed FIRST(x) symbols + + Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + + Precedence = { } # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + LRitems = [ ] # A list of all LR items for the grammar. These are the + # productions with the "dot" like E -> E . PLUS E + + Errorfunc = None # User defined error handler + + Signature = md5.new() # Digital signature of the grammar rules, precedence + # and other information. Used to determined when a + # parsing table needs to be regenerated. + + Requires = { } # Requires list + + # File objects used when creating the parser.out debugging file + global _vf, _vfc + _vf = cStringIO.StringIO() + _vfc = cStringIO.StringIO() + +# ----------------------------------------------------------------------------- +# class Production: +# +# This class stores the raw information about a single production or grammar rule. +# It has a few required attributes: +# +# name - Name of the production (nonterminal) +# prod - A list of symbols making up its production +# number - Production number. +# +# In addition, a few additional attributes are used to help with debugging or +# optimization of table generation. +# +# file - File where production action is defined. +# lineno - Line number where action is defined +# func - Action function +# prec - Precedence level +# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E' +# then lr_next refers to 'E -> E PLUS . E' +# lr_index - LR item index (location of the ".") in the prod list. +# len - Length of the production (number of symbols on right hand side) +# ----------------------------------------------------------------------------- + +class Production: + def __init__(self,**kw): + for k,v in kw.items(): + setattr(self,k,v) + self.lr_index = -1 + self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure + self.usyms = [ ] + + def __str__(self): + if self.prod: + s = "%s -> %s" % (self.name," ".join(self.prod)) + else: + s = "%s -> " % self.name + return s + + def __repr__(self): + return str(self) + + # Compute lr_items from the production + def lr_item(self,n): + if n > len(self.prod): return None + p = Production() + p.name = self.name + p.prod = list(self.prod) + p.number = self.number + p.lr_index = n + p.prod.insert(n,".") + p.prod = tuple(p.prod) + p.len = len(p.prod) + p.usyms = self.usyms + + # Precompute list of productions immediately following + try: + p.lrafter = Prodnames[p.prod[n+1]] + except (IndexError,KeyError),e: + p.lrafter = [] + try: + p.lrbefore = p.prod[n-1] + except IndexError: + p.lrbefore = None + + return p + +class MiniProduction: + pass + +# Utility function +def is_identifier(s): + for c in s: + if not (c.isalnum() or c == '_'): return 0 + return 1 + +# ----------------------------------------------------------------------------- +# add_production() +# +# Given an action function, this function assembles a production rule. +# The production rule is assumed to be found in the function's docstring. +# This rule has the general syntax: +# +# name1 ::= production1 +# | production2 +# | production3 +# ... +# | productionn +# name2 ::= production1 +# | production2 +# ... +# ----------------------------------------------------------------------------- + +def add_production(f,file,line,prodname,syms): + + if Terminals.has_key(prodname): + print "%s:%d: Illegal rule name '%s'. Already defined as a token." % (file,line,prodname) + return -1 + if prodname == 'error': + print "%s:%d: Illegal rule name '%s'. error is a reserved word." % (file,line,prodname) + return -1 + + if not is_identifier(prodname): + print "%s:%d: Illegal rule name '%s'" % (file,line,prodname) + return -1 + + for s in syms: + if not is_identifier(s) and s != '%prec': + print "%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname) + return -1 + + # See if the rule is already in the rulemap + map = "%s -> %s" % (prodname,syms) + if Prodmap.has_key(map): + m = Prodmap[map] + print "%s:%d: Duplicate rule %s." % (file,line, m) + print "%s:%d: Previous definition at %s:%d" % (file,line, m.file, m.line) + return -1 + + p = Production() + p.name = prodname + p.prod = syms + p.file = file + p.line = line + p.func = f + p.number = len(Productions) + + + Productions.append(p) + Prodmap[map] = p + if not Nonterminals.has_key(prodname): + Nonterminals[prodname] = [ ] + + # Add all terminals to Terminals + i = 0 + while i < len(p.prod): + t = p.prod[i] + if t == '%prec': + try: + precname = p.prod[i+1] + except IndexError: + print "%s:%d: Syntax error. Nothing follows %%prec." % (p.file,p.line) + return -1 + + prec = Precedence.get(precname,None) + if not prec: + print "%s:%d: Nothing known about the precedence of '%s'" % (p.file,p.line,precname) + return -1 + else: + p.prec = prec + del p.prod[i] + del p.prod[i] + continue + + if Terminals.has_key(t): + Terminals[t].append(p.number) + # Is a terminal. We'll assign a precedence to p based on this + if not hasattr(p,"prec"): + p.prec = Precedence.get(t,('right',0)) + else: + if not Nonterminals.has_key(t): + Nonterminals[t] = [ ] + Nonterminals[t].append(p.number) + i += 1 + + if not hasattr(p,"prec"): + p.prec = ('right',0) + + # Set final length of productions + p.len = len(p.prod) + p.prod = tuple(p.prod) + + # Calculate unique syms in the production + p.usyms = [ ] + for s in p.prod: + if s not in p.usyms: + p.usyms.append(s) + + # Add to the global productions list + try: + Prodnames[p.name].append(p) + except KeyError: + Prodnames[p.name] = [ p ] + return 0 + +# Given a raw rule function, this function rips out its doc string +# and adds rules to the grammar + +def add_function(f): + line = f.func_code.co_firstlineno + file = f.func_code.co_filename + error = 0 + + if f.func_code.co_argcount > 1: + print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) + return -1 + + if f.func_code.co_argcount < 1: + print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) + return -1 + + if f.__doc__: + # Split the doc string into lines + pstrings = f.__doc__.splitlines() + lastp = None + dline = line + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: continue + try: + if p[0] == '|': + # This is a continuation of a previous rule + if not lastp: + print "%s:%d: Misplaced '|'." % (file,dline) + return -1 + prodname = lastp + if len(p) > 1: + syms = p[1:] + else: + syms = [ ] + else: + prodname = p[0] + lastp = prodname + assign = p[1] + if len(p) > 2: + syms = p[2:] + else: + syms = [ ] + if assign != ':' and assign != '::=': + print "%s:%d: Syntax error. Expected ':'" % (file,dline) + return -1 + e = add_production(f,file,dline,prodname,syms) + error += e + except StandardError: + print "%s:%d: Syntax error in rule '%s'" % (file,dline,ps) + error -= 1 + else: + print "%s:%d: No documentation string specified in function '%s'" % (file,line,f.__name__) + return error + + +# Cycle checking code (Michael Dyck) + +def compute_reachable(): + ''' + Find each symbol that can be reached from the start symbol. + Print a warning for any nonterminals that can't be reached. + (Unused terminals have already had their warning.) + ''' + Reachable = { } + for s in Terminals.keys() + Nonterminals.keys(): + Reachable[s] = 0 + + mark_reachable_from( Productions[0].prod[0], Reachable ) + + for s in Nonterminals.keys(): + if not Reachable[s]: + print "yacc: Symbol '%s' is unreachable." % s + +def mark_reachable_from(s, Reachable): + ''' + Mark all symbols that are reachable from symbol s. + ''' + if Reachable[s]: + # We've already reached symbol s. + return + Reachable[s] = 1 + for p in Prodnames.get(s,[]): + for r in p.prod: + mark_reachable_from(r, Reachable) + +# ----------------------------------------------------------------------------- +# compute_terminates() +# +# This function looks at the various parsing rules and tries to detect +# infinite recursion cycles (grammar rules where there is no possible way +# to derive a string of only terminals). +# ----------------------------------------------------------------------------- +def compute_terminates(): + ''' + Raise an error for any symbols that don't terminate. + ''' + Terminates = {} + + # Terminals: + for t in Terminals.keys(): + Terminates[t] = 1 + + Terminates['$'] = 1 + + # Nonterminals: + + # Initialize to false: + for n in Nonterminals.keys(): + Terminates[n] = 0 + + # Then propagate termination until no change: + while 1: + some_change = 0 + for (n,pl) in Prodnames.items(): + # Nonterminal n terminates iff any of its productions terminates. + for p in pl: + # Production p terminates iff all of its rhs symbols terminate. + for s in p.prod: + if not Terminates[s]: + # The symbol s does not terminate, + # so production p does not terminate. + p_terminates = 0 + break + else: + # didn't break from the loop, + # so every symbol s terminates + # so production p terminates. + p_terminates = 1 + + if p_terminates: + # symbol n terminates! + if not Terminates[n]: + Terminates[n] = 1 + some_change = 1 + # Don't need to consider any more productions for this n. + break + + if not some_change: + break + + some_error = 0 + for (s,terminates) in Terminates.items(): + if not terminates: + if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + print "yacc: Infinite recursion detected for symbol '%s'." % s + some_error = 1 + + return some_error + +# ----------------------------------------------------------------------------- +# verify_productions() +# +# This function examines all of the supplied rules to see if they seem valid. +# ----------------------------------------------------------------------------- +def verify_productions(cycle_check=1): + error = 0 + for p in Productions: + if not p: continue + + for s in p.prod: + if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': + print "%s:%d: Symbol '%s' used, but not defined as a token or a rule." % (p.file,p.line,s) + error = 1 + continue + + unused_tok = 0 + # Now verify all of the tokens + if yaccdebug: + _vf.write("Unused terminals:\n\n") + for s,v in Terminals.items(): + if s != 'error' and not v: + print "yacc: Warning. Token '%s' defined, but not used." % s + if yaccdebug: _vf.write(" %s\n"% s) + unused_tok += 1 + + # Print out all of the productions + if yaccdebug: + _vf.write("\nGrammar\n\n") + for i in range(1,len(Productions)): + _vf.write("Rule %-5d %s\n" % (i, Productions[i])) + + unused_prod = 0 + # Verify the use of all productions + for s,v in Nonterminals.items(): + if not v: + p = Prodnames[s][0] + print "%s:%d: Warning. Rule '%s' defined, but not used." % (p.file,p.line, s) + unused_prod += 1 + + + if unused_tok == 1: + print "yacc: Warning. There is 1 unused token." + if unused_tok > 1: + print "yacc: Warning. There are %d unused tokens." % unused_tok + + if unused_prod == 1: + print "yacc: Warning. There is 1 unused rule." + if unused_prod > 1: + print "yacc: Warning. There are %d unused rules." % unused_prod + + if yaccdebug: + _vf.write("\nTerminals, with rules where they appear\n\n") + ks = Terminals.keys() + ks.sort() + for k in ks: + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]]))) + _vf.write("\nNonterminals, with rules where they appear\n\n") + ks = Nonterminals.keys() + ks.sort() + for k in ks: + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]]))) + + if (cycle_check): + compute_reachable() + error += compute_terminates() +# error += check_cycles() + return error + +# ----------------------------------------------------------------------------- +# build_lritems() +# +# This function walks the list of productions and builds a complete set of the +# LR items. The LR items are stored in two ways: First, they are uniquely +# numbered and placed in the list _lritems. Second, a linked list of LR items +# is built for each production. For example: +# +# E -> E PLUS E +# +# Creates the list +# +# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] +# ----------------------------------------------------------------------------- + +def build_lritems(): + for p in Productions: + lastlri = p + lri = p.lr_item(0) + i = 0 + while 1: + lri = p.lr_item(i) + lastlri.lr_next = lri + if not lri: break + lri.lr_num = len(LRitems) + LRitems.append(lri) + lastlri = lri + i += 1 + + # In order for the rest of the parser generator to work, we need to + # guarantee that no more lritems are generated. Therefore, we nuke + # the p.lr_item method. (Only used in debugging) + # Production.lr_item = None + +# ----------------------------------------------------------------------------- +# add_precedence() +# +# Given a list of precedence rules, add to the precedence table. +# ----------------------------------------------------------------------------- + +def add_precedence(plist): + plevel = 0 + error = 0 + for p in plist: + plevel += 1 + try: + prec = p[0] + terms = p[1:] + if prec != 'left' and prec != 'right' and prec != 'nonassoc': + print "yacc: Invalid precedence '%s'" % prec + return -1 + for t in terms: + if Precedence.has_key(t): + print "yacc: Precedence already specified for terminal '%s'" % t + error += 1 + continue + Precedence[t] = (prec,plevel) + except: + print "yacc: Invalid precedence table." + error += 1 + + return error + +# ----------------------------------------------------------------------------- +# augment_grammar() +# +# Compute the augmented grammar. This is just a rule S' -> start where start +# is the starting symbol. +# ----------------------------------------------------------------------------- + +def augment_grammar(start=None): + if not start: + start = Productions[1].name + Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None) + Productions[0].usyms = [ start ] + Nonterminals[start].append(0) + + +# ------------------------------------------------------------------------- +# first() +# +# Compute the value of FIRST1(beta) where beta is a tuple of symbols. +# +# During execution of compute_first1, the result may be incomplete. +# Afterward (e.g., when called from compute_follow()), it will be complete. +# ------------------------------------------------------------------------- +def first(beta): + + # We are computing First(x1,x2,x3,...,xn) + result = [ ] + for x in beta: + x_produces_empty = 0 + + # Add all the non- symbols of First[x] to the result. + for f in First[x]: + if f == '': + x_produces_empty = 1 + else: + if f not in result: result.append(f) + + if x_produces_empty: + # We have to consider the next x in beta, + # i.e. stay in the loop. + pass + else: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('') + + return result + + +# FOLLOW(x) +# Given a non-terminal. This function computes the set of all symbols +# that might follow it. Dragon book, p. 189. + +def compute_follow(start=None): + # Add '$' to the follow list of the start symbol + for k in Nonterminals.keys(): + Follow[k] = [ ] + + if not start: + start = Productions[1].name + + Follow[start] = [ '$' ] + + while 1: + didadd = 0 + for p in Productions[1:]: + # Here is the production set + for i in range(len(p.prod)): + B = p.prod[i] + if Nonterminals.has_key(B): + # Okay. We got a non-terminal in a production + fst = first(p.prod[i+1:]) + hasempty = 0 + for f in fst: + if f != '' and f not in Follow[B]: + Follow[B].append(f) + didadd = 1 + if f == '': + hasempty = 1 + if hasempty or i == (len(p.prod)-1): + # Add elements of follow(a) to follow(b) + for f in Follow[p.name]: + if f not in Follow[B]: + Follow[B].append(f) + didadd = 1 + if not didadd: break + + if 0 and yaccdebug: + _vf.write('\nFollow:\n') + for k in Nonterminals.keys(): + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]]))) + +# ------------------------------------------------------------------------- +# compute_first1() +# +# Compute the value of FIRST1(X) for all symbols +# ------------------------------------------------------------------------- +def compute_first1(): + + # Terminals: + for t in Terminals.keys(): + First[t] = [t] + + First['$'] = ['$'] + First['#'] = ['#'] # what's this for? + + # Nonterminals: + + # Initialize to the empty set: + for n in Nonterminals.keys(): + First[n] = [] + + # Then propagate symbols until no change: + while 1: + some_change = 0 + for n in Nonterminals.keys(): + for p in Prodnames[n]: + for f in first(p.prod): + if f not in First[n]: + First[n].append( f ) + some_change = 1 + if not some_change: + break + + if 0 and yaccdebug: + _vf.write('\nFirst:\n') + for k in Nonterminals.keys(): + _vf.write("%-20s : %s\n" % + (k, " ".join([str(s) for s in First[k]]))) + +# ----------------------------------------------------------------------------- +# === SLR Generation === +# +# The following functions are used to construct SLR (Simple LR) parsing tables +# as described on p.221-229 of the dragon book. +# ----------------------------------------------------------------------------- + +# Global variables for the LR parsing engine +def lr_init_vars(): + global _lr_action, _lr_goto, _lr_method + global _lr_goto_cache + + _lr_action = { } # Action table + _lr_goto = { } # Goto table + _lr_method = "Unknown" # LR method used + _lr_goto_cache = { } + +# Compute the LR(0) closure operation on I, where I is a set of LR(0) items. +# prodlist is a list of productions. + +_add_count = 0 # Counter used to detect cycles + +def lr0_closure(I): + global _add_count + + _add_count += 1 + prodlist = Productions + + # Add everything in I to J + J = I[:] + didadd = 1 + while didadd: + didadd = 0 + for j in J: + for x in j.lrafter: + if x.lr0_added == _add_count: continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = _add_count + didadd = 1 + + return J + +# Compute the LR(0) goto function goto(I,X) where I is a set +# of LR(0) items and X is a grammar symbol. This function is written +# in a way that guarantees uniqueness of the generated goto sets +# (i.e. the same goto set will never be returned as two different Python +# objects). With uniqueness, we can later do fast set comparisons using +# id(obj) instead of element-wise comparison. + +def lr0_goto(I,x): + # First we look for a previously cached entry + g = _lr_goto_cache.get((id(I),x),None) + if g: return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = _lr_goto_cache.get(x,None) + if not s: + s = { } + _lr_goto_cache[x] = s + + gs = [ ] + for p in I: + n = p.lr_next + if n and n.lrbefore == x: + s1 = s.get(id(n),None) + if not s1: + s1 = { } + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$',None) + if not g: + if gs: + g = lr0_closure(gs) + s['$'] = g + else: + s['$'] = gs + _lr_goto_cache[(id(I),x)] = g + return g + +# Compute the kernel of a set of LR(0) items +def lr0_kernel(I): + KI = [ ] + for p in I: + if p.name == "S'" or p.lr_index > 0 or p.len == 0: + KI.append(p) + + return KI + +_lr0_cidhash = { } + +# Compute the LR(0) sets of item function +def lr0_items(): + + C = [ lr0_closure([Productions[0].lr_next]) ] + i = 0 + for I in C: + _lr0_cidhash[id(I)] = i + i += 1 + + # Loop over the items in C and each grammar symbols + i = 0 + while i < len(C): + I = C[i] + i += 1 + + # Collect all of the symbols that could possibly be in the goto(I,X) sets + asyms = { } + for ii in I: + for s in ii.usyms: + asyms[s] = None + + for x in asyms.keys(): + g = lr0_goto(I,x) + if not g: continue + if _lr0_cidhash.has_key(id(g)): continue + _lr0_cidhash[id(g)] = len(C) + C.append(g) + + return C + +# ----------------------------------------------------------------------------- +# slr_parse_table() +# +# This function constructs an SLR table. +# ----------------------------------------------------------------------------- +def slr_parse_table(): + global _lr_method + goto = _lr_goto # Goto array + action = _lr_action # Action array + actionp = { } # Action production array (temporary) + + _lr_method = "SLR" + + n_srconflict = 0 + n_rrconflict = 0 + + print "yacc: Generating SLR parsing table..." + if yaccdebug: + _vf.write("\n\nParsing method: SLR\n\n") + + # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items + # This determines the number of states + + C = lr0_items() + + # Build the parser table, state by state + st = 0 + for I in C: + # Loop over each production in I + actlist = [ ] # List of actions + + if yaccdebug: + _vf.write("\nstate %d\n\n" % st) + for p in I: + _vf.write(" (%d) %s\n" % (p.number, str(p))) + _vf.write("\n") + + for p in I: + try: + if p.prod[-1] == ".": + if p.name == "S'": + # Start symbol. Accept! + action[st,"$"] = 0 + actionp[st,"$"] = p + else: + # We are at the end of a production. Reduce! + for a in Follow[p.name]: + actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) + r = action.get((st,a),None) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + sprec,slevel = Productions[actionp[st,a].number].prec + rprec,rlevel = Precedence.get(a,('right',0)) + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + action[st,a] = -p.number + actionp[st,a] = p + if not slevel and not rlevel: + _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) + n_srconflict += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + action[st,a] = None + else: + # Hmmm. Guess we'll keep the shift + if not slevel and not rlevel: + _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) + n_srconflict +=1 + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + action[st,a] = -p.number + actionp[st,a] = p + # print "Reduce/reduce conflict in state %d" % st + n_rrconflict += 1 + _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a])) + _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a])) + else: + print "Unknown conflict in state %d" % st + else: + action[st,a] = -p.number + actionp[st,a] = p + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if Terminals.has_key(a): + g = lr0_goto(I,a) + j = _lr0_cidhash.get(id(g),-1) + if j >= 0: + # We are in a shift state + actlist.append((a,p,"shift and go to state %d" % j)) + r = action.get((st,a),None) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + print "Shift/shift conflict in state %d" % st + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + rprec,rlevel = Productions[actionp[st,a].number].prec + sprec,slevel = Precedence.get(a,('right',0)) + if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): + # We decide to shift here... highest precedence to shift + action[st,a] = j + actionp[st,a] = p + if not slevel and not rlevel: + n_srconflict += 1 + _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + action[st,a] = None + else: + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + n_srconflict +=1 + _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) + + else: + print "Unknown conflict in state %d" % st + else: + action[st,a] = j + actionp[st,a] = p + + except StandardError,e: + raise YaccError, "Hosed in slr_parse_table", e + + # Print the actions associated with each terminal + if yaccdebug: + for a,p,m in actlist: + if action.has_key((st,a)): + if p is actionp[st,a]: + _vf.write(" %-15s %s\n" % (a,m)) + _vf.write("\n") + for a,p,m in actlist: + if action.has_key((st,a)): + if p is not actionp[st,a]: + _vf.write(" ! %-15s [ %s ]\n" % (a,m)) + + # Construct the goto table for this state + if yaccdebug: + _vf.write("\n") + nkeys = { } + for ii in I: + for s in ii.usyms: + if Nonterminals.has_key(s): + nkeys[s] = None + for n in nkeys.keys(): + g = lr0_goto(I,n) + j = _lr0_cidhash.get(id(g),-1) + if j >= 0: + goto[st,n] = j + if yaccdebug: + _vf.write(" %-15s shift and go to state %d\n" % (n,j)) + + st += 1 + + if n_srconflict == 1: + print "yacc: %d shift/reduce conflict" % n_srconflict + if n_srconflict > 1: + print "yacc: %d shift/reduce conflicts" % n_srconflict + if n_rrconflict == 1: + print "yacc: %d reduce/reduce conflict" % n_rrconflict + if n_rrconflict > 1: + print "yacc: %d reduce/reduce conflicts" % n_rrconflict + + +# ----------------------------------------------------------------------------- +# ==== LALR(1) Parsing ==== +# **** UNFINISHED! 6/16/01 +# ----------------------------------------------------------------------------- + + +# Compute the lr1_closure of a set I. I is a list of tuples (p,a) where +# p is a LR0 item and a is a terminal + +_lr1_add_count = 0 + +def lr1_closure(I): + global _lr1_add_count + + _lr1_add_count += 1 + + J = I[:] + + # Loop over items (p,a) in I. + ji = 0 + while ji < len(J): + p,a = J[ji] + # p = [ A -> alpha . B beta] + + # For each production B -> gamma + for B in p.lr1_after: + f = tuple(p.lr1_beta + (a,)) + + # For each terminal b in first(Beta a) + for b in first(f): + # Check if (B -> . gamma, b) is in J + # Only way this can happen is if the add count mismatches + pn = B.lr_next + if pn.lr_added.get(b,0) == _lr1_add_count: continue + pn.lr_added[b] = _lr1_add_count + J.append((pn,b)) + ji += 1 + + return J + +def lalr_parse_table(): + + # Compute some lr1 information about all of the productions + for p in LRitems: + try: + after = p.prod[p.lr_index + 1] + p.lr1_after = Prodnames[after] + p.lr1_beta = p.prod[p.lr_index + 2:] + except LookupError: + p.lr1_after = [ ] + p.lr1_beta = [ ] + p.lr_added = { } + + # Compute the LR(0) items + C = lr0_items() + CK = [] + for I in C: + CK.append(lr0_kernel(I)) + + print CK + +# ----------------------------------------------------------------------------- +# ==== LR Utility functions ==== +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# _lr_write_tables() +# +# This function writes the LR parsing tables to a file +# ----------------------------------------------------------------------------- + +def lr_write_tables(modulename=tab_module): + filename = modulename + ".py" + try: + f = open(filename,"w") + + f.write(""" +# %s +# This file is automatically generated. Do not edit. + +_lr_method = %s + +_lr_signature = %s +""" % (filename, repr(_lr_method), repr(Signature.digest()))) + + # Change smaller to 0 to go back to original tables + smaller = 1 + + # Factor out names to try and make smaller + if smaller: + items = { } + + for k,v in _lr_action.items(): + i = items.get(k[1]) + if not i: + i = ([],[]) + items[k[1]] = i + i[0].append(k[0]) + i[1].append(v) + + f.write("\n_lr_action_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_action = { } +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + _lr_action[(_x,_k)] = _y +del _lr_action_items +""") + + else: + f.write("\n_lr_action = { "); + for k,v in _lr_action.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + if smaller: + # Factor out names to try and make smaller + items = { } + + for k,v in _lr_goto.items(): + i = items.get(k[1]) + if not i: + i = ([],[]) + items[k[1]] = i + i[0].append(k[0]) + i[1].append(v) + + f.write("\n_lr_goto_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_goto = { } +for _k, _v in _lr_goto_items.items(): + for _x,_y in zip(_v[0],_v[1]): + _lr_goto[(_x,_k)] = _y +del _lr_goto_items +""") + else: + f.write("\n_lr_goto = { "); + for k,v in _lr_goto.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + # Write production table + f.write("_lr_productions = [\n") + for p in Productions: + if p: + if (p.func): + f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line)) + else: + f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len)) + else: + f.write(" None,\n") + f.write("]\n") + f.close() + + except IOError,e: + print "Unable to create '%s'" % filename + print e + return + +def lr_read_tables(module=tab_module,optimize=0): + global _lr_action, _lr_goto, _lr_productions, _lr_method + try: + exec "import %s as parsetab" % module + + if (optimize) or (Signature.digest() == parsetab._lr_signature): + _lr_action = parsetab._lr_action + _lr_goto = parsetab._lr_goto + _lr_productions = parsetab._lr_productions + _lr_method = parsetab._lr_method + return 1 + else: + return 0 + + except (ImportError,AttributeError): + return 0 + +# ----------------------------------------------------------------------------- +# yacc(module) +# +# Build the parser module +# ----------------------------------------------------------------------------- + +def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0): + global yaccdebug + yaccdebug = debug + + initialize_vars() + files = { } + error = 0 + + # Add starting symbol to signature + if start: + Signature.update(start) + + # Try to figure out what module we are working with + if module: + # User supplied a module object. + if not isinstance(module, types.ModuleType): + raise ValueError,"Expected a module" + + ldict = module.__dict__ + + else: + # No module given. We might be able to get information from the caller. + # Throw an exception and unwind the traceback to get the globals + + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + f = f.f_back # Walk out to our calling function + ldict = f.f_globals # Grab its globals dictionary + + # If running in optimized mode. We're going to + + if (optimize and lr_read_tables(tabmodule,1)): + # Read parse table + del Productions[:] + for p in _lr_productions: + if not p: + Productions.append(None) + else: + m = MiniProduction() + m.name = p[0] + m.len = p[1] + m.file = p[3] + m.line = p[4] + if p[2]: + m.func = ldict[p[2]] + Productions.append(m) + + else: + # Get the tokens map + tokens = ldict.get("tokens",None) + + if not tokens: + raise YaccError,"module does not define a list 'tokens'" + if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): + raise YaccError,"tokens must be a list or tuple." + + # Check to see if a requires dictionary is defined. + requires = ldict.get("require",None) + if requires: + if not (isinstance(requires,types.DictType)): + raise YaccError,"require must be a dictionary." + + for r,v in requires.items(): + try: + if not (isinstance(v,types.ListType)): + raise TypeError + v1 = [x.split(".") for x in v] + Requires[r] = v1 + except StandardError: + print "Invalid specification for rule '%s' in require. Expected a list of strings" % r + + + # Build the dictionary of terminals. We a record a 0 in the + # dictionary to track whether or not a terminal is actually + # used in the grammar + + if 'error' in tokens: + print "yacc: Illegal token 'error'. Is a reserved word." + raise YaccError,"Illegal token name" + + for n in tokens: + if Terminals.has_key(n): + print "yacc: Warning. Token '%s' multiply defined." % n + Terminals[n] = [ ] + + Terminals['error'] = [ ] + + # Get the precedence map (if any) + prec = ldict.get("precedence",None) + if prec: + if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)): + raise YaccError,"precedence must be a list or tuple." + add_precedence(prec) + Signature.update(repr(prec)) + + for n in tokens: + if not Precedence.has_key(n): + Precedence[n] = ('right',0) # Default, right associative, 0 precedence + + # Look for error handler + ef = ldict.get('p_error',None) + if ef: + if not isinstance(ef,types.FunctionType): + raise YaccError,"'p_error' defined, but is not a function." + eline = ef.func_code.co_firstlineno + efile = ef.func_code.co_filename + files[efile] = None + + if (ef.func_code.co_argcount != 1): + raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline) + global Errorfunc + Errorfunc = ef + else: + print "yacc: Warning. no p_error() function is defined." + + # Get the list of built-in functions with p_ prefix + symbols = [ldict[f] for f in ldict.keys() + if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] == 'p_' + and ldict[f].__name__ != 'p_error')] + + # Check for non-empty symbols + if len(symbols) == 0: + raise YaccError,"no rules of the form p_rulename are defined." + + # Sort the symbols by line number + symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) + + # Add all of the symbols to the grammar + for f in symbols: + if (add_function(f)) < 0: + error += 1 + else: + files[f.func_code.co_filename] = None + + # Make a signature of the docstrings + for f in symbols: + if f.__doc__: + Signature.update(f.__doc__) + + lr_init_vars() + + if error: + raise YaccError,"Unable to construct parser." + + if not lr_read_tables(tabmodule): + + # Validate files + for filename in files.keys(): + if not validate_file(filename): + error = 1 + + # Validate dictionary + validate_dict(ldict) + + if start and not Prodnames.has_key(start): + raise YaccError,"Bad starting symbol '%s'" % start + + augment_grammar(start) + error = verify_productions(cycle_check=check_recursion) + otherfunc = [ldict[f] for f in ldict.keys() + if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] != 'p_')] + + if error: + raise YaccError,"Unable to construct parser." + + build_lritems() + compute_first1() + compute_follow(start) + + if method == 'SLR': + slr_parse_table() + elif method == 'LALR1': + lalr_parse_table() + return + else: + raise YaccError, "Unknown parsing method '%s'" % method + + lr_write_tables(tabmodule) + + if yaccdebug: + try: + f = open(debug_file,"w") + f.write(_vfc.getvalue()) + f.write("\n\n") + f.write(_vf.getvalue()) + f.close() + except IOError,e: + print "yacc: can't create '%s'" % debug_file,e + + # Made it here. Create a parser object and set up its internal state. + # Set global parse() method to bound method of parser object. + + p = Parser("xyzzy") + p.productions = Productions + p.errorfunc = Errorfunc + p.action = _lr_action + p.goto = _lr_goto + p.method = _lr_method + p.require = Requires + + global parse + parse = p.parse + + # Clean up all of the globals we created + if (not optimize): + yacc_cleanup() + return p + +# yacc_cleanup function. Delete all of the global variables +# used during table construction + +def yacc_cleanup(): + global _lr_action, _lr_goto, _lr_method, _lr_goto_cache + del _lr_action, _lr_goto, _lr_method, _lr_goto_cache + + global Productions, Prodnames, Prodmap, Terminals + global Nonterminals, First, Follow, Precedence, LRitems + global Errorfunc, Signature, Requires + + del Productions, Prodnames, Prodmap, Terminals + del Nonterminals, First, Follow, Precedence, LRitems + del Errorfunc, Signature, Requires + + global _vf, _vfc + del _vf, _vfc + + +# Stub that raises an error if parsing is attempted without first calling yacc() +def parse(*args,**kwargs): + raise YaccError, "yacc: No parser built with yacc()" +