pycrate/pycrate_asn1c/tokenizer.py

968 lines
32 KiB
Python

# -*- coding: UTF-8 -*-
#/**
# * Software Name : pycrate
# * Version : 0.4
# *
# * Copyright 2018. Benoit Michau. ANSSI.
# *
# * This library is free software; you can redistribute it and/or
# * modify it under the terms of the GNU Lesser General Public
# * License as published by the Free Software Foundation; either
# * version 2.1 of the License, or (at your option) any later version.
# *
# * This library is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# * Lesser General Public License for more details.
# *
# * You should have received a copy of the GNU Lesser General Public
# * License along with this library; if not, write to the Free Software
# * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# * MA 02110-1301 USA
# *
# *--------------------------------------------------------
# * File Name : pycrate_asn1c/tokenizer.py
# * Created : 2018-03-13
# * Authors : Benoit Michau
# *--------------------------------------------------------
#*/
import re
from pycrate_asn1c.err import *
from pycrate_asn1c.utils import *
from pycrate_asn1c.dictobj import *
# white space and new line
_NL = '\x0a\x0b\x0c\x0d'
_SNL = '\x09\x0a\x0b\x0c\x0d\x20' + '\xa0' # a0 is not a valid UTF-8 char
REScannerSNL = '[%s]{1,}' % _SNL
# exclude more characters
_EXC = '(?![a-zA-Z0-9\-]{1,})'
# native types
REScannerNTypes = '|'.join((
'NULL',
'BOOLEAN',
'INTEGER',
'REAL',
'ENUMERATED',
'OBJECT IDENTIFIER',
'RELATIVE-OID',
'OID-IRI',
'RELATIVE-OID-IRI',
'BIT STRING',
'OCTET STRING',
'NumericString',
'PrintableString',
'VisibleString',
'ISO646String',
'IA5String',
'TeletexString',
'T61String',
'VideotexString',
'GraphicString',
'GeneralString',
'UniversalString',
'BMPString',
'UTF8String',
'ObjectDescriptor',
'GeneralizedTime',
'UTCTime',
'TIME',
'SEQUENCE',
'SET',
'CHOICE',
'EXTERNAL',
'EMBEDDED PDV',
'CHARACTER STRING',
'ANY',
'CLASS',
'TYPE-IDENTIFIER',
'ABSTRACT-SYNTAX',
'INSTANCE OF',
'MACRO'
))
# integer
REScannerInt = '([+\-](?:[%s]{0,})){0,1}[0-9]{1,}' % _SNL
# real int dec exp
REScannerReal = '(%s){1}(?:\.([0-9]{1,})){0,1}(?:[eE](%s)){0,1}'\
% (REScannerInt, REScannerInt)
# bstring
REScannerBStr = '\'[%s01]{0,}\'B' % _SNL
# hstring
REScannerHStr = '\'[%s0-9A-F]{0,}\'H' % _SNL
# tokens' identifiers
# comments and character string
TOK_CMT = 'CMT' # comment
TOK_CSTR = 'CSTR' # chars string
# definition and tag related
TOK_DEFI = 'DEFI' # DEFINITIONS
TOK_EXTI = 'EXTI' # EXTENSIBILITY IMPLIED
TOK_BEG = 'BEG' # BEGIN
TOK_END = 'END' # END
TOK_TAGS = 'TAGS' # TAGS
TOK_TUNI = 'TUNI' # UNIVERSAL
TOK_TAPP = 'TAPP' # APPLICATION
TOK_TPRI = 'TPRI' # PRIVATE
TOK_TEXP = 'TEXP' # EXPLICIT
TOK_TIMP = 'TIMP' # IMPLICIT
# set and value related
TOK_MINF = 'MINF' # MINUS-INFINITY
TOK_PINF = 'PINF' # PLUS-INFINITY
TOK_NAN = 'NAN' # NOT-A-NUMBER
TOK_ALL = 'ALL' # ALL
TOK_MIN = 'MIN' # MIN
TOK_MAX = 'MAX' # MAX
TOK_EXCE = 'EXCE' # EXCEPT
TOK_NULL = 'NULL' # NULL
TOK_TRUE = 'TRUE' # TRUE
TOK_FALS = 'FALS' # FALSE
TOK_REAL = 'REAL' # real number
TOK_INT = 'INT' # integer
TOK_BSTR = 'BSTR' # binary string
TOK_HSTR = 'HSTR' # hexa string
# other various keywords
TOK_ABS = 'ABS' # ABSENT
TOK_AUTO = 'AUTO' # AUTOMATIC
TOK_BY = 'BY' # BY
TOK_COMP = 'COMP' # COMPONENT
TOK_COMPS = 'COMPS' # COMPONENTS
TOK_CONST = 'CONST' # CONSTRAINED
TOK_CONT = 'CONT' # CONTAINING
TOK_DEF = 'DEF' # DEFAULT
TOK_ENC = 'ENC' # ENCODED
TOK_EXP = 'EXP' # EXPORTS
TOK_FROM = 'FROM' # FROM
TOK_IMP = 'IMP' # IMPORTS
TOK_INCL = 'INCL' # INCLUDES
TOK_OF = 'OF' # OF
TOK_OPT = 'OPT' # OPTIONAL
TOK_PAT = 'PAT' # PATTERN
TOK_PRES = 'PRES' # PRESENT
TOK_SIZE = 'SIZE' # SIZE
TOK_WSYN = 'WSYN' # WITH SYNTAX
TOK_UNIQ = 'UNIQ' # UNIQUE
# identifier related
TOK_NTYPE = 'NTYPE' # native type
TOK_CLAID = 'CLAID' # &[iI]dentifier
TOK_HID = 'HID' # IDENTIFIER
TOK_ID = 'ID' # Identifier
TOK_LID = 'LID' # identifier
# special (series of) characters
TOK_ASSI = 'ASSI' # ::=
TOK_COL = 'COL' # :
TOK_SCOL = 'SCOL' # ;
TOK_EQU = 'EQU' # =
TOK_COM = 'COM' # ,
TOK_PARO = 'PARO' # (
TOK_PARC = 'PARC' # )
TOK_DBRAO = 'DBRAO' # [[
TOK_DBRAC = 'DBRAC' # ]]
TOK_BRAO = 'BRAO' # [
TOK_BRAC = 'BRAC' # ]
TOK_CBRAO = 'CBRAO' # {
TOK_CBRAC = 'CBRAC' # }
TOK_TDOT = 'TDOT' # ...
TOK_DDOT = 'DDOT' # ..
TOK_DOT = 'DOT' # .
TOK_DOTA = 'DOTA' # .&
TOK_UNIO = 'UNIO' # |
TOK_INTER = 'INTER' # ^
TOK_LTHAN = 'LTHAN' # <
TOK_GTHAN = 'GTHAN' # >
TOK_ARRO = 'ARRO' # @
TOK_EXCL = 'EXCL' # !
TOKS_OBJS = {TOK_NULL, TOK_NTYPE, TOK_HID, TOK_ID, TOK_LID}
TOKS_TYPES = {TOK_NULL, TOK_NTYPE, TOK_HID, TOK_ID}
TOKS_OBJS_EXT = {TOK_NULL, TOK_NTYPE, TOK_HID, TOK_ID, TOK_LID, TOK_CLAID}
TOKS_TYPES_EXT = {TOK_HID, TOK_ID, TOK_CLAID}
REScannerASN1 = re.Scanner([
#
(r'(--).*?([%s]|(--)|$)' % _NL, lambda s, t: (TOK_CMT, t)),
(r'(/\*).*?(\*/)', lambda s, t: (TOK_CMT, t)),
(r'".*?(?<!")"(?!")', lambda s, t: (TOK_CSTR, t)),
#
(r'::=', lambda s, t: TOK_ASSI),
(r':', lambda s, t: TOK_COL),
(r';', lambda s, t: TOK_SCOL),
(r'=', lambda s, t: TOK_EQU),
(r',', lambda s, t: TOK_COM),
(r'\(', lambda s, t: TOK_PARO),
(r'\)', lambda s, t: TOK_PARC),
(r'\[{2}', lambda s, t: TOK_DBRAO),
(r'\]{2}', lambda s, t: TOK_DBRAC),
(r'\[', lambda s, t: TOK_BRAO),
(r'\]', lambda s, t: TOK_BRAC),
(r'\{', lambda s, t: TOK_CBRAO),
(r'\}', lambda s, t: TOK_CBRAC),
(r'\.\.\.', lambda s, t: TOK_TDOT),
(r'\.\.', lambda s, t: TOK_DDOT),
(r'\.', lambda s, t: TOK_DOT),
(r'\||(?:UNION%s)' % _EXC, lambda s, t: TOK_UNIO),
(r'\^|(?:INTERSECTION%s)' % _EXC, lambda s, t: TOK_INTER),
(r'<', lambda s, t: TOK_LTHAN),
(r'>', lambda s, t: TOK_GTHAN),
(r'@', lambda s, t: TOK_ARRO),
(r'\!', lambda s, t: TOK_EXCL),
#
(r'ABSENT%s' % _EXC, lambda s, t: TOK_ABS),
(r'ALL%s' % _EXC, lambda s, t: TOK_ALL),
(r'APPLICATION%s' % _EXC, lambda s, t: TOK_TAPP),
(r'AUTOMATIC%s' % _EXC, lambda s, t: TOK_AUTO),
(r'BEGIN%s' % _EXC, lambda s, t: TOK_BEG),
(r'BY%s' % _EXC, lambda s, t: TOK_BY),
(r'COMPONENT%s' % _EXC, lambda s, t: TOK_COMP),
(r'COMPONENTS%s' % _EXC, lambda s, t: TOK_COMPS),
(r'CONSTRAINED%s' % _EXC, lambda s, t: TOK_CONST),
(r'CONTAINING%s' % _EXC, lambda s, t: TOK_CONT),
(r'DEFAULT%s' % _EXC, lambda s, t: TOK_DEF),
(r'DEFINITIONS%s' % _EXC, lambda s, t: TOK_DEFI),
(r'ENCODED%s' % _EXC, lambda s, t: TOK_ENC),
(r'END%s' % _EXC, lambda s, t: TOK_END),
(r'EXCEPT%s' % _EXC, lambda s, t: TOK_EXCE),
(r'EXPLICIT%s' % _EXC, lambda s, t: TOK_TEXP),
(r'EXPORTS%s' % _EXC, lambda s, t: TOK_EXP),
(r'EXTENSIBILITY%sIMPLIED%s' % (REScannerSNL, _EXC), lambda s, t: TOK_EXTI),
(r'FALSE%s' % _EXC, lambda s, t: TOK_FALS),
(r'FROM%s' % _EXC, lambda s, t: TOK_FROM),
(r'IMPLICIT%s' % _EXC, lambda s, t: TOK_TIMP),
(r'IMPORTS%s' % _EXC, lambda s, t: TOK_IMP),
(r'INCLUDES%s' % _EXC, lambda s, t: TOK_INCL),
(r'MAX%s' % _EXC, lambda s, t: TOK_MAX),
(r'MIN%s' % _EXC, lambda s, t: TOK_MIN),
(r'MINUS-INFINITY%s' % _EXC, lambda s, t: TOK_MINF),
(r'NOT-A-NUMBER%s' % _EXC, lambda s, t: TOK_NAN),
(r'NULL%s' % _EXC, lambda s, t: (TOK_NULL, t)),
(r'OF%s' % _EXC, lambda s, t: TOK_OF),
(r'OPTIONAL%s' % _EXC, lambda s, t: TOK_OPT),
(r'PATTERN%s' % _EXC, lambda s, t: TOK_PAT),
(r'PLUS-INFINITY%s' % _EXC, lambda s, t: TOK_PINF),
(r'PRESENT%s' % _EXC, lambda s, t: TOK_PRES),
(r'PRIVATE%s' % _EXC, lambda s, t: TOK_TPRI),
(r'SIZE%s' % _EXC, lambda s, t: TOK_SIZE),
(r'TAGS%s' % _EXC, lambda s, t: TOK_TAGS),
(r'TRUE%s' % _EXC, lambda s, t: TOK_TRUE),
(r'UNIQUE%s' % _EXC, lambda s, t: TOK_UNIQ),
(r'UNIVERSAL%s' % _EXC, lambda s, t: TOK_TUNI),
(r'WITH%sSYNTAX%s' % (REScannerSNL, _EXC), lambda s, t: TOK_WSYN),
#
(r'%s' % REScannerReal, lambda s, t: (TOK_INT, t)),
(r'%s' % REScannerInt, lambda s, t: (TOK_REAL, t)),
(r'%s' % REScannerBStr, lambda s, t: (TOK_BSTR, t)),
(r'%s' % REScannerHStr, lambda s, t: (TOK_HSTR, t)),
#
(r'(%s)%s' % (REScannerNTypes, _EXC), lambda s, t: (TOK_NTYPE, t)),
(r'&[a-zA-Z](?:\-{0,1}[a-zA-Z0-9]{1,}){0,}%s' % _EXC, lambda s, t: (TOK_CLAID, t)),
(r'[A-Z](?:\-{0,1}[A-Z0-9]{1,}){0,}%s' % _EXC, lambda s, t: (TOK_HID, t)),
(r'[A-Z](?:\-{0,1}[a-zA-Z0-9]{1,}){0,}%s' % _EXC, lambda s, t: (TOK_ID, t)),
(r'[a-z](?:\-{0,1}[a-zA-Z0-9]{1,}){0,}%s' % _EXC, lambda s, t: (TOK_LID, t)),
#
(r'%s' % REScannerSNL, None)
],
flags=re.DOTALL
)
class Tokenizer(object):
"""handles consciously ASN.1 tokens, forward and backward, while ignoring
ASN.1 comments
"""
REPR_OFF = 10
GROUP = {
TOK_PARO : TOK_PARC, # ( )
TOK_DBRAO : TOK_DBRAC, # [[ ]]
TOK_BRAO : TOK_BRAC, # [ ]
TOK_CBRAO : TOK_CBRAC, # { }
TOK_BEG : TOK_END # BEGIN END
}
def __init__(self, tokens=[]):
self.toks = tokens
# cursor
self.cur = -1
# stack of previous cursor value
self.curp = []
def __repr__(self):
cur = self.get_cur()
return repr(self.toks[cur-self.REPR_OFF:cur+self.REPR_OFF])
def get_cur(self):
return self.cur
def set_cur(self, cur):
if not -1 <= cur < len(self.toks):
raise(ASN1TokenizerErr('invalid cursor'))
else:
self.cur = cur
def count(self):
return len(self.toks) - self.cur
def get_tok(self):
try:
return self.toks[self.cur]
except:
raise(ASN1TokenizerErr('invalid cursor'))
def get_next(self, off=1):
ind, cnt, curp = 0, 0, self.cur
for tok in self.toks[1+self.cur:]:
if tok[0] == TOK_CMT:
pass
else:
ind += 1
cnt += 1
if ind == off:
break
if ind < off:
raise(ASN1TokenizerErr('not enough tokens'))
self.cur += cnt
self.curp.append(curp)
return tok
def has_next(self):
for tok in self.toks[1+self.cur:]:
if tok[0] == TOK_CMT:
pass
else:
return True
return False
def get_prev(self, off=1):
ind, cnt, curp = 0, 0, self.cur
for tok in self.toks[:self.cur][::-1]:
if tok[0] == TOK_CMT:
pass
else:
ind += 1
cnt += 1
if ind == off:
break
if ind < off:
raise(ASN1TokenizerErr('not enough tokens'))
self.cur -= cnt
self.curp.append(curp)
return tok
def get_upto(self, target):
curp = self.cur
while self.get_next() != target:
# do not extend the stack with previous cursor value
del self.curp[-1]
self.curp.append(curp)
self.cur += 1
return self.__class__(self.toks[max(0, curp):self.cur-1])
def get_group(self, wbnd=True):
tok, curp = self.toks[self.cur], self.cur
if tok in self.GROUP:
op, clo = tok, self.GROUP[tok]
else:
raise(ASN1TokenizerErr('invalid group opening token, %s' % tok))
depth = 1
while depth > 0:
tok = self.get_next()
# do not extend the stack with previous cursor value
del self.curp[-1]
if tok == op:
depth += 1
elif tok == clo:
depth -= 1
if depth == 0:
break
self.curp.append(curp)
if wbnd:
return self.__class__(self.toks[curp:1+self.cur])
else:
return self.__class__(self.toks[1+curp:self.cur])
def get_comps(self, sep=TOK_COM):
comps, curp, curlast = [], self.cur, self.cur
while True:
try:
tok = self.get_next()
except:
break
if tok in self.GROUP:
# jump over the group
grp = self.get_group()
# do not extend the stack with previous cursor value
del self.curp[-1]
elif tok == sep:
comps.append(self.__class__(self.toks[curlast:self.cur-1]))
curlast = self.cur
else:
pass
self.curp.append(curp)
return comps
def undo(self):
if not self.curp:
raise()
self.cur = self.curp[-1]
del self.curp[-1]
# ASN.1 module global structure:
# ModName ModOID DEFINITIONS ModOpts ::= BEGIN ModExports ModImports ModObjects END
#
# ASN.1 object structure:
# ObjName ObjParam ObjType ::= ObjVal
# ObjName ObjParam ObjType ::= ObjSet
# ObjName ObjParam ::= ObjType
# ObjName MACRO ::= BEGIN ... END
#
# ASN.1 object type structure:
# ObjTags ObjType ObjParamAct ObjConsts ObjCont
# CLASS ObjParamAct ObjCont WITH SYNTAX ObjSynt
def tokenize_text(text=u'', **kwargs):
"""tokenize the provided textual ASN.1 specification
"""
#
if isinstance(text, (list, tuple)):
text = u'\n\n'.join(text)
elif not isinstance(text, str_types):
raise(ASN1Err('need some textual definition'))
#
toks, rest = REScannerASN1.scan(text)
if rest:
asnlog('%i remaining chars at the end of spec' % len(rest))
# build the handler for the tokens
Tok = Tokenizer(toks)
modules = ASN1Dict()
#
# scan the tokens for all ASN.1 modules defined
while True:
module = ASN1Dict()
#
# 1) scan tokens for module declaration with DEFINITIONS
try:
TokDecl = Tok.get_upto(TOK_DEFI)
except:
# no more DEFINITIONS
break
#
name, oid = scan_module_decl(TokDecl)
module['_name_'] = name
if oid:
module['_oidtok_'] = oid
# TODO: parse the OID value
module['_oid_'] = []
else:
module['_oidtok_'] = []
module['_oid_'] = []
#
# 2) scan tokens for module options before assignment ::=
if Tok.get_tok() != TOK_ASSI:
try:
TokOpt = Tok.get_upto(TOK_ASSI)
except:
raise(ASN1ProcTextErr('module assignment not found'))
#
module['_tag_'], module['_ext_'] = scan_module_opt(TokOpt)
else:
module['_tag_'], module['_ext_'] = None, False
Tok.get_next()
if 'autotags' in kwargs and kwargs['autotags']:
module['_tag_'] = TOK_AUTO
if 'extimpl' in kwargs and kwargs['extimpl']:
module['_ext_'] = True
#asnlog('[proc] module %s, tags: %r' % (name, module['_tag_']))
#asnlog('[proc] module %s, extensibility implied: %r' % (name, module['_ext_']))
#
# 3) scan tokens for BEGIN - END block
if Tok.get_tok() != TOK_BEG:
raise(ASN1ProcTextErr('missing BEGIN statement'))
TokDef = Tok.get_group(wbnd=False)
module['_tok_'] = TokDef
#asnlog('[proc] module %s: %i tokens' % (name, TokDef.count()))
if Tok.has_next():
Tok.get_next()
#
# 4) scan the module definition block for exports
tok = TokDef.get_next()
if tok == TOK_EXP:
module['_exp_'] = scan_module_exp(TokDef)
#asnlog('[proc] module %s: %i tokens' % (name, TokDef.count()))
else:
TokDef.undo()
#
# 5) scan the module definition block for imports
tok = TokDef.get_next()
if tok == TOK_IMP:
module['_imp_'] = scan_module_imp(TokDef)
module['_resolv_'] = {}
for d in module['_imp_']:
for sym in d['sym']:
module['_resolv_'][sym] = d['name']
#asnlog('[proc] module %s: %i tokens' % (name, TokDef.count()))
#if module['_imp_']:
# asnlog('[proc] module %s: imports parsed' % name)
else:
TokDef.undo()
#
# 6) scan the module definition block for objects
objs = scan_objs(TokDef)
#
# 7) init objects types for the module
module['_obj_'] = objs
module['_type_'] = []
module['_set_'] = []
module['_val_'] = []
module['_class_'] = []
module['_param_'] = []
#
for obj in objs.values():
if obj['mode'] == MODE_TYPE:
module['_type_'] = obj['name']
elif obj['mode'] == MODE_SET:
module['_set_'] = obj['name']
elif obj['mode'] == MODE_VALUE:
module['_val_'] = obj['name']
else:
assert()
if obj['typedef']['type'] == 'CLASS':
module['_class_'] = obj['name']
if obj['param']:
module['_param_'] = obj['name']
#
modules[name] = module
#
return modules
def scan_module_decl(Tok):
"""extract module name and OID from given tokens
"""
# scan ModuleIdentifier
tok = Tok.get_next()
if tok[0] not in (TOK_HID, TOK_ID):
raise(ASN1ProcTextErr('invalid module declaration, invalid name %r' % tok))
name = tok[1]
if Tok.has_next():
if Tok.get_next() == TOK_CBRAO:
oid = Tok.get_group()
else:
raise(ASN1ProcTextErr('invalid module declaration'))
else:
oid = None
return name, oid
def scan_module_opt(Tok):
"""extract module options from given tokens
"""
# scan TagDefault and ExtensionDefault
# TODO: scan EncodingReferenceDefault first
tag, ext = None, False
if not Tok.has_next():
return tag, next
tok = Tok.get_next()
if tok in (TOK_AUTO, TOK_TEXP, TOK_TIMP):
tag = Tok.get_tok()
if Tok.get_next() != TOK_TAGS:
raise(ASN1ProcTextErr('invalid module options, missing TAGS keyword'))
if not Tok.has_next():
return tag, ext
tok = Tok.get_next()
if tok == TOK_EXTI:
ext = True
else:
raise(ASN1ProcTextErr('invalid module options'))
if Tok.has_next():
raise(ASN1ProcTextErr('invalid module options'))
return tag, ext
def scan_module_exp(Tok):
"""consume the tokens searching for module exports declaration
"""
tok = Tok.get_next()
if tok == TOK_ALL:
if Tok.get_next() != TOK_SCOL:
raise(ASN1ProcTextErr('invalid module export'))
else:
return None
elif tok[0] in TOKS_OBJS:
exp = []
while tok != TOK_SCOL:
if tok[0] in TOKS_OBJS:
exp.append(tok[1])
elif tok == TOK_CBRAO:
tok = Tok.get_next()
if tok != TOK_CBRAC:
raise(ASN1ProcTextErr('invalid module export, parameterized reference'))
elif tok != TOK_COM:
raise(ASN1ProcTextErr('invalid module export'))
tok = Tok.get_next()
return exp
else:
raise(ASN1ProcTextErr('invalid module export'))
def scan_module_imp(Tok):
"""consume the tokens searching for module imports declaration
"""
sym, imp = [], []
tok = Tok.get_next()
while tok != TOK_SCOL:
if tok[0] in TOKS_OBJS:
sym.append(tok[1])
elif tok == TOK_CBRAO:
# parameterized ref: ignoring it
if Tok.get_next() != TOK_CBRAC:
raise(ASN1ProcTextErr('invalid module import, parameterized reference'))
elif tok == TOK_FROM:
tok = Tok.get_next()
if tok[0] not in (TOK_HID, TOK_ID) or not sym:
raise(ASN1ProcTextErr('invalid module import'))
imp.append({'name': tok[1], 'sym': sym})
sym, rev, tok = [], True, Tok.get_next()
if tok == TOK_CBRAO:
# module OID
imp[-1]['oidtok'] = Tok.get_group()
# TODO: parse the OID value
rev = False
elif tok[0] == TOK_LID:
asnlog('imported module OID reference is ambiguous, %s' % tok[1])
# will be dealt with at the end
if rev:
Tok.undo()
elif tok != TOK_COM:
raise(ASN1ProcTextErr('invalid module import'))
tok = Tok.get_next()
if sym:
if len(sym) == 1 and sym[0][0].islower():
asnlog('imported module ambiguous OID references were actually OID references')
# this means all those ambiguous OID ref were actually OID ref for
# the previous module instead of imported symbols
for i in range(len(imp)-1):
if 'oidtok' not in imp[i] and imp[i+1]['sym'][0][0] == TOK_LID:
# transfer the symbol as the OID ref of the previous module
imp[i]['oidtok'] = imp[i+1]['sym'][0][1]
del imp[i+1]['sym'][0]
imp[-1]['oidtok'] = sym[0]
else:
raise(ASN1ProcTextErr('invalid module import'))
return imp
def scan_objs(Tok):
"""consume the tokens searching for objects declaration
"""
objs = ASN1Dict()
while Tok.has_next():
objdict = scan_obj(Tok)
if objdict['name'] in objs:
asnlog('multiple definitions of %s' % objdict['name'])
objs[objdict['name']] = objdict
return objs
def scan_obj(Tok):
"""consume the tokens searching for the complete declaration of a single object
"""
# ASN.1 object structure:
# ObjName ObjParam ObjType ::= ObjVal
# ObjName ObjParam ObjType ::= ObjSet
# ObjName ObjParam ::= ObjType
# ObjName MACRO ::= BEGIN ... END
#
param, typedef, mode, val = None, {}, None, None
tok = Tok.get_next()
if tok[0] == TOK_LID:
mode = MODE_VALUE
elif tok[0] in (TOK_ID, TOK_HID):
mode = MODE_TYPE
else:
raise(ASN1ProcTextErr('invalid object name, %r' % (tok, )))
name = tok[1]
tok = Tok.get_next()
if tok == TOK_CBRAO:
# formal parameters
param = Tok.get_group()
tok = Tok.get_next()
if tok == TOK_BRAO or tok[0] in TOKS_TYPES:
if tok[1] == 'MACRO':
# MACRO
if Tok.get_next() != TOK_ASSI or Tok.get_next() != TOK_BEG:
raise(ASN1ProcTextErr('%s invalid MACRO definition' % name))
typedef['type'] = 'MACRO'
typedef['cont'] = Tok.get_group()
else:
# object value or set
if mode == MODE_TYPE:
mode = MODE_SET
# object type will be rescanned in scan_typedef()
Tok.undo()
try:
typedef = scan_typedef(Tok)
except Exception as Err:
Err.args = ('%s (%s) invalid definition, %s' % (name, mode, Err.args[0]), )
raise(Err)
if Tok.get_next() != TOK_ASSI:
raise(ASN1ProcTextErr('%s (%s) invalid definition' % (name, mode)))
try:
val = scan_val(Tok)
except Exception as Err:
Err.args = ('%s (%s) invalid definition, %s' % (name, mode, Err.args[0]), )
raise(Err)
elif tok == TOK_ASSI:
# object type
if mode == MODE_VALUE:
raise(ASN1ProcTextErr('%s (%s) invalid definition' % (name, mode)))
try:
typedef = scan_typedef(Tok)
except Exception as Err:
Err.args = ('%s (%s) invalid definition, %s' % (name, mode, Err.args[0]), )
raise(Err)
else:
raise(ASN1ProcTextErr('%s invalid definition' % name))
return {'name': name, 'param': param, 'typedef': typedef, 'mode': mode, 'val': val}
def scan_val(Tok):
"""consume the tokens searching for the complete value of a single object
"""
if Tok.get_next() == TOK_CBRAO:
val = [Tok.get_group()]
return val
else:
val = [Tok.get_tok()]
while Tok.has_next():
tok = Tok.get_next()
if tok == TOK_DOT:
val.append(tok)
tok = Tok.get_next()
if tok[0] not in TOKS_OBJS_EXT:
raise(ASN1ProcTextErr('invalid value definition'))
val.append(tok)
elif tok == TOK_COL:
val.append(tok)
val.extend( scan_val(Tok) )
elif tok == TOK_CBRAO:
# parameterized value
val.append(Tok.get_group())
return val
else:
Tok.undo()
return val
return val
def scan_typedef(Tok):
"""consume the tokens searching for the complete type declaration of a single object
"""
# ASN.1 object type structure:
# ObjTags ObjType ObjParamAct ObjConsts [OF] ObjCont
# CLASS ObjCont WITH SYNTAX ObjSynt
#
typedict = {'tags': [], 'type': None}
tok = Tok.get_next()
if tok == TOK_BRAO:
# tag(s)
typedict['tags'] = scan_tags(Tok)
tok = Tok.get_next()
if tok[0] in TOKS_TYPES:
typedict['type'] = scan_type(Tok)
if not Tok.has_next():
return typedict
else:
try:
if typedict['type'] == ['CLASS']:
_scan_typedef_class(Tok, typedict)
elif typedict['type'] in (['SET'], ['SEQUENCE']):
_scan_typedef_seq(Tok, typedict)
else:
_scan_typedef_std(Tok, typedict)
except Exception as Err:
Err.args = ('invalid type definition, %s' % Err.args[0], )
raise(Err)
return typedict
else:
raise(ASN1ProcTextErr('invalid type definition'))
def scan_tags(Tok):
tags = []
while True:
tags.append( scan_tag(Tok) )
if Tok.get_next() != TOK_BRAO:
Tok.undo()
return tags
def scan_tag(Tok):
tag = {'val': Tok.get_group(), 'mode': None}
tok = Tok.get_next()
if tok in (TOK_TEXP, TOK_TIMP):
tag['mode'] = tok
else:
Tok.undo()
return tag
def scan_type(Tok):
typ = [Tok.get_tok()[1]]
if Tok.has_next():
tok = Tok.get_next()
while tok == TOK_DOT:
tok = Tok.get_next()
if tok[0] not in TOKS_TYPES_EXT:
raise(ASN1ProcTextErr('invalid composite type definition'))
typ.append(tok[1])
if Tok.has_next():
tok = Tok.get_next()
else:
return typ
Tok.undo()
return typ
def _scan_typedef_class(Tok, typedict):
# CLASS ObjCont WITH SYNTAX ObjSynt
tok = Tok.get_next()
if tok != TOK_CBRAO:
raise(ASN1ProcTextErr('invalid CLASS object definition'))
typedict['cont'] = Tok.get_group()
if Tok.has_next():
tok = Tok.get_next()
if tok == TOK_WSYN:
tok = Tok.get_next()
if tok != TOK_CBRAO:
raise(ASN1ProcTextErr('invalid CLASS object SYNTAX definition'))
typedict['synt'] = Tok.get_group()
else:
Tok.undo()
def _scan_typedef_seq(Tok, typedict):
# SEQUENCE / SET ObjCont ObjConsts
# SEQUENCE / SET ObjConsts [SIZE (...)] OF ObjType
tok = Tok.get_next()
if tok == TOK_CBRAO:
# ObjCont
typedict['cont'] = Tok.get_group()
if Tok.has_next():
tok = Tok.get_next()
if tok == TOK_PARO:
typedict['const'] = scan_const(Tok)
else:
Tok.undo()
elif tok in (TOK_PARO, TOK_SIZE):
if tok == TOK_SIZE:
# special case of the SIZE constraint outside of a constraint notation
if Tok.get_next() != TOK_PARO:
raise(ASN1ProcTextErr('invalid SEQ / SET OF SIZE definition'))
typedict['const_sz'] = scan_const(Tok)
else:
# ObjConsts
typedict['const'] = scan_const(Tok)
tok = Tok.get_next()
if tok != TOK_OF:
raise(ASN1ProcTextErr('invalid SEQ / SET OF definition'))
_scan_typedef_seqof(Tok, typedict)
elif tok == TOK_OF:
# OF
_scan_typedef_seqof(Tok, typedict)
else:
raise(ASN1ProcTextErr('invalid SEQ / SET definition'))
def _scan_typedef_seqof(Tok, typedict):
typedict['type'][0] = typedict['type'][0] + ' OF'
# can have a component name
tok = Tok.get_next()
if tok[0] == TOK_LID:
# component name
typedict['cont_name'] = tok[1]
else:
Tok.undo()
try:
typedict['cont'] = scan_typedef(Tok)
except Exception as Err:
Err.args = ('invalid SEQ / SET OF definition, %s' % Err.args[0], )
raise(Err)
def _scan_typedef_std(Tok, typedict):
# ObjParamAct | ObjCont ObjConsts
tok = Tok.get_next()
if tok == TOK_CBRAO:
typedict['cont'] = Tok.get_group()
if not Tok.has_next():
return
tok = Tok.get_next()
if tok == TOK_PARO:
typedict['const'] = scan_const(Tok)
else:
Tok.undo()
def scan_const(Tok):
const = []
while True:
const.append( Tok.get_group() )
if Tok.has_next():
if Tok.get_next() != TOK_PARO:
Tok.undo()
return const
else:
return const
def test():
import os
from pycrate_asn1c.specdir import ASN_SPECS
p = os.path.dirname(__file__) + os.path.sep + '..' + os.path.sep + 'pycrate_asn1dir' + os.path.sep
M = ASN1Dict()
for S in ASN_SPECS.values():
if isinstance(S, (list, tuple)):
S = S[0]
if S != 'IETF_SNMP':
for fn in os.listdir( '%s%s/' % (p, S)):
if fn[-4:] == '.asn':
fp = '%s%s/%s' % (p, S, fn)
print(fp)
if python_version < 3:
mods = tokenize_text(open(fp).read().decode('utf-8'))
else:
mods = tokenize_text(open(fp).read())
for modname, moddict in mods.items():
M[modname] = moddict
return M
if __name__ == '__main__':
import sys
M = test()
sys.exit(0)