968 lines
32 KiB
Python
968 lines
32 KiB
Python
# -*- coding: UTF-8 -*-
|
|
#/**
|
|
# * Software Name : pycrate
|
|
# * Version : 0.4
|
|
# *
|
|
# * Copyright 2018. Benoit Michau. ANSSI.
|
|
# *
|
|
# * This library is free software; you can redistribute it and/or
|
|
# * modify it under the terms of the GNU Lesser General Public
|
|
# * License as published by the Free Software Foundation; either
|
|
# * version 2.1 of the License, or (at your option) any later version.
|
|
# *
|
|
# * This library is distributed in the hope that it will be useful,
|
|
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# * Lesser General Public License for more details.
|
|
# *
|
|
# * You should have received a copy of the GNU Lesser General Public
|
|
# * License along with this library; if not, write to the Free Software
|
|
# * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
# * MA 02110-1301 USA
|
|
# *
|
|
# *--------------------------------------------------------
|
|
# * File Name : pycrate_asn1c/tokenizer.py
|
|
# * Created : 2018-03-13
|
|
# * Authors : Benoit Michau
|
|
# *--------------------------------------------------------
|
|
#*/
|
|
|
|
import re
|
|
from pycrate_asn1c.err import *
|
|
from pycrate_asn1c.utils import *
|
|
from pycrate_asn1c.dictobj import *
|
|
|
|
|
|
# white space and new line
|
|
_NL = '\x0a\x0b\x0c\x0d'
|
|
_SNL = '\x09\x0a\x0b\x0c\x0d\x20' + '\xa0' # a0 is not a valid UTF-8 char
|
|
REScannerSNL = '[%s]{1,}' % _SNL
|
|
|
|
# exclude more characters
|
|
_EXC = '(?![a-zA-Z0-9\-]{1,})'
|
|
|
|
# native types
|
|
REScannerNTypes = '|'.join((
|
|
'NULL',
|
|
'BOOLEAN',
|
|
'INTEGER',
|
|
'REAL',
|
|
'ENUMERATED',
|
|
'OBJECT IDENTIFIER',
|
|
'RELATIVE-OID',
|
|
'OID-IRI',
|
|
'RELATIVE-OID-IRI',
|
|
'BIT STRING',
|
|
'OCTET STRING',
|
|
'NumericString',
|
|
'PrintableString',
|
|
'VisibleString',
|
|
'ISO646String',
|
|
'IA5String',
|
|
'TeletexString',
|
|
'T61String',
|
|
'VideotexString',
|
|
'GraphicString',
|
|
'GeneralString',
|
|
'UniversalString',
|
|
'BMPString',
|
|
'UTF8String',
|
|
'ObjectDescriptor',
|
|
'GeneralizedTime',
|
|
'UTCTime',
|
|
'TIME',
|
|
'SEQUENCE',
|
|
'SET',
|
|
'CHOICE',
|
|
'EXTERNAL',
|
|
'EMBEDDED PDV',
|
|
'CHARACTER STRING',
|
|
'ANY',
|
|
'CLASS',
|
|
'TYPE-IDENTIFIER',
|
|
'ABSTRACT-SYNTAX',
|
|
'INSTANCE OF',
|
|
'MACRO'
|
|
))
|
|
|
|
# integer
|
|
REScannerInt = '([+\-](?:[%s]{0,})){0,1}[0-9]{1,}' % _SNL
|
|
|
|
# real int dec exp
|
|
REScannerReal = '(%s){1}(?:\.([0-9]{1,})){0,1}(?:[eE](%s)){0,1}'\
|
|
% (REScannerInt, REScannerInt)
|
|
|
|
# bstring
|
|
REScannerBStr = '\'[%s01]{0,}\'B' % _SNL
|
|
|
|
# hstring
|
|
REScannerHStr = '\'[%s0-9A-F]{0,}\'H' % _SNL
|
|
|
|
|
|
# tokens' identifiers
|
|
|
|
# comments and character string
|
|
TOK_CMT = 'CMT' # comment
|
|
TOK_CSTR = 'CSTR' # chars string
|
|
|
|
# definition and tag related
|
|
TOK_DEFI = 'DEFI' # DEFINITIONS
|
|
TOK_EXTI = 'EXTI' # EXTENSIBILITY IMPLIED
|
|
TOK_BEG = 'BEG' # BEGIN
|
|
TOK_END = 'END' # END
|
|
TOK_TAGS = 'TAGS' # TAGS
|
|
TOK_TUNI = 'TUNI' # UNIVERSAL
|
|
TOK_TAPP = 'TAPP' # APPLICATION
|
|
TOK_TPRI = 'TPRI' # PRIVATE
|
|
TOK_TEXP = 'TEXP' # EXPLICIT
|
|
TOK_TIMP = 'TIMP' # IMPLICIT
|
|
|
|
# set and value related
|
|
TOK_MINF = 'MINF' # MINUS-INFINITY
|
|
TOK_PINF = 'PINF' # PLUS-INFINITY
|
|
TOK_NAN = 'NAN' # NOT-A-NUMBER
|
|
TOK_ALL = 'ALL' # ALL
|
|
TOK_MIN = 'MIN' # MIN
|
|
TOK_MAX = 'MAX' # MAX
|
|
TOK_EXCE = 'EXCE' # EXCEPT
|
|
TOK_NULL = 'NULL' # NULL
|
|
TOK_TRUE = 'TRUE' # TRUE
|
|
TOK_FALS = 'FALS' # FALSE
|
|
TOK_REAL = 'REAL' # real number
|
|
TOK_INT = 'INT' # integer
|
|
TOK_BSTR = 'BSTR' # binary string
|
|
TOK_HSTR = 'HSTR' # hexa string
|
|
|
|
# other various keywords
|
|
TOK_ABS = 'ABS' # ABSENT
|
|
TOK_AUTO = 'AUTO' # AUTOMATIC
|
|
TOK_BY = 'BY' # BY
|
|
TOK_COMP = 'COMP' # COMPONENT
|
|
TOK_COMPS = 'COMPS' # COMPONENTS
|
|
TOK_CONST = 'CONST' # CONSTRAINED
|
|
TOK_CONT = 'CONT' # CONTAINING
|
|
TOK_DEF = 'DEF' # DEFAULT
|
|
TOK_ENC = 'ENC' # ENCODED
|
|
TOK_EXP = 'EXP' # EXPORTS
|
|
TOK_FROM = 'FROM' # FROM
|
|
TOK_IMP = 'IMP' # IMPORTS
|
|
TOK_INCL = 'INCL' # INCLUDES
|
|
TOK_OF = 'OF' # OF
|
|
TOK_OPT = 'OPT' # OPTIONAL
|
|
TOK_PAT = 'PAT' # PATTERN
|
|
TOK_PRES = 'PRES' # PRESENT
|
|
TOK_SIZE = 'SIZE' # SIZE
|
|
TOK_WSYN = 'WSYN' # WITH SYNTAX
|
|
TOK_UNIQ = 'UNIQ' # UNIQUE
|
|
|
|
# identifier related
|
|
TOK_NTYPE = 'NTYPE' # native type
|
|
TOK_CLAID = 'CLAID' # &[iI]dentifier
|
|
TOK_HID = 'HID' # IDENTIFIER
|
|
TOK_ID = 'ID' # Identifier
|
|
TOK_LID = 'LID' # identifier
|
|
|
|
# special (series of) characters
|
|
TOK_ASSI = 'ASSI' # ::=
|
|
TOK_COL = 'COL' # :
|
|
TOK_SCOL = 'SCOL' # ;
|
|
TOK_EQU = 'EQU' # =
|
|
TOK_COM = 'COM' # ,
|
|
TOK_PARO = 'PARO' # (
|
|
TOK_PARC = 'PARC' # )
|
|
TOK_DBRAO = 'DBRAO' # [[
|
|
TOK_DBRAC = 'DBRAC' # ]]
|
|
TOK_BRAO = 'BRAO' # [
|
|
TOK_BRAC = 'BRAC' # ]
|
|
TOK_CBRAO = 'CBRAO' # {
|
|
TOK_CBRAC = 'CBRAC' # }
|
|
TOK_TDOT = 'TDOT' # ...
|
|
TOK_DDOT = 'DDOT' # ..
|
|
TOK_DOT = 'DOT' # .
|
|
TOK_DOTA = 'DOTA' # .&
|
|
TOK_UNIO = 'UNIO' # |
|
|
TOK_INTER = 'INTER' # ^
|
|
TOK_LTHAN = 'LTHAN' # <
|
|
TOK_GTHAN = 'GTHAN' # >
|
|
TOK_ARRO = 'ARRO' # @
|
|
TOK_EXCL = 'EXCL' # !
|
|
|
|
|
|
TOKS_OBJS = {TOK_NULL, TOK_NTYPE, TOK_HID, TOK_ID, TOK_LID}
|
|
TOKS_TYPES = {TOK_NULL, TOK_NTYPE, TOK_HID, TOK_ID}
|
|
TOKS_OBJS_EXT = {TOK_NULL, TOK_NTYPE, TOK_HID, TOK_ID, TOK_LID, TOK_CLAID}
|
|
TOKS_TYPES_EXT = {TOK_HID, TOK_ID, TOK_CLAID}
|
|
|
|
|
|
REScannerASN1 = re.Scanner([
|
|
#
|
|
(r'(--).*?([%s]|(--)|$)' % _NL, lambda s, t: (TOK_CMT, t)),
|
|
(r'(/\*).*?(\*/)', lambda s, t: (TOK_CMT, t)),
|
|
(r'".*?(?<!")"(?!")', lambda s, t: (TOK_CSTR, t)),
|
|
#
|
|
(r'::=', lambda s, t: TOK_ASSI),
|
|
(r':', lambda s, t: TOK_COL),
|
|
(r';', lambda s, t: TOK_SCOL),
|
|
(r'=', lambda s, t: TOK_EQU),
|
|
(r',', lambda s, t: TOK_COM),
|
|
(r'\(', lambda s, t: TOK_PARO),
|
|
(r'\)', lambda s, t: TOK_PARC),
|
|
(r'\[{2}', lambda s, t: TOK_DBRAO),
|
|
(r'\]{2}', lambda s, t: TOK_DBRAC),
|
|
(r'\[', lambda s, t: TOK_BRAO),
|
|
(r'\]', lambda s, t: TOK_BRAC),
|
|
(r'\{', lambda s, t: TOK_CBRAO),
|
|
(r'\}', lambda s, t: TOK_CBRAC),
|
|
(r'\.\.\.', lambda s, t: TOK_TDOT),
|
|
(r'\.\.', lambda s, t: TOK_DDOT),
|
|
(r'\.', lambda s, t: TOK_DOT),
|
|
(r'\||(?:UNION%s)' % _EXC, lambda s, t: TOK_UNIO),
|
|
(r'\^|(?:INTERSECTION%s)' % _EXC, lambda s, t: TOK_INTER),
|
|
(r'<', lambda s, t: TOK_LTHAN),
|
|
(r'>', lambda s, t: TOK_GTHAN),
|
|
(r'@', lambda s, t: TOK_ARRO),
|
|
(r'\!', lambda s, t: TOK_EXCL),
|
|
#
|
|
(r'ABSENT%s' % _EXC, lambda s, t: TOK_ABS),
|
|
(r'ALL%s' % _EXC, lambda s, t: TOK_ALL),
|
|
(r'APPLICATION%s' % _EXC, lambda s, t: TOK_TAPP),
|
|
(r'AUTOMATIC%s' % _EXC, lambda s, t: TOK_AUTO),
|
|
(r'BEGIN%s' % _EXC, lambda s, t: TOK_BEG),
|
|
(r'BY%s' % _EXC, lambda s, t: TOK_BY),
|
|
(r'COMPONENT%s' % _EXC, lambda s, t: TOK_COMP),
|
|
(r'COMPONENTS%s' % _EXC, lambda s, t: TOK_COMPS),
|
|
(r'CONSTRAINED%s' % _EXC, lambda s, t: TOK_CONST),
|
|
(r'CONTAINING%s' % _EXC, lambda s, t: TOK_CONT),
|
|
(r'DEFAULT%s' % _EXC, lambda s, t: TOK_DEF),
|
|
(r'DEFINITIONS%s' % _EXC, lambda s, t: TOK_DEFI),
|
|
(r'ENCODED%s' % _EXC, lambda s, t: TOK_ENC),
|
|
(r'END%s' % _EXC, lambda s, t: TOK_END),
|
|
(r'EXCEPT%s' % _EXC, lambda s, t: TOK_EXCE),
|
|
(r'EXPLICIT%s' % _EXC, lambda s, t: TOK_TEXP),
|
|
(r'EXPORTS%s' % _EXC, lambda s, t: TOK_EXP),
|
|
(r'EXTENSIBILITY%sIMPLIED%s' % (REScannerSNL, _EXC), lambda s, t: TOK_EXTI),
|
|
(r'FALSE%s' % _EXC, lambda s, t: TOK_FALS),
|
|
(r'FROM%s' % _EXC, lambda s, t: TOK_FROM),
|
|
(r'IMPLICIT%s' % _EXC, lambda s, t: TOK_TIMP),
|
|
(r'IMPORTS%s' % _EXC, lambda s, t: TOK_IMP),
|
|
(r'INCLUDES%s' % _EXC, lambda s, t: TOK_INCL),
|
|
(r'MAX%s' % _EXC, lambda s, t: TOK_MAX),
|
|
(r'MIN%s' % _EXC, lambda s, t: TOK_MIN),
|
|
(r'MINUS-INFINITY%s' % _EXC, lambda s, t: TOK_MINF),
|
|
(r'NOT-A-NUMBER%s' % _EXC, lambda s, t: TOK_NAN),
|
|
(r'NULL%s' % _EXC, lambda s, t: (TOK_NULL, t)),
|
|
(r'OF%s' % _EXC, lambda s, t: TOK_OF),
|
|
(r'OPTIONAL%s' % _EXC, lambda s, t: TOK_OPT),
|
|
(r'PATTERN%s' % _EXC, lambda s, t: TOK_PAT),
|
|
(r'PLUS-INFINITY%s' % _EXC, lambda s, t: TOK_PINF),
|
|
(r'PRESENT%s' % _EXC, lambda s, t: TOK_PRES),
|
|
(r'PRIVATE%s' % _EXC, lambda s, t: TOK_TPRI),
|
|
(r'SIZE%s' % _EXC, lambda s, t: TOK_SIZE),
|
|
(r'TAGS%s' % _EXC, lambda s, t: TOK_TAGS),
|
|
(r'TRUE%s' % _EXC, lambda s, t: TOK_TRUE),
|
|
(r'UNIQUE%s' % _EXC, lambda s, t: TOK_UNIQ),
|
|
(r'UNIVERSAL%s' % _EXC, lambda s, t: TOK_TUNI),
|
|
(r'WITH%sSYNTAX%s' % (REScannerSNL, _EXC), lambda s, t: TOK_WSYN),
|
|
#
|
|
(r'%s' % REScannerReal, lambda s, t: (TOK_INT, t)),
|
|
(r'%s' % REScannerInt, lambda s, t: (TOK_REAL, t)),
|
|
(r'%s' % REScannerBStr, lambda s, t: (TOK_BSTR, t)),
|
|
(r'%s' % REScannerHStr, lambda s, t: (TOK_HSTR, t)),
|
|
#
|
|
(r'(%s)%s' % (REScannerNTypes, _EXC), lambda s, t: (TOK_NTYPE, t)),
|
|
(r'&[a-zA-Z](?:\-{0,1}[a-zA-Z0-9]{1,}){0,}%s' % _EXC, lambda s, t: (TOK_CLAID, t)),
|
|
(r'[A-Z](?:\-{0,1}[A-Z0-9]{1,}){0,}%s' % _EXC, lambda s, t: (TOK_HID, t)),
|
|
(r'[A-Z](?:\-{0,1}[a-zA-Z0-9]{1,}){0,}%s' % _EXC, lambda s, t: (TOK_ID, t)),
|
|
(r'[a-z](?:\-{0,1}[a-zA-Z0-9]{1,}){0,}%s' % _EXC, lambda s, t: (TOK_LID, t)),
|
|
#
|
|
(r'%s' % REScannerSNL, None)
|
|
],
|
|
flags=re.DOTALL
|
|
)
|
|
|
|
|
|
class Tokenizer(object):
|
|
"""handles consciously ASN.1 tokens, forward and backward, while ignoring
|
|
ASN.1 comments
|
|
"""
|
|
|
|
REPR_OFF = 10
|
|
|
|
GROUP = {
|
|
TOK_PARO : TOK_PARC, # ( )
|
|
TOK_DBRAO : TOK_DBRAC, # [[ ]]
|
|
TOK_BRAO : TOK_BRAC, # [ ]
|
|
TOK_CBRAO : TOK_CBRAC, # { }
|
|
TOK_BEG : TOK_END # BEGIN END
|
|
}
|
|
|
|
def __init__(self, tokens=[]):
|
|
self.toks = tokens
|
|
# cursor
|
|
self.cur = -1
|
|
# stack of previous cursor value
|
|
self.curp = []
|
|
|
|
def __repr__(self):
|
|
cur = self.get_cur()
|
|
return repr(self.toks[cur-self.REPR_OFF:cur+self.REPR_OFF])
|
|
|
|
def get_cur(self):
|
|
return self.cur
|
|
|
|
def set_cur(self, cur):
|
|
if not -1 <= cur < len(self.toks):
|
|
raise(ASN1TokenizerErr('invalid cursor'))
|
|
else:
|
|
self.cur = cur
|
|
|
|
def count(self):
|
|
return len(self.toks) - self.cur
|
|
|
|
def get_tok(self):
|
|
try:
|
|
return self.toks[self.cur]
|
|
except:
|
|
raise(ASN1TokenizerErr('invalid cursor'))
|
|
|
|
def get_next(self, off=1):
|
|
ind, cnt, curp = 0, 0, self.cur
|
|
for tok in self.toks[1+self.cur:]:
|
|
if tok[0] == TOK_CMT:
|
|
pass
|
|
else:
|
|
ind += 1
|
|
cnt += 1
|
|
if ind == off:
|
|
break
|
|
if ind < off:
|
|
raise(ASN1TokenizerErr('not enough tokens'))
|
|
self.cur += cnt
|
|
self.curp.append(curp)
|
|
return tok
|
|
|
|
def has_next(self):
|
|
for tok in self.toks[1+self.cur:]:
|
|
if tok[0] == TOK_CMT:
|
|
pass
|
|
else:
|
|
return True
|
|
return False
|
|
|
|
def get_prev(self, off=1):
|
|
ind, cnt, curp = 0, 0, self.cur
|
|
for tok in self.toks[:self.cur][::-1]:
|
|
if tok[0] == TOK_CMT:
|
|
pass
|
|
else:
|
|
ind += 1
|
|
cnt += 1
|
|
if ind == off:
|
|
break
|
|
if ind < off:
|
|
raise(ASN1TokenizerErr('not enough tokens'))
|
|
self.cur -= cnt
|
|
self.curp.append(curp)
|
|
return tok
|
|
|
|
def get_upto(self, target):
|
|
curp = self.cur
|
|
while self.get_next() != target:
|
|
# do not extend the stack with previous cursor value
|
|
del self.curp[-1]
|
|
self.curp.append(curp)
|
|
self.cur += 1
|
|
return self.__class__(self.toks[max(0, curp):self.cur-1])
|
|
|
|
def get_group(self, wbnd=True):
|
|
tok, curp = self.toks[self.cur], self.cur
|
|
if tok in self.GROUP:
|
|
op, clo = tok, self.GROUP[tok]
|
|
else:
|
|
raise(ASN1TokenizerErr('invalid group opening token, %s' % tok))
|
|
depth = 1
|
|
while depth > 0:
|
|
tok = self.get_next()
|
|
# do not extend the stack with previous cursor value
|
|
del self.curp[-1]
|
|
if tok == op:
|
|
depth += 1
|
|
elif tok == clo:
|
|
depth -= 1
|
|
if depth == 0:
|
|
break
|
|
self.curp.append(curp)
|
|
if wbnd:
|
|
return self.__class__(self.toks[curp:1+self.cur])
|
|
else:
|
|
return self.__class__(self.toks[1+curp:self.cur])
|
|
|
|
def get_comps(self, sep=TOK_COM):
|
|
comps, curp, curlast = [], self.cur, self.cur
|
|
while True:
|
|
try:
|
|
tok = self.get_next()
|
|
except:
|
|
break
|
|
if tok in self.GROUP:
|
|
# jump over the group
|
|
grp = self.get_group()
|
|
# do not extend the stack with previous cursor value
|
|
del self.curp[-1]
|
|
elif tok == sep:
|
|
comps.append(self.__class__(self.toks[curlast:self.cur-1]))
|
|
curlast = self.cur
|
|
else:
|
|
pass
|
|
self.curp.append(curp)
|
|
return comps
|
|
|
|
def undo(self):
|
|
if not self.curp:
|
|
raise()
|
|
self.cur = self.curp[-1]
|
|
del self.curp[-1]
|
|
|
|
|
|
# ASN.1 module global structure:
|
|
# ModName ModOID DEFINITIONS ModOpts ::= BEGIN ModExports ModImports ModObjects END
|
|
#
|
|
# ASN.1 object structure:
|
|
# ObjName ObjParam ObjType ::= ObjVal
|
|
# ObjName ObjParam ObjType ::= ObjSet
|
|
# ObjName ObjParam ::= ObjType
|
|
# ObjName MACRO ::= BEGIN ... END
|
|
#
|
|
# ASN.1 object type structure:
|
|
# ObjTags ObjType ObjParamAct ObjConsts ObjCont
|
|
# CLASS ObjParamAct ObjCont WITH SYNTAX ObjSynt
|
|
|
|
def tokenize_text(text=u'', **kwargs):
|
|
"""tokenize the provided textual ASN.1 specification
|
|
"""
|
|
#
|
|
if isinstance(text, (list, tuple)):
|
|
text = u'\n\n'.join(text)
|
|
elif not isinstance(text, str_types):
|
|
raise(ASN1Err('need some textual definition'))
|
|
#
|
|
toks, rest = REScannerASN1.scan(text)
|
|
if rest:
|
|
asnlog('%i remaining chars at the end of spec' % len(rest))
|
|
# build the handler for the tokens
|
|
Tok = Tokenizer(toks)
|
|
modules = ASN1Dict()
|
|
#
|
|
# scan the tokens for all ASN.1 modules defined
|
|
while True:
|
|
module = ASN1Dict()
|
|
#
|
|
# 1) scan tokens for module declaration with DEFINITIONS
|
|
try:
|
|
TokDecl = Tok.get_upto(TOK_DEFI)
|
|
except:
|
|
# no more DEFINITIONS
|
|
break
|
|
#
|
|
name, oid = scan_module_decl(TokDecl)
|
|
module['_name_'] = name
|
|
if oid:
|
|
module['_oidtok_'] = oid
|
|
# TODO: parse the OID value
|
|
|
|
module['_oid_'] = []
|
|
else:
|
|
module['_oidtok_'] = []
|
|
module['_oid_'] = []
|
|
#
|
|
# 2) scan tokens for module options before assignment ::=
|
|
if Tok.get_tok() != TOK_ASSI:
|
|
try:
|
|
TokOpt = Tok.get_upto(TOK_ASSI)
|
|
except:
|
|
raise(ASN1ProcTextErr('module assignment not found'))
|
|
#
|
|
module['_tag_'], module['_ext_'] = scan_module_opt(TokOpt)
|
|
else:
|
|
module['_tag_'], module['_ext_'] = None, False
|
|
Tok.get_next()
|
|
if 'autotags' in kwargs and kwargs['autotags']:
|
|
module['_tag_'] = TOK_AUTO
|
|
if 'extimpl' in kwargs and kwargs['extimpl']:
|
|
module['_ext_'] = True
|
|
#asnlog('[proc] module %s, tags: %r' % (name, module['_tag_']))
|
|
#asnlog('[proc] module %s, extensibility implied: %r' % (name, module['_ext_']))
|
|
#
|
|
# 3) scan tokens for BEGIN - END block
|
|
if Tok.get_tok() != TOK_BEG:
|
|
raise(ASN1ProcTextErr('missing BEGIN statement'))
|
|
TokDef = Tok.get_group(wbnd=False)
|
|
module['_tok_'] = TokDef
|
|
#asnlog('[proc] module %s: %i tokens' % (name, TokDef.count()))
|
|
if Tok.has_next():
|
|
Tok.get_next()
|
|
#
|
|
# 4) scan the module definition block for exports
|
|
tok = TokDef.get_next()
|
|
if tok == TOK_EXP:
|
|
module['_exp_'] = scan_module_exp(TokDef)
|
|
#asnlog('[proc] module %s: %i tokens' % (name, TokDef.count()))
|
|
else:
|
|
TokDef.undo()
|
|
#
|
|
# 5) scan the module definition block for imports
|
|
tok = TokDef.get_next()
|
|
if tok == TOK_IMP:
|
|
module['_imp_'] = scan_module_imp(TokDef)
|
|
module['_resolv_'] = {}
|
|
for d in module['_imp_']:
|
|
for sym in d['sym']:
|
|
module['_resolv_'][sym] = d['name']
|
|
#asnlog('[proc] module %s: %i tokens' % (name, TokDef.count()))
|
|
#if module['_imp_']:
|
|
# asnlog('[proc] module %s: imports parsed' % name)
|
|
else:
|
|
TokDef.undo()
|
|
#
|
|
# 6) scan the module definition block for objects
|
|
objs = scan_objs(TokDef)
|
|
#
|
|
# 7) init objects types for the module
|
|
module['_obj_'] = objs
|
|
module['_type_'] = []
|
|
module['_set_'] = []
|
|
module['_val_'] = []
|
|
module['_class_'] = []
|
|
module['_param_'] = []
|
|
#
|
|
for obj in objs.values():
|
|
if obj['mode'] == MODE_TYPE:
|
|
module['_type_'] = obj['name']
|
|
elif obj['mode'] == MODE_SET:
|
|
module['_set_'] = obj['name']
|
|
elif obj['mode'] == MODE_VALUE:
|
|
module['_val_'] = obj['name']
|
|
else:
|
|
assert()
|
|
if obj['typedef']['type'] == 'CLASS':
|
|
module['_class_'] = obj['name']
|
|
if obj['param']:
|
|
module['_param_'] = obj['name']
|
|
#
|
|
modules[name] = module
|
|
#
|
|
return modules
|
|
|
|
|
|
def scan_module_decl(Tok):
|
|
"""extract module name and OID from given tokens
|
|
"""
|
|
# scan ModuleIdentifier
|
|
tok = Tok.get_next()
|
|
if tok[0] not in (TOK_HID, TOK_ID):
|
|
raise(ASN1ProcTextErr('invalid module declaration, invalid name %r' % tok))
|
|
name = tok[1]
|
|
if Tok.has_next():
|
|
if Tok.get_next() == TOK_CBRAO:
|
|
oid = Tok.get_group()
|
|
else:
|
|
raise(ASN1ProcTextErr('invalid module declaration'))
|
|
else:
|
|
oid = None
|
|
return name, oid
|
|
|
|
|
|
def scan_module_opt(Tok):
|
|
"""extract module options from given tokens
|
|
"""
|
|
# scan TagDefault and ExtensionDefault
|
|
# TODO: scan EncodingReferenceDefault first
|
|
tag, ext = None, False
|
|
if not Tok.has_next():
|
|
return tag, next
|
|
tok = Tok.get_next()
|
|
if tok in (TOK_AUTO, TOK_TEXP, TOK_TIMP):
|
|
tag = Tok.get_tok()
|
|
if Tok.get_next() != TOK_TAGS:
|
|
raise(ASN1ProcTextErr('invalid module options, missing TAGS keyword'))
|
|
if not Tok.has_next():
|
|
return tag, ext
|
|
tok = Tok.get_next()
|
|
if tok == TOK_EXTI:
|
|
ext = True
|
|
else:
|
|
raise(ASN1ProcTextErr('invalid module options'))
|
|
if Tok.has_next():
|
|
raise(ASN1ProcTextErr('invalid module options'))
|
|
return tag, ext
|
|
|
|
|
|
def scan_module_exp(Tok):
|
|
"""consume the tokens searching for module exports declaration
|
|
"""
|
|
tok = Tok.get_next()
|
|
if tok == TOK_ALL:
|
|
if Tok.get_next() != TOK_SCOL:
|
|
raise(ASN1ProcTextErr('invalid module export'))
|
|
else:
|
|
return None
|
|
elif tok[0] in TOKS_OBJS:
|
|
exp = []
|
|
while tok != TOK_SCOL:
|
|
if tok[0] in TOKS_OBJS:
|
|
exp.append(tok[1])
|
|
elif tok == TOK_CBRAO:
|
|
tok = Tok.get_next()
|
|
if tok != TOK_CBRAC:
|
|
raise(ASN1ProcTextErr('invalid module export, parameterized reference'))
|
|
elif tok != TOK_COM:
|
|
raise(ASN1ProcTextErr('invalid module export'))
|
|
tok = Tok.get_next()
|
|
return exp
|
|
else:
|
|
raise(ASN1ProcTextErr('invalid module export'))
|
|
|
|
|
|
def scan_module_imp(Tok):
|
|
"""consume the tokens searching for module imports declaration
|
|
"""
|
|
sym, imp = [], []
|
|
tok = Tok.get_next()
|
|
while tok != TOK_SCOL:
|
|
if tok[0] in TOKS_OBJS:
|
|
sym.append(tok[1])
|
|
elif tok == TOK_CBRAO:
|
|
# parameterized ref: ignoring it
|
|
if Tok.get_next() != TOK_CBRAC:
|
|
raise(ASN1ProcTextErr('invalid module import, parameterized reference'))
|
|
elif tok == TOK_FROM:
|
|
tok = Tok.get_next()
|
|
if tok[0] not in (TOK_HID, TOK_ID) or not sym:
|
|
raise(ASN1ProcTextErr('invalid module import'))
|
|
imp.append({'name': tok[1], 'sym': sym})
|
|
sym, rev, tok = [], True, Tok.get_next()
|
|
if tok == TOK_CBRAO:
|
|
# module OID
|
|
imp[-1]['oidtok'] = Tok.get_group()
|
|
# TODO: parse the OID value
|
|
|
|
rev = False
|
|
elif tok[0] == TOK_LID:
|
|
asnlog('imported module OID reference is ambiguous, %s' % tok[1])
|
|
# will be dealt with at the end
|
|
if rev:
|
|
Tok.undo()
|
|
elif tok != TOK_COM:
|
|
raise(ASN1ProcTextErr('invalid module import'))
|
|
tok = Tok.get_next()
|
|
if sym:
|
|
if len(sym) == 1 and sym[0][0].islower():
|
|
asnlog('imported module ambiguous OID references were actually OID references')
|
|
# this means all those ambiguous OID ref were actually OID ref for
|
|
# the previous module instead of imported symbols
|
|
for i in range(len(imp)-1):
|
|
if 'oidtok' not in imp[i] and imp[i+1]['sym'][0][0] == TOK_LID:
|
|
# transfer the symbol as the OID ref of the previous module
|
|
imp[i]['oidtok'] = imp[i+1]['sym'][0][1]
|
|
del imp[i+1]['sym'][0]
|
|
imp[-1]['oidtok'] = sym[0]
|
|
else:
|
|
raise(ASN1ProcTextErr('invalid module import'))
|
|
return imp
|
|
|
|
|
|
def scan_objs(Tok):
|
|
"""consume the tokens searching for objects declaration
|
|
"""
|
|
objs = ASN1Dict()
|
|
while Tok.has_next():
|
|
objdict = scan_obj(Tok)
|
|
if objdict['name'] in objs:
|
|
asnlog('multiple definitions of %s' % objdict['name'])
|
|
objs[objdict['name']] = objdict
|
|
return objs
|
|
|
|
|
|
def scan_obj(Tok):
|
|
"""consume the tokens searching for the complete declaration of a single object
|
|
"""
|
|
# ASN.1 object structure:
|
|
# ObjName ObjParam ObjType ::= ObjVal
|
|
# ObjName ObjParam ObjType ::= ObjSet
|
|
# ObjName ObjParam ::= ObjType
|
|
# ObjName MACRO ::= BEGIN ... END
|
|
#
|
|
param, typedef, mode, val = None, {}, None, None
|
|
tok = Tok.get_next()
|
|
if tok[0] == TOK_LID:
|
|
mode = MODE_VALUE
|
|
elif tok[0] in (TOK_ID, TOK_HID):
|
|
mode = MODE_TYPE
|
|
else:
|
|
raise(ASN1ProcTextErr('invalid object name, %r' % (tok, )))
|
|
name = tok[1]
|
|
tok = Tok.get_next()
|
|
if tok == TOK_CBRAO:
|
|
# formal parameters
|
|
param = Tok.get_group()
|
|
tok = Tok.get_next()
|
|
if tok == TOK_BRAO or tok[0] in TOKS_TYPES:
|
|
if tok[1] == 'MACRO':
|
|
# MACRO
|
|
if Tok.get_next() != TOK_ASSI or Tok.get_next() != TOK_BEG:
|
|
raise(ASN1ProcTextErr('%s invalid MACRO definition' % name))
|
|
typedef['type'] = 'MACRO'
|
|
typedef['cont'] = Tok.get_group()
|
|
else:
|
|
# object value or set
|
|
if mode == MODE_TYPE:
|
|
mode = MODE_SET
|
|
# object type will be rescanned in scan_typedef()
|
|
Tok.undo()
|
|
try:
|
|
typedef = scan_typedef(Tok)
|
|
except Exception as Err:
|
|
Err.args = ('%s (%s) invalid definition, %s' % (name, mode, Err.args[0]), )
|
|
raise(Err)
|
|
if Tok.get_next() != TOK_ASSI:
|
|
raise(ASN1ProcTextErr('%s (%s) invalid definition' % (name, mode)))
|
|
try:
|
|
val = scan_val(Tok)
|
|
except Exception as Err:
|
|
Err.args = ('%s (%s) invalid definition, %s' % (name, mode, Err.args[0]), )
|
|
raise(Err)
|
|
elif tok == TOK_ASSI:
|
|
# object type
|
|
if mode == MODE_VALUE:
|
|
raise(ASN1ProcTextErr('%s (%s) invalid definition' % (name, mode)))
|
|
try:
|
|
typedef = scan_typedef(Tok)
|
|
except Exception as Err:
|
|
Err.args = ('%s (%s) invalid definition, %s' % (name, mode, Err.args[0]), )
|
|
raise(Err)
|
|
else:
|
|
raise(ASN1ProcTextErr('%s invalid definition' % name))
|
|
return {'name': name, 'param': param, 'typedef': typedef, 'mode': mode, 'val': val}
|
|
|
|
|
|
def scan_val(Tok):
|
|
"""consume the tokens searching for the complete value of a single object
|
|
"""
|
|
if Tok.get_next() == TOK_CBRAO:
|
|
val = [Tok.get_group()]
|
|
return val
|
|
else:
|
|
val = [Tok.get_tok()]
|
|
while Tok.has_next():
|
|
tok = Tok.get_next()
|
|
if tok == TOK_DOT:
|
|
val.append(tok)
|
|
tok = Tok.get_next()
|
|
if tok[0] not in TOKS_OBJS_EXT:
|
|
raise(ASN1ProcTextErr('invalid value definition'))
|
|
val.append(tok)
|
|
elif tok == TOK_COL:
|
|
val.append(tok)
|
|
val.extend( scan_val(Tok) )
|
|
elif tok == TOK_CBRAO:
|
|
# parameterized value
|
|
val.append(Tok.get_group())
|
|
return val
|
|
else:
|
|
Tok.undo()
|
|
return val
|
|
return val
|
|
|
|
|
|
def scan_typedef(Tok):
|
|
"""consume the tokens searching for the complete type declaration of a single object
|
|
"""
|
|
# ASN.1 object type structure:
|
|
# ObjTags ObjType ObjParamAct ObjConsts [OF] ObjCont
|
|
# CLASS ObjCont WITH SYNTAX ObjSynt
|
|
#
|
|
typedict = {'tags': [], 'type': None}
|
|
tok = Tok.get_next()
|
|
if tok == TOK_BRAO:
|
|
# tag(s)
|
|
typedict['tags'] = scan_tags(Tok)
|
|
tok = Tok.get_next()
|
|
if tok[0] in TOKS_TYPES:
|
|
typedict['type'] = scan_type(Tok)
|
|
if not Tok.has_next():
|
|
return typedict
|
|
else:
|
|
try:
|
|
if typedict['type'] == ['CLASS']:
|
|
_scan_typedef_class(Tok, typedict)
|
|
elif typedict['type'] in (['SET'], ['SEQUENCE']):
|
|
_scan_typedef_seq(Tok, typedict)
|
|
else:
|
|
_scan_typedef_std(Tok, typedict)
|
|
except Exception as Err:
|
|
Err.args = ('invalid type definition, %s' % Err.args[0], )
|
|
raise(Err)
|
|
return typedict
|
|
else:
|
|
raise(ASN1ProcTextErr('invalid type definition'))
|
|
|
|
|
|
def scan_tags(Tok):
|
|
tags = []
|
|
while True:
|
|
tags.append( scan_tag(Tok) )
|
|
if Tok.get_next() != TOK_BRAO:
|
|
Tok.undo()
|
|
return tags
|
|
|
|
|
|
def scan_tag(Tok):
|
|
tag = {'val': Tok.get_group(), 'mode': None}
|
|
tok = Tok.get_next()
|
|
if tok in (TOK_TEXP, TOK_TIMP):
|
|
tag['mode'] = tok
|
|
else:
|
|
Tok.undo()
|
|
return tag
|
|
|
|
|
|
def scan_type(Tok):
|
|
typ = [Tok.get_tok()[1]]
|
|
if Tok.has_next():
|
|
tok = Tok.get_next()
|
|
while tok == TOK_DOT:
|
|
tok = Tok.get_next()
|
|
if tok[0] not in TOKS_TYPES_EXT:
|
|
raise(ASN1ProcTextErr('invalid composite type definition'))
|
|
typ.append(tok[1])
|
|
if Tok.has_next():
|
|
tok = Tok.get_next()
|
|
else:
|
|
return typ
|
|
Tok.undo()
|
|
return typ
|
|
|
|
|
|
def _scan_typedef_class(Tok, typedict):
|
|
# CLASS ObjCont WITH SYNTAX ObjSynt
|
|
tok = Tok.get_next()
|
|
if tok != TOK_CBRAO:
|
|
raise(ASN1ProcTextErr('invalid CLASS object definition'))
|
|
typedict['cont'] = Tok.get_group()
|
|
if Tok.has_next():
|
|
tok = Tok.get_next()
|
|
if tok == TOK_WSYN:
|
|
tok = Tok.get_next()
|
|
if tok != TOK_CBRAO:
|
|
raise(ASN1ProcTextErr('invalid CLASS object SYNTAX definition'))
|
|
typedict['synt'] = Tok.get_group()
|
|
else:
|
|
Tok.undo()
|
|
|
|
|
|
def _scan_typedef_seq(Tok, typedict):
|
|
# SEQUENCE / SET ObjCont ObjConsts
|
|
# SEQUENCE / SET ObjConsts [SIZE (...)] OF ObjType
|
|
tok = Tok.get_next()
|
|
if tok == TOK_CBRAO:
|
|
# ObjCont
|
|
typedict['cont'] = Tok.get_group()
|
|
if Tok.has_next():
|
|
tok = Tok.get_next()
|
|
if tok == TOK_PARO:
|
|
typedict['const'] = scan_const(Tok)
|
|
else:
|
|
Tok.undo()
|
|
elif tok in (TOK_PARO, TOK_SIZE):
|
|
if tok == TOK_SIZE:
|
|
# special case of the SIZE constraint outside of a constraint notation
|
|
if Tok.get_next() != TOK_PARO:
|
|
raise(ASN1ProcTextErr('invalid SEQ / SET OF SIZE definition'))
|
|
typedict['const_sz'] = scan_const(Tok)
|
|
else:
|
|
# ObjConsts
|
|
typedict['const'] = scan_const(Tok)
|
|
tok = Tok.get_next()
|
|
if tok != TOK_OF:
|
|
raise(ASN1ProcTextErr('invalid SEQ / SET OF definition'))
|
|
_scan_typedef_seqof(Tok, typedict)
|
|
elif tok == TOK_OF:
|
|
# OF
|
|
_scan_typedef_seqof(Tok, typedict)
|
|
else:
|
|
raise(ASN1ProcTextErr('invalid SEQ / SET definition'))
|
|
|
|
|
|
def _scan_typedef_seqof(Tok, typedict):
|
|
typedict['type'][0] = typedict['type'][0] + ' OF'
|
|
# can have a component name
|
|
tok = Tok.get_next()
|
|
if tok[0] == TOK_LID:
|
|
# component name
|
|
typedict['cont_name'] = tok[1]
|
|
else:
|
|
Tok.undo()
|
|
try:
|
|
typedict['cont'] = scan_typedef(Tok)
|
|
except Exception as Err:
|
|
Err.args = ('invalid SEQ / SET OF definition, %s' % Err.args[0], )
|
|
raise(Err)
|
|
|
|
|
|
def _scan_typedef_std(Tok, typedict):
|
|
# ObjParamAct | ObjCont ObjConsts
|
|
tok = Tok.get_next()
|
|
if tok == TOK_CBRAO:
|
|
typedict['cont'] = Tok.get_group()
|
|
if not Tok.has_next():
|
|
return
|
|
tok = Tok.get_next()
|
|
if tok == TOK_PARO:
|
|
typedict['const'] = scan_const(Tok)
|
|
else:
|
|
Tok.undo()
|
|
|
|
|
|
def scan_const(Tok):
|
|
const = []
|
|
while True:
|
|
const.append( Tok.get_group() )
|
|
if Tok.has_next():
|
|
if Tok.get_next() != TOK_PARO:
|
|
Tok.undo()
|
|
return const
|
|
else:
|
|
return const
|
|
|
|
|
|
|
|
def test():
|
|
|
|
import os
|
|
from pycrate_asn1c.specdir import ASN_SPECS
|
|
|
|
p = os.path.dirname(__file__) + os.path.sep + '..' + os.path.sep + 'pycrate_asn1dir' + os.path.sep
|
|
M = ASN1Dict()
|
|
|
|
for S in ASN_SPECS.values():
|
|
if isinstance(S, (list, tuple)):
|
|
S = S[0]
|
|
if S != 'IETF_SNMP':
|
|
for fn in os.listdir( '%s%s/' % (p, S)):
|
|
if fn[-4:] == '.asn':
|
|
fp = '%s%s/%s' % (p, S, fn)
|
|
print(fp)
|
|
if python_version < 3:
|
|
mods = tokenize_text(open(fp).read().decode('utf-8'))
|
|
else:
|
|
mods = tokenize_text(open(fp).read())
|
|
for modname, moddict in mods.items():
|
|
M[modname] = moddict
|
|
return M
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
M = test()
|
|
sys.exit(0)
|