construct: add adapter Utf8Adapter to safely interpret utf8 text

Uninitialized Files, File records or fields in a File record or File
usually contain a string of 0xff bytes. This becomes a problem when the
content is normally encoded/decoded as utf8 since by the construct
parser. The parser will throw an expection when it tries to decode the
0xff string as utf8. This is especially a serious problem in pySim-trace
where an execption stops the parser.

Let's fix this by interpreting a string of 0xff as an empty string.

Related: OS#6094
Change-Id: Id114096ccb8b7ff8fcc91e1ef3002526afa09cb7
This commit is contained in:
Philipp Maier 2023-07-26 17:01:37 +02:00
parent fec721fcb1
commit 791f80a44f
3 changed files with 25 additions and 10 deletions

View File

@ -6,6 +6,7 @@ from construct.core import evaluate, BitwisableString
from construct.lib import integertypes
from pySim.utils import b2h, h2b, swap_nibbles
import gsm0338
import codecs
"""Utility code related to the integration of the 'construct' declarative parser."""
@ -34,6 +35,18 @@ class HexAdapter(Adapter):
def _encode(self, obj, context, path):
return h2b(obj)
class Utf8Adapter(Adapter):
"""convert a bytes() type that contains utf8 encoded text to human readable text."""
def _decode(self, obj, context, path):
# In case the string contains only 0xff bytes we interpret it as an empty string
if obj == b'\xff' * len(obj):
return ""
return codecs.decode(obj, "utf-8")
def _encode(self, obj, context, path):
return codecs.encode(obj, "utf-8")
class BcdAdapter(Adapter):
"""convert a bytes() type to a string of BCD nibbles."""

View File

@ -872,7 +872,7 @@ class EF_ePDGId(TransparentEF):
class ePDGId(BER_TLV_IE, tag=0x80, nested=[]):
_construct = Struct('type_of_ePDG_address'/Enum(Byte, FQDN=0, IPv4=1, IPv6=2),
'ePDG_address'/Switch(this.type_of_address,
{'FQDN': GreedyString("utf8"),
{'FQDN': Utf8Adapter(GreedyBytes),
'IPv4': HexAdapter(GreedyBytes),
'IPv6': HexAdapter(GreedyBytes)}))
@ -951,15 +951,15 @@ class EF_OPL5G(LinFixedEF):
class EF_SUPI_NAI(TransparentEF):
class NetworkSpecificIdentifier(TLV_IE, tag=0x80):
# RFC 7542 encoded as UTF-8 string
_construct = GreedyString("utf8")
_construct = Utf8Adapter(GreedyBytes)
class GlobalLineIdentifier(TLV_IE, tag=0x81):
# TS 23.003 clause 28.16.2
_construct = GreedyString("utf8")
_construct = Utf8Adapter(GreedyBytes)
class GlobalCableIdentifier(TLV_IE, tag=0x82):
# TS 23.003 clause 28.15.2
_construct = GreedyString("utf8")
_construct = Utf8Adapter(GreedyBytes)
class NAI_TLV_Collection(TLV_IE_Collection,
nested=[NetworkSpecificIdentifier, GlobalLineIdentifier, GlobalCableIdentifier]):
@ -984,7 +984,7 @@ class EF_Routing_Indicator(TransparentEF):
# TS 31.102 Section 4.4.11.13
class EF_TN3GPPSNN(TransparentEF):
class ServingNetworkName(BER_TLV_IE, tag=0x80):
_construct = GreedyString("utf8")
_construct = Utf8Adapter(GreedyBytes)
def __init__(self, fid='4f0c', sfid=0x0c, name='EF.TN3GPPSNN',
desc='Trusted non-3GPP Serving network names list', **kwargs):
super().__init__(fid, sfid=sfid, name=name, desc=desc, **kwargs)

View File

@ -30,6 +30,7 @@ from pySim.ts_31_102 import ADF_USIM, EF_FromPreferred
from pySim.ts_31_102_telecom import EF_UServiceTable
import pySim.ts_102_221
from pySim.ts_102_221 import EF_ARR
from pySim.construct import *
# Mapping between ISIM Service Number and its description
EF_IST_map = {
@ -62,8 +63,9 @@ class EF_IMPI(TransparentEF):
( '803137333830303630303030303031303140696d732e6d6e633030302e6d63633733382e336770706e6574776f726b2e6f7267',
{ "nai": "738006000000101@ims.mnc000.mcc738.3gppnetwork.org" } ),
]
class nai(BER_TLV_IE, tag=0x80):
_construct = GreedyString("utf8")
_construct = Utf8Adapter(GreedyBytes)
def __init__(self, fid='6f02', sfid=0x02, name='EF.IMPI', desc='IMS private user identity', **kwargs):
super().__init__(fid=fid, sfid=sfid, name=name, desc=desc, **kwargs)
@ -76,7 +78,7 @@ class EF_DOMAIN(TransparentEF):
{ "domain": "ims.mnc000.mcc738.3gppnetwork.org" } ),
]
class domain(BER_TLV_IE, tag=0x80):
_construct = GreedyString("utf8")
_construct = Utf8Adapter(GreedyBytes)
def __init__(self, fid='6f03', sfid=0x05, name='EF.DOMAIN', desc='Home Network Domain Name', **kwargs):
super().__init__(fid=fid, sfid=sfid, name=name, desc=desc, **kwargs)
@ -89,7 +91,7 @@ class EF_IMPU(LinFixedEF):
{ "impu": "sip:738006000000101@ims.mnc000.mcc738.3gppnetwork.org" } ),
]
class impu(BER_TLV_IE, tag=0x80):
_construct = GreedyString("utf8")
_construct = Utf8Adapter(GreedyBytes)
def __init__(self, fid='6f04', sfid=0x04, name='EF.IMPU', desc='IMS public user identity', **kwargs):
super().__init__(fid=fid, sfid=sfid, name=name, desc=desc, **kwargs)
@ -165,7 +167,7 @@ class EF_NAFKCA(LinFixedEF):
# TS 31.103 Section 4.2.16
class EF_UICCIARI(LinFixedEF):
class iari(BER_TLV_IE, tag=0x80):
_construct = GreedyString("utf8")
_construct = Utf8Adapter(GreedyBytes)
def __init__(self, fid='6fe7', sfid=None, name='EF.UICCIARI', desc='UICC IARI', **kwargs):
super().__init__(fid=fid, sfid=sfid, name=name, desc=desc, **kwargs)
@ -232,7 +234,7 @@ class EF_XCAPConfigData(BerTlvEF):
# TS 31.103 Section 4.2.20
class EF_WebRTCURI(TransparentEF):
class uri(BER_TLV_IE, tag=0x80):
_construct = GreedyString("utf8")
_construct = Utf8Adapter(GreedyBytes)
def __init__(self, fid='6ffa', sfid=None, name='EF.WebRTCURI', desc='WebRTC URI', **kwargs):
super().__init__(fid=fid, sfid=sfid, name=name, desc=desc, **kwargs)