pycrate/pycrate_mobile/TS23038.py

# -*- coding: UTF-8 -*-
#/**
# * Software Name : pycrate
# * Version : 0.4
# *
# * Copyright 2017. Benoit Michau. ANSSI.
# *
# * This library is free software; you can redistribute it and/or
# * modify it under the terms of the GNU Lesser General Public
# * License as published by the Free Software Foundation; either
# * version 2.1 of the License, or (at your option) any later version.
# *
# * This library is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# * Lesser General Public License for more details.
# *
# * You should have received a copy of the GNU Lesser General Public
# * License along with this library; if not, write to the Free Software
# * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# * MA 02110-1301  USA
# *
# *--------------------------------------------------------
# * File Name : pycrate_mobile/TS23038.py
# * Created : 2017-10-23
# * Authors : Benoit Michau
# *--------------------------------------------------------
#*/

__all__ = [
    'DCS_7B',
    'DCS_8B',
    'DCS_UCS',
    'SMS_DCS',
    'CBS_DCS',
    'encode_7b',
    'decode_7b',
    'encode_7b_cbs',
    'decode_7b_cbs'
    ]

#------------------------------------------------------------------------------#
# 3GPP TS 23.038: Alphabets and language-specific information
# release 13 (d00)
#------------------------------------------------------------------------------#

from pycrate_core.utils  import python_version, pack_val, TYPE_UINT, PycrateErr
from pycrate_core.charpy import Charpy
from pycrate_core.elt    import Envelope
from pycrate_core.base   import Uint

_str_reserved = 'reserved'
DCS_7B  = 0
DCS_8B  = 1
DCS_UCS = 2

#------------------------------------------------------------------------------#
# SMS Data Coding Scheme
# TS 23.038, section 4
#------------------------------------------------------------------------------#

_SMSDCSGroup_dict = {
    0 : 'general data coding, uncompressed', # no class meaning
    1 : 'general data coding, uncompressed',
    2 : 'general data coding, compressed', # no class meaning
    3 : 'general data coding, compressed',
    4 : 'message for automatic deletion group, uncompressed', # no class meaning
    5 : 'message for automatic deletion group, uncompressed',
    6 : 'message for automatic deletion group, compressed', # no class meaning
    7 : 'message for automatic deletion group, compressed',
    8 : _str_reserved,
    9 : _str_reserved,
    10: _str_reserved,
    11: _str_reserved,
    12: 'message waiting indication group: discard message',
    13: 'message waiting indication group: store message',
    14: 'message waiting indication group: store message',
    15: 'data coding / message class'
    }

_SMSDCSCharset_dict = {
    0 : 'GSM 7 bit default alphabet',
    1 : '8 bit data',
    2 : 'UCS2 (16 bit)',
    3 : _str_reserved
    }

_SMSDCSClass_dict = {
    0 : 'Class 0',
    1 : 'Class 1 - default meaning: ME-specific',
    2 : 'Class 2 - (U)SIM specific message',
    3 : 'Class 3 - default meaning: TE specific',
    }

_SMSDCSIndSense_dict = {
    0 : 'GSM 7 bit default alphabet, set indication inactive',
    2 : 'GSM 7 bit default alphabet, set indication active'
    }

_SMSDCSIndSenseUCS_dict = {
    0 : 'UCS2 (16 bit), set indication inactive',
    2 : 'UCS2 (16 bit), set indication active'
    }

_SMSDCSIndType_dict = {
    0 : 'Voicemail Message Waiting',
    1 : 'Fax Message Waiting',
    2 : 'Electronic Mail Message Waiting',
    3 : 'Other Message Waiting',
    }

class SMS_DCS(Envelope):
    _GEN = (
        Uint('Group', bl=4, dic=_SMSDCSGroup_dict),
        Uint('Charset', bl=2),
        Uint('Class', bl=2)
        )
    def __init__(self, *args, **kwargs):
        Envelope.__init__(self, *args, **kwargs)
        self[1].set_dicauto(self._set_cs_dic)
        self[2].set_dicauto(self._set_cla_dic)

    def _set_cs_dic(self):
        grp = self[0]()
        if grp < 8 or grp == 15:
            return _SMSDCSCharset_dict
        elif grp in (12, 13):
            return _SMSDCSIndSense_dict
        elif grp == 14:
            _SMSDCSIndSenseUCS_dict
        else:
            return {}

    def _set_cla_dic(self):
        grp = self[0]()
        if grp in (1, 3, 5, 7, 15):
            return _SMSDCSClass_dict
        elif grp in (12, 13, 14):
            return _SMSDCSIndType_dict
        else:
            return {}


#------------------------------------------------------------------------------#
# CBS Data Coding Scheme
# TS 23.038, section 5
#------------------------------------------------------------------------------#

_CBSDCSGroup_dict = {
    0 : 'Language using the GSM 7 bit default alphabet',
    3 : 'Reserved for other languages using the GSM 7 bit default alphabet, '\
        'with unspecified handling at the MS',
    4 : 'general data coding, uncompressed', # no class meaning
    5 : 'general data coding, uncompressed',
    6 : 'general data coding, compressed', # no class meaning
    7 : 'general data coding, compressed',
    8 : _str_reserved,
    9 : 'Message with User Data Header (UDH) structure',
    13: 'I1 protocol message defined in 3GPP TS 24.294',
    14: 'Defined by the WAP Forum',
    15: 'data coding / message class'
    }

_CBSDCSCs0_dict = {
    0 : 'German',
    1 : 'English',
    2 : 'Italian',
    3 : 'French',
    4 : 'Spanish',
    5 : 'Dutch',
    6 : 'Swedish',
    7 : 'Danish',
    8 : 'Portuguese',
    9 : 'Finnish',
    10: 'Norwegian',
    11: 'Greek',
    12: 'Turkish',
    13: 'Hungarian',
    14: 'Polish',
    15: 'Language unspecified'
    }

_CBSDCSCs1_dict = {
    0 : 'GSM 7 bit default alphabet; message preceded by language indication',
    1 : 'UCS2; message preceded by language indication'
    }

_CBSDCSCs2_dict = {
    0 : 'Czech',
    1 : 'Hebrew',
    2 : 'Arabic',
    3 : 'Russian',
    4 : 'Icelandic'
    }

class CBS_DCS(Envelope):
    ENV_SEL_TRANS = False
    _GEN = (
        Uint('Group', bl=4, dic=_CBSDCSGroup_dict),
        Uint('Charset', bl=4),
        Uint('Charset', bl=2),
        Uint('Class', bl=2)
        )
    def __init__(self, *args, **kwargs):
        Envelope.__init__(self, *args, **kwargs)
        self[1].set_transauto(self._set_cs4_trans)
        self[1].set_dicauto(self._set_cs4_dic)
        self[2].set_transauto(self._set_cs2_trans)
        self[2].set_dicauto(self._set_cs2_dic)
        self[3].set_transauto(self._set_cs2_trans)
        self[3].set_dicauto(self._set_cla_dic)

    def _set_cs4_trans(self):
        if self[0]() in (0, 1, 2, 3, 8, 10, 11, 12, 13, 14):
            return False
        else:
            return True

    def _set_cs4_dic(self):
        grp = self[0]()
        if grp == 0:
            return _CBSDCSCs0_dict
        elif grp == 1:
            return _CBSDCSCs1_dict
        elif grp == 2:
            return _CBSDCSCs2_dict
        else:
            return {}

    def _set_cs2_trans(self):
        if self[0]() in (4, 5, 6, 7, 9, 15):
            return False
        else:
            return True

    def _set_cs2_dic(self):
        grp = self[0]()
        if grp in (4, 5, 6, 7, 9, 15):
            return _SMSDCSCharset_dict
        else:
            return {}

    def _set_cla_dic(self):
        grp = self[0]()
        if grp in (5, 7, 9, 15):
            return _SMSDCSClass_dict
        else:
            return {}


#------------------------------------------------------------------------------#
# GSM 7 bit Default Alphabet
# TS 23.038, section 6.2.1
#------------------------------------------------------------------------------#

_GSM7bLUT = {
    0 : u'@',
    1 : u'£',
    2 : u'$',
    3 : u'¥',
    4 : u'è',
    5 : u'é',
    6 : u'ù',
    7 : u'ì',
    8 : u'ò',
    9 : u'Ç',
    10 : u'\n',
    11 : u'Ø',
    12 : u'ø',
    13 : u'\r',
    14 : u'Å',
    15 : u'å',
    16 : u'Δ',
    17 : u'_',
    18 : u'Φ',
    19 : u'Γ',
    20 : u'Λ',
    21 : u'Ω',
    22 : u'Π',
    23 : u'Ψ',
    24 : u'Σ',
    25 : u'Θ',
    26 : u'Ξ',
    27 : u'\x1b',
    28 : u'Æ',
    29 : u'æ',
    30 : u'ß',
    31 : u'É',
    32 : u' ',
    33 : u'!',
    34 : u'"',
    35 : u'#',
    36 : u'¤',
    37 : u'%',
    38 : u'&',
    39 : u"'",
    40 : u'(',
    41 : u')',
    42 : u'*',
    43 : u'+',
    44 : u',',
    45 : u'-',
    46 : u'.',
    47 : u'/',
    48 : u'0',
    49 : u'1',
    50 : u'2',
    51 : u'3',
    52 : u'4',
    53 : u'5',
    54 : u'6',
    55 : u'7',
    56 : u'8',
    57 : u'9',
    58 : u':',
    59 : u';',
    60 : u'<',
    61 : u'=',
    62 : u'>',
    63 : u'?',
    64 : u'¡',
    65 : u'A',
    66 : u'B',
    67 : u'C',
    68 : u'D',
    69 : u'E',
    70 : u'F',
    71 : u'G',
    72 : u'H',
    73 : u'I',
    74 : u'J',
    75 : u'K',
    76 : u'L',
    77 : u'M',
    78 : u'N',
    79 : u'O',
    80 : u'P',
    81 : u'Q',
    82 : u'R',
    83 : u'S',
    84 : u'T',
    85 : u'U',
    86 : u'V',
    87 : u'W',
    88 : u'X',
    89 : u'Y',
    90 : u'Z',
    91 : u'Ä',
    92 : u'Ö',
    93 : u'Ñ',
    94 : u'Ü',
    95 : u'§',
    96 : u'¿',
    97 : u'a',
    98 : u'b',
    99 : u'c',
    100 : u'd',
    101 : u'e',
    102 : u'f',
    103 : u'g',
    104 : u'h',
    105 : u'i',
    106 : u'j',
    107 : u'k',
    108 : u'l',
    109 : u'm',
    110 : u'n',
    111 : u'o',
    112 : u'p',
    113 : u'q',
    114 : u'r',
    115 : u's',
    116 : u't',
    117 : u'u',
    118 : u'v',
    119 : u'w',
    120 : u'x',
    121 : u'y',
    122 : u'z',
    123 : u'ä',
    124 : u'ö',
    125 : u'ñ',
    126 : u'ü',
    127 : u'à'
    }

_GSM7bLUTInv = {
    u'@' : 0,
    u'£' : 1,
    u'$' : 2,
    u'¥' : 3,
    u'è' : 4,
    u'é' : 5,
    u'ù' : 6,
    u'ì' : 7,
    u'ò' : 8,
    u'Ç' : 9,
    u'\n' : 10,
    u'Ø' : 11,
    u'ø' : 12,
    u'\r' : 13,
    u'Å' : 14,
    u'å' : 15,
    u'Δ' : 16,
    u'_' : 17,
    u'Φ' : 18,
    u'Γ' : 19,
    u'Λ' : 20,
    u'Ω' : 21,
    u'Π' : 22,
    u'Ψ' : 23,
    u'Σ' : 24,
    u'Θ' : 25,
    u'Ξ' : 26,
    u'\x1b' : 27,
    u'Æ' : 28,
    u'æ' : 29,
    u'ß' : 30,
    u'É' : 31,
    u' ' : 32,
    u'!' : 33,
    u'"' : 34,
    u'#' : 35,
    u'¤' : 36,
    u'%' : 37,
    u'&' : 38,
    u"'" : 39,
    u'(' : 40,
    u')' : 41,
    u'*' : 42,
    u'+' : 43,
    u',' : 44,
    u'-' : 45,
    u'.' : 46,
    u'/' : 47,
    u'0' : 48,
    u'1' : 49,
    u'2' : 50,
    u'3' : 51,
    u'4' : 52,
    u'5' : 53,
    u'6' : 54,
    u'7' : 55,
    u'8' : 56,
    u'9' : 57,
    u':' : 58,
    u';' : 59,
    u'<' : 60,
    u'=' : 61,
    u'>' : 62,
    u'?' : 63,
    u'¡' : 64,
    u'A' : 65,
    u'B' : 66,
    u'C' : 67,
    u'D' : 68,
    u'E' : 69,
    u'F' : 70,
    u'G' : 71,
    u'H' : 72,
    u'I' : 73,
    u'J' : 74,
    u'K' : 75,
    u'L' : 76,
    u'M' : 77,
    u'N' : 78,
    u'O' : 79,
    u'P' : 80,
    u'Q' : 81,
    u'R' : 82,
    u'S' : 83,
    u'T' : 84,
    u'U' : 85,
    u'V' : 86,
    u'W' : 87,
    u'X' : 88,
    u'Y' : 89,
    u'Z' : 90,
    u'Ä' : 91,
    u'Ö' : 92,
    u'Ñ' : 93,
    u'Ü' : 94,
    u'§' : 95,
    u'¿' : 96,
    u'a' : 97,
    u'b' : 98,
    u'c' : 99,
    u'd' : 100,
    u'e' : 101,
    u'f' : 102,
    u'g' : 103,
    u'h' : 104,
    u'i' : 105,
    u'j' : 106,
    u'k' : 107,
    u'l' : 108,
    u'm' : 109,
    u'n' : 110,
    u'o' : 111,
    u'p' : 112,
    u'q' : 113,
    u'r' : 114,
    u's' : 115,
    u't' : 116,
    u'u' : 117,
    u'v' : 118,
    u'w' : 119,
    u'x' : 120,
    u'y' : 121,
    u'z' : 122,
    u'ä' : 123,
    u'ö' : 124,
    u'ñ' : 125,
    u'ü' : 126,
    u'à' : 127
    }

_GSM7bExtLUT = {
    10 : u'\x0c',
    13 : u'\x11', # no real equivalent to CR2 in the ascii table
    20 : u'^',
    27 : u'\x0e', # no real equivalent to SS2 in the ascii table
    40 : u'{',
    41 : u'}',
    47 : u'\\',
    60 : u'[',
    61 : u'~',
    62 : u']',
    64 : u'|',
    101 : u'€'
    }

_GSM7bExtLUTInv = {
    u'\x0c' : 10,
    u'\x11' : 13,
    u'^' : 20,
    u'\x0e' : 27,
    u'{' : 40,
    u'}' : 41,
    u'\\' : 47,
    u'[' : 60,
    u'~' : 61,
    u']' : 62,
    u'|' : 64,
    u'€' : 101
    }


def encode_7b(txt, off=0):
    """translates the unicode string `txt' to a GSM 7 bit characters buffer

    Args:
        txt (utf8 str): text string to encode
        off (uint): bit offset

    Returns:
        encoded buffer and septet count (bytes, uint)
    """
    arr, cnt = [], 0
    for c in reversed(txt):
        try:
            arr.append( (TYPE_UINT, _GSM7bLUTInv[c], 7) )
        except KeyError:
            try:
                arr.append( (TYPE_UINT, _GSM7bExtLUTInv[c], 7) )
            except KeyError:
                raise(PycrateErr('invalid GSM 7 bit char: %r' % c))
            else:
                # add an escape char
                arr.append( (TYPE_UINT, 27, 7) )
                cnt += 2
        else:
            cnt += 1
    # check the length in bits and add padding bits
    pad = ((8-(7*len(arr)+off)%8)%8)
    arr.insert(0, (TYPE_UINT, 0, pad))
    if python_version < 3:
        return ''.join(reversed(pack_val(*arr)[0])), cnt
    else:
        return bytes(reversed(pack_val(*arr)[0])), cnt


def decode_7b(buf, off=0):
    """translates the GSM 7 bit characters buffer `buf' to an unicode string

    Args:
        buf (bytes): buffer to decode
        off (uint): bit offset

    Returns:
        decoded text string (utf8 str)
    """
    if python_version < 3:
        char = Charpy(''.join(reversed(buf)))
    else:
        char = Charpy(bytes(reversed(buf)))
    # jump over the padding bits
    # WNG: in case of 7 bits padding, we will have an @ at the end
    chars_num = (8*len(buf)-off) // 7
    char._cur = (8*len(buf)-off)-(7*chars_num)
    # get all chars
    arr = [char.get_uint(7) for i in range(chars_num)]
    chars = []
    #
    for i, v in enumerate(arr):
        if v == 27:
            # escape char, replace last char with extended content
            try:
                chars[-1] = _GSM7bExtLUT[arr[i-1]]
            except:
                chars.append(u' ')
        else:
            chars.append(_GSM7bLUT[v])
    return u''.join(reversed(chars))


def encode_7b_cbs(txt):
    """translates the unicode string `txt' into a tuple of page(s)
    containing GSM 7 bit characters, ready for broadcast

    a page is a 2-tuple: 82-bytes buffer, message length (<= 82)
    """
    pages, page, cnt = [], [], 0
    # check the number of 7 bit characters required for txt
    for c in txt:
        if c in _GSM7bLUTInv:
            c_cnt = 1
        elif c in _GSM7bExtLUTInv:
            c_cnt = 2
        else:
            raise(PycrateErr('invalid GSM 7 bit char: %r' % c))
        if cnt + c_cnt < 94:
            page.append(c)
            cnt += c_cnt
        else:
            # encode the current page to pages
            enc = encode_7b(''.join(page))[0]
            enc_len = len(enc)
            if enc_len < 82:
                # padding with CR
                enc += (82-enc_len) * b'\x0d'
            pages.append( (enc, enc_len) )
            # restart filling current page
            page, cnt = [c], c_cnt
    # pad and append last page
    if page:
        last = encode_7b(''.join(page))[0]
        last_len = len(last)
        last += (82-last_len) * b'\x0d'
        pages.append( (last, last_len) )
    # return the tuple of pages
    return tuple(pages)


def decode_7b_cbs(pages):
    """translates a tuple of `pages' containing GSM 7 bit characters to an
    unicode string

    a page is a 2-tuple: 82-bytes buffer, message length (<= 82)
    """
    txt = []
    for page, page_len in pages:
        txt.append( decode_7b(page[:page_len]) )
    return u''.join(txt)