"""Utility code related to the integration of the 'construct' declarative parser."""

import typing
import codecs
import ipaddress

import gsm0338

from construct.lib.containers import Container, ListContainer
from construct.core import EnumIntegerString
from construct import Adapter, Prefixed, Int8ub, GreedyBytes, Default, Flag, Byte, Construct, Enum
from construct import BitsInteger, BitStruct, Bytes, StreamError, stream_read_entire, stream_write
from construct import SizeofError, IntegerError, swapbytes
from construct.core import evaluate
from construct.lib import integertypes

from pySim.utils import b2h, h2b, swap_nibbles

# (C) 2021-2022 by Harald Welte <laforge@osmocom.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


class HexAdapter(Adapter):
    """convert a bytes() type to a string of hex nibbles."""

    def _decode(self, obj, context, path):
        return b2h(obj)

    def _encode(self, obj, context, path):
        return h2b(obj)

class Utf8Adapter(Adapter):
    """convert a bytes() type that contains utf8 encoded text to human readable text."""

    def _decode(self, obj, context, path):
        # In case the string contains only 0xff bytes we interpret it as an empty string
        if obj == b'\xff' * len(obj):
            return ""
        return codecs.decode(obj, "utf-8")

    def _encode(self, obj, context, path):
        return codecs.encode(obj, "utf-8")

class GsmOrUcs2Adapter(Adapter):
    """Try to encode into a GSM 03.38 string; if that fails, fall back to UCS-2 as described
    in TS 102 221 Annex A."""
    def _decode(self, obj, context, path):
        # In case the string contains only 0xff bytes we interpret it as an empty string
        if obj == b'\xff' * len(obj):
            return ""
        # one of the magic bytes of TS 102 221 Annex A
        if obj[0] in [0x80, 0x81, 0x82]:
            ad = Ucs2Adapter(GreedyBytes)
        else:
            ad = GsmString(GreedyBytes)
        return ad._decode(obj, context, path)

    def _encode(self, obj, context, path):
        # first try GSM 03.38; then fall back to TS 102 221 Annex A UCS-2
        try:
            ad = GsmString(GreedyBytes)
            return ad._encode(obj, context, path)
        except:
            ad = Ucs2Adapter(GreedyBytes)
            return ad._encode(obj, context, path)

class Ucs2Adapter(Adapter):
    """convert a bytes() type that contains UCS2 encoded characters encoded as defined in TS 102 221
    Annex A to normal python string representation (and back)."""
    def _decode(self, obj, context, path):
        # In case the string contains only 0xff bytes we interpret it as an empty string
        if obj == b'\xff' * len(obj):
            return ""
        if obj[0] == 0x80:
            # TS 102 221 Annex A Variant 1
            return codecs.decode(obj[1:], 'utf_16_be')
        elif obj[0] == 0x81:
            # TS 102 221 Annex A Variant 2
            out = ""
            # second byte contains a value indicating the number of characters
            num_of_chars = obj[1]
            # the third byte contains an 8 bit number which defines bits 15 to 8 of a 16 bit base
            # pointer, where bit 16 is set to zero, and bits 7 to 1 are also set to zero. These
            # sixteen bits constitute a base pointer to a "half-page" in the UCS2 code space
            base_ptr = obj[2] << 7
            for ch in obj[3:3+num_of_chars]:
                # if bit 8 of the byte is set to zero, the remaining 7 bits of the byte contain a
                # GSM Default Alphabet character, whereas if bit 8 of the byte is set to one, then
                # the remaining seven bits are an offset value added to the 16 bit base pointer
                # defined earlier, and the resultant 16 bit value is a UCS2 code point
                if ch & 0x80:
                    codepoint = (ch & 0x7f) + base_ptr
                    out += codecs.decode(codepoint.to_bytes(2, byteorder='big'), 'utf_16_be')
                else:
                    out += codecs.decode(bytes([ch]), 'gsm03.38')
            return out
        elif obj[0] == 0x82:
            # TS 102 221 Annex A Variant 3
            out = ""
            # second byte contains a value indicating the number of characters
            num_of_chars = obj[1]
            # third and fourth bytes contain a 16 bit number which defines the complete 16 bit base
            # pointer to a half-page in the UCS2 code space, for use with some or all of the
            # remaining bytes in the string
            base_ptr = obj[2] << 8 | obj[3]
            for ch in obj[4:4+num_of_chars]:
                # if bit 8 of the byte is set to zero, the remaining 7 bits of the byte contain a
                # GSM Default Alphabet character, whereas if bit 8 of the byte is set to one, the
                # remaining seven bits are an offset value added to the base pointer defined in
                # bytes three and four, and the resultant 16 bit value is a UCS2 code point, else: #
                # GSM default alphabet
                if ch & 0x80:
                    codepoint = (ch & 0x7f) + base_ptr
                    out += codecs.decode(codepoint.to_bytes(2, byteorder='big'), 'utf_16_be')
                else:
                    out += codecs.decode(bytes([ch]), 'gsm03.38')
            return out
        else:
            raise ValueError('First byte of TS 102 221 UCS-2 must be 0x80, 0x81 or 0x82')

    def _encode(self, obj, context, path):
        def encodable_in_gsm338(instr: str) -> bool:
            """Determine if given input string is encode-ale in gsm03.38."""
            try:
                # TODO: figure out if/how we can constrain to default alphabet.  The gsm0338
                # library seems to include the spanish lock/shift table
                codecs.encode(instr, 'gsm03.38')
            except ValueError:
                return False
            return True

        def codepoints_not_in_gsm338(instr: str) -> typing.List[int]:
            """Return an integer list of UCS2 codepoints for all characters of 'inster'
            which are not representable in the GSM 03.38 default alphabet."""
            codepoint_list = []
            for c in instr:
                if encodable_in_gsm338(c):
                    continue
                c_codepoint = int.from_bytes(codecs.encode(c, 'utf_16_be'), byteorder='big')
                codepoint_list.append(c_codepoint)
            return codepoint_list

        def diff_between_min_and_max_of_list(inlst: typing.List) -> int:
            return max(inlst) - min(inlst)

        def encodable_in_variant2(instr: str) -> bool:
            codepoint_prefix = None
            for c in instr:
                if encodable_in_gsm338(c):
                    continue
                c_codepoint = int.from_bytes(codecs.encode(c, 'utf_16_be'), byteorder='big')
                if c_codepoint >= 0x8000:
                    return False
                c_prefix = c_codepoint >> 7
                if codepoint_prefix is None:
                    codepoint_prefix = c_prefix
                else:
                    if c_prefix != codepoint_prefix:
                        return False
            return True

        def encodable_in_variant3(instr: str) -> bool:
            codepoint_list = codepoints_not_in_gsm338(instr)
            # compute delta between max and min; check if it's encodable in 7 bits
            if diff_between_min_and_max_of_list(codepoint_list) >= 0x80:
                return False
            return True

        def _encode_variant1(instr: str) -> bytes:
            """Encode according to TS 102 221 Annex A Variant 1"""
            return b'\x80' + codecs.encode(instr, 'utf_16_be')

        def _encode_variant2(instr: str) -> bytes:
            """Encode according to TS 102 221 Annex A Variant 2"""
            codepoint_prefix = None
            # second byte contains a value indicating the number of characters
            hdr = b'\x81' + len(instr).to_bytes(1, byteorder='big')
            chars = b''
            for c in instr:
                try:
                    enc = codecs.encode(c, 'gsm03.38')
                except ValueError:
                    c_codepoint = int.from_bytes(codecs.encode(c, 'utf_16_be'), byteorder='big')
                    c_prefix = c_codepoint >> 7
                    if codepoint_prefix is None:
                        codepoint_prefix = c_prefix
                    assert codepoint_prefix == c_prefix
                    enc = (0x80 + (c_codepoint & 0x7f)).to_bytes(1, byteorder='big')
                chars += enc
            if codepoint_prefix is None:
                codepoint_prefix = 0
            return hdr + codepoint_prefix.to_bytes(1, byteorder='big') + chars

        def _encode_variant3(instr: str) -> bytes:
            """Encode according to TS 102 221 Annex A Variant 3"""
            # second byte contains a value indicating the number of characters
            hdr = b'\x82' + len(instr).to_bytes(1, byteorder='big')
            chars = b''
            codepoint_list = codepoints_not_in_gsm338(instr)
            codepoint_base = min(codepoint_list)
            for c in instr:
                try:
                    # if bit 8 of the byte is set to zero, the remaining 7 bits of the byte contain a GSM
                    # Default # Alphabet character
                    enc = codecs.encode(c, 'gsm03.38')
                except ValueError:
                    # if bit 8 of the byte is set to one, the remaining seven bits are an offset
                    # value added to the base pointer defined in bytes three and four, and the
                    # resultant 16 bit value is a UCS2 code point
                    c_codepoint = int.from_bytes(codecs.encode(c, 'utf_16_be'), byteorder='big')
                    c_codepoint_delta = c_codepoint - codepoint_base
                    assert c_codepoint_delta < 0x80
                    enc = (0x80 + c_codepoint_delta).to_bytes(1, byteorder='big')
                chars += enc
            # third and fourth bytes contain a 16 bit number which defines the complete 16 bit base
            # pointer to a half-page in the UCS2 code space
            return hdr + codepoint_base.to_bytes(2, byteorder='big') + chars

        if encodable_in_variant2(obj):
            return _encode_variant2(obj)
        elif encodable_in_variant3(obj):
            return _encode_variant3(obj)
        else:
            return _encode_variant1(obj)

class BcdAdapter(Adapter):
    """convert a bytes() type to a string of BCD nibbles."""

    def _decode(self, obj, context, path):
        return swap_nibbles(b2h(obj))

    def _encode(self, obj, context, path):
        return h2b(swap_nibbles(obj))

class PlmnAdapter(BcdAdapter):
    """convert a bytes(3) type to BCD string like 262-02 or 262-002."""
    def _decode(self, obj, context, path):
        bcd = super()._decode(obj, context, path)
        if bcd[3] == 'f':
            return '-'.join([bcd[:3], bcd[4:]])
        else:
            return '-'.join([bcd[:3], bcd[3:]])

    def _encode(self, obj, context, path):
        l = obj.split('-')
        if len(l[1]) == 2:
            bcd = l[0] + 'f' + l[1]
        else:
            bcd = l[0] + l[1]
        return super()._encode(bcd, context, path)

class InvertAdapter(Adapter):
    """inverse logic (false->true, true->false)."""
    @staticmethod
    def _invert_bool_in_obj(obj):
        for k,v in obj.items():
            # skip all private entries
            if k.startswith('_'):
                continue
            if v is False:
                obj[k] = True
            elif v is True:
                obj[k] = False
        return obj

    def _decode(self, obj, context, path):
        return self._invert_bool_in_obj(obj)

    def _encode(self, obj, context, path):
        return self._invert_bool_in_obj(obj)

class Rpad(Adapter):
    """
    Encoder appends padding bytes (b'\\xff') or characters up to target size.
    Decoder removes trailing padding bytes/characters.

    Parameters:
        subcon: Subconstruct as defined by construct library
        pattern: set padding pattern (default: b'\\xff')
        num_per_byte: number of 'elements' per byte. E.g. for hex nibbles: 2
    """

    def __init__(self, subcon, pattern=b'\xff', num_per_byte=1):
        super().__init__(subcon)
        self.pattern = pattern
        self.num_per_byte = num_per_byte

    def _decode(self, obj, context, path):
        return obj.rstrip(self.pattern)

    def _encode(self, obj, context, path):
        target_size = self.sizeof() * self.num_per_byte
        if len(obj) > target_size:
            raise SizeofError("Input ({}) exceeds target size ({})".format(
                len(obj), target_size))
        return obj + self.pattern * (target_size - len(obj))

class MultiplyAdapter(Adapter):
    """
    Decoder multiplies by multiplicator
    Encoder divides by multiplicator

    Parameters:
        subcon: Subconstruct as defined by construct library
        multiplier: Multiplier to apply to raw encoded value
    """

    def __init__(self, subcon, multiplicator):
        super().__init__(subcon)
        self.multiplicator = multiplicator

    def _decode(self, obj, context, path):
        return obj * 8

    def _encode(self, obj, context, path):
        return obj // 8


class GsmStringAdapter(Adapter):
    """Convert GSM 03.38 encoded bytes to a string."""

    def __init__(self, subcon, codec='gsm03.38', err='strict'):
        super().__init__(subcon)
        self.codec = codec
        self.err = err

    def _decode(self, obj, context, path):
        return obj.decode(self.codec)

    def _encode(self, obj, context, path):
        return obj.encode(self.codec, self.err)

class Ipv4Adapter(Adapter):
    """
    Encoder converts from 4 bytes to string representation (A.B.C.D).
    Decoder converts from string representation (A.B.C.D) to four bytes.
    """
    def _decode(self, obj, context, path):
        ia = ipaddress.IPv4Address(obj)
        return ia.compressed

    def _encode(self, obj, context, path):
        ia = ipaddress.IPv4Address(obj)
        return ia.packed

class Ipv6Adapter(Adapter):
    """
    Encoder converts from 16 bytes to string representation.
    Decoder converts from string representation to 16 bytes.
    """
    def _decode(self, obj, context, path):
        ia = ipaddress.IPv6Address(obj)
        return ia.compressed

    def _encode(self, obj, context, path):
        ia = ipaddress.IPv6Address(obj)
        return ia.packed

class StripTrailerAdapter(Adapter):
    """
    Encoder removes all trailing bytes matching the default_value
    Decoder pads input data up to total_length with default_value

    This is used in constellations like "FlagsEnum(StripTrailerAdapter(GreedyBytes, 3), ..."
    where you have a bit-mask that may have 1, 2 or 3 bytes, depending on whether or not any
    of the LSBs are actually set.
    """
    def __init__(self, subcon, total_length:int, default_value=b'\x00', min_len=1):
        super().__init__(subcon)
        assert len(default_value) == 1
        self.total_length = total_length
        self.default_value = default_value
        self.min_len = min_len

    def _decode(self, obj, context, path):
        assert isinstance(obj, bytes)
        # pad with suppressed/missing bytes
        if len(obj) < self.total_length:
            obj += self.default_value * (self.total_length - len(obj))
        return int.from_bytes(obj, 'big')

    def _encode(self, obj, context, path):
        assert isinstance(obj, int)
        obj = obj.to_bytes(self.total_length, 'big')
        # remove trailing bytes if they are zero
        while len(obj) > self.min_len and obj[-1] == self.default_value[0]:
            obj = obj[:-1]
        return obj


def filter_dict(d, exclude_prefix='_'):
    """filter the input dict to ensure no keys starting with 'exclude_prefix' remain."""
    if not isinstance(d, dict):
        return d
    res = {}
    for (key, value) in d.items():
        if key.startswith(exclude_prefix):
            continue
        if isinstance(value, dict):
            res[key] = filter_dict(value)
        else:
            res[key] = value
    return res


def normalize_construct(c, exclude_prefix: str = '_'):
    """Convert a construct specific type to a related base type, mostly useful
    so we can serialize it."""
    # we need to include the filter_dict as we otherwise get elements like this
    # in the dict: '_io': <_io.BytesIO object at 0x7fdb64e05860> which we cannot json-serialize
    c = filter_dict(c, exclude_prefix)
    if isinstance(c, (Container, dict)):
        r = {k: normalize_construct(v) for (k, v) in c.items()}
    elif isinstance(c, ListContainer):
        r = [normalize_construct(x) for x in c]
    elif isinstance(c, list):
        r = [normalize_construct(x) for x in c]
    elif isinstance(c, EnumIntegerString):
        r = str(c)
    else:
        r = c
    return r


def parse_construct(c, raw_bin_data: bytes, length: typing.Optional[int] = None, exclude_prefix: str = '_', context: dict = {}):
    """Helper function to wrap around normalize_construct() and filter_dict()."""
    if not length:
        length = len(raw_bin_data)
    try:
        parsed = c.parse(raw_bin_data, total_len=length, **context)
    except StreamError as e:
        # if the input is all-ff, this means the content is undefined.  Let's avoid passing StreamError
        # exceptions in those situations (which might occur if a length field 0xff is 255 but then there's
        # actually less bytes in the remainder of the file.
        if all(v == 0xff for v in raw_bin_data):
            return None
        else:
            raise e
    return normalize_construct(parsed, exclude_prefix)

def build_construct(c, decoded_data, context: dict = {}):
    """Helper function to handle total_len."""
    return c.build(decoded_data, total_len=None, **context)

# here we collect some shared / common definitions of data types
LV = Prefixed(Int8ub, HexAdapter(GreedyBytes))

# Default value for Reserved for Future Use (RFU) bits/bytes
# See TS 31.101 Sec. "3.4 Coding Conventions"
__RFU_VALUE = 0

# Field that packs Reserved for Future Use (RFU) bit
FlagRFU = Default(Flag, __RFU_VALUE)

# Field that packs Reserved for Future Use (RFU) byte
ByteRFU = Default(Byte, __RFU_VALUE)

# Field that packs all remaining Reserved for Future Use (RFU) bytes
GreedyBytesRFU = Default(GreedyBytes, b'')


def BitsRFU(n=1):
    '''
    Field that packs Reserved for Future Use (RFU) bit(s)
    as defined in TS 31.101 Sec. "3.4 Coding Conventions"

    Use this for (currently) unused/reserved bits whose contents
    should be initialized automatically but should not be cleared
    in the future or when restoring read data (unlike padding).

    Parameters:
        n (Integer): Number of bits (default: 1)
    '''
    return Default(BitsInteger(n), __RFU_VALUE)


def BytesRFU(n=1):
    '''
    Field that packs Reserved for Future Use (RFU) byte(s)
    as defined in TS 31.101 Sec. "3.4 Coding Conventions"

    Use this for (currently) unused/reserved bytes whose contents
    should be initialized automatically but should not be cleared
    in the future or when restoring read data (unlike padding).

    Parameters:
        n (Integer): Number of bytes (default: 1)
    '''
    return Default(Bytes(n), __RFU_VALUE)


def GsmString(n):
    '''
    GSM 03.38 encoded byte string of fixed length n.
    Encoder appends padding bytes (b'\\xff') to maintain
    length. Decoder removes those trailing bytes.

    Exceptions are raised for invalid characters
    and length excess.

    Parameters:
        n (Integer): Fixed length of the encoded byte string
    '''
    return GsmStringAdapter(Rpad(Bytes(n), pattern=b'\xff'), codec='gsm03.38')

def GsmOrUcs2String(n):
    '''
    GSM 03.38 or UCS-2 (TS 102 221 Annex A) encoded byte string of fixed length n.
    Encoder appends padding bytes (b'\\xff') to maintain
    length. Decoder removes those trailing bytes.

    Exceptions are raised for invalid characters
    and length excess.

    Parameters:
        n (Integer): Fixed length of the encoded byte string
    '''
    return GsmOrUcs2Adapter(Rpad(Bytes(n), pattern=b'\xff'))

class GreedyInteger(Construct):
    """A variable-length integer implementation, think of combining GrredyBytes with BytesInteger."""
    def __init__(self, signed=False, swapped=False, minlen=0):
        super().__init__()
        self.signed = signed
        self.swapped = swapped
        self.minlen = minlen

    def _parse(self, stream, context, path):
        data = stream_read_entire(stream, path)
        if evaluate(self.swapped, context):
            data = swapbytes(data)
        try:
            return int.from_bytes(data, byteorder='big', signed=self.signed)
        except ValueError as e:
            raise IntegerError(str(e), path=path)

    def __bytes_required(self, i, minlen=0):
        if self.signed:
            raise NotImplementedError("FIXME: Implement support for encoding signed integer")

        # compute how many bytes we need
        nbytes = 1
        while True:
            i = i >> 8
            if i == 0:
                break
            else:
                nbytes = nbytes + 1

        # round up to the minimum number
        # of bytes we anticipate
        nbytes = max(nbytes, minlen)

        return nbytes

    def _build(self, obj, stream, context, path):
        if not isinstance(obj, integertypes):
            raise IntegerError(f"value {obj} is not an integer", path=path)
        length = self.__bytes_required(obj, self.minlen)
        try:
            data = obj.to_bytes(length, byteorder='big', signed=self.signed)
        except ValueError as e:
            raise IntegerError(str(e), path=path) from e
        if evaluate(self.swapped, context):
            data = swapbytes(data)
        stream_write(stream, data, length, path)
        return obj

# merged definitions of 24.008 + 23.040
TypeOfNumber = Enum(BitsInteger(3), unknown=0, international=1, national=2, network_specific=3,
                    short_code=4, alphanumeric=5, abbreviated=6, reserved_for_extension=7)
NumberingPlan = Enum(BitsInteger(4), unknown=0, isdn_e164=1, data_x121=3, telex_f69=4,
                     sc_specific_5=5, sc_specific_6=6, national=8, private=9,
                     ermes=10, reserved_cts=11, reserved_for_extension=15)
TonNpi = BitStruct('ext'/Flag, 'type_of_number'/TypeOfNumber, 'numbering_plan_id'/NumberingPlan)