#!/usr/bin/env python3
# Generate the array of Certificate Transparency Log ID to description mappings
# for the TLS dissector.
#
# To update the TLS dissector source file, run this from the source directory:
#
#   python3 tools/make-tls-ct-logids.py --update
#

import argparse
from base64 import b64decode, b64encode
from enum import Enum
import itertools
import os
import requests
from hashlib import sha256


# Begin of comment, followed by the actual array definition
HEADER = "/* Generated by tools/make-tls-ct-logids.py\n"
# See also https://www.certificate-transparency.org/known-logs
CT_JSON_URL = 'https://www.gstatic.com/ct/log_list/v3/all_logs_list.json'
# File to be patched
SOURCE_FILE = os.path.join('epan', 'dissectors', 'packet-tls-utils.c')

# Maximum elements per line in the value array. 11 is chosen because it results
# in output consistent with clang-format.
BYTES_PER_LINE = 11

class SourceStage(Enum):
    BEGIN = 1
    IN_METAINFO = 2
    IN_BLOCK = 3
    END = 4


def escape_c(s):
    return s.replace('\\', '\\\\').replace('"', '\\"')


def byteshex(b):
    return " ".join("0x%02x," % b for b in bytearray(b))


def process_json(obj, lastmod):
    logs = list(itertools.chain(*[op['logs'] for op in obj['operators']]))
    metainfo, block = HEADER, ''
    metainfo += " * Last-Modified %s, %s entries. */\n" % (lastmod, len(logs))
    block += "static const bytes_string ct_logids[] = {\n"
    for entry in logs:
        desc = entry["description"]
        pubkey_der = b64decode(entry["key"])
        key_id = sha256(pubkey_der).digest()
        block += '    { (const guint8[]){\n'
        for offset in range(0, len(key_id), BYTES_PER_LINE):
            block += '          %s\n' % \
                byteshex(key_id[offset:offset+BYTES_PER_LINE])
        block += '      },\n'
        block += '      %d, "%s" },\n' % (len(key_id), escape_c(desc))
    block += "    { NULL, 0, NULL }\n"
    block += "};\n"
    return metainfo, block


def parse_source(source_path):
    """
    Reads the source file and tries to split it in the parts before, inside and
    after the block.
    """
    begin, metainfo, block, end = '', '', '', ''
    # Stages: BEGIN (before block), IN_METAINFO, IN_BLOCK (skip), END
    stage = SourceStage.BEGIN
    with open(source_path) as f:
        for line in f:
            if line.startswith('/* Generated by '):
                stage = SourceStage.IN_METAINFO


            if stage == SourceStage.BEGIN:
                begin += line
            elif stage == SourceStage.IN_METAINFO:
                metainfo += line
            elif stage == SourceStage.IN_BLOCK:
                block += line
                if line.startswith('}'):
                    stage = SourceStage.END
            elif stage == SourceStage.END:
                end += line

            if line.startswith(' * Last-Modified '):
                stage = SourceStage.IN_BLOCK

    if stage != SourceStage.END:
        raise RuntimeError("Could not parse file (in stage %s)" % stage.name)
    return begin, metainfo, block, end


parser = argparse.ArgumentParser()
parser.add_argument("--update", action="store_true",
                    help="Update %s as needed instead of writing to stdout" % SOURCE_FILE)


def main():
    args = parser.parse_args()
    this_dir = os.path.dirname(__file__)
    r = requests.get(CT_JSON_URL)
    j_metainfo, j_block = process_json(r.json(), lastmod=r.headers['Last-Modified'])
    source_path = os.path.join(this_dir, '..', SOURCE_FILE)

    if args.update:
        s_begin, _, s_block, s_end = parse_source(source_path)
        if s_block == j_block:
            print("File is up-to-date")
        else:
            with open(source_path, "w") as f:
                f.write(s_begin)
                f.write(j_metainfo)
                f.write(j_block)
                f.write(s_end)
            print("Updated %s" % source_path)
    else:
        print(j_metainfo, j_block)


if __name__ == '__main__':
    main()