import pickle
from collections import defaultdict, OrderedDict
from pprint import pprint


def get_statistics(mapping, rule_count,
                   including_terminal: bool):
    if not including_terminal:
        mapping = {k: v for k, v in mapping.items()
                   if len(k) > 1 and isinstance(k[1][0], str)}

    result = OrderedDict()
    result["Total HRG Rule Count"] = len(rule_count)
    result["Total SHRG Rule Count"] = sum(len(v) for v in mapping.values())

    statistics_by_points = defaultdict(int)
    instance_statistics_by_points = defaultdict(int)
    total_instances = 0
    total_instance_x_point = 0

    def format_key(count, max):
        if count < max:
            return str(count)
        else:
            return str(max) + "+"

    for k, v in mapping.items():
        ep_field = k[0].rsplit("#", 1)[1]
        try:
            ep_count = int(ep_field)
        except ValueError:
            if ep_field == "None":
                ep_count = 0
            else:
                ep_count = len(ep_field)
        # if count == 0:
        #     print(k)
        statistics_by_points[format_key(ep_count, 5)] += len(v)
        instance_count = sum(v.values())
        instance_statistics_by_points[format_key(ep_count, 5)] += instance_count
        instance_statistics_by_points["ALL"] += instance_count
        total_instances += instance_count
        total_instance_x_point += instance_count * ep_count

    result["Rules by points"] = sorted(statistics_by_points.items())
    result["Instances by points"] = sorted(instance_statistics_by_points.items())
    result["Instances by points/%"] = sorted((k, v / total_instances)
        for k, v in instance_statistics_by_points.items())
    result["Average External Point"] = total_instance_x_point / total_instances
    return result


def get_statistics_from_name(name, include_terminal=True):

    with open("deepbank-preprocessed/cfg_hrg_mapping-{}.pickle".format(name), "rb") as f:
        cfg_hrg_mapping = pickle.load(f)

    with open("deepbank-preprocessed/count-{}.pickle".format(name), "rb") as f:
        counter = pickle.load(f)

    pprint(get_statistics(cfg_hrg_mapping, counter, include_terminal))

