import sys
import os

import json
import argparse
from collections import Counter

def getParser():
    parser = argparse.ArgumentParser(description="parser for arguments", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--typefile", type=str, help="inptut file containing tail types[json]", required=True)
    parser.add_argument("--topfile", type=str, help="output file to store top k types", required=True)
    parser.add_argument("--k", type=int, help="Number of top types to consider", default=100)
    return parser

def getTopk(params):
    with open(params.typefile, 'r') as fin:
        ent2type = json.load(fin)
    typeCounter = Counter()
    rootTypeCounter = Counter()
    ignore_types=  ['common/topic']
    for _, types in ent2type.items():
        types = set(types).difference(ignore_types)
        typeCounter.update(types)
        rootTypeCounter.update([typ.split('/')[0] for typ in types])
    topTypes = [typ[0] for typ in typeCounter.most_common(params.k)]
    topRootTypes = [typ[0] for typ in rootTypeCounter.most_common(params.k)]
    print("Top Types")
    print(topTypes)
    print("Top Root Types")
    print(topRootTypes)
    outdict = {}
    outdict['topTypes'] = topTypes
    outdict['topRootTypes'] = topRootTypes
    outdict['typeCounts'] = typeCounter.most_common()
    outdict['rootTypeCounts'] = rootTypeCounter.most_common()
    with open(params.topfile, 'w') as fout:
        json.dump(outdict, fout)

def main():
    parser = getParser()
    try:
        params = parser.parse_args()
    except:
        # parser.print_help()
        sys.exit(1)
    getTopk(params)
    

if __name__ == "__main__":
    main()
