import json
import argparse

preds_bucket = {}
total_num_rels = 0

parser = argparse.ArgumentParser()
parser.add_argument('--input', default='./clue_typed_triples_tacl.json', type=str)
parser.add_argument('--ceiling', default=80000, type=int)
args = parser.parse_args()

with open(args.input, 'r', encoding='utf8') as fp:
	for lidx, line in enumerate(fp):
		if lidx % 100000 == 0:
			print(f"{lidx}; {len(preds_bucket)}")
		line = json.loads(line)
		for rel in line["rels"]:
			rel = rel["r"]
			rel = rel[1:-1].split('::')
			if len(rel) != 8:
				print(rel)
				raise AssertionError
			type_names = f"{rel[6]}-{rel[7]}"
			if type_names not in preds_bucket:
				preds_bucket[type_names] = {}

			if rel[0] not in preds_bucket[type_names]:
				preds_bucket[type_names][rel[0]] = 0
			preds_bucket[type_names][rel[0]] += 1
			total_num_rels += 1

print(f"Total number of relations: {total_num_rels}!")
preds_bucket = {k: v for k, v in sorted(preds_bucket.items(), key=lambda item: len(item[1]))}

total_num_preds_passed_2 = 0
total_num_preds_passed_3 = 0
total_num_preds_passed_4 = 0
total_num_preds_passed_5 = 0
total_num_preds_passed_6 = 0
total_num_preds_passed_10 = 0
total_num_preds_passed_11 = 0
total_num_preds_passed_12 = 0
total_num_preds_passed_13 = 0
total_num_preds_passed_14 = 0
total_num_preds_passed_15 = 0
total_num_preds_passed_20 = 0

num_tp_with_preds_passed_2 = 0
num_tp_with_preds_passed_3 = 0
num_tp_with_preds_passed_4 = 0
num_tp_with_preds_passed_5 = 0
num_tp_with_preds_passed_6 = 0
num_tp_with_preds_passed_10 = 0
num_tp_with_preds_passed_11 = 0
num_tp_with_preds_passed_12 = 0
num_tp_with_preds_passed_13 = 0
num_tp_with_preds_passed_14 = 0
num_tp_with_preds_passed_15 = 0
num_tp_with_preds_passed_20 = 0

for type_names in preds_bucket:
	print(f"Number of all predicates between {type_names}: {len(preds_bucket[type_names])};")

	if len(preds_bucket[type_names]) < 50000:
		continue

	num_preds_passed_2 = 0
	num_preds_passed_3 = 0
	num_preds_passed_4 = 0
	num_preds_passed_5 = 0
	num_preds_passed_6 = 0
	num_preds_passed_10 = 0
	num_preds_passed_11 = 0
	num_preds_passed_12 = 0
	num_preds_passed_13 = 0
	num_preds_passed_14 = 0
	num_preds_passed_15 = 0
	num_preds_passed_20 = 0
	for pid, p in enumerate(preds_bucket[type_names]):
		if preds_bucket[type_names][p] >= 2:
			num_preds_passed_2 += 1
		if preds_bucket[type_names][p] >= 3:
			num_preds_passed_3 += 1
		if preds_bucket[type_names][p] >= 4:
			num_preds_passed_4 += 1
		if preds_bucket[type_names][p] >= 5:
			num_preds_passed_5 += 1
		if preds_bucket[type_names][p] >= 6:
			num_preds_passed_6 += 1
		if preds_bucket[type_names][p] >= 10:
			num_preds_passed_10 += 1
		if preds_bucket[type_names][p] >= 11:
			num_preds_passed_11 += 1
		if preds_bucket[type_names][p] >= 12:
			num_preds_passed_12 += 1
		if preds_bucket[type_names][p] >= 13:
			num_preds_passed_13 += 1
		if preds_bucket[type_names][p] >= 14:
			num_preds_passed_14 += 1
		if preds_bucket[type_names][p] >= 15:
			num_preds_passed_15 += 1
		if preds_bucket[type_names][p] >= 20:
			num_preds_passed_20 += 1

	ceiled_size = len(preds_bucket[type_names])
	if ceiled_size <= args.ceiling:
		pass
	elif num_preds_passed_2 <= args.ceiling:
		pass
	elif num_preds_passed_3 <= args.ceiling:
		ceiled_size = num_preds_passed_2
	elif num_preds_passed_4 <= args.ceiling:
		ceiled_size = num_preds_passed_3
	elif num_preds_passed_5 <= args.ceiling:
		ceiled_size = num_preds_passed_4
	elif num_preds_passed_6 <= args.ceiling:
		ceiled_size = num_preds_passed_5
	else:
		ceiled_size = num_preds_passed_6
		for thres in range(7,100):
			temp_ceiled_size = 0
			for p in preds_bucket[type_names]:
				if preds_bucket[type_names][p] >= thres:
					temp_ceiled_size += 1
			if temp_ceiled_size <= args.ceiling:
				break
			ceiled_size = temp_ceiled_size

	total_num_preds_passed_2 += min(num_preds_passed_2, ceiled_size)
	total_num_preds_passed_3 += min(num_preds_passed_3, ceiled_size)
	total_num_preds_passed_4 += min(num_preds_passed_4, ceiled_size)
	total_num_preds_passed_5 += min(num_preds_passed_5, ceiled_size)
	total_num_preds_passed_6 += min(num_preds_passed_6, ceiled_size)
	total_num_preds_passed_10 += min(num_preds_passed_10, ceiled_size)
	total_num_preds_passed_11 += min(num_preds_passed_11, ceiled_size)
	total_num_preds_passed_12 += min(num_preds_passed_12, ceiled_size)
	total_num_preds_passed_13 += min(num_preds_passed_13, ceiled_size)
	total_num_preds_passed_14 += min(num_preds_passed_14, ceiled_size)
	total_num_preds_passed_15 += min(num_preds_passed_15, ceiled_size)
	total_num_preds_passed_20 += min(num_preds_passed_20, ceiled_size)

	if num_preds_passed_2 > 0:
		num_tp_with_preds_passed_2 += 1
	if num_preds_passed_3 > 0:
		num_tp_with_preds_passed_3 += 1
	if num_preds_passed_4 > 0:
		num_tp_with_preds_passed_4 += 1
	if num_preds_passed_5 > 0:
		num_tp_with_preds_passed_5 += 1
	if num_preds_passed_6 > 0:
		num_tp_with_preds_passed_6 += 1
	if num_preds_passed_10 > 0:
		num_tp_with_preds_passed_10 += 1
	if num_preds_passed_11 > 0:
		num_tp_with_preds_passed_11 += 1
	if num_preds_passed_12 > 0:
		num_tp_with_preds_passed_12 += 1
	if num_preds_passed_13 > 0:
		num_tp_with_preds_passed_13 += 1
	if num_preds_passed_14 > 0:
		num_tp_with_preds_passed_14 += 1
	if num_preds_passed_15 > 0:
		num_tp_with_preds_passed_15 += 1
	if num_preds_passed_20 > 0:
		num_tp_with_preds_passed_20 += 1

	print(f"Number of all predicates between {type_names} with more than 2 instances: {num_preds_passed_2}")
	print(f"Number of all predicates between {type_names} with more than 3 instances: {num_preds_passed_3}")
	print(f"Number of all predicates between {type_names} with more than 4 instances: {num_preds_passed_4}")
	print(f"Number of all predicates between {type_names} with more than 5 instances: {num_preds_passed_5}")
	print(f"Number of all predicates between {type_names} with more than 6 instances: {num_preds_passed_6}")
	print(f"Number of all predicates between {type_names} with more than 10 instances: {num_preds_passed_10}")
	print(f"Number of all predicates between {type_names} with more than 11 instances: {num_preds_passed_11}")
	print(f"Number of all predicates between {type_names} with more than 12 instances: {num_preds_passed_12}")
	print(f"Number of all predicates between {type_names} with more than 13 instances: {num_preds_passed_13}")
	print(f"Number of all predicates between {type_names} with more than 14 instances: {num_preds_passed_14}")
	print(f"Number of all predicates between {type_names} with more than 15 instances: {num_preds_passed_15}")
	print(f"Number of all predicates between {type_names} with more than 20 instances: {num_preds_passed_20}")
	print("")
	print("")

print(f"Total number of predicates with more than 2 predicates: {total_num_preds_passed_2}")
print(f"Total number of predicates with more than 3 predicates: {total_num_preds_passed_3}")
print(f"Total number of predicates with more than 4 predicates: {total_num_preds_passed_4}")
print(f"Total number of predicates with more than 5 predicates: {total_num_preds_passed_5}")
print(f"Total number of predicates with more than 6 predicates: {total_num_preds_passed_6}")
print(f"Total number of predicates with more than 10 predicates: {total_num_preds_passed_10}")
print(f"Total number of predicates with more than 11 predicates: {total_num_preds_passed_11}")
print(f"Total number of predicates with more than 12 predicates: {total_num_preds_passed_12}")
print(f"Total number of predicates with more than 13 predicates: {total_num_preds_passed_13}")
print(f"Total number of predicates with more than 14 predicates: {total_num_preds_passed_14}")
print(f"Total number of predicates with more than 15 predicates: {total_num_preds_passed_15}")
print(f"Total number of predicates with more than 20 predicates: {total_num_preds_passed_20}")

print(f"Total number of type pairs with preds passed 2: {num_tp_with_preds_passed_2}")
print(f"Total number of type pairs with preds passed 3: {num_tp_with_preds_passed_3}")
print(f"Total number of type pairs with preds passed 4: {num_tp_with_preds_passed_4}")
print(f"Total number of type pairs with preds passed 5: {num_tp_with_preds_passed_5}")
print(f"Total number of type pairs with preds passed 6: {num_tp_with_preds_passed_6}")
print(f"Total number of type pairs with preds passed 10: {num_tp_with_preds_passed_10}")
print(f"Total number of type pairs with preds passed 11: {num_tp_with_preds_passed_11}")
print(f"Total number of type pairs with preds passed 12: {num_tp_with_preds_passed_12}")
print(f"Total number of type pairs with preds passed 13: {num_tp_with_preds_passed_13}")
print(f"Total number of type pairs with preds passed 14: {num_tp_with_preds_passed_14}")
print(f"Total number of type pairs with preds passed 15: {num_tp_with_preds_passed_15}")
print(f"Total number of type pairs with preds passed 20: {num_tp_with_preds_passed_20}")

print(f"Total number of type pairs: {len(preds_bucket)}")
