import json
import random

out_fp = open('./clue_typed_triples_tacl_filtered_.json', 'w', encoding='utf8')
exception_count = 0
total_rels_count = 0.00000000000001

with open('./clue_typed_triples_tacl.json', 'r', encoding='utf8') as fp:
	for lidx, line in enumerate(fp):
		if lidx % 100000 == 0:
			print(f"{lidx}; exception count: {exception_count}; total number of rels after filter: {total_rels_count}; percentage of exceptions: %.3f percent" % (100*float(exception_count)/float(total_rels_count)))
		line = line.strip('\n')
		item = json.loads(line)
		new_rels = []
		for rel in item["rels"]:
			pred = rel["r"][1:-1].split('::')[0]
			if "是·X·的·的" not in pred:
				new_rels.append(rel)
			else:
				if random.random() < 0.0001:
					print(pred)
				exception_count += 1
		item["rels"] = new_rels
		total_rels_count += len(new_rels)
		new_line = json.dumps(item, ensure_ascii=False)
		out_fp.write(new_line+'\n')

out_fp.close()
print(f"Number of 是·X·的·的: {exception_count}.")