import argparse
import json


def merge_en():
	en_fn = '/Users/teddy/eclipse-workspace/entGraph_mod/downloaded/news_gen8_p.json'
	zh_fn = './webhose_news_gen.json'
	merged_fn = './NW_news_gen.json'

	en_total_num_rels = 0
	zh_total_num_rels = 0

	global_article_id = 1
	global_line_id = -1

	output_fp = open(merged_fn, 'w', encoding='utf8')

	print("Handling NewsSpike!")
	with open(en_fn, 'r', encoding='utf8') as fp:
		for lidx, line in enumerate(fp):
			if lidx % 100000 == 0:
				print(lidx)
			item = json.loads(line)
			assert int(item['articleId']) >= global_article_id
			assert int(item['lineId']) > global_line_id
			global_article_id = int(item['articleId'])
			global_line_id = int(item['lineId'])
			en_total_num_rels += len(item['rels'])
			output_fp.write(line.strip()+'\n')

	newsspike_last_article_id = global_article_id
	newsspike_last_line_id = global_line_id

	print(f"Last articleId of NewsSpike: {newsspike_last_article_id};")
	print(f"Last lineId of NewsSpike: {newsspike_last_line_id};")

	print("Handling Webhose!")
	with open(zh_fn, 'r', encoding='utf8') as fp:
		for lidx, line in enumerate(fp):
			if lidx % 100000 == 0:
				print(lidx)
			item = json.loads(line)
			new_article_id = newsspike_last_article_id + int(item['articleId'])
			new_line_id = newsspike_last_line_id + int(item['lineId']) + 1
			item['articleId'] = new_article_id
			item['lineId'] = new_line_id

			assert item['articleId'] >= global_article_id
			assert item['lineId'] > global_line_id
			global_article_id = item['articleId']
			global_line_id = item['lineId']
			zh_total_num_rels += len(item['rels'])

			item['articleId'] = str(item['articleId'])
			item['lineId'] = str(item['lineId'])

			out_line = json.dumps(item, ensure_ascii='False')
			output_fp.write(out_line+'\n')

	print(f"Last articleId overall: {global_article_id};")
	print(f"Last lineId overall: {global_line_id}")

	print(f"Total number of English relations: {en_total_num_rels} from {newsspike_last_article_id} articles and {newsspike_last_line_id} lines;")
	print(f"Total number of Chinese relations: {zh_total_num_rels} from {global_article_id-newsspike_last_article_id} articles and {global_line_id-newsspike_last_line_id} lines!")
	output_fp.close()



if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('--lang', type=str, default='en')
	args = parser.parse_args()

	if args.lang == 'en':
		merge_en()
	elif args.lang == 'zh':
		raise NotImplementedError
	else:
		raise AssertionError