import json

no_type_fn = '/Users/teddy/eclipse-workspace/entGraph_mod/downloaded/news_gen8_p.json'
with_type_fn = './news_gen8_p_typed.json'  # this typed file results from a piece of java code in entGraph (in Ubuntu VM)

# no_type_fn = './webhose_news_gen.json'
# with_type_fn = './webhose_news_gen_typed.json'

no_type_len = 0
with_type_len = 0

with open(no_type_fn, 'r', encoding='utf8') as fp:
	for line in fp:
		no_type_len += 1

print(f"number of lines no type: {no_type_len}!")

with open(with_type_fn, 'r', encoding='utf8') as fp:
	for line in fp:
		with_type_len += 1

print(f"number of lines with type: {with_type_len}!")

assert no_type_len == with_type_len


no_type_fp = open(no_type_fn, 'r', encoding='utf8')
with_type_fp = open(with_type_fn, 'r', encoding='utf8')

for lidx, (line_untyped, line_typed) in enumerate(zip(no_type_fp, with_type_fp)):
	if lidx % 500000 == 0:
		print(lidx)
	untyped_item = json.loads(line_untyped)
	typed_item = json.loads(line_typed)

	assert untyped_item['s'] == typed_item['s']
	assert untyped_item['date'] == typed_item['date']
	assert untyped_item['articleId'] == typed_item['articleId']
	assert untyped_item['lineId'] == typed_item['lineId']

print(f"Test passed!")

no_type_fp.close()
with_type_fp.close()
