import json
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--input_fn', default='./%s/%s_in_lines_backtranslated_%d.txt', type=str)
parser.add_argument('--subset', default='dev', type=str)
parser.add_argument('--num_slices', default=12, type=int)
parser.add_argument('--output_fn', default='./levyholt_backtranslation_input_%s.json', type=str)

args = parser.parse_args()
out_fp = open(args.output_fn % args.subset, 'w', encoding='utf8')

global_article_id = 1
global_line_id = 0

for sid in range(args.num_slices):
	with open(args.input_fn % (args.subset, args.subset, sid), 'r', encoding='utf8') as in_fp:
		for lidx, line in enumerate(in_fp):
			if lidx % 1000 == 0:
				print(lidx)
			in_sent = line.strip('\n')
			out_entry = {'s': in_sent, 'date': '', 'articleId': 1, 'lineId': global_line_id}
			out_line = json.dumps(out_entry)
			out_fp.write(out_line+'\n')
			global_line_id += 1
print('total number of lines: ', global_line_id)

out_fp.close()
