
import os
import json
import jsonlines
import tqdm

#dir_path_input = './in-language/'
#dir_path_output = './concat-in-language/'

dir_path_input = './in-english/'
dir_path_output = './concat-in-english/'

files = os.listdir(dir_path_input)

for fname in files:
    print('*****************************')
    print(fname)
    
    record = dict()
    idx = 0

    with open(dir_path_input + fname) as f:
        for item in jsonlines.Reader(f):
            if item['passage_retrieved_language'] == 'en':
                passage = item['passage_en']
            else:
                passage = item['passage_in_language']

            if item['query'] not in record.keys():
                record[item['query']] = {
                        'query': item['query'],
                        'query_language': item['query_language'],
                        'answers': item['answers'],
                        'prediction': item['prediction'],
                        'prediction_correct': item['prediction_correct'],
                        'passage_in_language': '<0> ' + passage, 
                        'passage_retrieved_language': 'multi',
                        'split_passages_en': [item['passage_en']],
                        'intrepetability_vote': [item['intrepetability_vote']],
                        'ais_vote': [item['ais_vote']],
                        'intrepetability': [item['intrepetability']],
                        'ais': [item['ais']],
                        'idxs': [idx]
                        }
            else:
                if len(record[item['query']]['passage_in_language']) > 0 and record[item['query']]['passage_in_language'][-1] != ' ':
                    record[item['query']]['passage_in_language'] += ' '
                num_passages = len(record[item['query']]['idxs'])
                record[item['query']]['passage_in_language'] += '<' + str(num_passages) + '> ' + passage
                record[item['query']]['split_passages_en'].append(item['passage_en'])
                record[item['query']]['intrepetability_vote'].append(item['intrepetability_vote'])
                record[item['query']]['ais_vote'].append(item['ais_vote'])
                record[item['query']]['intrepetability'].append(item['intrepetability'])
                record[item['query']]['ais'].append(item['ais']) 
                record[item['query']]['idxs'].append(idx)
            idx += 1

    with jsonlines.open(dir_path_output + fname, 'w') as f:
        for k, v in record.items():
            f.write(v)
