import json

# Put train annotation file path to here
from tqdm import tqdm

train_anno_path = "/data/GQA/train_all_questions/"
train_anno_to_save = "/data/GQA/train_all.json"

train_anno_files = [f"train_all_questions_{i}.json" for i in range(10)]

data_to_save = {}
for f in tqdm(train_anno_files):
    qa_s = json.load(open(train_anno_path+f))
    for q_id in qa_s.keys():
        data_to_save.update({q_id:{
            "imageId":    qa_s[q_id]["imageId"],
            "question":   qa_s[q_id]["question"],
            "answer":     qa_s[q_id]["answer"],
            "fullAnswer": qa_s[q_id]["fullAnswer"],
            "isBalanced": qa_s[q_id]["isBalanced"]
        }})

with open(train_anno_to_save, 'w') as fp:
    json.dump(data_to_save, fp)