import gzip
import json
import argparse
from google.protobuf import text_format
from tapas.protos import interaction_pb2
from google.protobuf.json_format import MessageToJson


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', type=str, required=True)
    parser.add_argument('--output_file', type=str, required=True)
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = get_args()
    with open(args.output_file, mode="w", encoding="utf-8") as fout:
        with gzip.open(args.input_file, 'rt') as fin:
            example_count = 0
            for line in fin:
                json_format = MessageToJson(text_format.Parse(line, interaction_pb2.Interaction()))
                json_data = json.dumps(json.loads(json_format), indent=None)
                fout.write(json_data)
                fout.write('\n')
                example_count += 1
                if example_count % 10000 == 0:
                    print("Preprocess %d examples... " % example_count)
