import json 
import os

def cola():
    with open("../glue_data/CoLA/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../glue_data/CoLA/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0: 'ungrammatical', 1: 'grammatical'}
    train_data = []
    for item in train:
        data_one = {"input": "cola text: {}".format(item['text1']), 
                    "target": label_map[item['label']],
                    "task": "cola"}
        train_data.append(data_one)
    json.dump(train_data, open("glue_data/CoLA/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "cola text:{}".format(item['text1']), 
                    "target": label_map[item['label']],
                    "task": "cola"}
        dev_data.append(data_one)
    json.dump(dev_data, open("glue_data/CoLA/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def mnli():
    with open("../glue_data/MNLI/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../glue_data/MNLI/dev_matched.json", encoding='utf-8') as f:
        dev_m = json.load(f)
    with open("../glue_data/MNLI/dev_mismatched.json", encoding='utf-8') as f:
        dev_mm = json.load(f)
    label_map = {0:'neutral', 1:'contradiction', 2:'entailment'}
    train_data = []
    for item in train:
        data_one = {"input": "mnli sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "mnli"}
        train_data.append(data_one)
    json.dump(train_data, open("glue_data/MNLI/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    devm_data = []
    for item in dev_m:
        data_one = {"input": "mnli sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "mnli"}
        devm_data.append(data_one)
    json.dump(devm_data, open("glue_data/MNLI/dev_matched.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    devmm_data = []
    for item in dev_mm:
        data_one = {"input": "mnli sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "mnli"}
        devmm_data.append(data_one)
    json.dump(devmm_data, open("glue_data/MNLI/dev_matched.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def mrpc():
    with open("../glue_data/MRPC/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../glue_data/MRPC/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0: 'not equivalent', 1: 'equivalent'}
    train_data = []
    for item in train:
        data_one = {"input": "mrpc sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "mrpc"}
        train_data.append(data_one)
    json.dump(train_data, open("glue_data/MRPC/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "mrpc sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "mrpc"}
        dev_data.append(data_one)
    json.dump(dev_data, open("glue_data/MRPC/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def qnli():
    with open("../glue_data/QNLI/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../glue_data/QNLI/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0: 'not entailment', 1: 'entailment'}
    train_data = []
    for item in train:
        data_one = {"input": "qnli sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "qnli"}
        train_data.append(data_one)
    json.dump(train_data, open("glue_data/QNLI/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "qnli sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "qnli"}
        dev_data.append(data_one)
    json.dump(dev_data, open("glue_data/QNLI/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def qqp():
    with open("../glue_data/QQP/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../glue_data/QQP/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0: 'not duplicates', 1: 'duplicates'}
    train_data = []
    for item in train:
        data_one = {"input": "qqp quertion1: {} quertion2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "qqp"}
        train_data.append(data_one)
    json.dump(train_data, open("glue_data/QQP/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "qqp quertion1: {} quertion2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "qqp"}
        dev_data.append(data_one)
    json.dump(dev_data, open("glue_data/QQP/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def rte():
    with open("../glue_data/RTE/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../glue_data/RTE/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0: 'not entailment', 1: 'entailment'}
    train_data = []
    for item in train:
        data_one = {"input": "rte sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "rte"}
        train_data.append(data_one)
    json.dump(train_data, open("glue_data/RTE/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "rte sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "rte"}
        dev_data.append(data_one)
    json.dump(dev_data, open("glue_data/RTE/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def sst2():
    with open("../glue_data/SST-2/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../glue_data/SST-2/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0: 'negative', 1: 'positive'}
    train_data = []
    for item in train:
        data_one = {"input": "sst2 sentence: {}".format(item['text1']), 
                    "target": label_map[item['label']],
                    "task": "sst2"}
        train_data.append(data_one)
    json.dump(train_data, open("glue_data/SST-2/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "sst2 sentence: {}".format(item['text1']), 
                    "target": label_map[item['label']],
                    "task": "sst2"}
        dev_data.append(data_one)
    json.dump(dev_data, open("glue_data/SST-2/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def stsb():
    with open("../glue_data/STS-B/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../glue_data/STS-B/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    train_data = []
    for item in train:
        data_one = {"input": "stsb sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": str(item['label']),
                    "task": "stsb"}
        train_data.append(data_one)
    json.dump(train_data, open("glue_data/STS-B/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "stsb sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": str(item['label']),
                    "task": "stsb"}
        dev_data.append(data_one)
    json.dump(dev_data, open("glue_data/STS-B/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def wnli():
    with open("../glue_data/WNLI/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../glue_data/WNLI/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0: 'not entailment', 1: 'entailment'}
    train_data = []
    for item in train:
        data_one = {"input": "wnli sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "wnli"}
        train_data.append(data_one)
    json.dump(train_data, open("glue_data/WNLI/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "wnli sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "wnli"}
        dev_data.append(data_one)
    json.dump(dev_data, open("glue_data/WNLI/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def paws():
    with open("../PAWS/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../PAWS/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0:'not entailment', 1:'entailment'}
    train_data = []
    for item in train:
        data_one = {"input": "paws sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "paws"}
        train_data.append(data_one)
    json.dump(train_data, open("PAWS/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "paws sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "paws"}
        dev_data.append(data_one)
    json.dump(dev_data, open("PAWS/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def imdb():
    with open("../imdb/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../imdb/val.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0: 'negative', 1: 'positive'}
    train_data = []
    for item in train:
        data_one = {"input": "imdb sentence: {}".format(item['text1']), 
                    "target": label_map[item['label']],
                    "task": "imdb"}
        train_data.append(data_one)
    json.dump(train_data, open("imdb/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "imdb sentence:{}".format(item['text1']), 
                    "target": label_map[item['label']],
                    "task": "imdb"}
        dev_data.append(data_one)
    json.dump(dev_data, open("imdb/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def snli():
    with open("../snli_1.0/snli_1.0_train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../snli_1.0/snli_1.0_dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0:'neutral', 1:'contradiction', 2:'entailment'}
    train_data = []
    for item in train:
        data_one = {"input": "snli sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "snli"}
        train_data.append(data_one)
    json.dump(train_data, open("snli/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "snli sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "snli"}
        dev_data.append(data_one)
    json.dump(dev_data, open("snli/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def boolq():
    with open("../SuperGlue/BoolQ/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../SuperGlue/BoolQ/val.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0: 'no', 1: 'yes'}
    train_data = []
    for item in train:
        data_one = {"input": "boolq question: {} passage: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "boolq"}
        train_data.append(data_one)
    json.dump(train_data, open("SuperGlue/BoolQ/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "boolq question: {} passage: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "boolq"}
        dev_data.append(data_one)
    json.dump(dev_data, open("SuperGlue/BoolQ/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def cb():
    with open("../SuperGlue/CB/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../SuperGlue/CB/val.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0:'neutral', 1:'contradiction', 2:'entailment'}
    train_data = []
    for item in train:
        data_one = {"input": "cb premise: {} hypothesis: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "cb"}
        train_data.append(data_one)
    json.dump(train_data, open("SuperGlue/CB/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "cb premise: {} hypothesis: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "cb"}
        dev_data.append(data_one)
    json.dump(dev_data, open("SuperGlue/CB/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def copa():
    with open("../SuperGlue/COPA/train.jsonl", encoding='utf-8') as f:
        train = f.readlines()
    with open("../SuperGlue/COPA/val.jsonl", encoding='utf-8') as f:
        dev = f.readlines()
    label_map = {0: 'choice1', 1: 'choice2'}
    train_data = []
    for line in train:
        item = json.loads(line.replace("\n", ""))
        data_one = {"input": "copa question: {} premise: {} choice1: {} choice2: {}".format(item['question'], item['premise'], item['choice1'], item['choice2']), 
                    "target": label_map[item['label']],
                    "task": "copa"}
        train_data.append(data_one)
    json.dump(train_data, open("SuperGlue/COPA/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for line in dev:
        item = json.loads(line.replace("\n", ""))
        data_one = {"input": "copa question: {} premise: {} choice1: {} choice2: {}".format(item['question'], item['premise'], item['choice1'], item['choice2']), 
                    "target": label_map[item['label']],
                    "task": "copa"}
        dev_data.append(data_one)
    json.dump(dev_data, open("SuperGlue/COPA/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def mrc():
    with open("../SuperGlue/MultiRC/train.jsonl", encoding='utf-8') as f:
        train = f.readlines()
    with open("../SuperGlue/MultiRC/val.jsonl", encoding='utf-8') as f:
        dev = f.readlines()
    label_map = {0: 'false', 1: 'true'}
    train_data = []
    for line in train:
        line = json.loads(line)['passage']
        content = line['text']
        for q in line['questions']:
            question = q['question']
            for a in q['answers']:
                train_data.append({"input": "multirc paragraph: {} question: {} answer: {}".format(content, question, a['text']),
                    'target': label_map[a['label']], 
                    'task': 'mrc'})
    json.dump(train_data, open("SuperGlue/MultiRC/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for line in dev:
        line = json.loads(line)['passage']
        content = line['text']
        for q in line['questions']:
            question = q['question']
            for a in q['answers']:
                dev_data.append({"input": "multirc paragraph: {} question: {} answer: {}".format(content, question, a['text']),
                    'target': label_map[a['label']], 
                    'task': 'mrc'})
    json.dump(dev_data, open("SuperGlue/MultiRC/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def srte():
    with open("../SuperGlue/RTE/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("../SuperGlue/RTE/val.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0: 'not entailment', 1: 'entailment'}
    train_data = []
    for item in train:
        data_one = {"input": "rte sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "srte"}
        train_data.append(data_one)
    json.dump(train_data, open("SuperGlue/RTE/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "rte sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "srte"}
        dev_data.append(data_one)
    json.dump(dev_data, open("SuperGlue/RTE/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def wic():
    with open("../SuperGlue/WiC/train.jsonl", encoding='utf-8') as f:
        train = f.readlines()
    with open("../SuperGlue/WiC/val.jsonl", encoding='utf-8') as f:
        dev = f.readlines()
    label_map = {0: 'false', 1: 'true'}
    train_data = []
    for line in train:
        item = json.loads(line.replace("\n", ""))
        data_one = {"input": "wic word: {} sentence1: {} sentence2: {}".format(item['word'], item['sentence1'], item['sentence2']), 
                    "target": label_map[int(item['label'])],
                    "task": "wic"}
        train_data.append(data_one)
    json.dump(train_data, open("SuperGlue/WiC/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for line in dev:
        item = json.loads(line.replace("\n", ""))
        data_one = {"input": "wic word: {} sentence1: {} sentence2: {}".format(item['word'], item['sentence1'], item['sentence2']), 
                    "target": label_map[int(item['label'])],
                    "task": "wic"}
        dev_data.append(data_one)
    json.dump(dev_data, open("SuperGlue/WiC/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def wsc():
    with open("../SuperGlue/WSC/train.jsonl", encoding='utf-8') as f:
        train = f.readlines()
    with open("../SuperGlue/WSC/val.jsonl", encoding='utf-8') as f:
        dev = f.readlines()
    label_map = {0: 'false', 1: 'true'}
    train_data = []
    for line in train:
        item = json.loads(line.replace("\n", ""))
        data_one = {"input": "wsc paragraph: {} word1: {} word2: {}".format(item['text'], item['target']['span1_text'], item['target']['span2_text']), 
                    "target": label_map[int(item['label'])],
                    "task": "wsc"}
        train_data.append(data_one)
    json.dump(train_data, open("SuperGlue/WSC/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for line in dev:
        item = json.loads(line.replace("\n", ""))
        data_one = {"input": "wsc paragraph: {} word1: {} word2: {}".format(item['text'], item['target']['span1_text'], item['target']['span2_text']), 
                    "target": label_map[int(item['label'])],
                    "task": "wsc"}
        dev_data.append(data_one)
    json.dump(dev_data, open("SuperGlue/WSC/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def anli_r1():
    with open("raw_data/anli/r1/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("raw_data/anli/r1/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0:'neutral', 1:'contradiction', 2:'entailment'}
    train_data = []
    os.makedirs("anli/r1", exist_ok=True)
    for item in train:
        data_one = {"input": "anli R1 context: {} hypothesis: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "anlir1"}
        train_data.append(data_one)
    json.dump(train_data, open("anli/r1/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "anli R1 context: {} hypothesis: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "anlir1"}
        dev_data.append(data_one)
    json.dump(dev_data, open("anli/r1/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def anli_r2():
    with open("raw_data/anli/r2/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("raw_data/anli/r2/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0:'neutral', 1:'contradiction', 2:'entailment'}
    train_data = []
    os.makedirs("anli/r2", exist_ok=True)
    for item in train:
        data_one = {"input": "anli R2 context: {} hypothesis: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "anlir2"}
        train_data.append(data_one)
    json.dump(train_data, open("anli/r2/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "anli R2 context: {} hypothesis: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "anlir2"}
        dev_data.append(data_one)
    json.dump(dev_data, open("anli/r2/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def anli_r3():
    with open("raw_data/anli/r3/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("raw_data/anli/r3/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0:'neutral', 1:'contradiction', 2:'entailment'}
    train_data = []
    os.makedirs("anli/r3", exist_ok=True)
    for item in train:
        data_one = {"input": "anli R3 context: {} hypothesis: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "anlir3"}
        train_data.append(data_one)
    json.dump(train_data, open("anli/r3/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "anli R3 context: {} hypothesis: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "anlir3"}
        dev_data.append(data_one)
    json.dump(dev_data, open("anli/r3/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def scitail():
    with open("raw_data/scitail/train.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("raw_data/scitail/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0:'neutral', 1:'entails'}
    train_data = []
    os.makedirs("scitail", exist_ok=True)
    for item in train:
        data_one = {"input": "SciTail sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "scitail"}
        train_data.append(data_one)
    json.dump(train_data, open("scitail/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "SciTail sentence1: {} sentence2: {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "scitail"}
        dev_data.append(data_one)
    json.dump(dev_data, open("scitail/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

def winogrande():
    with open("raw_data/winogrande/train_l.json", encoding='utf-8') as f:
        train = json.load(f)
    with open("raw_data/winogrande/dev.json", encoding='utf-8') as f:
        dev = json.load(f)
    label_map = {0:'option1', 1:'option2'}
    train_data = []
    os.makedirs("winogrande", exist_ok=True)
    for item in train:
        data_one = {"input": "winogrande sentence: {} {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "winogrande"}
        train_data.append(data_one)
    json.dump(train_data, open("winogrande/train.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)
    dev_data = []
    for item in dev:
        data_one = {"input": "winogrande sentence: {} {}".format(item['text1'], item['text2']), 
                    "target": label_map[item['label']],
                    "task": "winogrande"}
        dev_data.append(data_one)
    json.dump(dev_data, open("winogrande/dev.json", "w", encoding='utf-8'), ensure_ascii=False, indent=4)

scitail()
winogrande()