import json, os, argparse
import sys
# from settings import parse_args
import random
import shutil
random.seed(0)

K=100
# task_names = ['ag','dbpedia','yelp','yahoo','amazon']
task_names = ['woz.en','srl','sst','wikisql','squad']
# task_names = ['woz.en','squad','srl','sst','wikisql']
ori_dir = 'lamol_data'
# out_dir = '../../DATA/LAMOL_DIVIDED/TC/'+'label_'+str(K)
out_dir = '../../DATA/LAMOL_DIVIDED/decaNLP/'+'label_'+str(K)

for task in task_names:
    all_unlabel_path = os.path.join(out_dir,task,'unlabel_train.json')
    with open(all_unlabel_path, 'r') as fr:
        data = json.load(fr)['data']

    # for R in [5, 10, 20, 100, 500]:
    for R in [5, 10, 20]:
        print(f'Number of the all unlabeled data is {len(data)}')
        idx_list = random.sample(range(len(data)), R*K)
        unlabel_data = [data[i] for i in range(len(data)) if i in idx_list]

        unlabel_path = os.path.join(out_dir, task,str(K*R)+'_unlabel_train.json')
        with open(unlabel_path,'w',encoding='utf-8') as fw:
            res = {'data':unlabel_data}
            print(json.dumps(res, ensure_ascii=False), file=fw)

    print('Finishing dealing with ',task,flush=True)

