import os
import myutils
import json

if not os.path.isdir('configs/'):
    os.mkdir('configs/')

## single-dataset models
for udVersion in myutils.udVersions:
    dataConfigsSplit = []
    dataConfigsNoSplit = []
    udPath = 'data/ud-treebanks-v' + udVersion + '.singleToken/'
    for UDdir in sorted(os.listdir(udPath)):
        if not UDdir.startswith("UD") or not os.path.isdir(udPath + UDdir):
            continue
        train, dev, test = myutils.getTrainDevTest(udPath + UDdir)
        for splits in [True, False]:
            for attention in [True, False]:
                if attention == True and splits == False:
                    continue
                if train != '':
                    if not myutils.hasColumn(train, 1, threshold=.1):
                        #print('noWords ', train)
                        continue
                    config = {}
                    config['train_data_path'] = '../' + train
                    if dev != '':
                        config['dev_data_path'] = '../' + dev
                    #config['max_words'] = 500000
                    config['word_idx'] = 1
                    config['tasks'] = {}
                    if myutils.hasColumn(train, 3, threshold=.1):
                        config['tasks']['upos'] = {'task_type':'seq', 'column_idx':3}
                    if myutils.hasColumn(train, 2, threshold=.95):
                        config['tasks']['lemma'] = {'task_type':'string2string', 'column_idx':2}
                    if myutils.hasColumn(train, 5, threshold=.95):
                        config['tasks']['feats'] = {'task_type':'seq', 'column_idx':5}
                    config['tasks']['tokenization'] = {'task_type':'tok', 'column_idx':-1}
                    if splits == False:
                        config['tasks']['tokenization']['pre_split'] = False
                    config['tasks']['dependency'] = {'task_type':'dependency', 'column_idx':6}
        
                    if attention:
                        for task in config['tasks']:
                            config['tasks'][task]['layers_to_use'] = [0,1,2,3,4,5,6,7,8,9,10,11,12]

                    dataConfig = {UDdir: config}
                    jsonPath = 'configs/tok.' + UDdir + '.' + str(splits) + '.' + str(attention) + '.' + udVersion + '.json'
                    if attention == False:
                        if splits:
                            dataConfigsSplit.append('../' + jsonPath)
                        else:
                            dataConfigsNoSplit.append('../' + jsonPath)
                    json.dump(dataConfig, open(jsonPath, 'w'), indent=4)
                    for mlm in myutils.mlms: 
                        modelName = 'tok.' + mlm.replace('/', '_') + '.' + UDdir + '.' + str(splits) + '.'  + str(attention) + '.' + udVersion
                        hyperParams = myutils.makeParams('machamp/configs/params.json', mlm)
                        if myutils.getModel(modelName) == '':
                            print('python3 train.py --dataset_config ../' + jsonPath + ' --parameters_config ../' + hyperParams + ' --name ' + modelName)

        # Single task models; splits=true attention=False
        if train != '':
            if not myutils.hasColumn(train, 1, threshold=.1):
                #print('noWords ', train)
                continue
            config = {}
            config['train_data_path'] = '../' + train
            if dev != '':
                config['dev_data_path'] = '../' + dev
            config['word_idx'] = 1
            config['tasks'] = {}
            config['tasks']['tokenization'] = {'task_type':'tok', 'column_idx':-1}

            dataConfig = {UDdir: config}
            jsonPath = 'configs/tok.' + UDdir + '.single.' + udVersion + '.json'
            json.dump(dataConfig, open(jsonPath, 'w'), indent=4)
            for mlm in myutils.mlms:
                modelName = 'tok.' + mlm.replace('/', '_') + '.' + UDdir + '.single.'  + udVersion
                hyperParams = myutils.makeParams('machamp/configs/params.json', mlm)
                if myutils.getModel(modelName) == '':
                    print('python3 train.py --dataset_config ../' + jsonPath + ' --parameters_config ../' + hyperParams + ' --name ' + modelName)
    

    # Multi-dataset models
    for mlm in myutils.mlms:
        for splits in [True, False]:
            modelName = 'multi.' + mlm.replace('/', '_') + '.' + str(splits) + '.' + udVersion
            if myutils.getModel(modelName) == '':
                if splits:
                    cmd = 'python3 train.py --dataset_configs ' + ' '.join(dataConfigsSplit) + ' --name ' + modelName
                else:
                    cmd = 'python3 train.py --dataset_configs ' + ' '.join(dataConfigsNoSplit) + ' --name ' + modelName
                print(cmd)

