import os
import myutils
import json

if not os.path.isdir('configs/'):
    os.mkdir('configs/')

## single-dataset models
for udVersion in myutils.udVersions:
    udPath = 'data/ud-treebanks-v' + udVersion + '.singleToken/'
    for UDdir in sorted(os.listdir(udPath)):
        if not UDdir.startswith("UD") or not os.path.isdir(udPath + UDdir):
            continue
        train, dev, test = myutils.getTrainDevTest(udPath + UDdir)
    
        if train != '':
            if not myutils.hasColumn(train, 1, threshold=.1):
                #print('noWords ', train)
                continue
            config = {}
            config['train_data_path'] = '../' + train
            if dev != '':
                config['dev_data_path'] = '../' + dev
            config['word_idx'] = 1
            config['tasks'] = {}
            if myutils.hasColumn(train, 3, threshold=.1):
                config['tasks']['upos'] = {'task_type':'seq', 'column_idx':3}
            if myutils.hasColumn(train, 2, threshold=.95):
                config['tasks']['lemma'] = {'task_type':'string2string', 'column_idx':2}
            if myutils.hasColumn(train, 5, threshold=.95):
                config['tasks']['feats'] = {'task_type':'seq', 'column_idx':5}
            config['tasks']['dependency'] = {'task_type':'dependency', 'column_idx':6}
    
            dataConfig = {UDdir: config}
            jsonPath = 'configs/notok.' + UDdir + '-' + udVersion + '.json'
            json.dump(dataConfig, open(jsonPath, 'w'), indent=4)
            for mlm in myutils.mlms:
                modelName = 'notok.' + mlm.replace('/', '_') + '.' + UDdir + '.' + udVersion
                hyperParams = myutils.makeParams('machamp/configs/params.json', mlm)
                if myutils.getModel(modelName) == '':
                    print('python3 train.py --dataset_config ../' + jsonPath + ' --parameters_config ../' + hyperParams + ' --name ' + modelName)


