import os
import conll18_ud_eval
import myutils
root_old = 'data/ud-treebanks-v2.2.singleToken/'
root_new = 'data/ud-treebanks-v2.10.singleToken/'

def count(path):
    lines = 0
    words = 0
    for line in open(path):
        if len(line) < 2:
            lines += 1
        elif line[0] != '#':
            words += 1
    return lines, words 
            

for udDir in os.listdir(root_old):
    train, dev, test = myutils.getTrainDevTest(root_old + udDir)
    if train == '':
        continue
    if udDir not in os.listdir(root_new):
        continue
    newTrain, newDev, newTest = myutils.getTrainDevTest(root_new + udDir)
    oldLines, oldWords = count(train)
    newLines, newWords = count(newTrain)

    goldSent = conll18_ud_eval.load_conllu(open(newTest))
    predSent = conll18_ud_eval.load_conllu(open(test))
    try: 
        scores = conll18_ud_eval.evaluate(goldSent, predSent)
        tokScore = scores['Tokens'].f1
        print(udDir, tokScore)
    except:
        print(udDir, 0.0)
        continue
    #print('update', dev, oldWords, newWords)
        

