import sys
import glob
import os

#import numpy as np

from data import Task1File

import unicodedata

def strip_accents(s):
   return ''.join(c for c in unicodedata.normalize('NFD', s)
                  if unicodedata.category(c) != 'Mn')

#def normalize(s): return s
normalize = strip_accents

def main():
    ref_dir = sys.argv[1]
    sys_dirs = sys.argv[2:]

    results = []

    for sys_idx,sys_dir in enumerate(sys_dirs):
        results.append({})
        for ref_file in glob.glob(os.path.join(ref_dir, '*-dev')):
            basename = os.path.basename(ref_file)
            sys_file = os.path.join(sys_dir, basename)
            if os.path.exists(sys_file):
                ref_data = Task1File(ref_file)
                sys_data = Task1File(sys_file)
                assert len(ref_data.data) == len(sys_data.data)
                n_total = len(ref_data.data)
                n_identical = sum(
                        normalize(ref_trg) == normalize(sys_trg)
                        for (_,ref_trg,_), (_,sys_trg,_)
                        in zip(ref_data.data, sys_data.data))
                accuracy = n_identical/n_total
                results[-1][ref_data.language] = accuracy

    languages = set(results[0].keys())
    for result in results[1:]:
        languages &= set(result.keys())
    languages = sorted(languages)

    for language in languages:
        row = [language[0].upper()+language[1:]] + [
                '%.1f'%(100*result[language]) for result in results]
        print(' '*8  + ' & '.join(row) + r' \\')

if __name__ == '__main__': main()

