import os
import codecs
import pandas as pd

#---------------------------------------
def separate_labels(name):

    true = []
    #print("Loading " + name)
    
    #Open test file and the test file to write without labels
    with codecs.open(name, "r", encoding = "utf-8") as fr:
        for line in fr:
            
            line = line.strip()
            label = line.replace("__label__","")
            true.append(label)

    return true
    
#----------------------------------------
def compare_labels(model1, model2):

    same = 0
    total = 0
    
    for i in range(len(model1)):
    
        pred1 = model1[i]
        pred2 = model2[i]
        
        if pred1 == pred2:
            same += 1
        
        total += 1
        
    return same/total
#---------------------------------------

results = []

for file in os.listdir(os.path.join(".", "TW_Test")):
    if file.endswith(".txt") and "results" not in file:
    
        meta = file.split(".")
        region = meta[1]
        country = meta[2]
        print(region, country, file)
        file = os.path.join(".", "TW_Test", file)
        name = "round5."+region
        baseline_name = "round5.baseline"
        
        #TEST GEO MODEL
        #Get labels and test samples
        
        #Test both full size
        command = "./fastText/fasttext predict "
        command += name+".full.bin" + " " + file
        command += " > " + file +".geo.full_results.txt"
        print(command)
        os.system(command)

        # #Test quantized model
        command = "./fastText/fasttext predict "
        command += name+".reduced.ftz" + " " + file
        command += " > " + file +".geo.ftz_results.txt"
        print(command)
        os.system(command)
        
        # #TEST GEO MODEL
        # #Get labels and test samples
        
        # #Test both full size
        command = "./fastText/fasttext predict "
        command += baseline_name+".full.bin" + " " + file
        command += " > " + file +".baseline.full_results.txt"
        print(command)
        os.system(command)

        # #Test quantized model
        command = "./fastText/fasttext predict "
        command += baseline_name+".reduced.ftz" + " " + file
        command += " > " + file +".baseline.ftz_results.txt"
        print(command)
        os.system(command)
        
        #Read predictions
        predict_geo_full = separate_labels(file +".geo.full_results.txt")
        predict_geo_ftz = separate_labels(file +".geo.ftz_results.txt")
        predict_baseline_full = separate_labels(file +".baseline.full_results.txt")
        predict_baseline_ftz = separate_labels(file +".baseline.ftz_results.txt")
        
        #Geo vs non-geo
        geo_non_full = compare_labels(predict_geo_full, predict_baseline_full)
        geo_non_ftz = compare_labels(predict_geo_ftz, predict_baseline_ftz)
        geo_compression = compare_labels(predict_geo_full, predict_geo_ftz)
        baseline_compression = compare_labels(predict_baseline_full, predict_baseline_ftz)
        
        print("Geo:", geo_non_full, geo_non_ftz, "Compression:", geo_compression, baseline_compression)
        results.append([region, country, geo_non_full, geo_non_ftz, geo_compression, baseline_compression])

#Now save results        
df = pd.DataFrame(results, columns = ["Region", "Country", "Geo (Full)", "Geo (FTZ)", "Compression (geo)", "Compression (base)"])
print(df)
df.to_csv("evaluation.v1.csv")    
        