import os
import itertools
import pandas as pd
from nlp_safety_prop.properties.check_consistency import ModelInputRepresenter, TrainedRobertaClassifier, check_consistency, WordRelationTransitivity
from nlp_safety_prop.utils.paths import get_data_path
from .transrelation import Transrelation


def main():
    print("Run synonymy experiment...")
    data = pd.read_csv('./data/raw/synonymy/CogALex_VI/test/test_italian_data.txt', sep='\t',
                       header=None, names=['word_1', 'word_2'])

    words = set(data.word_1.unique()).union(set(data.word_2.unique()))
    words = list(words)[:100]

    triples = pd.DataFrame.from_records(
                itertools.product(words, words, words),
                columns=['word_1', 'word_2', 'word_3'],
                index=None)
    triple_data = triples.drop_duplicates()#.sample(500)

    model_path = get_data_path("models/xlm_roberta_synonymy/")
    if (not os.path.exists(model_path)):
        representer = Transrelation()
    representer = ModelInputRepresenter()
    # hypothesized_labeler = CosineChecker
    hypothesized_labeler = TrainedRobertaClassifier("models/xlm_roberta_synonymy/")
    rule = WordRelationTransitivity()
    results = check_consistency(triple_data,
        representer, 
        hypothesized_labeler, 
        rule)

    with open('./experiments/synonymy/hypotheses_italian.csv', 'w+') as file:
        file.write(results["hypotheses_dataframe"].to_csv(sep='\t'))


if __name__ == "__main__":
    main()
