# semantic_analogy_generator.py
# This file handles generating semantic analogies for training the genetic embedding


# Internal Imports

# External Imports
import json
from alive_progress import alive_bar
from gensim.parsing.preprocessing import remove_stopwords
import random
from PyDictionary import PyDictionary
import requests
from bs4 import BeautifulSoup

# Global Variables


def generate_semantic_analogies(name, num_analogies_per_word=10, word_limit=1000):
    # Get some configuration parameters
    with open("src/genetic_embedding/core/config.json") as config_file:
        config = json.load(config_file)["general"]
        config_file.close()
    src_dir = config['data_dir'] + name + '/'
    src = src_dir + name + '.vocab'

    # Open the source file & start reading in words
    words = []
    print("DATA: Reading vocabulary from file")
    with open(src) as vocab_file:
        with alive_bar(word_limit, bar="smooth", spinner="classic") as bar:
            for line in vocab_file:
                # Remove it if it's a stop word
                line = remove_stopwords(line)
                # If there is still a word, add it
                if (line != '' and len(line) > 2 and get_synonyms(line) and get_antonyms(line)):
                    words.append(line)
                    bar()
                # If the counter meets/exceeds the limit, break the loop
                if (len(words) >= word_limit):
                    break
        vocab_file.close()

    # For each word, generate some analogies
    print("ANALOGY: Generating analogies from supplied vocabulary")
    with open(src_dir + 'analogies.txt', 'w') as output_file:
        output_file.close()
        with alive_bar(len(words), bar="smooth", spinner="classic") as bar:
            for word in words:
                # Generate the analogies
                analogies = [x(word, words) for x in analogy_functions]
                #analogies = [random.choice(analogy_functions)(word, words) for i in range(num_analogies_per_word)]
                # Write them to the file
                output_file = open(src_dir + 'analogies.txt', 'a')
                for analogy in analogies:
                    output_file.write(analogy + "\n")
                output_file.close()
                # Update the progress bar
                bar()
        #output_file.close()


def get_synonyms(term):# Current synonym/antonym fetching is VERY DUMB. fix later with PoS-invariance in synonym/antonym generations
    response = requests.get('https://www.thesaurus.com/browse/{}'.format(term))
    soup = BeautifulSoup(response.text, 'lxml')
    soup.find('section', {'class': 'css-17ofzyv e1ccqdb60'})
    good_synonyms = [span.text for span in soup.findAll('a', {'class': 'css-1kg1yv8 eh475bn0'})]# 'css-1gyuw4i eh475bn0' for less relevant synonyms
    #print(len(good_synonyms))
    if len(good_synonyms) > 1:
        return good_synonyms
    else:
        good_synonyms = good_synonyms + [span.text for span in soup.findAll('a', {'class': 'css-1gyuw4i eh475bn0'})]#css-fr5jga e1ccqdb60

        if (len(good_synonyms) > 1):
            return good_synonyms
        else:
            return good_synonyms + [span.text for span in soup.findAll('a', {'class': 'css-1n6g4vv eh475bn0'})]#css-fr5jga e1ccqdb60


def get_antonyms(term):
    response = requests.get('https://www.thesaurus.com/browse/{}'.format(term))
    soup = BeautifulSoup(response.text, 'lxml')
    soup.find('section', {'class': 'css-17ofzyv e1ccqdb60'})
    return [span.text for span in soup.findAll('a', {'class': 'css-15bafsg eh475bn0'})]


def synonym_analogy_unrelated(word, words):
    # Start by getting the synonyms for both words
    rand_word = random.choice(words)
    synonym = get_synonyms(word)
    rand_syn = get_synonyms(rand_word)
    synonyms = [x for x in synonym if x in words]
    rand_syns = [x for x in rand_syn if x in words]
    # If the list doesn't have any entries, return the first entry
    if not synonyms:
        synonyms = synonym
    if not rand_syns:
        rand_syns = rand_syn

    # Return the completed string
    return "{} {} {} {}".format(word, synonyms[0].strip(), rand_word.strip(), rand_syns[0].strip())

def synonym_analogy_antonym(word, words):
    # Start by getting the antonym of the word
    antonym = get_antonyms(word)
    for x in antonym:
        x = x.strip()
        if get_synonyms(x):
            antonym = x
            break
    # Get the synonyms for each word
    synonym = get_synonyms(word)
    ant_syn = get_synonyms(antonym)
    synonyms = [x for x in synonym if x in words]
    ant_syns = [x for x in ant_syn if x in words]
    # If the list doesn't have any entries, return the first entry
    if not synonyms:
        synonyms = synonym
    if not ant_syns:
        ant_syns = ant_syn

    # Return the completed string
    return "{} {} {} {}".format(word, synonyms[0].strip(), antonym.strip(), ant_syns[0].strip())

def antonym_analogy_unrelated(word, words):
    # Start by getting the antonyms for both words
    rand_word = random.choice(words)
    antonym = get_antonyms(word)
    rand_ant = get_antonyms(rand_word)
    antonyms = [x for x in antonym if x in words]
    rand_ants = [x for x in rand_ant if x in words]
    # If the list doesn't have any entries, return the first entry
    if not antonyms:
        antonyms = antonym
    if not rand_ants:
        rand_ants = rand_ant

    # Return the completed string
    return "{} {} {} {}".format(word, antonyms[0].strip(), rand_word.strip(), rand_ants[0].strip())

def antonym_analogy_synonym(word, words):
    # Start by getting the synonym of the word
    synonym = get_synonyms(word)
    #print(synonym)
    for x in synonym:
        x = x.strip()
        if get_antonyms(x) and len(x.split()) == 1:
            synonym = x
            break
    # Get the antonyms for each word
    antonym = get_antonyms(word)
    syn_ant = get_antonyms(synonym)
    antonyms = [x for x in antonym if x in words]
    syn_ants = [x for x in syn_ant if x in words]
    # If the list doesn't have any entries, return the first entry
    if not antonyms:
        antonyms = antonym
    if not syn_ants:
        syn_ants = syn_ant

    # Return the completed string
    print(word, antonyms)
    print(synonym, syn_ants)
    return "{} {} {} {}".format(word, antonyms[0].strip(), synonym.strip(), syn_ants[0].strip())

def gendered_analogy_PoS_invariant(word, words):
    pass

def gendered_analogy_PoS_variant(word, words):
    pass

analogy_functions = [
    synonym_analogy_unrelated,
    synonym_analogy_antonym,
    antonym_analogy_unrelated,
    antonym_analogy_synonym
]


if (__name__ == "__main__"):
    # If this file is being run, process the sample dataset
    generate_semantic_analogies("top-10k", word_limit=2000)