import os
import json
import math
import nltk
import random
import numpy as np
import torch
from collections import OrderedDict

conj_dict = {'accordingly': 1, 'after all': 1, 'although': 1, 'and': 1, 'as': 1, 'as a result': 1, 'because': 1, 'but': 1, 
    'by': 1, 'eventually': 1, 'first': 1, 'for example': 1, 'for instance': 1, 'for one thing': 1, 'further': 1, 
    'furthermore': 1, 'however': 1, 'in': 1, 'in addition': 1, 'in fact': 1, 'in more detail': 1, 'in other words': 1, 
    'in particular': 1, 'in short': 1, 'in sum': 1, 'in the end': 1, 'indeed': 1, 'instead': 1, 'more specifically': 1, 
    'more to the point': 1, 'moreover': 1, 'namely': 1, 'on the contrary': 1, 'on the one hand': 1, 'on the whole': 1, 
    'or': 1, 'overall': 1, 'particularly': 1, 'since': 1, 'so': 1, 'specifically': 1, 
    'then': 1,  'thereby': 1, 'by means of': 1, 'in that': 1, 'with': 1, 'afterward': 1, 
    'afterwards': 1, 'as of now': 1, 'before': 1, 'finally': 1, 'later': 1, 'next': 1, 'now': 1, 'previously': 1, 
    'since then': 1, 'soon': 1, 'subsequently': 1, 'thereafter': 1, 'ultimately': 1, 'when': 1, 
    'while': 1, 'after': 1,  'before that': 1, 'earlier': 1, 'prior to this': 1, 'upon': 1,
    'all the while': 1, 'at that time': 1, 'at the same time': 1, 'at the time': 1, 'during that time': 1, 
    'in the meantime': 1, 'meanwhile': 1, 'simultaneously': 1, 'if': 1, 
    'in order': 1, 'so as': 1, 'for that purpose': 1, 'through': 1, 
    'for': 1, 'for the purpose of': 1,  'in order to': 1, 'so that': 1, 'thus': 1, 'with the goal': 1, 
    'with the goal of': 1, 'with the purpose of': 1, 'additionally': 1, 'also': 1, 'as a matter of fact': 1, 
    'as it turns out': 1, 'besides': 1, 'by comparison': 1, 'in addition to': 1, 'in response': 1, 
    'in return': 1, 'incidentally': 1, 'likewise': 1, 'more ever': 1, 'nevertheless': 1, 'plus': 1, 'second': 1, 
    'separately': 1, 'similarly': 1, 'that is': 1, 'third': 1, 'what’s more': 1, 'whereas': 1, 'yet': 1, 'rather': 1, 
    'alternatively': 1, 'otherwise': 1, 'generally': 1, 'as part of that': 1, 'for one': 1, 'in this case': 1,
    'in general': 1, 'in summary': 1, 'even though': 1, 'granted': 1, 'by contrast': 1, 'despite': 1, 
    'despite this': 1, 'nonetheless': 1, 'regardless': 1, 'still': 1, 'though': 1, 'conversely': 1, 'in comparison': 1, 
    'in comparison to the fact': 1, 'in contrast': 1, 'on the other hand': 1, 'as a result of': 1,
    'as evidence': 1, 'because of': 1, 'considering that': 1, 'given': 1, 'given that': 1, 
    'inasmuch as': 1, 'this is because': 1, 'as such': 1, 'consequently': 1, 
    'therefore': 1,  'for the reason that': 1, 'insofar as': 1, 'it is because': 1,
    'as a consequence': 1, 'because of that': 1, 'for that reason': 1, 'hence': 1, 'to this end': 1}

conj_token = {'as a matter of fact': ['as', 'a', 'matter', 'of', 'fact'], 'in comparison to the fact': ['in', 'comparison', 'to', 'the', 'fact'], 
    'more to the point': ['more', 'to', 'the', 'point'], 'on the one hand': ['on', 'the', 'one', 'hand'], 
    'at the same time': ['at', 'the', 'same', 'time'], 'for the purpose of': ['for', 'the', 'purpose', 'of'], 
    'with the goal of': ['with', 'the', 'goal', 'of'], 'with the purpose of': ['with', 'the', 'purpose', 'of'], 
    'as it turns out': ['as', 'it', 'turns', 'out'], 'as part of that': ['as', 'part', 'of', 'that'], 
    'on the other hand': ['on', 'the', 'other', 'hand'], 'as a result of': ['as', 'a', 'result', 'of'], 
    'for the reason that': ['for', 'the', 'reason', 'that'], 'as a result': ['as', 'a', 'result'], 
    'for one thing': ['for', 'one', 'thing'], 'in more detail': ['in', 'more', 'detail'], 
    'in other words': ['in', 'other', 'words'], 'in the end': ['in', 'the', 'end'], 'on the contrary': ['on', 'the', 'contrary'], 
    'on the whole': ['on', 'the', 'whole'], 'by means of': ['by', 'means', 'of'], 'as of now': ['as', 'of', 'now'], 
    'prior to this': ['prior', 'to', 'this'], 'all the while': ['all', 'the', 'while'], 'at that time': ['at', 'that', 'time'], 
    'at the time': ['at', 'the', 'time'], 'during that time': ['during', 'that', 'time'], 
    'in the meantime': ['in', 'the', 'meantime'], 'for that purpose': ['for', 'that', 'purpose'], 
    'in order to': ['in', 'order', 'to'], 'with the goal': ['with', 'the', 'goal'], 'in addition to': ['in', 'addition', 'to'], 
    'what’s more': ['what', '’s', 'more'], 'in this case': ['in', 'this', 'case'], 'this is because': ['this', 'is', 'because'], 
    'it is because': ['it', 'is', 'because'], 'as a consequence': ['as', 'a', 'consequence'], 
    'because of that': ['because', 'of', 'that'], 'for that reason': ['for', 'that', 'reason'], 
    'to this end': ['to', 'this', 'end'], 'after all': ['after', 'all'], 'for example': ['for', 'example'], 
    'for instance': ['for', 'instance'], 'in addition': ['in', 'addition'], 'in fact': ['in', 'fact'], 
    'in particular': ['in', 'particular'], 'in short': ['in', 'short'], 'in sum': ['in', 'sum'], 
    'more specifically': ['more', 'specifically'], 'in that': ['in', 'that'], 'since then': ['since', 'then'], 
    'before that': ['before', 'that'], 'in order': ['in', 'order'], 'so as': ['so', 'as'], 'so that': ['so', 'that'], 
    'by comparison': ['by', 'comparison'], 'in response': ['in', 'response'], 'in return': ['in', 'return'], 
    'more ever': ['more', 'ever'], 'that is': ['that', 'is'], 'for one': ['for', 'one'], 'in general': ['in', 'general'], 
    'in summary': ['in', 'summary'], 'even though': ['even', 'though'], 'by contrast': ['by', 'contrast'], 
    'despite this': ['despite', 'this'], 'in comparison': ['in', 'comparison'], 'in contrast': ['in', 'contrast'], 
    'as evidence': ['as', 'evidence'], 'because of': ['because', 'of'], 'considering that': ['considering', 'that'], 
    'given that': ['given', 'that'], 'inasmuch as': ['inasmuch', 'as'], 'as such': ['as', 'such'], 
    'insofar as': ['insofar', 'as'], 'accordingly': ['accordingly'], 'although': ['although'], 'and': ['and'], 
    'as': ['as'], 'because': ['because'], 'but': ['but'], 'by': ['by'], 'eventually': ['eventually'], 'first': ['first'], 
    'further': ['further'], 'furthermore': ['furthermore'], 'however': ['however'], 'in': ['in'], 'indeed': ['indeed'], 
    'instead': ['instead'], 'moreover': ['moreover'], 'namely': ['namely'], 'or': ['or'], 'overall': ['overall'], 
    'particularly': ['particularly'], 'since': ['since'], 'so': ['so'], 'specifically': ['specifically'], 'then': ['then'], 
    'thereby': ['thereby'], 'with': ['with'], 'afterward': ['afterward'], 'afterwards': ['afterwards'], 'before': ['before'], 
    'finally': ['finally'], 'later': ['later'], 'next': ['next'], 'now': ['now'], 'previously': ['previously'], 'soon': ['soon'], 
    'subsequently': ['subsequently'], 'thereafter': ['thereafter'], 'ultimately': ['ultimately'], 'when': ['when'], 
    'while': ['while'], 'after': ['after'], 'earlier': ['earlier'], 'upon': ['upon'], 'meanwhile': ['meanwhile'], 
    'simultaneously': ['simultaneously'], 'if': ['if'], 'through': ['through'], 'for': ['for'], 'thus': ['thus'], 
    'additionally': ['additionally'], 'also': ['also'], 'besides': ['besides'], 'incidentally': ['incidentally'], 
    'likewise': ['likewise'], 'nevertheless': ['nevertheless'], 'plus': ['plus'], 'second': ['second'], 
    'separately': ['separately'], 'similarly': ['similarly'], 'third': ['third'], 'whereas': ['whereas'], 'yet': ['yet'], 
    'rather': ['rather'], 'alternatively': ['alternatively'], 'otherwise': ['otherwise'], 'generally': ['generally'], 
    'granted': ['granted'], 'despite': ['despite'], 'nonetheless': ['nonetheless'], 'regardless': ['regardless'], 
    'still': ['still'], 'though': ['though'], 'conversely': ['conversely'], 'given': ['given'], 'consequently': ['consequently'], 
    'therefore': ['therefore'], 'hence': ['hence']}

conj_number_dict = {'or': 422014, 'if': 1029752, 'and': 7394456, 'then': 944705, 'for': 355613, 'also': 336416, 'so': 702825, 'but': 2046122, 
                    'first': 164014, 'as': 1944986, 'while': 407284, 'in': 494250, 'when': 1278887, 'second': 51198, 'before': 574736, 
                    'earlier': 42059, 'by': 220723, 'through': 67943, 'still': 408180, 'simultaneously': 8695, 'instead': 104397, 'yet': 156960, 
                    'after': 262409, 'because': 306090, 'third': 9014, 'since': 149986, 'now': 559849, 'later': 162010, 'finally': 168509, 'soon': 165358, 
                    'next': 119440, 'further': 59392, 'with': 142093, 'though': 232900, 'eventually': 65421, 'even': 49728, 'indeed': 25381, 'therefore': 45712, 
                    'thus': 44345, 'however': 144912, 'upon': 22727, 'plus': 8553, 'although': 83650, 'despite': 8764, 'generally': 15325, 'besides': 15449, 
                    'afterwards': 11665, 'rather': 45777, 'in order': 47781, 'furthermore': 8799, 'consequently': 4776, 'whereas': 8373, 'additionally': 3650, 
                    'otherwise': 22700, 'hence': 7327, 'likewise': 5539, 'meanwhile': 12957, 'thereafter': 3673, 'separately': 2536, 'afterward': 5050, 
                    'moreover': 9372, 'nevertheless': 11201, 'regardless': 10050, 'accordingly': 4833, 'particularly': 7097, 'ultimately': 10416, 'similarly': 6662, 
                    'because of': 192, 'specifically': 8757, 'as a result': 7237, 'nonetheless': 3547, 'previously': 17085, 'subsequently': 4543, 
                    'in addition': 8190, 'with the goal': 223, 'overall': 2767, 'as a matter of': 1267, 'thereby': 6337, 'inasmuch': 467, 'insofar': 414, 
                    'alternatively': 1373, 'conversely': 1511, 'for the purpose': 1662, 'by means': 2648, 'incidentally': 714, 'with the purpose': 184, 
                    'namely': 1057, 'more to the': 280, 'all the': 423, 'for the reason': 84, 'as a': 188, 'given': 6, 'prior to': 2, 'as part of': 64, 
                    'in more': 5, 'on the': 61, 'as of': 10, 'in comparison to the': 3, 'with the': 1, 'on the other': 2, 'for one': 3, 'in this': 1,
                    'at the same': 1, 'on the one': 2}
    
conj_class_list = {'0': ['although', 'but', 'or'], '1': ['although', 'but', 'however', 'while'],
                     '2': ['although', 'but', 'despite', 'furthermore', 'however', 'nevertheless', 'nonetheless', 'regardless', 'still', 'though', 'whereas', 'while', 'yet'], '3': ['although', 'and', 'but', 'conversely', 'however', 'meanwhile', 'nevertheless', 'regardless', 'still', 'when', 'whereas', 'while', 'yet'], '4': ['also', 'likewise', 'similarly'], '5': ['as', 'because', 'because of', 'given', 'in', 'indeed', 'since', 'so', 'thus'], '6': ['as a result', 'consequently', 'so', 'therefore', 'thus'], '7': ['because'], '8': ['so'], '9': ['although', 'as', 'because', 'because of', 'for', 'given', 'however', 'in', 'indeed', 'since', 'so', 'specifically', 'with'], '10': ['accordingly', 'and', 'as', 'as a result', 'but', 'consequently', 'finally', 'furthermore', 'hence', 'indeed', 'so', 'then', 'therefore', 'thus', 'ultimately'], '11': ['so'], '12': ['then'], '13': ['but', 'if', 'in order', 'when', 'with'], '14': ['by', 'through'], '15': ['for', 'in', 'in order', 'thus', 'with the goal'], '16': ['accordingly', 'additionally', 'after', 'also', 'although', 'and', 'as', 'besides', 'but', 'first', 'further', 'furthermore', 'however', 'in addition', 'incidentally', 'indeed', 'instead', 'likewise', 'meanwhile', 'moreover', 'nevertheless', 'or', 'overall', 'plus', 'second', 'separately', 'similarly', 'so', 'specifically', 'then', 'third', 'whereas', 'while', 'with', 'yet'], '17': ['or'], '18': ['indeed', 'namely', 'or', 'rather', 'specifically'], '19': ['alternatively', 'otherwise'], '20': ['however', 'rather'], '21': ['generally'], '22': ['and', 'first', 'for one', 'indeed', 'specifically', 'while', 'with'], '23': ['in', 'indeed', 'overall', 'rather', 'so', 'specifically', 'ultimately'], '24': ['accordingly', 'although', 'and', 'as', 'as a result', 'because', 'but', 'by', 'eventually', 'first', 'further', 'furthermore', 'however', 'in', 'in addition', 'indeed', 'instead', 'moreover', 'namely', 'or', 'overall', 'particularly', 'since', 'so', 'specifically', 'then', 'thus', 'ultimately', 'when', 'with'], '25': ['in', 'thereby'], '26': ['and', 'by', 'specifically', 'with'], '27': ['but', 'instead', 'rather'], '28': ['afterward', 'afterwards', 'and', 'before', 'eventually', 'finally', 'later', 'next', 'now', 'previously', 'soon', 'subsequently', 'then', 'thereafter', 'ultimately', 'when', 'while'], '29': ['after', 'and', 'before', 'earlier', 'first', 'previously', 'upon', 'when', 'with'], '30': ['and', 'as', 'in', 'meanwhile', 'simultaneously', 'when', 'while', 'with']}

conj_can_retrieval = {'although': [4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 
                        'but': [4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 28, 29], 'or': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 'however': [0, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 18, 19, 21, 22, 23, 25, 26, 27, 28, 29], 'while': [0, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 26, 27, 29], 'despite': [4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 26, 29, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 7, 14, 17, 19, 20, 21, 25], 'furthermore': [4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 26, 29], 'nevertheless': [4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 26, 29], 'nonetheless': [4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 26, 29, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 7, 14, 17, 19, 20, 21, 25], 'regardless': [4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 26, 29], 'still': [4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 26, 29], 'though': [4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 26, 29, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 7, 14, 17, 19, 20, 21, 25], 'whereas': [4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 26, 29], 'yet': [4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 26, 29], 'and': [0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'conversely': [4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 21, 23, 25, 4, 7, 14, 17, 19, 20, 21, 25, 4, 5, 6, 7, 8, 11, 12, 14, 17, 18, 19, 20, 21, 23, 25, 7, 14, 15, 19, 21, 25], 'meanwhile': [0, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'when': [0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'also': [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], 'likewise': [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], 'similarly': [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], 'as': [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 'because': [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 'because of': [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 'given': [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 'in': [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 'indeed': [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 'since': [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 'so': [0, 1, 2, 3, 4, 7, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 25, 26, 27, 28, 29], 'thus': [0, 1, 2, 3, 4, 7, 8, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 25, 26, 27, 28, 29], 'as a result': [0, 1, 2, 3, 4, 7, 12, 13, 14, 17, 18, 19, 20, 21, 22, 25, 26, 27, 28, 29], 'consequently': [0, 1, 2, 3, 4, 7, 12, 13, 14, 17, 18, 19, 20, 21, 22, 25, 26, 27, 28, 29], 'therefore': [0, 1, 2, 3, 4, 7, 12, 13, 14, 17, 18, 19, 20, 21, 22, 25, 26, 27, 28, 29], 'for': [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 'specifically': [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 'with': [0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27, 28], 'accordingly': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 29], 'finally': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 29], 'hence': [4, 7, 14, 17, 19, 20, 21, 25, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 29, 7, 14, 15, 19, 21, 25, 0, 1, 2, 3, 4, 7, 12, 13, 14, 17, 19, 21, 29, 0, 4, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 25, 27], 'then': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 29], 'ultimately': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 29], 'if': [4, 5, 6, 7, 8, 11, 12, 14, 17, 18, 19, 20, 21, 23, 25, 0, 1, 2, 3, 4, 7, 8, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 26, 27, 28, 29], 'in order': [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29], 'by': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 27, 28, 29], 'through': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 27, 28, 29, 4, 19, 21], 'with the goal': [0, 1, 2, 3, 4, 7, 8, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 26, 27, 28, 29], 'additionally': [7, 14, 15, 19, 21, 25, 0, 4, 6, 7, 8, 11, 12, 14, 15, 17, 19, 20, 21, 25, 27, 0, 1, 2, 3, 4, 7, 12, 13, 14, 17, 19, 21, 29, 4, 19, 21, 4, 19, 21, 4, 19, 21, 4, 19, 21, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'after': [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27, 29], 'besides': [7, 14, 15, 19, 21, 25, 0, 4, 6, 7, 8, 11, 12, 14, 15, 17, 19, 20, 21, 25, 27, 0, 1, 2, 3, 4, 7, 12, 13, 14, 17, 19, 21, 29, 4, 19, 21, 4, 19, 21, 4, 19, 21, 4, 19, 21, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'first': [0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'further': [0, 1, 2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 17, 19, 20, 21, 25, 27, 29], 'in addition': [0, 1, 2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 17, 19, 20, 21, 25, 27, 29], 'incidentally': [7, 14, 15, 19, 21, 25, 0, 4, 6, 7, 8, 11, 12, 14, 15, 17, 19, 20, 21, 25, 27, 0, 1, 2, 3, 4, 7, 12, 13, 14, 17, 19, 21, 29, 4, 19, 21, 4, 19, 21, 4, 19, 21, 4, 19, 21, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'instead': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 17, 19, 20, 21, 22, 25, 26, 27, 28, 29], 'moreover': [0, 1, 2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 17, 19, 20, 21, 25, 27, 29], 'overall': [0, 1, 2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 17, 19, 20, 21, 25, 27, 29], 'plus': [7, 14, 15, 19, 21, 25, 0, 4, 6, 7, 8, 11, 12, 14, 15, 17, 19, 20, 21, 25, 27, 0, 1, 2, 3, 4, 7, 12, 13, 14, 17, 19, 21, 29, 4, 19, 21, 4, 19, 21, 4, 19, 21, 4, 19, 21, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'second': [7, 14, 15, 19, 21, 25, 0, 4, 6, 7, 8, 11, 12, 14, 15, 17, 19, 20, 21, 25, 27, 0, 1, 2, 3, 4, 7, 12, 13, 14, 17, 19, 21, 29, 4, 19, 21, 4, 19, 21, 4, 19, 21, 4, 19, 21, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'separately': [7, 14, 15, 19, 21, 25, 0, 4, 6, 7, 8, 11, 12, 14, 15, 17, 19, 20, 21, 25, 27, 0, 1, 2, 3, 4, 7, 12, 13, 14, 17, 19, 21, 29, 4, 19, 21, 4, 19, 21, 4, 19, 21, 4, 19, 21, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'third': [7, 14, 15, 19, 21, 25, 0, 4, 6, 7, 8, 11, 12, 14, 15, 17, 19, 20, 21, 25, 27, 0, 1, 2, 3, 4, 7, 12, 13, 14, 17, 19, 21, 29, 4, 19, 21, 4, 19, 21, 4, 19, 21, 4, 19, 21, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'namely': [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 19, 21, 22, 25, 26, 28, 29], 'rather': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 21, 22, 25, 26, 28, 29], 'alternatively': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], 'otherwise': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], 'generally': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29], 'for one': [0, 4, 6, 7, 8, 11, 12, 14, 15, 17, 19, 20, 21, 25, 27], 'eventually': [0, 4, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 25, 27], 'particularly': [4, 19, 21, 0, 4, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 25, 27], 'thereby': [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 26, 27, 28, 29], 'afterward': [0, 4, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'afterwards': [0, 4, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'before': [0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'later': [0, 4, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'next': [0, 4, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'now': [0, 4, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'previously': [0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'soon': [0, 4, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'subsequently': [0, 4, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'thereafter': [0, 4, 5, 6, 7, 8, 9, 11, 14, 15, 17, 18, 19, 20, 21, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27, 0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'earlier': [0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'upon': [0, 1, 2, 4, 5, 6, 7, 8, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 25, 27], 'simultaneously': [0, 4, 6, 7, 8, 11, 12, 14, 17, 18, 19, 20, 21, 27]}


def generate_conj_can_retrieval_conj():
    conj_can_retrieval_conj = {}
    for k,v in conj_can_retrieval.items():
        tmp = []
        for i in v:
            tmp += conj_class_list[str(i)]
        tmp = list(set(tmp))
        conj_can_retrieval_conj[k] = tmp
    return conj_can_retrieval_conj

def path_file_list(path):
    dirs = os.listdir(path)
    return dirs

def get_all_file_path(path):
    file_list = []
    sub_dir = path_file_list(path)
    for i in sub_dir:
        temp = path_file_list(path + i + '/')
        for j in temp:
            file_list.append(path + i + '/' + j)
    return file_list

def read_txt(file_name):
    txt_list = []
    f = open(file_name)
    line = f.readline()
    while line:
        txt_list.append(line)
        line = f.readline()
    f.close()
    return txt_list

def list2string(sent_list):
    string = ''
    for i in sent_list:
        string = string + i + ' '
    return string[:-1]

def sort_dict(dic, ids=1, reverse=True):
    dic_tuple = sorted(dic.items(), key=lambda x:x[ids], reverse=reverse)
    return dic_tuple

def del_overlap(a_list):
    temp = []
    for i in range(len(a_list)):
        if a_list[i] in temp:
            continue
        sign = 0
        for j in range(len(a_list)):
            if i == j:
                continue

            if a_list[i][0] > a_list[j][0] and a_list[i][1] < a_list[j][1]:
                sign = 1
                break
            elif a_list[i][0] > a_list[j][0] and a_list[i][1] <= a_list[j][1]:
                sign = 1
                break
            elif a_list[i][0] >= a_list[j][0] and a_list[i][1] < a_list[j][1]:
                sign = 1
                break
                
        if sign == 0:
            temp.append(a_list[i])
    return temp

def find_token_id(sent_list, conj_list):
    conj_len = len(conj_list)
    sent_len = len(sent_list)
    for i in range(sent_len - conj_len + 1):
        if sent_list[i] == conj_list[0]:
            
            if conj_len == 1:
                return [i, i+1]

            sign = 0
            for j in range(conj_len):
                if sent_list[i+j] != conj_list[j]:
                    sign = 1
                    break

            if sign == 0:
                return [i, i+j]
    return []
   
def remove_(sent):
    if ' _' in sent:
        sent = sent.replace(' _', ' ')
    if '_ ' in sent:
        sent = sent.replace('_ ', ' ')
    if '_' in sent:
        sent = sent.replace('_', ' ')
    return sent

def expand_conj_dict(conj_dict):
    conj_exp_dict = {}
    for k in conj_dict.keys():
        conj_exp_dict[' ' + k + ' '] = k
        conj_exp_dict[' ' + k + ','] = k
        conj_exp_dict[',' + k + ' '] = k
        conj_exp_dict[' ' + k + '.'] = k
        conj_exp_dict['.' + k + ' '] = k
    return conj_exp_dict

def deleteByStartAndEnd(s1, s2, token):
    x1 = s1.index(token)
    x2 = s2.index(token)
    x3 = s1[:x1] + s2[x2:]
    return x3

def getstartandend(misc):
    misc = misc.split('start_char=')[1]
    misc = misc.split('|end_char=')
    return misc

def write_json(file_name, data_list):
    with open(file_name, "a") as outfile:
        for i in data_list:
            json.dump(i, outfile)
            outfile.write('\n')
    outfile.close()

def read_json(file_name, start_num=-1, end_num=-1):
    if start_num==-1 and end_num==-1:
        json_list = []
        with open(file_name, 'r') as fp:
            for ids, l in enumerate(fp):
                mj = json.loads(l)
                json_list.append(mj)
        fp.close()
        return json_list
    else:
        json_list = []
        with open(file_name, 'r') as fp:
            for ids, l in enumerate(fp):
                if start_num <= ids < end_num:
                    mj = json.loads(l)
                    json_list.append(mj)
        fp.close()
        return json_list

def filter_conj_find_id(conj_id, words_list, tags_list):
    sign = 0
    start_id = None
    end_id = None

    if 'B-V' in tags_list:
        start_id = tags_list.index('B-V')
        end_id = tags_list.index('B-V') + 1
    else:
        return None, None

    # set start id
    sign = 0
    while start_id > 0:
        if tags_list[start_id-1]  == 'O':
            break
        elif start_id-1 == conj_id:
            sign = 1
            break
        else:
            start_id -= 1
    if sign == 1:
        tmp_str = tags_list[start_id-1][2:]
        for i in range(start_id, len(tags_list)):
            if tags_list[i][2:] != tmp_str:
                break
        start_id = i

    # set end id
    sign = 0
    while end_id < len(tags_list):
        if tags_list[end_id]  == 'O':
            break
        elif end_id == conj_id:
            sign = 1
            break
        else:
            end_id += 1
    if sign == 1:
        tmp_str = tags_list[end_id][2:]
        for i in range(end_id, 0, -1):
            if tags_list[i-1][2:] != tmp_str:
                break
        end_id = i

    return start_id, end_id

def event_span_extraction(sent_result, real_conj_list):
    conj_position = []
    verb_position = []
    event_list = []

    xcomp_list = []

    for i in real_conj_list:
        for j in range(i[0], i[1]):
            conj_position.append(j)

    for ids, i in enumerate(sent_result):
        if i[2] == 'VERB':
            if ids not in conj_position:
                verb_position.append(ids)

    for ids, i in enumerate(verb_position):
        if sent_result[i][5] == 'xcomp':
            if sent_result[sent_result[i][3]][2] == 'VERB':
                if sent_result[i][3] in verb_position:
                    other_position = verb_position.index(sent_result[i][3])
                    if ids < other_position:
                        xcomp_list.append((ids, other_position))
                    else:
                        xcomp_list.append((other_position, ids))

    for verb_id in verb_position:
        left = []
        right = []
        for ids, i in enumerate(sent_result): 
            if ids == verb_id:
                continue

            if (i[3] == verb_id):
                if (i[2] != 'VERB') and (i[5] != 'punct') and (i[5] != 'conj'):
                    if ids < verb_id:
                        left.append(ids)
                    else:
                        right.append(ids)
        
        # check conj
        temp = []
        for i in range(len(left)-1, -1, -1):
            if left[i] not in conj_position:
                temp.append(left[i])
            else:
                break
        left = min(temp+[verb_id])

        temp = []
        for i in range(len(right)):
            if right[i] not in conj_position:
                temp.append(right[i])
            else:
                break
        right = max(temp+[verb_id])

        # add nmod
        if right != verb_id:
            for i in range(right, len(sent_result)):
                if (sent_result[i][3] == right) and (sent_result[i][5] == 'nmod'):
                    right = i

        event_list.append([left, right+1])

    for ids in range(len(xcomp_list)-1, -1, -1):
        i = xcomp_list[ids]
        if event_list[i[0]][1] == event_list[i[1]][0]:
            temp = [event_list[i[0]][0], event_list[i[1]][1]]
            event_list.pop(i[1])
            event_list[i[0]] = temp

    return event_list

def output_dict_thed(dic, thed=0.2, num=100):
    while len(dic) >= num:
        output = {}
        for k, v in dic.items():
            if v >= thed:
                output[k] = v
        if len(output) >= num:
            dic = output
            thed += 0.1
        elif len(output) < 3:
            break
        else:
            dic = output

    dic = sorted(dic.items(), key=lambda x:x[1], reverse=True)
    return dic

def tokens2pos_seq(tokens):
    pos_results = nltk.pos_tag(tokens)
    string = ''
    for i in pos_results:
        string = string + i[1] + '-'
    return string[:-1]

def seed_all(seed_value):
    random.seed(seed_value) # Python
    np.random.seed(seed_value) # cpu vars
    torch.manual_seed(seed_value) # cpu vars
    
    if torch.cuda.is_available(): 
        print ('CUDA is available')
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value) # gpu vars
        #torch.backends.cudnn.deterministic = True  #needed
        torch.backends.cudnn.benchmark = True

def sample_distribution():
    count = 0

    dic = conj_number_dict
    dic = sorted(dic.items(), key=lambda x: x[1], reverse=True)
    temp = {}
    for i in dic:
        count += i[1]
        temp[i[0]] = i[1]

    for k, v in temp.items():
        temp[k] = v / count

    count = 0.0
    dic = {}
    for k, v in temp.items():
        temp[k] = v**0.3        # 0.2
        count += v**0.3

    for k, v in temp.items():
        dic[k] = v / count

    max_prob = dic['and']



    eval_dic = {}
    for k, v in dic.items():
        if v >= 0.00824:
            eval_dic[k] = int(v/max_prob*5000)      # 5000



    for k, v in dic.items():
        dic[k] = int(max_prob / v)

    return dic, eval_dic

def sample_number(dic_freq):
    count = 0

    dic = {}
    new_freq = {}
    for k,v in conj_number_dict.items():
        if v >= 100000:
            dic[k] = 100000
        else:
            dic[k] = v

        if v < 10000:
            new_freq[k] = dic_freq[k]
        else:
            new_freq[k] = 1
            
    return dic, new_freq

def conj_freq_dict():
    dic = conj_number_dict
    dic = sorted(dic.items(), key=lambda x: x[1])
    temp = {}
    for i in dic:
        temp[i[0]] = i[1]
    return temp

def modify_state_dict(state_dict):
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:] # remove `module.`
        new_state_dict[name] = v
    return new_state_dict

def len_mask(_lens, max_len=None):
    max_len = max_len or _lens.max().item()  # []
    rg = torch.arange(0, max_len, dtype=torch.long, device=_lens.device)  # ml
    # expand to [...] + [ml]
    for _ in range(_lens.dim()):
        rg = rg.unsqueeze(0)
    rg = rg.expand(list(_lens.size()) + [max_len])
    expd_lens = _lens.unsqueeze(-1).expand_as(rg)
    return (rg < expd_lens).to(torch.long), max_len

def zero_mask(_mask, _val, high_rank=False):
    _zero_mask = _mask.to(_val.dtype)
    if high_rank:
        _zero_mask = _zero_mask.unsqueeze(-1).expand_as(_val)
    return _zero_mask * _val

def slice_tensor_v2(rep_input, rep_se):
    """
    :param rep_input: [bs,sl,hn]
    :param rep_se: [bs,nl,2]
    :return:
    """
    bs, sl, hn = rep_input.shape
    _, nl = rep_se.shape[:2]
    device = rep_input.device

    node_lens = rep_se[..., 1] - rep_se[..., 0]  # bs,nl
    node_len_mask, max_node_len = len_mask(node_lens)  # [bs,nl,pl], []
    # refine node_len_mask
    node_len_mask = node_len_mask * ((rep_se[..., 1] - rep_se[..., 0]) > 0).to(torch.long).unsqueeze(-1)  # [bs,nl,pl]

    node_ranges = torch.arange(0, max_node_len, dtype=torch.long, device=device).unsqueeze(
        0).unsqueeze(0).expand([bs, nl, max_node_len])  # bs,nl,pl
    node_indices = (node_ranges + rep_se[..., 0].unsqueeze(-1)) * node_len_mask  # bs,nl,pl
    node_indices = node_indices.contiguous()  # bs,nl,pl
    node_indices_rsp = \
        node_indices.view(bs, nl * max_node_len).unsqueeze(-1).expand(bs, nl * max_node_len, hn)  # bs, nl*pl, hn
    rep_node = torch.gather(rep_input, dim=1, index=node_indices_rsp).view(bs, nl, max_node_len, hn)
    rep_node = zero_mask(node_len_mask, rep_node, high_rank=True)

    return rep_node, node_len_mask  # [bs,nl,pl,hn] & [bs,nl,pl]

if __name__ == '__main__':
    dic, eval_dic = sample_distribution()
