from matplotlib import pylab
from scipy import spatial
from pyvttbl import Anova1way
import numpy as np
import itertools


def generateAnalogies(analogyTemplates, keyedVecs):
	expandedAnalogyTemplates = []
	for A, stereotypes in analogyTemplates.items():
		for B, _ in analogyTemplates.items():
			if(A != B):
				for stereotype in stereotypes:
					expandedAnalogyTemplates.append([[A, stereotype], [B]])

	analogies = []
	for positive, negative in expandedAnalogyTemplates:
		words = []
		try:
			words = keyedVecs.most_similar(positive=positive, negative=negative)
		except KeyError as e:
			pass
			
		for word, score in words:
			analogy = str(positive[0]) + " is to " + str(positive[1]) + " as " + str(negative[0]) + " is to " + str(word)
			analogies.append([score, analogy])

	analogies = sorted(analogies, key=lambda x:-x[0])
	return analogies

def multiclass_evaluation(embeddings, targets, attributes):
	targets_eval = []
	for targetSet in targets:
		for target in targetSet:
			for attributeSet in attributes:
				targets_eval.append(_unary_s(embeddings, target, attributeSet))
	m_score = np.mean(targets_eval)
	return m_score, targets_eval

def _unary_s(embeddings, target, attributes):
	return np.mean([ spatial.distance.cosine(embeddings[target], embeddings[ai]) for ai in attributes ])

#NOTE: IN THE BINARY CASE THE ORIGINAL BATH PAPER DID NOT INCLUDE ABSOLUTE VALUE NOTATION AND THUS THIS APPROACH IS A SLIGHT IMPROVEMENT ON THEIR APPROACH. 
def binary_weat(embeddings, targets, attributes):
	targetOne = []
	targetTwo = []
	for x in targets[0]:
		targetOne.append(_binary_s(embeddings, x, attributes))
	for y in targets[1]:
		targetTwo.append(_binary_s(embeddings, y, attributes))

	weat_score = np.absolute(sum(targetOne) - sum(targetTwo))

	wtmp = [_binary_s(embeddings, t, attributes) for t in targets[0] + targets[1]]
	effect_std = np.std(wtmp)
	num = np.absolute((sum(targetOne)/float(len(targetOne)) - sum(targetTwo)/float(len(targetTwo))))
	print num, effect_std
	effect_size = (num/effect_std)
	return weat_score, effect_size

def _binary_s(embeddings, target, attributes):
	groupOne = []
	groupTwo = []
	for ai in attributes[0]:
		groupOne.append(spatial.distance.cosine(embeddings[target], embeddings[ai]))
	for aj in attributes[1]:
		groupTwo.append(spatial.distance.cosine(embeddings[target], embeddings[aj]))
	return np.absolute(sum(groupOne)/float(len(groupOne)) - sum(groupTwo)/float(len(groupTwo)))







	