package handlers;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import utils.enums.Metric;


public class OrthographicMetricHandler 
{
public static Map<String, Double> distancesMap;
	
	static
	{
		distancesMap = new HashMap<String, Double>();
	}
		
	/**
	 * compute orthographic metric between w1 and w2
	 * @param w1 first input string 
	 * @param w2 second input string
	 * @param metric orthographic metric
	 * @return value for 'metric' distance between w1 and w1
	 */
	public double computeMetric(String w1, String w2, Metric metric)
	{
		double value = 0.;
		
		if (Metric.EDIT.equals(metric))
			value = computeNormalizedLevenshteinSimilarity(w1, w2);
		else if (Metric.LCSR.equals(metric))
			value = computeNormalizedLcsrSimilarity(w1, w2);
		else if (Metric.XDICE.equals(metric))
			value = computeNormalizedXDicePlusSimilarity(w1, w2);
		
		return value;
	}	
	
	/**
	 * compute the normalized edit similarity = 1 - normalized edit distance
	 * for normalization, divide the edit distance to the maximum length of the two input strings
	 * @param s1 first input string
	 * @param s2 second input string
	 * @return the normalized edit similarity between s1 and s2
	 */
	public static double computeNormalizedLevenshteinSimilarity(String s1, String s2)
	{
		int maxLength = Math.max(s1.length(), s2.length());

		if (maxLength == 0)
			return 0;
		
		double levenshteinDistance = computeLevenshteinDistance(s1.toLowerCase(), s2.toLowerCase());
				
		return 1 - levenshteinDistance/maxLength;
	}
	
	/**
	 * compute the edit distance
	 * @param s1 first input string
	 * @param s2 second input string
	 * @return the edit similarity between s1 and s2
	 */
	private static double computeLevenshteinDistance(String s1, String s2)
	{
		 int cost = 0;

		 Double computedDistance = distancesMap.get(s1 + "__" + s2);
		 
		 if (computedDistance != null && computedDistance != 0d)
		 {
			 return computedDistance;
		 }
		 
		 if(s1.length() == 0)
			 return s2.length();
		 else if(s2.length() == 0)
			 return s1.length();
		 else             
		 {
			 if(s1.charAt(0) != s2.charAt(0))
				 cost = 1;
			 
			 double distance =  Math.min(Math.min(computeLevenshteinDistance(s1.substring(1), s2) + 1,
					 computeLevenshteinDistance(s1, s2.substring(1)) + 1),
					 computeLevenshteinDistance(s1.substring(1), s2.substring(1)) + cost);
			 
			 distancesMap.put(s1 + "__" + s2, distance);
			 
			 return distance;
		 }
	}
	
	/**
	 * compute the normalized longest common subsequence ratio
	 * for normalization, divide the longest common subsequence ratio to the maximum length of the two strings
	 * @param s1 first input string
	 * @param s2 second input string
	 * @return the normalized longest common subsequence ratio between s1 and s2
	 */
	public static double computeNormalizedLcsrSimilarity(String s1, String s2)
	{
		int maxLength = Math.max(s1.length(), s2.length());

		if (maxLength == 0)
			return 0;

		double lcsrDistance = computeLcsrSimilarity(s1, s2);
		
		return lcsrDistance/maxLength;
	}

	/**
	 * compute the longest common subsequence ratio
	 * @param s1 first input string
	 * @param s2 second input string
	 * @return the longest common subsequence ratio between s1 and se
	 */
	private static double computeLcsrSimilarity(String s1, String s2)
	{
		int[][]lcsr = new int[s1.length() + 1][s2.length() + 1];
		
		for (int i = 0; i < s1.length(); i++)
		{
			lcsr[i][0] = 0;
		}
		
		for (int j = 0; j < s2.length(); j++)
			lcsr[0][j] = 0;
		
		for (int i = 1; i <= s1.length(); i++)
		{
			for (int j = 1; j <= s2.length(); j++)
			{
				if (s1.charAt(i-1) == s2.charAt(j-1))
					lcsr[i][j] = lcsr[i-1][j-1] + 1;
				else
					lcsr[i][j] = Math.max(lcsr[i][j-1], lcsr[i-1][j]);
			}
		}
				
		return (double)lcsr[s1.length()][s2.length()];
	}
	
	/**
	 * compute the normalized xdice similarity, with bigrams and extended bigrams
	 * for normalization, divide the xdice distance to the total number of bigrams and extended bigrams
	 * @param s1 first input string
	 * @param s2 second input string
	 * @return the normalized xdice distance between s1 and s2
	 */
	public static double computeNormalizedXDicePlusSimilarity(String s1, String s2)
	{
		Set<String> bigramsS1 = new HashSet<String>(Arrays.asList(getXPlusBigrams(s1)));
		Set<String> bigramsS2 = new HashSet<String>(Arrays.asList(getXPlusBigrams(s2)));
		Set<String> allBigrams = new HashSet<String>();

		allBigrams.addAll(bigramsS1);
		allBigrams.addAll(bigramsS2);
		
		int nrOfCommonBigrams = 0;

		for (String bigram : allBigrams)
		{
			if (bigramsS1.contains(bigram) && bigramsS2.contains(bigram))
				nrOfCommonBigrams++;
		}

		if (bigramsS1.size() + bigramsS2.size() == 0)
			return 1;
		
		return 2 * (double)nrOfCommonBigrams / (bigramsS1.size() + bigramsS2.size());
	}
	
	/**
	 * determine bigrams and extended bigrams for xdice distance
	 * for example, for word 'language' the output is 
	 * {'la', 'an', 'ng', 'gu', 'ua', 'ag', 'ge', 'ln', 'ag', 'nu', 'ga', 'ug', 'ae'} 
	 * @param s input string
	 * @return array of bigrams and extended bigrams
	 */
	private static String[] getXPlusBigrams(String s) 
	{
		List<String> xBigrams = new ArrayList<String>();
		
		if (s.length() >= 3)
		{
			for (int i = 0; i < s.length() - 2; i++)
				xBigrams.add(s.charAt(i) + "" + s.charAt(i + 2));
		}
		
		for (int i = 0; i < s.length() - 1; i++)
			xBigrams.add(s.charAt(i) + "" + s.charAt(i + 1));
		
		return xBigrams.toArray(new String[xBigrams.size()]);
	}
}