"""
Functions:
    This module offers edit distance calculation.
Requirements:
    Dictionary object file
"""
import os
import time
import types

class editingDistance:
   def __init__(self, dictData):
      self.dictSet = set()
      dictType = type(dictData)
      self.subWeight = 1
      # passed-in string of dictionary file, one word/line, lowercase
      if dictType == types.StringType:
         fdict = open(dictData, 'r')
         for line in fdict.readlines():
            self.dictSet.add(line.rstrip().lower())
         fdict.close()
      # process a list of dictionary words (e.g., soundex codes)
      elif dictType == types.ListType:
         self.dictSet = set([dictKVP.lower() for dictKVP in dictData])
      else:
         raise Exception("Not supported dictionary type")

      self.varTypes = "abcdefghijklmnopqrstuvwxyz'"

   def isInDictionary(self, word):
      if word in self.dictSet:
         return True
      else:
         return False

   def getEditVariants(self, word, eDist=2):
      word = word.rstrip().lower()
      variantDict = dict()
      variantDict[0] = set([word])
      # Schema: iteratively geneartion-and-test
      # Variants generation
      for i in range(eDist):
         sourceList = variantDict[i]
         variantDict[i + 1] = set()
         for source in sourceList:
            targetList = self.__genVariants(source)
            for target in targetList:
               variantDict[i + 1].add(target)
      # Variants filter
      variants = set([variant for value in variantDict.values() for variant in value if variant in self.dictSet])
      return variants 

   def __genVariants(self, w):
      variants = list()
      wordLen = len(w)
      # insertion
      variants.extend([w[0:i] + v + w[i:] for i in range(wordLen + 1) for v in self.varTypes])
      #deletion
      variants.extend([w[0:i] + w[i + 1:] for i in range(wordLen)])
      #substitution
      variants.extend([w[0:i] + v + w[i + 1:] for i in range(wordLen) for v in self.varTypes])
      return set(variants)

   def getEditDistance(self, s, t):
      m = len(s)
      n = len(t)
      matrix = [[0 for j in range(n + 1)] for i in range(m + 1)] # matrix[m][n]
      for j in range(n + 1):
         matrix[0][j] = j
         for i in range(m + 1):
            matrix[i][0] = i

      for i in range(1, m + 1):
         for j in range(1, n + 1):
            if s[i - 1] == t[j - 1]:
               matrix[i][j] = matrix[i - 1] [j - 1]
            else:
               matrix[i][j] = min([matrix[i - 1][j], matrix[i][j - 1], matrix[i - 1][j - 1]]) + self.subWeight
      return matrix[m][n]
                                                               

if __name__ == "__main__":
   print "Unit test for edit distance"
   testDict = {'earthquak':'earthquake', 'yu':'you'}
   right = 0 
   wrong = 0
   ed = editingDistance('../data/dict.dict')
   for (k, v) in testDict.iteritems():
      if v in ed.getEditVariants(k):         
         right += 1
      else:
         wrong += 1
   print "right %d, wrong %d" % (right, wrong)
