###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

############################################
## A Tree object consists of a string "head" (e.g. NP, S, JJ),
## a Tree object which is its parent (possibly nil)
## and an array of children that are Tree objects
## You initialize a tree by passing it a string
## containing parentheses delimited tree structure.
## For files containing multi-line trees (like treebank)
## you need to use the tree slurper class
##
###########################################

$disallowed = {"-NONE-" => 1, "-DFL-" => 1, "DISFL" => 1, "XX" => 1,
               "N_S" => 1, "E_S" => 1}

class Tree
  attr_reader :str, :head, :children, :num_rules, :parent, :prob
  attr_writer :str, :head, :children, :num_rules, :parent, :prob
  
  def initialize(str="", parent=nil)
    @str = str
    @children = Array.new
    @parent = parent
    @num_rules = 0
    @prob = nil
    if str != ""
      ## Check if the stupid user passed in a stupid string with
      ## stupid brackets instead of parentheses
      if (str.length - str.gsub(/\)/,"").length) < (str.length - str.gsub(/\]/,"").length)
        str.gsub!(/\[/,"(")
        str.gsub!(/\]/,")")
      end
      if str =~ /: ([0-9\.]+)$/
        @prob = $1
      end
      begin
        buildStructure(@str)
      rescue
        raise "Caught exception when tree #{@str} was passed in...\nError message:\n #{$!}"
      end
    else
      head = ""
    end
    if str == ""
      str = to_s
    end
    @num_rules = getNumRules #+= @children[i].num_rules
  end
  
  def buildStructure(str)
    if str == ""
      @head = ""
      return ""
    else
      ## First let's check if its one of those weird switchboard
      ## trees that start with 2 open parens: ( (S
      if str =~ /^ *\( *\((.*)\) *$/
        str = "(" + $1
      end
      
      #Pluck off the head if it's there
      if str =~ /^ *\( *([^ ()]+)/
        ## Start of a rule
        @head = $1
        str = $' #'
        while true
          if str =~ /^ *\(/
            child = Tree.new("", self)
            begin
              str = child.buildStructure(str)
            rescue
              ## Catching downstream exception we'll pass it up...
              #$stderr.puts "Error caught and being passed upwards"
              raise $!
            end
            @children << child
          elsif str =~ /^ *([^ ()]+) *\)/
            ## we've reached a leaf - i.e. a word and its close paren
            child = Tree.new("", self)
            child.head = $1 #.downcase
            child.num_rules = 1
            @children << child
            str = $' #'
            return str
          elsif str =~ /^ *\)/
            ## End of a tree
            str = $' #'
            return str
          else
            raise "Erroneouse part of tree: #{str}."
          end
        end
      end
    end
  end
  
  ## Convert a chomsky normal form (cnf) tree to a right-corner (rc) tree
  def cnf2rc
    if getNumLeafs() == 1
      return self
    end
    if @children[0].class == Tree and @children.size() == 1
#      puts "**Tree " + to_s + "... has only one child, but many leaves"
      @children[0] = @children[0].cnf2rc
      return self
    end
    curTree = nil
    next_rc = nil
    rc = nil
    
    ## Outer loop over all right children
    while true
      if getNumLeafs() == 1
        break
      end

      if next_rc != nil
        rc = next_rc
        next_rc = rc.parent
      else
        ## Inner loop to find current right corner
        rc = self
        while true
          if rc.getNumLeafs() == 1
            break
          end
          rc = rc.children.last
        end
        next_rc = rc.parent
      end
      
      if rc == self
        @children[0] = @children[0].cnf2rc
        break
      end
      
      ## Okay, now we have the "right corner"
      ## Get its parent, remove it, and add it to new children list
      tempParent = rc.parent
      if tempParent != nil
          tempParent.children.pop()
      end
      
      newBigTree = Tree.new
      newBigTree.head = String.new(@head)
      newBigTree.prepend(rc.cnf2rc)
#      rc.cnf2rc
      newBigTree.prepend(self)
      if curTree != nil
        curTree.children[0] = newBigTree
        newBigTree.parent = curTree
      end
      curTree = newBigTree
      
      ## Find out what we're "missing" by relocating the right corner
      if rc.head =~ /(.*)\/(.*)/
        newHead = $1
      else
        newHead = String.new(rc.head)
      end
       
#      puts "newHead = " + newHead
#      puts "My tree (prelim) is: " + to_s
#      puts "Curtree parent (prelim) is: " + curTree.parent.to_s   
      ## Now adjust the labels in the tree to reflect the missing item
      temp = self
      while temp != nil
        if temp.head =~ /.*\/.*/
          temp.head.gsub!(/(.*)\/(.*)/, '\1/' + newHead)
        else
          temp.head += "/" + newHead
        end
        if temp.children.size() < 2
          break
        end
        temp = temp.children.last
      end
      
#      puts "My tree is: " + to_s
#      puts "Next rc is: " + next_rc.to_s
#      puts "Curtree is: " + curTree.to_s
#      puts "Curtree parent is: " + curTree.parent.to_s
    end

    #@head = curTree.head
    #@children = curTree.children
    while curTree.parent != nil
      curTree = curTree.parent
    end
    return curTree
    

  end
  
  def rc2cnf
    
    if getNumLeafs == 1
        return self #@children[0]
    end
#  begin
#    puts "rc2cnf called with:"
#    puts to_s + "\n\n"
    
    r_sub = self.children[1]
    cur_tree = self.children[0]
    while true
      if cur_tree.getNumLeafs == 1
        newTree = Tree.new
        newTree.head = @head
        newTree.children << cur_tree.children[0]
        newTree.children << r_sub
        return newTree
      end
      
      if cur_tree.children.size() == 1
#        $stderr.puts "I'm in here and cur_tree = " + cur_tree.to_s
#        $stderr.puts "and r_sub = " + r_sub.to_s
        newTree = Tree.new
        newTree.head = cur_tree.head.gsub(/(.*)\/(.*)/, '\1')
        newTree.children << cur_tree.children[0].rc2cnf
#        $stderr.puts "I've returned from rc2cnf on my child"
        newTree.children << r_sub
        return newTree
#        cur_tree = cur_tree.children[0]
      else
        cur_tree.children[1].parent = nil
        newTree = Tree.new
        newTree.head = cur_tree.children[0].head.gsub(/(.*)\/(.*)/, '\2')
        newTree.children << cur_tree.children[1]
        newTree.children << r_sub
        cur_tree.children[1].parent = newTree
        r_sub.parent = newTree
        r_sub = newTree
        cur_tree.children.pop()
        if r_sub.children[0].class == Tree
            r_sub.children[0] = r_sub.children[0].rc2cnf
        end
        cur_tree = cur_tree.children[0]
      end
    end
    return cur_tree
#  rescue
#    puts "I've been rescued: cur_tree: " + cur_tree.to_s
#    puts "     and r_sub: " + r_sub.to_s
#    return nil
#  end
  end
  
  def nominalBinarize!()
    ra = Array.new
    cats = Array.new
    if @children.size > 2
      @children.each{ |child|
        if not $disallowed.has_key?(child.head)
          ra << child
          cats << child.head
        end
      }
    end
    
    if ra.size > 2
      nt = Tree.new
      nt.head = cats[1..-1].join("_")
      nt.children = ra[1..-1]
      @children[1] = nt
      @children.slice!(2..-1)
    end
    @children.each{ |child|
      child.nominalBinarize!
    }
  end

  def nominalUnbinarize!()
    if @children[0] == nil
      return
    elsif @children[0].head =~ /_/
      left_children = @children[0].children
    else
      left_children = @children[0]
    end
    
    if @children[1] == nil
      right_children = nil
    elsif @children[1].head =~ /_/
      right_children = @children[1].children
    else
      right_children = @children[1]
    end
    if right_children != nil
      @children = [left_children, right_children].flatten
    else
      @children = [left_children].flatten
    end
    @children.each{ |child|
      child.nominalUnbinarize!
    }
  end
  #####################################
  # unbinarize
  # Takes a treebank tree that has been binarized with the magerman head
  # rules and unbinarizes them for easy comparison to treebank gold standard
  #
  #####################################
  def unbinarize
    if getNumLeafs == 1
      return
    end
    @children.each_index { |i|
      @children[i].unbinarize
      if @children[i].head =~ /-bin/
        movin_on_up = @children[i].children
        @children.delete_at(i)
        @children.insert(i, movin_on_up)
      end
    }
    
  end
  
  def to_s
    if @children.length == 0
      return "#{head}"
    else
      
      s = "(#{@head} "
      @children.each{ |child|
        s += child.to_s
#        print "#{child.head}"
        #s += " "
      }
      s += ")"
    end
    return s.gsub(/\)\(/, ") (")
  end
  
  def prepend(t)
    @children.unshift(t)
  end
  
  ## getNumRules
  ## Computes the total number of rules in this tree
  ## If this tree is the gold standard, it is the denominator
  ## in the recall calculation.
  ## If this tree is the hypothesis, it is the denominator
  ## in the precision calculation.
  def getNumRules()
    if @children.size() > 1
      @num_rules = 1
#    elsif @children.size() == 1 and @children[0].children.size > 0
#      @num_rules = 1
    else
      @num_rules = 0
    end
    @children.each_index{ |i|
      @num_rules += @children[i].getNumRules
    }
    return @num_rules
  end
  
  ## getNumCorrect - This method treats the tree it belongs
  ## to as a gold standard, takes in a tree argument as a
  ## hypothesis, and returns the number correct, as the
  ## numerator for the labeled precision/recall calculation
  def getNumCorrect(t)
    count = 0
    gold_rules = getRulesHash
    hypoth_rules = t.getRulesArray(0)
    hypoth_rules.each{ |v|
      if (v.j - v.i) > 1 && gold_rules.has_key?(v.hash)
#      if v.first != "" && (not v.first =~ /[a-z]/)  && gold_rules.has_key?(v.hash)
        count += 1
      end
    }
    return count
  end
  
  def getRulesHash
    rules = getRulesArray(0)
    rules_hash = Hash.new
    rules.each{ |v|
      rules_hash[v.hash] = v
    }
    return rules_hash
  end
  
  def getRulesArray(start_ind)
    rules = Array.new
    ind = start_ind
    if(@children.size > 0)
      @children.each_index{ |i|
        i_rules = @children[i].getRulesArray(ind)
        if i_rules.size > 0
          rules << i_rules
          rules.flatten!
          ind = rules.last.j
        end
      }
    else
      ## Don't increment count for 0, *T*-1, and word fragments
      if not @head =~ /^[\*\d]/ and not @head =~ /\-$/
#        if @children.size() > 1
#          rules << Rule.new(@head, start_ind, start_ind+1, @children[0].head)
#        else
          rules << Rule.new(@head, start_ind, start_ind+1)
#        end
#        puts "Created new rule " + rules.last.to_s
      end
      return rules
    end
    if rules.size > 0
      end_ind = rules.last.j
    else
      end_ind = start_ind
    end
    #rules.delete_if{ |rule| (rule.j - rule.i) < 2}
    if(@children.size() > 1)
      rules << Rule.new(@head, start_ind, end_ind, @children[0].head)
    end
#    puts "Created new rule " + rules.last.to_s
    return rules
  end
  
  def getPrecision(t)
    return t.getNumCorrect / t.getNumRules
  end
  
  def getRecall(t)
    return t.getNumCorrect / self.getNumRules
  end
  
  def getWordString
   begin
    if @children.size == 0
      return @head
    end
    str = ""
    @children.each{ |child|
      str += child.getWordString + " "
    }
    str.gsub!(/ +/, " ")
    str.gsub!(/ $/, "")
    return str
   rescue Exception
     $stderr.puts "Exception caught when str = #{str}: " + $!
   end
  end
  
  def getNumLeafs
    if @children.size() == 0
      return 1
    end
    ret = 0
    @children.each{ |child|
      ret += child.getNumLeafs
    }
    return ret
  end

# get the depth of the current tree
	def getDepth
		if @children.size == 0
			return 1
		else
			ret = 0;
			@children.each{ |child|
				if child.getDepth > ret
					ret = child.getDepth
				end
			}
			return 1 + ret
		end
	end

	# returns true if the tree includes the given word
	def include?(str)
		if(head.include?("#") and head.split("#")[1] == str)
			return true
		else 
			@children.each { |child|
				if(child.include?(str))
					return true
				end
			}
			return false
		end
	end

	# returns true if the tree includes the given POS
	def posinclude?(str)
		if(head.include?("#") and (head.split("#")[0]).include?(str))
			return true
		else
			@children.each { |child|
				if(child.posinclude?(str))
					return true
				end
			}
			return false
		end
	end

	def find(str)
		rArray = Array.new
		if(head =~ /##{str}$/ or head =~ /^\S*#{str}\S*#/) then
#			puts head
			rArray.push(self)
			return rArray
		elsif(@children.size == 0)
			return nil
		else
			@children.each { |child|
				tmp = child.find(str)
				if (tmp != nil) then
					if (tmp.class == Array) then
						rArray += tmp
					else
						rArray.push(tmp)
					end
				end
			}
			if(rArray.size == 0) then
				return nil
			else
				return rArray
			end
		end
	end
			
	# gets the words in this tree that are contained in the given pos
	def getWords(pos)
		arr = Array.new
		if(head.include?("#") and (head.split("#")[0]).include?(pos)) then
				arr[0] = head.split("#")[1]
				return arr
		else
			@children.each { |child|
				arr += child.getWords(pos)
			}
		end
		return arr
	end

# get the depth of the stack if it were interpreted as a right-corner tree
	def getRCStackDepth(str = "")
		if str != "" then
			if head.include?("#" + str + " )") or head.include?(str + "#") then
				return self.getRCStackDepth
			elsif @children.size == 0
				return -1
			else
				max = 0;
				@children.each_index{ |x|
					depth = @children[x].getRCStackDepth(str)
					if x != 0 then
						depth += 1
					end
					if depth > max and (@children[x].include?(str) or @children[x].posinclude?(str)) then
						max = depth
					end
				}
				return max
			end
		elsif @children.size == 0
			return 0
		else
			max = 0;
			@children.each_index { |x|
				depth = @children[x].getRCStackDepth
				if x != 0 then
					depth += 1
				end
				if depth > max then
					max = depth
				end
			}
			return max
		end
	end

# gets the depth of the stack for the path from root to leaf containing the given str
#	def getRCStackDepth(str)
#		if head.include?("#" + str + " )")
#			return self.getRCStackDepth
#		elsif @children.size == 0
#			return -1
#		else
#			max = 0
#			@children.each_index { |x|
#				depth = @children[x].getRCStackDepth
#				if x != 0
#					depth += 1
#				end
#				if depth > max then
#					max = depth
#				end
#			}
#			return max
#		end
#	end

  def getOps
  end
  
  def getNumEditedLeafs
    if not @head =~ /\/EDITED/ and @head =~ /EDITED/
      ## We have a EDITED label
      return getNumLeafs
    else
      count = 0
      @children.each{ |child|
        count += child.getNumEditedLeafs
      }
      return count
    end
  end
  
  def getEditedArray
    if not @head =~ /\/EDITED/ and @head =~ /EDITED/
      ## We have a EDITED label
      return Array.new(getNumLeafs,1)
    else
      a = Array.new
      if @children.size == 0
        a[0] = 0
      else
        @children.each{ |child|
          a << child.getEditedArray
        }
      end
      return a.flatten
    end
  end
  
  def getNumEditedCorrect(t2)
    count = 0
    gold = getEditedArray
    hypoth = t2.getEditedArray
    hypoth.each_index{ |i|
      if hypoth[i] == 1 and gold[i] == 1
        count += 1
      end
    }
    return count
  end
end

##################################
## TreeSlurper
## This class is used to read in files
## containing trees spanning multiple
## lines.  getNext reads just to the next
## tree and returns a Tree object.  
## getAll returns an array containing all
## trees in the file.  This may be prohibitively
## large for certain large corpora.
##
###################################

class TreeSlurper
  def initialize(file_name)
    @file_name = file_name
    @file = File.open(@file_name)
  end
  
  def getNext
    ## Get the next tree
    num_parens = 0
    str = ""
    while (line = (@file.gets)) != nil
      line.chomp!
      str += line
      num_left_parens = line.length - line.gsub(/\(/,'').length
      num_right_parens = line.length - line.gsub(/\)/,'').length
      num_parens += (num_left_parens - num_right_parens)
      if num_parens == 0 && line != ""
        break
      end
    end
    if str != ""
      return Tree.new(str)
    else
      return nil
    end
  end
  
  def getAll
    ## Get all trees
    ra = Array.new
    while (t = getNext) != nil
      ra << t
    end
    return ra
  end
  
  def close
    File.close(@file_name)
  end
  
end

class Rule
  attr_reader :head, :i, :j, :first
  
  def initialize(name, i, j, first="")
    @head = name
    @i = i
    @j = j
    @first = first
  end
  
  def to_s
    s = i.to_s + " " + @head + " " + j.to_s + " " + @first
  end
  
  def hash
    return @i.to_s + @head + @j.to_s
  end
end

########################
# class HypothPath
#
# Represents the output of a DBN viterbi trace, all the
# operations at each stack depth and time step.
# This is most importantly used for recreating a rc tree
# that one can compare to a gold standard for computing
# accuracy results
#
##########################

class HypothPath
  attr_reader :S, :R, :F
  
  def initialize(hypoth_file)
    @S = Array.new
    @R = Array.new
    @F = Array.new
    t = -1
    @file = File.open(hypoth_file, "r")
#    old_line = ""
    while (line = @file.gets) != nil
      line.chomp!
#      if line == old_line
        ## didn't recognize correctly
      if line =~ /no most likely sequence/
        if t > -1
          break
        else
          @R[0] = nil
          @F[0] = nil
          @S[0] = nil
          break
        end
      end
#      end
      if (line =~ /HYPOTH (\d+)> R: ([^ ]+) *F: ([^ ]+) *S: ([^ ]+)/)
        rs = $2
        fs = $3
        ss = $4
        t = $1.to_i
        @R[t] = rs.split(";")
        @F[t] = fs.split(";")
        @F[t] << 1          ## Add a 1 to the end
        @S[t] = ss.split(";")  # S already has the LX at the end
        if line =~ /S: null/
          break
        end
      end
#      old_line = line
    end
  end

  def nextTree
    treeSoFar = nil #Tree.new
    subTree = nil
    subsubTree = nil
    ## Iterate over time steps
    @R.each_index { |t|
      if @R[0] == nil
        treeSoFar = Tree.new
        break
      end
      if t == 0
        next
      end
      ## This is the 011 case or 111 case - the latter just being a special 
      ## case of the former for the end of utterance
      if @F[t][1].to_i == 1 
        ## Create a new tree at depth 0 with the old subtree as the left and the new stuff as the right
        newBigTree = Tree.new
        newBigTree.head = @R[t][0]
        ## Add the tree so far as the left child, if it exists
        if treeSoFar != nil
          newBigTree.children << treeSoFar
        end

        ## Create a new tree at depth 1 that will be the right child of the newBigTree
        newSmallTree = Tree.new
        newSmallTree.head = @R[t][1] #Tree.new("( #{@R[t][1]} )")
        ## If we've just finished a bunch of stuff at level 1, add it as the left child        
        if subTree != nil
          ## FIXME? (this _is_ a test fix... not sure it's right)
          if subsubTree != nil
            superSubTree = Tree.new
            superSubTree.head = @R[t-1][1]
            superSubTree.children << subTree
            superSubTree.children << subsubTree
            subTree = superSubTree
          end
          newSmallTree.children << subTree
        end
        
        ## If there is any non-trivial thing going on between levels 1 and 2 in R, make a
        ## a tree out of it (trivial case is a word going to the same word), and add that
        ## tree to the 
        ## Non-trivial case is NT going to another NT, or NT/word going to word basically
        if @R[t][1] =~ /[A-Z]/ || @R[t][1] != @R[t][2]
          newSmallTree.children << Tree.new("( #{@R[t][2]} )")
          if @R[t][2] != @S[t-1][3]
            newSmallTree.children.last.children << Tree.new("( #{@S[t-1][3]} )")
          end
        end
        ## Append the right hand (small) tree to the new big tree, and make the
        ## treeSoFar point at it
        newBigTree.children << newSmallTree
        if not @R[t][0].eql?(@S[t][0]) and @F[t][0].to_i == 0
          temp = Tree.new
          temp.head = @S[t][0]
          temp.children << newBigTree
          newBigTree = temp
        end
        subTree = nil
        subsubTree = nil
        treeSoFar = newBigTree
      elsif @F[t][2].to_i == 1  ## Here we have 0 0 1
        if subTree == nil
          ## Just starting a new subtree off the main tree
          subTree = Tree.new
          subTree.head = @R[t][1]
          subTree.children << Tree.new("( #{@R[t][2]} )")
          if @S[t-1][3] != @R[t][2]
            subTree.children[0].children << Tree.new("( #{@S[t-1][3]} )")
          end
        elsif subsubTree == nil
          ## Continuing a subtree
          newSubTree = Tree.new
          newSubTree.head = @R[t][1]
          newSubTree.children << subTree
          newSubTree.children << Tree.new("( #{@R[t][2]} )")
          if @S[t-1][3] != @R[t][2]
            newSubTree.children[1].children << Tree.new("( #{@S[t-1][3]} )")
          end
          subTree = newSubTree
        else ## We have to deal with a subsubtree
          newSubTree = Tree.new
          newSubTree.head = @R[t][1]
          newSubTree.children << subTree
          newSubTree.children << Tree.new("( #{@R[t][2]} )")
          newSubTree.children[1].children << subsubTree
          newsubstr = "( " + @S[t-1][2].gsub(/^[^\/]+\/(.*)$/, '\1') + " " + @S[t-1][3] + " )"
          newSubTree.children[1].children << Tree.new(newsubstr)
          subTree = newSubTree
        end
        if not @R[t][1].eql?(@S[t][1])
          temp = Tree.new
          temp.head = @S[t][1]
          temp.children << subTree
          subTree = temp
        end
        subsubTree = nil
      else   ## Here we have 0 0 0 for the F nodes
        if subsubTree == nil
          subsubTree = Tree.new
          subsubTree.head = @R[t][2]
          subsubTree.children << Tree.new("( #{@S[t-1][3]} )")
        else
          newsubsubTree = Tree.new
          newsubsubTree.head = @R[t][2]
          newsubsubTree.children << subsubTree;
          newsubsubTree.children << Tree.new("( #{@S[t-1][3]} )")
          subsubTree = newsubsubTree
        end
        if not @R[t][2].eql?(@S[t][2])
          temp = Tree.new
          temp.head = @S[t][2]
          temp.children << subsubTree
          subsubTree = temp
        end        
      end
    }
    @R = Array.new
    @S = Array.new
    @F = Array.new
    t = -1
    ## Advance to the next tree
    while (line = @file.gets) != nil
      line.chomp!
      if line =~ /no most likely sequence/
        if t > -1
          break
        else
          @R[0] = nil
          @F[0] = nil
          @S[0] = nil
          break
        end
#        break
      end
      if (line =~ /HYPOTH (\d+)> R: ([^ ]+) *F: ([^ ]+) *S: ([^ ]+)/)
        rs = $2
        fs = $3
        ss = $4
        t = $1.to_i
        @R[t] = rs.split(";")
        @F[t] = fs.split(";")
        @F[t] << 1          ## Add a 1 to the end
        @S[t] = ss.split(";")  # S already has the LX at the end
        if line =~ /S: null/
          break
        end
      end
    end
    return treeSoFar
  end
end


class ValueProb
  attr_writer :value, :prob
  attr_reader :value, :prob
  
  def initialize(v,p)
    @value = v
    @prob = p
  end
end

class CounterHash < Hash
  def [] (key)
    if not has_key?(key)
      store(key, 0)
    end
    super
  end
end

class ArrayHash < Hash
  def initialize
    super
    @max_val = 0
  end
  
  def [] (key)
    if not has_key?(key)
      @max_val += 1
      store(key, @max_val)
    end
    super
  end
end
