#!/usr/bin/ruby
###################
# File: lexicalizeVerbs.rb
# Usage: Takes in a newline-separated list of trees,
# tallies most common verbs, and creates lexicalized heads of those
# most commonly used verbs.
# e.g. (VP (VBZ is) (...)) => (VPheadis (VBZheadis is) (...))
# or somethign like that.
#
# Author: Tim Miller
#
#######################

require 'scripts/umnlp.rb'

$sentences = Array.new

class Tree
 def countVerbs(vHash)
    if @children.size == 1 && @children[0].children.size == 0
      # do something
      if @head =~ /^V/
        vHash[@children[0].head] += 1
      end
      ## DON'T RECURSE!  WE'RE ALREADY AT OUR BASE CASE ESSENTIALLY!
    else
      @children.each{ |child|
        child.countVerbs(vHash)
      }
    end
  end
  
  def lexicalizeVerbs!(vHash)
    @children.each{ |child|
        child.lexicalizeVerbs!(vHash)
    }
    if @children.size == 1 && @children[0].children.size == 0
        if @head =~ /^V/ and vHash.has_key?(@children[0].head)
            @head += "head" + @children[0].head
        end
    end 
  end
end

num_verbs = ARGV[0].to_i
$stderr.puts "Lexicalizing with #{num_verbs} verbs..."
$stderr.puts "Reading in sentences..."
verbs = CounterHash.new
while(line = $stdin.gets)
    if num_verbs == 0
      puts line.chomp!
    else
        t = Tree.new(line)
        t.countVerbs(verbs)
        $sentences << line.chomp!
    end
end

if num_verbs == 0
  $stderr.puts "Exiting early because of 0 input"
  exit
end

topHash = Hash.new

$stderr.puts "Sorting by verb count..."

sortedArrays = verbs.sort {|a,b| b[1]<=>a[1]}

#$stderr.puts "Writing out answers"

0.upto(num_verbs-1){ |i|
  $stderr.puts "#{sortedArrays[i][0]} => #{sortedArrays[i][1]}"
  topHash[sortedArrays[i][0]] = 1
}

$stderr.puts "Propagating lexicalization up the tree..."

$sentences.each{ |sent|
    tree = Tree.new(sent)
    tree.lexicalizeVerbs!(topHash)
    puts tree.to_s
}

