###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

#!/usr/bin/ruby
#
#####################
# buildModel.rb
#
######################

require "scripts/umnlp.rb"
require 'optparse'
require 'ostruct'

$posTags = {"BES" => 1,"CC" => 1,"CD" => 1,"DT" => 1,"EX" => 1,"FW" => 1,"GW" => 1,"HVS" => 1,"IN" => 1,"JJ" => 1,"JJR" => 1,"JJS" => 1,"LS" => 1,"MD" => 1,"NN" => 1,"NNP" => 1,"NNPS" => 1,"NNS" => 1,"PDT" => 1,"POS" => 1,"PRP" => 1,"PRP$" => 1,"RB" => 1,"RBR" => 1,"RBS" => 1,"RP" => 1,"SYM" => 1,"TO" => 1,"UH" => 1,"VB" => 1,"VBD" => 1,"VBG" => 1,"VBN" => 1,"VBP" => 1,"VBZ" => 1,"WDT" => 1,"WP" => 1,"WP$" => 1,"WRB" => 1 }

class Tree
  ## fixEdited is a way of re-structuring the speech repairs in the switchboard
  ## corpus.  I translate a tree like
  ## => EDITED EDITING_TERMS X
  ##   where:
  ## EDITED => X
  ## (which is context sensitive)
  ## and move it to:
  ## => X
  ##   and:
  ## X => EDITED-X EDITING_TERMS X
  ##
  def fixEdited
    if @children.size == 1 && @children[0].children.size == 0
      return
    end
    len = @children.size
    @children.each_index{ |ri|
      i = len - ri - 1 
      if i < 0
        return
      end
      #puts "ri = #{ri}, i = #{i}, head = #{@head}, child[i] = #{@children[i].head}"
      #puts "My head is #{head}"
      @children[i].children[0].head =~ /([^- ]*)-?/
      cat = $1
      if (@children.length - i) > 1 && @children[i+1].head =~ /([^- ]*)-?/
        n1 = $1
      end
      if (@children.length - i) > 2 && @children[i+2].head =~ /([^- ]*)-?/
        n2 = $1
      end
      if (@children.length - i) > 3 && @children[i+3].head =~ /([^- ]*)-?/
        n3 = $1
      end
      if (@children.length - i) > 3 && (@children[i].head == "EDITED") && n1 == "INTJ" && n2 == "PRN" && @children[i].children.length == 1 && (cat == n3 || cat == n3+"UNF")
         #                       i     i+1 i+2 i+3
         # we have the rule => EDITED INTJ PRN  X and EDITED => X
         # which we want to rewrite as: => X and X => EDITED-X INTJ PRN X
         t = Tree.new("")
         t.head = n3
         t.prepend(@children[i+3])
         t.prepend(@children[i+2])
         #@children[i+2].head += "ET"
         t.prepend(@children[i+1])
         #@children[i+1].head += "ET"
         t.prepend(@children[i])
         @children[i].head += n3
         @children.delete_at(i) ## delete EDITED
         @children.delete_at(i) ## delete INTJ
         @children.delete_at(i) ## delete PRN
         @children[i] = t
       elsif (@children.length - i) > 2 && @children[i].head == "EDITED" && (n1 =~ /INTJ|PRN/) && @children[i].children.length == 1 && (cat == n2 || cat == n2+"UNF")
         t = Tree.new("")
         t.head = n2
         t.prepend(@children[i+2])
         t.prepend(@children[i+1])
         #@children[i+1].head += "ET"
         t.prepend(@children[i])
         @children[i].head += n2
         @children.delete_at(i) ## delete EDITED
         @children.delete_at(i) ## delet INTJ/PRN
         @children[i] = t
       elsif (@children.length - i) > 1 && @children[i].head == "EDITED" && @children[i].children.length == 1 && (cat == n1 || cat == n1+"UNF")
         t = Tree.new("")
         t.head = n1
         t.prepend(@children[i+1])
         t.prepend(@children[i])
         @children[i].head += n1
         @children.delete_at(i) ## delete EDITED
         @children[i] = t
       elsif @children[i].head == "EDITED" && @children[i].children.length == 1
         #@children[i].head += cat
       end
       @children[i].fixEdited
         
    }
  end
  
  def annotateRepaired!
    if @children.size == 0
      return
    end
    @children.each_index{ |ri|
      i = @children.size - ri - 1
      if not @children[i].head =~ /EDITED/
        @children[i].annotateRepaired!
        next
      end
      @children[i].children[0].head =~ /([^- ]*)-?/
      cat = $1
      if (@children.length - i) > 1 && @children[i+1].head =~ /([^- ]*)-?/
        n1 = $1
      end
      if (@children.length - i) > 2 && @children[i+2].head =~ /([^- ]*)-?/
        n2 = $1
      end
      if (@children.length - i) > 3 && @children[i+3].head =~ /([^- ]*)-?/
        n3 = $1
      end
#$stderr.puts "Inside edited tree with cat=#{cat}, n1=#{n1}, n2=#{n2}, n3=#{n3}"
      if (@children.length - i) > 3 && @children[i].head =~ /EDITED/ && n1 == "INTJ" && n2 == "PRN" && @children[i].children.length == 1 && (cat == n3 || cat == n3+"UNF")
         #                       i     i+1 i+2 i+3
         # we have the rule => EDITED INTJ PRN  X and EDITED => X => NT1 ... NTN
         # which we want to rewrite as: => X and X => NT1... NTN
         # and NTN => REPAIREDX and REPAIREDX => ET X
         t = Tree.new
         t.head = n3
         if @children[i].children[0].children.size == 1
           #Something like EDITED -> NP -> PRP, rather than EDITED -> S -> NP VP
           # i.e. POS tags instead of NTs
           if $posTags.has_key?(@children[i].children[0].head.gsub(/([A-Z]+).*/, '\1'))
             @children[i].head = "EDITED" + @children[i].children[0].head
             @children[i].annotateRepaired!
             next
           else
             t.children[0] = @children[i].children[0].children[0]
             t.children[1] = Tree.new
             t.children[1].head = "REPAIRED_INTJ_PRN_" + n3
             t.children[1].children[0] = @children[i+1]
             t.children[1].children[1] = @children[i+2]
             t.children[1].children[2] = @children[i+3]
           end
         else
           ## Does our edit consist of multiple phrases?
           if $posTags.has_key?(@children[i].children[0].children.last.head.gsub(/([A-Z]+).*/, '\1'))
             t.children[0] = @children[i].children[0]
             t.children[1] = Tree.new
             t.children[1].head = "REPAIRED_INTJ_PRN_" + n3
             t.children[1].children[0] = @children[i+1]
             t.children[1].children[1] = @children[i+2]
             t.children[1].children[2] = @children[i+3]
           else
             t.children = @children[i].children[0].children
             t.children.last.children << Tree.new   ## FYI - this is append
             t.children.last.children.last.head = "REPAIRED_INTJ_PRN_" + n3
             t.children.last.children.last.children[0] = @children[i+1]
             t.children.last.children.last.children[1] = @children[i+2]
             t.children.last.children.last.children[2] = @children[i+3]
           end
#$stderr.puts "Built t with structure: #{t.to_s}"
					
         end
         @children.delete_at(i) ## delete EDITED
         @children.delete_at(i) ## delete INTJ
         @children.delete_at(i) ## delete PRN
         @children[i] = t
       elsif (@children.length - i) > 2 && @children[i].head =~ /EDITED/ && (n1 =~ /INTJ|PRN/) && @children[i].children.length == 1 && (cat == n2 || cat == n2+"UNF")
         #                       i     i+1     i+2
         # we have the rule => EDITED INTJ/PRN  X and EDITED => X => NT1 ... NTN
         # which we want to rewrite as: => X and X => NT1... NTN
         # and NTN => REPAIREDX and REPAIREDX => ET X
         t = Tree.new
         t.head = n2
         @children[i].children[0].head =~ /([A-Z]*).*/
         editTag = $1
         if @children[i].children[0].children.size == 1
           #Something like EDITED -> NP -> PRP, rather than EDITED -> S -> NP VP
           # i.e. POS tags instead of NTs
           if $posTags.has_key?(@children[i].children[0].head.gsub(/([A-Z]+).*/, '\1'))
             @children[i].head = "EDITED" + @children[i].children[0].head
             @children[i].annotateRepaired!
             next
           else
             t.children[0] = @children[i].children[0].children[0]
             t.children[1] = Tree.new
             t.children[1].head = "REPAIRED_#{n1}_" + n2
             t.children[1].children[0] = @children[i+1]
             t.children[1].children[1] = @children[i+2]
           end
         else
           ## Does our edit consist of multiple phrases?
           if $posTags.has_key?(@children[i].children[0].children.last.head.gsub(/([A-Z]+).*/, '\1'))
             t.children[0] = @children[i].children[0]
             t.children[1] = Tree.new
             t.children[1].head = "REPAIRED_#{n1}_" + n2
             t.children[1].children[0] = @children[i+1]
             t.children[1].children[1] = @children[i+2]
           else
             t.children = @children[i].children[0].children
             t.children.last.children << Tree.new   ## FYI - this is append
             t.children.last.children.last.head = "REPAIRED_#{n1}_" + n2
             t.children.last.children.last.children[0] = @children[i+1]
             t.children.last.children.last.children[1] = @children[i+2]
           end
#$stderr.puts "Built t with structure: #{t.to_s}"
					
         end
         @children.delete_at(i) ## delete EDITED
         @children.delete_at(i) ## delete INTJ
         @children[i] = t

       elsif (@children.length - i) > 1 && @children[i].head =~ /EDITED/ && @children[i].children.length == 1 && (cat == n1 || cat == n1+"UNF")
         #                       i     i+1 
         # we have the rule => EDITED   X and EDITED => X => NT1 ... NTN
         # which we want to rewrite as: => X and X => NT1... NTN
         # and NTN => REPAIREDX and REPAIREDX => X
         t = Tree.new
         t.head = n1
         if @children[i].children[0].children.size == 1
           #Something like EDITED -> NP -> PRP, rather than EDITED -> S -> NP VP
           # i.e. POS tags instead of NTs
           if $posTags.has_key?(@children[i].children[0].head.gsub(/([A-Z]+).*/, '\1'))
             @children[i].head = "EDITED" + @children[i].children[0].head
             @children[i].annotateRepaired!
             next
           else
             t.children[0] = @children[i].children[0].children[0]
             t.children[1] = Tree.new
             t.children[1].head = "REPAIRED_" + n1
             #t.children[1].children[0] = @children[i+1]
             t.children[1].children = @children[i+1].children
           end
         else
           ## Does our edit consist of multiple phrases?
           if $posTags.has_key?(@children[i].children[0].children.last.head.gsub(/([A-Z]+).*/, '\1'))
             t.children[0] = @children[i].children[0]
             t.children[1] = Tree.new
             t.children[1].head = "REPAIRED_" + n1
             #t.children[1].children[0] = @children[i+1]
             t.children[1].children = @children[i+1].children
           else
             t.children = @children[i].children[0].children
             t.children.last.children << Tree.new   ## FYI - this is append
             t.children.last.children.last.head = "REPAIRED_" + n1
             #t.children.last.children.last.children[0] = @children[i+1]
             t.children.last.children.last.children = @children[i+1].children
           end
#$stderr.puts "Built t with structure: #{t.to_s}"
					
         end
         @children.delete_at(i) ## delete EDITED
         @children[i] = t

       elsif @children[i].head == "EDITED" && @children[i].children.length == 1
         @children[i].head += cat
       end
       @children[i].annotateRepaired!
    }
    
    ## Catch the case where we create unary nodes
    if @children.size == 1 and @head == @children[0].head and @children[0].children.size > 0
      @children = @children[0].children
    end
  end
  
  ## swapEditedRepaired is a way of re-structuring the speech repairs in the switchboard
  ## corpus.  I translate a tree like
  ## => EDITED EDITING_TERMS X
  ##   where:
  ## EDITED => X
  ## (which is context sensitive)
  ## and move it to:
  ## => X
  ##   and:
  ## X => EDITED-X EDITING_TERMS X
  ##
  def swapEditedRepaired!
    if @children.size == 1 && @children[0].children.size == 0
      return
    end
    @children.each_index{ |i|
      #puts "My head is #{head}"
      @children[i].children[0].head =~ /([^- ]*)-?/
      cat = $1
      if (@children.length - i) > 1 && @children[i+1].head =~ /([^- ]*)-?/
        n1 = $1
      end
      if (@children.length - i) > 2 && @children[i+2].head =~ /([^- ]*)-?/
        n2 = $1
      end
      if (@children.length - i) > 3 && @children[i+3].head =~ /([^- ]*)-?/
        n3 = $1
      end
      if (@children.length - i) > 3 && @children[i].head =~ /EDITED/ && n1 == "INTJ" && n2 == "PRN" && @children[i].children.length == 1 && (cat == n3 || cat == n3+"UNF")
         #                       i     i+1 i+2 i+3
         # we have the rule => EDITED INTJ PRN  X and EDITED => X
         # which we want to rewrite as: => X and X => EDITED-X INTJ PRN X
         t = Tree.new("")
         t.head = n3
         t.prepend(@children[i+3])
         t.children[0].head = "REPAIRED" + n3
         t.prepend(@children[i+2])
         #@children[i+2].head += "ET"
         t.prepend(@children[i+1])
         #@children[i+1].head += "ET"
         t.prepend(@children[i])
         @children[i].head = n3
         @children.delete_at(i) ## delete EDITED
         @children.delete_at(i) ## delete INTJ
         @children.delete_at(i) ## delete PRN
         @children[i] = t
       elsif (@children.length - i) > 2 && @children[i].head =~ /EDITED/ && (n1 =~ /INTJ|PRN/) && @children[i].children.length == 1 && (cat == n2 || cat == n2+"UNF")
         t = Tree.new("")
         t.head = n2
         t.prepend(@children[i+2])
         t.children[0].head = "REPAIRED" + n2
         t.prepend(@children[i+1])
         #@children[i+1].head += "ET"
         t.prepend(@children[i])
         @children[i].head = n2
         @children.delete_at(i) ## delete EDITED
         @children.delete_at(i) ## delet INTJ/PRN
         @children[i] = t
       elsif (@children.length - i) > 1 && @children[i].head =~ /EDITED/ && @children[i].children.length == 1 && (cat == n1 || cat == n1+"UNF")
         t = Tree.new("")
         t.head = n1
         t.prepend(@children[i+1])
         t.children[0].head = "REPAIRED" + n1
         t.prepend(@children[i])
         @children[i].head = n1
         @children.delete_at(i) ## delete EDITED
         @children[i] = t
       elsif @children[i].head == "EDITED" && @children[i].children.length == 1
         @children[i].head += cat
       end
       @children[i].swapEditedRepaired!
         
    }
    ## Catch the case where we create unary nodes
    if @children.size == 1 and @head == @children[0].head and @children[0].children.size > 0
      @children = @children[0].children
    end
  end

  def binarize(fn)
    ## Put something here
    
  end
  
  def binarize()
    #binarize("model/head_rules.txt")
    ## This turns S -> X1 X2 X3 NP VP
    ## into: S -> X1 X2 X3 S
    ## and : S -> NP VP
    ## THIS IS NOT RECURSIVE!! SHOULD IT BE???  CERTAINLY THE OUTPUT HAS A LOT
    ## OF THIS TYPE OF RECURSION WHICH WOULD SCREW UP MY NUMBERS SOMETHING 
    ## FIERCE!
    if @children.size == 0
      return
    end
    
    if @children.size() > 2 && @children[-1].head == "VP" && @children[-2].head[0,2] =~ /^NP/ && @head =~ /^S/
      newLastChild = Tree.new
      newLastChild.head = "S"
      newLastChild.children << @children[-2]
      newLastChild.children << @children[-1]
      @children.delete_at(@children.size() - 1)
      @children.delete_at(@children.size() - 1)
      @children << newLastChild
    end
    
    @children.each{ |child|
      child.binarize
    }
  end
  
  ## Function: annotateDaughters
  ## This function simply propagates the 
  def annotateDaughters!
    if @children.size == 0
      return
    end
    
    @children.each{ |child|
      child.annotateDaughters!
    }
    
    if @head =~ /^EDITED$/
      ## Case 1: Usually EDITED nodes go directly to a non-terminal node
      ## EDITED -> VP
      ## Case 2: Sometimes EDITED nodes go to EDITED X nodes (nested reparanda)
      ## EDITED -> EDITED(NP) NP
      if @children.size == 1
        if @children[0].head =~ /^([^ -]+)-?UNF/ or 
           @children[0].head =~ /^([^ -]+)/
              @head += $1
        end
      elsif @children.size == 2 and @children[0].head =~ /^EDITED/
        if @children[1].head =~ /^([^ -]+)-?UNF/ or
           @children[1].head =~ /^([^ -]+)/
           
              @head += $1
        end
      end
    end
  end
  
  def fixEditedUnary()
#    puts to_s
    if @children.size() > 0
      if @children.size() == 1 and getNumLeafs() > 1
#        if not @head =~ /EDITED/ 
#          @head = @children[0].head
#        end
        if @head =~ /EDITED/ and @children[0].head =~ /UNF/
            @head = @children[0].head
            @children = @children[0].children
        end
      end
      @children.each { |child|
        child.fixEditedUnary
      }
    end
  end

  def propUnf
    if @children.size < 1
      return
    end
    @children.each {|child|
      child.propUnf
    }
    if @children[-1].head =~ /UNF/  and not @head =~ /EDITED/ and not @head =~ /UNF$/ and not @head == "X" and not @head == "NAC" and not @head == "S1"
      @head += "UNF"
    end
  end
  
  def binarizeIntjOld
    if @children.size <= 1
      return
    end
    @children.reverse_each {|child|
      child.binarizeIntj
    }
    @children.each_index{ |i_inv|
      i = @children.size() - i_inv -1
      if i+1 == @children.size()
        #break
        next
      end
      if @children[i].head == "INTJ" and not @children[i+1].head =~ /EDITED/ and @children.size > 2
        t = Tree.new
        t.head = @children[i+1].head
        t.children << @children[i]
        t.children << @children[i+1]
        @children.delete_at(i)
        @children[i] = t
      end
    }
  end
  
  def binarizeIntj!
    if @children.size == 0
      return
    end
    
    ## Rule #1 - Sjunk -> INTJ alpha beta ... becomes
    ## Sjunk -> INTJ Sjunk, Sjunk -> alpha beta ...
    if @head =~ /^S/ and @children.size > 2 and @children[0].head == "INTJ"
      lc = @children[0]
      t = Tree.new
      t.head = @head
      t.children = @children.slice!(1..-1)
      @children[1] = t
    end
    
    ## Now check for INTJ's from the right side to the left
    ## if we're more than binary...
    ## e.g. S -> NP INTJ VP => S -> NP VP, VP -> INTJ VP
    if @children.size > 2
      ## First reverse the children array so we can operate on it
      ## by reverse index
      ## S -> NP INTJ VP => S -> VP INTJ NP
      @children.reverse!
    
      @children.each_index{ |i|
        ## If we're on the last element, we can exit early
        if i == @children.size-1
          break
        end
        
        ## Check for INTJ to the right (since we're reversed)
        ## VP INTJ NP
        ##  i
        if @children[i+1].head == "INTJ"
          t = Tree.new
          t.head = @children[i].head
          ## VP_new -> INTJ VP (Reversed from index order above)
          t.children[0] = @children[i+1]
          t.children[1] = @children[i]
          @children[i] = t
          @children.delete_at(i+1)
        end
      }
      
      ## now remember to unreverse!
      @children.reverse!
    end
    
    @children.each{ |child|
      child.binarizeIntj!
    }
  end
  
  def savePOS
    if @children.size < 1
      return
    end
    @children.each {|child|
      child.savePOS
    }
    if @head =~ /UNF/
      @head += @children[0].head.downcase
    end
  end
  
  ## Annotate POS tag to front of word for tagger training
  ## argument is whether to add it to the end as well (for input) 
  def tag(addPOS)
    if @children.size < 1
      return
    end

    @children.each {|child|
      child.tag(addPOS)
    }

    if @children.size == 1 and @children[0].children.size == 0
      if addPOS == false
        if @head =~ /EDITED([^ -]*)/
          @children[0].head = "#{$1}##{@children[0].head}"
        elsif @head =~ /(.*)UNF([^ -]*)/
          @children[0].head = "#{$1}#{$2}##{@children[0].head}"
        elsif @head =~ /^([^-]*)-/
          @children[0].head = "#{$1}##{@children[0].head}"
        else
          @children[0].head = "#{@head}##{@children[0].head}"
        end
      else
        if @head =~ /EDITED([^ -]*)/
          $1.gsub!(/([A-Z\$]+).*/, '\1')
          @children[0].head ="#{$1}##{@children[0].head}#{$1}"
        elsif @head =~ /(.*)UNF([^ -]*)/
          $1.gsub!(/([A-Z\$]+).*/, '\1')
          @children[0].head = "#{$1}#{$2}##{@children[0].head}#{$1}#{$2}"
        elsif @head =~ /([^-]*)-/
          $1.gsub!(/([A-Z\$]+).*/, '\1')
          @children[0].head = "#{$1}##{@children[0].head}#{$1}"
        else
          headBase = @head.gsub(/([A-Z\$]+).*/, '\1')
          @children[0].head = "#{headBase}##{@children[0].head}#{headBase}"
#          $stderr.puts "I'm in here with @head=#{@head} and headbase=#{headBase} and child #{@children[0].head}"
        end
      end
    end
  end
  
  def fixUnary!()
    if @children.size < 1
      return
    end
    while @children.size == 1 and @children[0].children.size > 0#and getNumLeafs > 1
      ## Make sure we're not getting rid of a Pos/pre-terminal
      if @children[0].children[0].children.size == 0
        break
      end
      @children = @children[0].children      
    end
    @children.each { |child|
      child.fixUnary!
    }
  end
  
  ## Function unpropagateEdited!
  ## Normally when you do the right-corner transform, the EDITED
  ## tag gets propagated down the left spine of the sub-tree, which
  ## means you put all these normal rules into the EDITED rule set which
  ## is dumb.  This fixes that...
  ## EDITEDNP -> EDITEDNP/NN NNunf =>
  ## EDITEDNP -> NP/NN NNunf
  ##
  ## Works bottom-up recursively using the principle:
  ## Am I edited as well as my parent?  If so, remove my EDITED label.
  ##
  def unpropagateEdited!
    if @children.size ==0
      return
    end
    
    @children.each{ |child|
      child.unpropagateEdited!
    }
    
    
    ## If my label starts with EDITEDX and my first child starts with EDITEDX,
    ## then rename my first child X
    if @head =~ /^EDITED([^ \/]*)/
      cat = $1
      if @children[0].head =~ /^EDITED([^ \/]*)/
        cat2 = $1
        if cat == cat2 and cat != ""
          @children[0].head.gsub!(/EDITED/, "")
        end
      end
    end
  end
  
  def removeIntj!
    if @children.size == 0
      return
    end
    
    removed = 0
    ## We don't want to remove interjections like "Good grief" or "great"
    ## or "righty" or so on.  I think we are justified in removing things
    ## like "uh" and "oh", especially from an ASR perspective
    sz = @children.size
    (0..sz).each{ |x|
      #puts "x = #{x}"
      i = x - removed
      if @children.size== i
        break
      end
      #puts "Looking at head=#{@head} with x=#{x}, i=#{i}, removed=#{removed}, child0.head = #{@children[i].head}"
      if @children[i].head == "INTJ" and @children[i].children.size == 1 and @children[i].children[0].head == "UH" and @children[i].children[0].children[0].head =~ /^(oh|uh|um)$/
        @children.delete_at(i)
        removed += 1
      end
      #puts "  Now removed=#{removed}"
    }
    
    @children.each{|child|
      child.removeIntj!
    }
  end
  
  ## This function takes trees that have nested repairs and flattens the edited 
  ## structure so that the repairs right-cornerize better.  (The nested 
  ## structure isn't any more linguistically coherent than my representation)
  def flattenEdited!
    if @children.size == 0
      return
    end
    
    @children.each_index{ |ri|
      i = @children.size - ri - 1
      
      if @children[i].head =~ /EDITED/ and @children[i].children[0].head =~ /EDITED/
         #$stderr.puts "If-condition is true with i=#{i} and subtree: #{to_s}"
         ## Why 0..-2?  Why not just 0?  Because it is _always_ the fact that
         ## it is either binary EDITED X-UNF, or that there are some intervening
         ## categories but they are all editing terms.  Prove me wrong kids!
         @children.insert(i, @children[i].children.slice!(0..-2))
      end
      
      @children.flatten!
      ## Now recurse to my children
      @children[i].flattenEdited!
    }
    
  end
  
  ## Lexicalize parentheticals - the hypothesis is that much of the difference
  ## in lexicalized parsers (charniak result form hale 06) and non-lexicalized
  ## (cyk result from hale 06) is due to the easy identification of stock 
  ## phrases annotated as PRN (parenthetical), which easily get screwed up
  ## by the parser trying to give them complementary phrases, as in:
  ## "I mean that's what I'm saying.." (`I mean' is a PRN)
  def lexprn!(domprn = false, parent="")
    if @children.size == 0
      return
    end

    @children.each{ |child|
      child.lexprn!
      if child.children.size == 0 and @head =~ /^V/ and (child.head == "know" or child.head == "mean" or child.head == "guess" or child.head == "think")
        @head += "head" + child.head.gsub(/'/, '\1')
      elsif child.head =~ /head/
      ## This does nothing right now - treebinarize propagates the head up to the
      ## vp
#        if @head =~ /^PRN/
#          next
#        elsif @head =~ /head(.*)/
#          @head += "head#{$1}"
#        end
      end
    }

#    if domprn
#      @head += parent
#    end
  
#    if @head == "PRN" 
#      if @children.size == 1
#        @head += @children[0].head.downcase
#        ## This should be a parameter!   Not sure this is best to
#        ## get rid of single child (probably doesn't matter)
#        @children = @children[0].children
#      end
#      domprn = true
#      parent = @head.downcase
#    end
  
#    @children.each{ |child|
#      child.lexprn!(domprn, parent)
#    }
  end
  
  def extrprn!()
    if @children.size == 0
      return
    end

    if @head == "PRN" and @children.size == 1 and @children[0].head == "S" and @children[0].children.size == 2 and @children[0].children[1].head =~ /^VP/ and @children[0].children[1].children.size == 1 and (@children[0].children[1].children[0].children[0].head == "know" or @children[0].children[1].children[0].children[0].head == "mean" or @children[0].children[1].children[0].children[0].head == "guess" or @children[0].children[1].children[0].children[0].head == "think")
      @children[0].children[1].head += "-extrS"
      @children[0].children[1].children[0].head += "-argS"
    end
    
    @children.each{ |child|
      child.extrprn!
    }
  end

  def binarizeDet!
    if @children.size == 0
      return
    end
    
    if @head =~ /^NP/ and @children.size > 2 and @children[0].head == "PDT" and @children[1].head == "DT"
      t = Tree.new
      t.head = "DT"
      t.children << @children[0]
      t.children << @children[1]
      @children.delete_at(0)
      @children[0] = t
    end
    
    @children.each{ |child|
      child.binarizeDet!
    }
  end
  
  ##################
  # This function looks for an INTJ category (filled pause usually),
  # extracts the word ('uh' usually), attaches it with a caret to the
  # previous word (right-most word of left-sibling), then
  # removes the INTJ from the tree.
  ##################
  def glomIntj!
    numChildren = @children.size
    ind = 1
    while ind < numChildren
      if @children[ind].head == "INTJ"
        @children[ind-1].glomOntoLastWord("^" + @children[ind].getLeftLeaf.parent.head + "^" + @children[ind].getLeftLeaf.head)
        numChildren = numChildren - 1
        @children.slice!(ind)
      else
        ind = ind + 1
      end
    end
    
    @children.each{ |child|
      child.glomIntj!
    }
  end
  
  ###############
  # Same as glomIntj, but also does the same thing for 2-word prns. and edits
  ###############
  def glomIntjPrn!
    numChildren = @children.size
    ind =1 
    while ind < numChildren
      if @children[ind].head == "INTJ"
        @children[ind-1].glomOntoLastWord("^" + @children[ind].getLeftLeaf)
        numChildren = numChildren-1
        @children.slice!(ind)
      elsif @children[ind].head == "PRN" and @children[ind].getNumLeafs == 2 and
      (@children[ind].getWordString.eql?("you know") || @children[ind].getWordString.eql?("i think") || @children[ind].getWordString.eql?("i mean") || @children[ind].getWordString.eql?("i guess"))
        words = @children[ind].getWordString.gsub!(/ /, '_')
        if 
          @children[ind-1].glomOntoLastWord("^" + words)
          numChildren -= 1
          @children.slice!(ind)
        end
      elsif @children[ind-1].head =~ /^EDITED/ and @children[ind-1].getNumLeafs == 1
        @children[ind-1].getWordString =~ /([^^]+)(.*)/
        word1 = $1
        fps = $2
        @children[ind].getLeftLeaf =~ /([^^]+).*/
        word2 = $1
        ## For one word repairs...
        if word1.eql? word2
            word = @children[ind].getLeftLeaf
            @children[ind].glomOntoFirstWord(fps + "^" + "REPEAT", final=true)
            numChildren -= 1
            @children.slice!(ind-1)
        else
          ind += 1
        end
      elsif @children[ind-1].head =~ /^EDITED/
        @children[ind-1].glomOntoLastWord("^1")
        ind += 1
      else
        ind += 1
      end
   end
   
   @children.each{ |child|
     child.glomIntjPrn!
   }
  end
  
  ## Adds a string to the right-most leaf of this subtree
  def glomOntoLastWord(substr)
    if @children.size == 0
      @head = @head + substr
    else
      @children[-1].glomOntoLastWord(substr)
    end
  end
  
  def glomOntoFirstWord(substr, final=false)
    if @children.size == 0 and !final
      @head = substr + @head
    elsif @children.size == 0 and final
      @head = @head + substr
    else
      @children[0].glomOntoFirstWord(substr, final)
    end
  end
  
  def getLeftLeaf
    if @children.size == 0
      return self
    else
      return @children[0].getLeftLeaf
    end
  end
end

options = OpenStruct.new
options.propUnf = false
options.fixEdited = false
options.fixUnary = false
options.binarizeIntj = false
options.glomIntj = false 
options.propArgs = false
options.binarize = false
options.savePOS = false
options.addPOS = false
options.tagging = false
options.glomIntjPrn = false
options.haleify = false
options.fixUnfDashes = false
options.unpropagateEdited = false
options.annotateRepaired = false
options.swapEdRep = false
options.nothing = false
options.flattenEdited = false
options.fixEditedUnary = false
options.removeIntj = false
options.binarizeDet = true
options.extrprn = false
options.lexprn = false
opts = OptionParser.new

opts.on("-a") {|val| options.propArgs = true }
opts.on("-b") {|val| options.binarize = true }
opts.on("-c") {|val| options.daughterAnnotation = true }
opts.on("-d") {|val| options.fixUnfDashes = true }
opts.on("-e") { |val| options.fixEdited = true }
opts.on("-f") {|val| options.fixUnary = true }
opts.on("-g") { |val| options.fixEditedUnary = true}
opts.on("-h") {|val| options.haleify = true }
opts.on("-i") {|val| options.binarizeIntj = true }
opts.on("-I") {|val| options.glomIntj = true }
opts.on("-j") {|val| options.removeIntj = true }
opts.on("-k") {|val| options.extrprn = true }
opts.on("-l") {|val| options.lexprn = true}
opts.on("-n") {|val| options.nomBin = true }
opts.on("-q") {|val| options.flattenEdited = true}
opts.on("-p") {|val| options.addPOS = true }
opts.on("-P") {|val| options.glomIntjPrn = true }
opts.on("-r") {|val| options.annotateRepaired = true }
opts.on("-s") {|val| options.savePOS = true }
opts.on("-t") {|val| options.tagging = true }
opts.on("-u") { |val| options.propUnf = true }
opts.on("-w") {|val| options.swapEdRep = true }
opts.on("-x") {|val| options.nothing = true}
opts.on("-Y") {|val| options.binarizeDet = false}
opts.on("-z") {|val| options.unpropagateEdited = true }

opts.parse!(ARGV)
line_num = 1
$trees = Array.new
$verbs = {"know" => 1, "do" => 1, "have" => 1, "was" => 1, "is" => 1,
          "think" => 1, "'re" => 1, "are" => 1, "be" => 1, "'ve" => 1,
          "'m" => 1, "get" => 1, "get" => 1, "had" => 1, "did" => 1, #'
          "going" => 1, "go" => 1, "got" => 1, "mean" => 1, "were" => 1,
          "been" => 1, "see" => 1, "guess" => 1, "like" => 1, "has" => 1,
          "does" => 1, "want" => 1, "say" => 1, "take" => 1, "said" => 1,
          "put" => 1, "doing" => 1, "went" => 1, "make" => 1, "work" => 1,
          "come" => 1, "getting" => 1, "used" => 1, "let" => 1, "need" => 1,
          "thought" => 1, "feel" => 1, "done" => 1, "talking" => 1, "read" => 1,
          "use" => 1, "being" => 1, "try" => 1, "seems" => 1, "live" => 1} #,
          #"keep" => 1, "trying" => 1, "find" => 1, "seen" => 1, "look" => 1,
          #"having" => 1, "started" => 1, "watch" => 1, "tell" => 1, "am" => 1,
          #"remember" => 1, "start" => 1, "heard" => 1, "pay" => 1, "enjoy" => 1,
          #"came" => 1, "made" => 1, "working" => 1, "give" => 1, "talk" => 1,
          #"believe" => 1, "sounds" => 1, "buy" => 1, "took" => 1, "play" => 1,
          #"goes" => 1, "wanted" => 1, "called" => 1, "looking" => 1, "gets" => 1,
          #"understand" => 1, "agree" => 1, "call" => 1, "love" => 1, "saw" => 1,
          #"coming" => 1, "makes" => 1, "bought" => 1, "hear" => 1, "gone" => 1, 
          #"stay" => 1, "spend" => 1, "lived" => 1, "saying" => 1, "supposed" => 1,
          #"gotten" => 1, "thinking" => 1, "found" => 1, "seem" => 1, "worked" => 1}
          
          
#"do" => 1, "have" => 1, "was" => 1, "is" => 1}#, 
#          "think" => 1, "'re" => 1, "are" => 1, "be" => 1, "'ve" => 1}
          

while (line = gets)
# begin
  if options.fixUnfDashes
    line.gsub!(/\(([A-Z]+)[^ ()]*-UNF/, '(\1UNF')
  end

  if options.nothing
    ## Force no changes
    line.chomp!
    print line + "\n"
#    print t.to_s + "\n"
#    line_num += 1
    next
  end

  t = Tree.new(line)
  
  if options.binarizeDet
    t.binarizeDet!
  end
  
  if options.removeIntj
    t.removeIntj!
  end
  
  if options.flattenEdited
    t.flattenEdited!
  end
  
  if options.lexprn
    t.lexprn!
  end
 
  if options.extrprn
    t.extrprn!
  end
 
  if options.daughterAnnotation
    t.annotateDaughters!
  end

  if options.propUnf
    t.propUnf
  end
  if options.fixEdited
    t.fixEdited
    #t.fixEditedUnary
  end
  if options.fixEditedUnary
    t.fixEditedUnary
  end
  if options.binarize
    t.binarize
  end
  #if options.fixEdited
    #t.fixEdited
    #t.fixEditedUnary
  #end
  if options.binarizeIntj
    t.binarizeIntj!
  end
  if options.glomIntj
    t.glomIntj!
  elsif options.glomIntjPrn
    t.glomIntjPrn!
  end
  ## TODO
  if options.propArgs
    t.propArgs
  end
  if options.savePOS
    t.savePOS
  end
  if options.tagging
    t.tag(options.addPOS)
  end
  if options.numBin
    t.nominalBinarize!
  end
  if options.fixUnary
    t.fixUnary!
  end
  if options.unpropagateEdited
    t.unpropagateEdited!
  end
  if options.annotateRepaired
    #t.binarize
    #s = t.to_s
    #s.gsub!(/UNF/, '')
    #t = Tree.new(s)
    t.annotateRepaired!
  elsif options.swapEdRep
    t.swapEditedRepaired!
  end
  
  if t.prob == nil
    print t.to_s + "\n"
  else
    print "#{t.to_s} : #{t.prob}\n"
  end
  line_num += 1
# rescue
#  $stderr.puts "Error caught at line number #{line_num}"
#  Process.exit
# end
end
