###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################


require 'scripts/umnlp.rb'
require 'optparse'
require 'ostruct'

class Tree
  def unbinarizeIntj!
#    $stderr.puts "Working on subtree: #{to_s}"
    if @children.size == 0
      return
    end

    ## First undo the internal binarization of INTJs
    ## Let's do our own loop control here, because we'll be adding
    ## things to the child list as necessary when un-binarizing.
    i = 0
    while(i < @children.size)
      if @children[i].children.size == 2 and @children[i].head == @children[i].children[1].head and @children[i].children[0].head == "INTJ"
        @children.insert(i+1, @children[i].children[1])
        @children[i] = @children[i].children[0]
        i += 2
      end
      i += 1 
    end
    
    ## Now undo the initial binarization of INTJs
    if @children.size == 2 and @head =~ /^S/ and @children[0].head == "INTJ" and @head == @children[1].head
      newChildren = @children[1].children
      @children.delete_at(1)
      @children << newChildren
      @children.flatten!
    end
    
    @children.each{ |child|
      child.unbinarizeIntj!
    }
  end
  
  
  # treebinarize does a bunch of stuff that should be undone...
  # shown above each section...
  #
  def unbinarize!
    if @children.size == 0
      return
    end
    
    ##  S -> X1 X2 X3 NP VP =>
    ## into: S -> X1 X2 X3 S
    ## and : S -> NP VP
    ##
    ## So we'll turn S -> X1 ... S_1
    ## S_1 -> NP VP into
    ## S -> X1 ... NP VP 
    ## S_1 -> /dev/null
    if @head =~ /^S/ or @head =~ /^UTT/
      @children.each_index{ |i|
        if @children[i].head == "S" and @children[i].children.size == 2 and @children[i].children[0].head =~ /^NP/ and @children[i].children[1].head =~ /VP/
          @children.insert(i+1, @children[i].children[1])
          @children[i] = @children[i].children[0]
          #@children.flatten!
        end
      }
    end
    
#    if @head == "UTT" and @children.size >= 2 and @children.last.head =~ /^S/ and
#    @children.last.children[-1] =~ /^VP/ and @children.last.children[-2] =~ /^NP/
      
    
    ## Take VP -> ADVP VP and flatten it to ADVP VP
    @children.each_index{ |i|
      if @children[i].head == "VP" and
         @children[i].children.size == 2 and
         @children[i].children[0].head == "ADVP" and
         @children[i].children[1].head == "VP"
           @children.insert(i+1, @children[i].children[1])
           @children[i] = @children[i].children[0]
           #@children.flatten!
      end
    }
    
    ## Take -> S -> TO VP
    ## => S -> VP -> TO VP (automatic unary projection)
    if @head == "S" and
       @children.size == 2 and
       @children[0].head =~ /^TO/ and
       @children[1].head == "VP"
         t = Tree.new
         t.head = "VP"
         t.children[0] = @children[0]
         t.children[1] = @children[1]
         @children[0] = t
         @children.delete_at(1)
    end
    
    ## Take -> INTJ -> INTJ INTJ (But not as the only constituent in the 
    ##sentence)
    @children.each_index { |i|
      if @children[i].head == "INTJ" and
         @children[i].children.size == 2 and
         @children[i].children[0].head == "INTJ" and
         @children[i].children[1].head == "INTJ"
           @children.insert(i+1, @children[i].children[1])
           @children[i] = @children[i].children[0]
      end
    }

    
    # Take EDITEDS -> NP VP =>
    # EDITEDS -> S -> NP VP
    # undone in trees2cnftrees, but losing points to baseline for no reason...
    if @head == "EDITEDS" and
       @children.size == 2 and
       @children[0].head == "NP" and
       @children[1].head =~ /^VP/
         t= Tree.new
         t.head = "S"
         t.children[0] = @children[0]
         t.children[1] = @children[1]
         @children[0] = t
         @children.delete_at(1)
    end
    
    if @head =~ /EDITEDNP/ and @children[0].head != "NP" and @children.size == 1
      t = Tree.new
      t.head = "NP"
      t.children[0] = @children[0]
      @children[0] = t
    end
    
    if @head =~ /EDITED([^ _]+)/
      @head = "EDITED"
      t = Tree.new
      t.head = $1
      t.children = @children.slice!(0..-1)
#      @children.slice!(1..-1)
      @children[0] = t
    end
    
    ## PRN -> NP VP is written as
    ## PRN -> S -> NP VP in the gold standard (gets removed in cnftrees i believe)
    if @head == "PRN" and @children.size == 2 and @children[0].head == "NP" and @children[1].head =~ /^VP/
      t = Tree.new
      t.head = "S"
      t.children[0] = @children[0]
      t.children[1] = @children[1]
      @children[0] = t
      @children.delete_at(1)
    end
    
    ## Some things are both POS tags and pre-POS tags, and will show up 
    ## twice in hypoths. (e.g. IN)
    if @children.size == 1 and @head == @children[0].head
      @children = @children[0].children
    end
    
    ## In the gold evalform, SBAR -> ... NP VP is always expanded
    ## to SBAR -> S -> ... NP VP... so we'll do it too
    ## Too complicated... also based on things being intj or prn before
    ## NP VP... skip for now...
#    if @head == "SBAR" and @children.size > 1 and @children[-1].head =~ /^VP/ and @children[-2].head =~ /^NP/
#      newTree = Tree.new
#      newTree.head = "S"
#      newTree.children = @children
#      @children = Array.new
#      @children[0] = newTree
#    end
      
    ## Turn S -> X S and
    ## S -> INTJ NP VP
    ## into S -> X INTJ NP VP (or whatever)
    if @children.size == 2 and @head == "S" and @children[1].head == "S" and @children[1].children[0].children.size != 0
      #$stderr.puts "My tree is: " + to_s
      new_children = @children[1].children
      @children.delete_at(1)
      @children << new_children
      @children.flatten!
    end
    
    ## Turn VP -> MD VP and
    ## VP -> RB NP or whatever into
    ## VP -> MD RB NP
    ## Special case: When VP2 goes to VB something, it is not flattened.
    if @children.size == 2 and @head =~ /^VP/ and @children[0].head =~ /MD/ and @children[1].head =~ /^VP/ and @children[1].children[0].children.size != 0 and @children[1].children[0].head =~ /^[^V]/
      new_children = @children[1].children
      @children.delete_at(1)
      @children << new_children
      @children.flatten!
    end
    
    ## BES is left-binarized and called a NT, which will not
    ## show up in results... impossible to get right...
    ## so we'll unbinarize it here...
    ## CANCELED - Added to unbinarize.pl
#    @children.each_index{ |i|
#      while @children[i].head == "BES" and 
#         @children[i].children[0].head == "BES"
#           @children.insert(i+1, @children[i].children[1])
#           @children[i] = @children[i].children[0]
#      end
#    }
    
    @children.each{|child|
      child.unbinarize!
    }
  end
  
  def unfixEdited!
    ## Return if we're at a POS tag
    if @children.size == 1 && @children[0].children.size == 0
      return
    end
  
    ## iterate over children
    @children.each_index{ |i|
      @children[i].head =~ /([^ -]+)/
      cat = $1
      
      ## Now look for X -> EDITEDX INTJ PRN X
      if @children[i].children.size == 4 and 
         @children[i].children[0].head == ("EDITED" + cat) and 
         @children[i].children[1].head == "INTJ" and
         @children[i].children[2].head == "PRN" and
         @children[i].children[3].head =~ /^#{cat}/
           
           @children[i] = @children[i].children
           @children.flatten!
      elsif @children[i].children.size == 3 and
            @children[i].children[0].head == ("EDITED" + cat) and
            @children[i].children[1].head =~ /INTJ|PRN/ and
            @children[i].children[2].head =~ /^#{cat}/
              
              @children[i] = @children[i].children
              @children.flatten!
      elsif @children[i].children.size == 2 and
            @children[i].children[0].head == ("EDITED" + cat) and
            @children[i].children[1].head =~ /^#{cat}/
              @children[i] = @children[i].children
              @children.flatten!
      end
    }
    
    @children.each{ |child|
      begin
        child.unfixEdited!
      rescue
        $stderr.puts "Error in unfixEdited with tree: #{to_s}"
      end
    }
  end
   
  ## This function will work its way up 
  ## a syntax tree looking for the REPAIREDX marker,
  ## mark it if found, and return it.
  ## Then when the similar X category is recursed back to, it will
  ## be noticed and the tree will be transformed to reflect edit and repair
  def unannotateRepaired!()
    if @children.size == 0
      return nil
    end
    
    ## Only the last one will be preserved, but that is all we need, since only ## the right child could be repaired...
    repairedNode = nil
#    $stderr.puts "Working with sub-tree: #{to_s}"
    @children.each{ |child|
#      $stderr.puts "I'm the child and my head is: #{child.head}"
      repairedNode = child.unannotateRepaired!
#      $stderr.puts "repairedNode is #{repairedNode}"
    }
    
#    if repairedNode == nil
#      puts "var is nil"
#    elsif repairedNode != nil
#      puts "var is not nil"
#    end
    
    if repairedNode != nil && repairedNode == @head
      if  @children.last.head == "REPAIRED" + @head
        ## Something like NP -> PRP REPAIREDNP
        t = Tree.new
        t.head = "EDITED" + @head
        #t.children[0] = Tree.new
        #t.children[0].head = @head
        ## This removes all but the last elem. of children and gives it
        ## to t
#        t.children[0].children = @children.slice!(0..-2)
        t.children = @children.slice!(0..-2)
        ## Make t the first elem. and correct the NT label of the repair
        @children.insert(0, t)
        @children[1].head.gsub!(/REPAIRED/, '')
      elsif @children.last.children.last.head == "REPAIRED" + @head
#=begin
        t = Tree.new
        t.head = "EDITED" + @head
        t.children = @children.slice(0..-2)
        t.children << Tree.new
        t.children.last.head = @children.last.head
        t.children.last.children = @children.last.children.slice(0..-2)
        @children[0] = t
        @children[1] = @children.last.children.last
        @children[1].head.gsub!(/REPAIRED/, '')
        @children.slice!(2..-1)
#=end
      end
      ## We clear the repairedNode out...
      return nil
    elsif repairedNode == nil && @head =~ /^REPAIRED([^ ]*)/
      return $1
    elsif repairedNode == nil
      ## We're not repaired, and there is nothing repaired to pass up the tree,
      ## so just return nil
      return nil
    else
      ## We're not repaired, but we have to pass this up the tree...
      ## Yes, the last two clauses could be combined, but this is explicit
      ## for clarity...
      return repairedNode
    end
    
  end
   
  def unswapEditedRepaired!
    ## Return if we're at a POS tag
    if @children.size == 0 # && @children[0].children.size == 0
      return
    end
  
    ## iterate over children
    @children.each_index{ |i|
      @children[i].head =~ /([^ -]+)/
      cat = $1
      
      ## Now look for X -> EDITEDX INTJ PRN X
      if @children[i].children.size == 4 and 
         @children[i].children[0].head =~ /^#{cat}/ and 
         @children[i].children[1].head == "INTJ" and
         @children[i].children[2].head == "PRN" and
         @children[i].children[3].head == "REPAIRED" + cat
           
           @children[i].children[0].head = "EDITED" + cat
           @children[i].children[3].head.gsub!(/REPAIRED/, '')
           @children[i] = @children[i].children
           @children.flatten!
      elsif @children[i].children.size == 3 and
            @children[i].children[0].head =~ /^#{cat}/ and
            @children[i].children[1].head =~ /INTJ|PRN/ and
            @children[i].children[2].head == "REPAIRED" + cat
              
              @children[i].children[0].head = "EDITED" + cat
              @children[i].children[2].head.gsub!(/REPAIRED/, '')
              @children[i] = @children[i].children
              @children.flatten!
      elsif @children[i].children.size == 2 and
            @children[i].children[0].head =~ /^#{cat}/ and
            @children[i].children[1].head == "REPAIRED" + cat
            
              @children[i].children[0].head = "EDITED" + cat
              @children[i].children[1].head.gsub!(/REPAIRED/, '')
              @children[i] = @children[i].children
              @children.flatten!
      end
    }
    
    @children.each{ |child|
      begin
        child.unswapEditedRepaired!
      rescue
        $stderr.puts "Error in unswapEditedRepaired! with tree: #{child.to_s}"
      end
    }
  end
  
  
  def unannotateChildren!
    if @children.size == 0
      return
    end
    
#    if @head =~ /EDITED([^ ]+)/
#      if @children
  end
  
  def unelide!
    if @children.size == 0
      return
    end
    
    @children.each_index{ |i|
      child = children[i]
      child.unelide!
      if child.head =~ /UNF/
        t= Tree.new
        t.head = "EDITED"
        t.children << child
        @children[i] = t
      end
    }
    
    if @children.size > 1 and @children[-1].children.size == 1 and
       @children[-1].children[0].head == "__eli"
      @head += "UNF"
      @children.delete_at(-1)
    end
  end
  
   ## Unnecessary - automatically done by reverse RCT
#  def repropagateEdited!
#    
#  end

  def unlexprn!
    if @children.size == 0
      return
    end
    
    if @head =~ /^PRN([^_]+)$/
      @head = "PRN"
      t = Tree.new
      t.head = $1.upcase
      t.children = @children
      @children = Array.new
      @children[0] = t
    end
    
    if @head =~ /^(.+)PRN(.+)$/
      @head = $1
    end
    
    @children.each{ |child|
      child.unlexprn!
    }
  end
  
  ## Unelide right-corner trees
  def unelideRC!
    if @children.size == 0
      return
    end
    
    ## If right-child is a slash (incomplete) category it's an UNF
    if @children.size == 2 and @children[1].head =~ /(.+)\/(.+)/
      full = $1
      lack = $2
      t = Tree.new
      t.head = full
      t.children << @children[1]
      t.children << Tree.new
      t.children[1].head = lack
      t.children[1].children << Tree.new
      t.children[1].children[0].head = "__eli"
      @children[1] = t
    end
    
    @children.each{ |child|
      child.unelideRC!
    }
  end
end

options = OpenStruct.new
options.unbinarize = true ## On by default b/c treebinarize is on by default
options.unbinarizeIntj = false
options.unfixEdited = false
#options.repropagateEdited = false
options.unannotateRepaired = false
options.unannotateChildren = false
options.unswapEditRep = false
options.unelide = false
options.nothing = false
options.unlexprn = false
options.unelideRC = false

opts = OptionParser.new

opts.on("-b") {|val| options.unbinarize = true }
opts.on("-c") {|val| options.unannotateChildren = true }
opts.on("-d") {|val| options.unelide = true }
opts.on("-e") { |val| options.unfixEdited = true }
opts.on("-i") {|val| options.unbinarizeIntj = true }
opts.on("-l") {|val| options.unlexprn = true}
opts.on("-r") {|val| options.unannotateRepaired = true }
opts.on("-s") {|val| options.unelideRC = true }
opts.on("-w") {|val| options.unswapEditRep = true }
opts.on("-x") {|val| options.nothing = true }

#opts.on("-z") {|val| options.repropagateEdited = true}

opts.parse!(ARGV)

while(line = gets)
  t = Tree.new(line)

#  if options.nothing
#    puts t.to_s
#    next
#  end
  
  if options.unlexprn
    t.unlexprn!
  end
  
  if options.unbinarizeIntj
    t.unbinarizeIntj!
  end
  
  if options.unfixEdited
    t.unfixEdited!
  end
  

  if options.unannotateRepaired
    t.unannotateRepaired!
    t.unfixEdited!
    #t.unannotateChildren!
  elsif options.unswapEditRep
    t.unswapEditedRepaired!
  end

  if options.unbinarize
    begin 
      t.unbinarize!
    rescue
      $stderr.puts "ERROR in unbinarize with tree: #{t}"
      break
    end
  end

  if options.unelide
    t.unelide!
  end
  
  if options.unelideRC
    t.unelideRC!
  end
  
#  if options.repropagateEdited
#    t.repropagateEdited!
#  end
  
  puts t.to_s
end
