#!/usr/bin/ruby
#######################
# unglomIntj.rb
#
# The reason for this script has changed.
# It's purpose now is to renormalize trees for which the interjections have
# been glommed on.
# Sequence: 1) Parse with hhmmparser-fp
# 2) Glom interjections onto end of previous word with hypoths2rctrees-ciqf.pl
# 3) Undo all tree operations like normal
# 4) Add INTJ right-siblings for every glommed on word (this script)
########################

require 'scripts/umnlp.rb'

class Tree
    def unglomIntj!
        ## Bottom-up: Do all my children first then me
        @children.each{ |child|
          child.unglomIntj!
        }

        ## If I am a pre-terminal, and the word beneath me has glommed-on junk,
        ## remove it and add it to myself
        if @children.size == 1 and @children[0].children.size == 0 && @children[0].head =~ /([^\^]+)\^(.+)/
            real_word = $1
            fps = $2
            @head += "^#{fps}"
            @children[0].head = real_word
            return
        end
            
        ## Recursively propagate glommed on stuff upwards until the highest 
        ## possible point, where it is in the middle of a phrase.
        ## Note: Special cases for right-most child, because something glommed
        ## on to the right of the right-most child can be propagated up again
        num_children = @children.size
        ind = 0
        while ind < num_children
            if @children[ind].children.size > 0 && @children[ind].head =~ /(.+)\^(.+)/
                ## Here 'fp' is the rightmost filled pause, and real_word is actually
                ## the real_word plus all prior fp's.  We will break off one at a time
                ## starting from the end and give them their own trees, until
                ## the real word here has no fp's on it.
                ## Note: we check if real_word has ^ in it, so we can get the
                ## bare word isolated (only_word), to compare it to check for repeats.
                real_word = $1
                fp = $2
                if real_word =~ /([^^]+)\^(.+)/
                  only_cat = $1
                else
                  only_cat = real_word
                end
                @children[ind].head = real_word
                if ind == (num_children-1)
                    if @head =~ /\^/
                        @head.gsub!(/([^\^]+)\^(.+)/, '\1^' + fp + '^\2')
                    else
                        @head += "^#{fp}"
                    end 
                else
                    ## Check if it's a repeat or fp or a prn...
                    if fp.eql?@children[ind].getWordString
                        @children[ind].head = only_cat
                        @children.insert(ind+1, Tree.new(@children[ind].to_s))
                        unless @children[ind].head =~ /EDITED/
                            temp = Tree.new
                            temp.head = "EDITED"
                            temp.children[0] = @children[ind]
                            @children[ind] = temp
                        end
                        if real_word =~ /\^(.+)/
                          @children[ind].head += "^#{$1}"
                        end
                    elsif fp =~ /(.+)_(.+)/
                        @children.insert(ind+1, Tree.new("(PRN (S (NP (prp #{$1})) (VP (vb #{$2}))))"))
                    else
                        @children.insert(ind+1, Tree.new("(INTJ (UH #{fp}))"))
                    end
                    num_children += 1
                end
            else
                ## Only increment when there wasn't a glommed on intj, because
                ## there might be another one we need to handle (this will do them
                ## in the correct order).
                ind += 1
            end
        end
    end
end

while(line = gets)
    if line =~ /\^/
        ## First, take care of prn's that have been naively jammed together... 
        ## they will look like: ...^you_^know^...
        line.gsub!(/\^([^^_]+)_\^([^^]+)/, '^\1_\2')
        t = Tree.new(line)
        t.unglomIntj!
#        $stderr.puts t.to_s
        if t.head =~ /INTJ\^(.+)/
          fps = $1
          #$stderr.puts "fps = #{fps}"
          temp = Tree.new
          temp.head = "INTJ"
          temp.children[0] = t
          t.head = temp.head
          if fps =~ /([^\^]+)\^(.+)/
            fp = $1
            fps.gsub!(/([^\^]+)\^(.+)/, '\2')
            temp.head += "^#{fps}"
          else
            fp = fps
          end
          if fp =~ /(.+)_(.+)/
            temp.children[1] = Tree.new("(PRN (S (NP (prp #{$1})) (VP (vb #{$2}))))")
          else
            temp.children[1] = Tree.new("(INTJ (UH #{fp}))")
          end
          t = temp
        end
        while t.head =~ /([^\^]+)\^(.+)/
          real_word = $1
          fps = $2
          if t.children.size == 1 and t.children[0].children.size == 0
            temp = Tree.new
            temp.head = real_word
            temp.children[0] = t.children[0]
            t.children[0] = temp
          end
          if fps =~ /([^\^]+)\^(.+)/
            fp = $1
            fps.gsub!(/([^\^]+)\^(.+)/, '\2')
            t.head = real_word + "^" + fps
          else
            fp = fps
            t.head = real_word
          end
          if fp =~ /(.+)_(.+)/
            t.children << Tree.new("(PRN (S (NP (prp #{$1})) (VP (vb #{$2}))))")
          else
            t.children << Tree.new("(INTJ (UH #{fp}))")
          end
        end
        puts t.to_s
    else
        puts line
    end
end
