###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

#!/usr/bin/ruby

#####################################################################
# trees2dat-sr.rb
# This script converts right-corner transformed trees into the
# operations in an HHMM parser.  Variable names:
# Qe - A syntactic category as initiated from above (recursively)
# Qt - A syntactic category as initiated from the left (next step in an FSA)
# Fr - A reduce category (1|0) which determines whether the FSA at that depth has completed
# Pg - POS given G (category from original grammar)
# Pw - POS given word
# IP - Interruption Point node which decides whether a reparandum has ended
#
######################################################################

require "scripts/umnlp.rb"

$error = false
$line = ""
$prob = 0.0

class Tree
  def toDat ( depth, cexpParent, cfullParent, clackParent )

    if @children.size==0
      $stderr.puts "ERROR: EXPECT (PRETERM), POS, WORD NODES IN RCTREE!!! curr:"+@head + "\nLine=#{$line}"
      return
    end
    if @children.size==1 && @children[0].children.size==1 && @children[0].children[0].children.size!=0
      $stderr.puts "ERROR: EXPECT (PRETERM), POS, WORD NODES IN RCTREE!!! curr:"+@head + " child:"+@children[0].head + " granch:"+@children[0].children[0].head
      return
    end
 
    if @children.size==1 && head =~ /REPAIRED/
       $stderr.puts "ERROR: Odd sentence has REPAIRED node as the head of a unary tree: #{to_s}"
       Process.exit
    end

    ## terminal case, left child post-transform
    if @children.size==1 && @children[0].children.size==1 && @children[0].children[0].children.size==0
      pos = @children[0].children[0].head.gsub(/([^\#]*)\#.*/,'\1')
      if depth<5
        qexp_pseudo = @children[0].head.gsub(/(.*)\/.*/, '\1')
        puts "Qe #{depth.to_s} #{cfullParent} : #{@children[0].head}/#{@children[0].head}|#{qexp_pseudo} = #{$prob}"
        puts "Qe #{(depth+1).to_s} #{@children[0].head}/#{@children[0].head}|#{qexp_pseudo} : -/-|- = #{$prob}"
        ##print "Fr " + (depth+1).to_s + " " + @children[0].head+"/"+@children[0].head+"|"+@children[0].head + " : 1\n"
        puts "Pg #{@children[0].head} : #{pos} = #{$prob}"
      end
      ## if borderline depth...
      if depth==5
        $stderr.puts "ERROR: TREE TOO BIG!!!"
        #print "Pg " + @head + " : " + @head + "\n"
        puts "Pg #{@children[0].head} : #{pos} = #{$prob}"
      end
      ## if beyond max depth...
      if depth>5
        $stderr.puts "ERROR: TREE TOO BIG!!!"
      end
      word = @children[0].children[0].head.gsub(/.*\#(.*)/,'\1')
      puts "Pw #{word} : #{pos} = #{$prob}"
      if word == ""
        $error = true
      end
      return @children[0].head+"/"+@children[0].head+"|"+@children[0].head.gsub(/(.*)\/.*/,'\1'),@children[0].head+"="  ##+ " " + @head
    end

    ## terminal case, right child post-transform
    if @children.size==1 && @children[0].children.size==0
      ## make sure head label equal to pos...
      pos = @children[0].head.gsub(/([^\#]*)\#.*/,'\1')

        puts "Qe #{depth.to_s} #{cfullParent} : -/-|- = #{$prob}"
          
        puts "Pg #{@head} : #{pos} = #{$prob}"
      word = @children[0].head.gsub(/.*\#(.*)/,'\1')
      puts "Pw #{word} : #{pos} = #{$prob}"
      if word == ""
        $error = true
      end
      return "-/-|-","" ##@head+"/"+@head+"|"+@head,"0+"  ##+ " " + @head
    end

    ## unary case
    if @children.size==1
      ## recurse to left (or unary) child...
      (composL,continL) = @children[0].toDat(depth,cexpParent,cfullParent,clackParent)
      return composL,@children[0].head+"="
    end

    ## binary case
    if @children.size==2
      ## propagate curr...
      @children[0].head = @children[0].head+"|"+@children[1].head.gsub(/(.*)\/.*/, '\1')
      ## recurse to left (or unary) child...
      (composL,continL) = @children[0].toDat(depth,cexpParent,cfullParent,clackParent)
      ## recurse to right child...
      cexpCurr = @children[0].head.gsub( /.*\|(.*)/, '\1' )
      cfullCurr = @children[0].head# @children[0].head.gsub( /.*\/(.*)/, '\1' )
      clackCurr = @children[0].head.gsub( /.*\/(.*)/, '\1' )
      ##########print "----------> at binary: "+@head+" -> "+@children[0].head+" "+@children[1].head+"\n";
      if ( cexpParent == composL.gsub(/(.*)\/.*/,'\1') && composL.gsub(/.*\/(.*)\|.*/,'\1') == composL.gsub(/.*\|(.*)/,'\1') )
        puts "Fr #{depth.to_s} #{composL} : 0 = #{$prob}"         ##composL.gsub!(/^([^ ]*) /,'\1')
      end
      puts "IP #{depth.to_s} #{composL} : 0 = 1.0"
      puts "Qt #{depth.to_s} #{cexpParent} #{composL} : #{continL}#{@children[0].head} = #{$prob}"
      ## right child was final (therefore no transition)...
      (composR,continR) = @children[1].toDat(depth+1,cexpCurr,cfullCurr,clackCurr)
      puts "Fr #{(depth+1).to_s} #{composR} : 1 = #{$prob}"
      ## Check for repair
      if (composR != "-/-|-") and (((cexpCurr.casecmp composR.gsub(/(.*)\/.*/, '\1')) != 0) or (@children[1].head =~ /\//))
        ## Then we have a repair
        puts "IP #{depth+1} #{composR} : 1+ = #{$prob}"
#      else
#        puts "IP #{depth+1} #{composR} : 0 = #{$prob}"
      end
#      if @head =~ /REPAIRED/
#        @head.gsub!(/REPAIRED[^_]+_/, '')
#        puts "IP #{depth} #{@children[0].head} : 1"
#      else
#        puts "IP #{depth} #{@children[0].head} : 0"
#      end
      return @children[0].head,""  ##+ " " + @children[1].head
    end

    $stderr.puts "ERROR: N-ARY BRANCH IN RCTREE!!!"
    return

  end
end

while($line = gets)
  t = Tree.new($line)
  ## Set global prob to prob of this tree (shortcut to setting every sub-tree to have same prob)
  $prob = t.prob
  if $prob == nil
    ## Trees w/o explicit probs are assumed to be whole training examples
    $prob = 1.0 
  end
  (composL,continL) = t.toDat(1,"UTT","ROOT/UTT|UTT","UTT|UTT")
  if $error
    $stderr.puts "\nWARNING: This tree generated a word-less Pw: #{t}\n"
    $error = false
  end
  ## top node was final or IP...
  cexpCurr = t.children[0].head.gsub( /.*\|(.*)/, '\1' )
  clackCurr = t.children[0].head.gsub( /.*\/(.*)\|.*/, '\1' )
  if cexpCurr == clackCurr
    puts "Fr 1 #{t.children[0].head} : 1 = #{$prob}"
  else
    puts "IP 1 #{t.children[0].head} : 1+ = #{$prob}"
  end

  for d in 1..4
    puts "Qe #{d.to_s} -/-|- : -/-|- = #{$prob}"
    puts "Fr #{d.to_s} -/-|- : 1 = #{$prob}"
  end
#  $stderr.puts "-------------------------------------------------------"
end
