###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

#!/usr/bin/ruby

require "scripts/umnlp.rb"

##### parse options
require 'optparse'

$options = {}
$options[:ignoreG] = false
$options[:unkasroot] = false
OptionParser.new do |opts|
  opts.banner = "Usage: cat genmodel/<file>hw.crctrees | grep -v '\^[5-9]' | sed 's/\^[0-9]//g' | ruby scripts/rctrees2lstar.rb [options]"
  opts.on("-i", "--ignoreG", "ignore the grammatical constituents in L* and L0 output") do |v|
    $options[:ignoreG] = v
  end
  opts.on("-u", "--unkasroot", "make unk the root headword") do |v|
    $options[:unkasroot] = v
  end
end.parse!

######
$error = false
$line = ""

$ROOTOFTREE = "h:ROOT/h:REST"
if $options[:unkasroot]
  $ROOTHDWD = "unk"
else
  $ROOTHDWD = "-"
end
$stack = []
$lzero = {}
$lplus = {}
$lstar = {}
$m_id = {}
$m_one = {}
$lword = {}

#####
class Tree

  #attr_reader :hdwdrules
  attr_accessor :achd
  attr_accessor :awhd
  #attr_accessor :lastrules

  def initialize(str="", parent=$ROOTOFTREE)
    @str = str
    @children = Array.new
    @parent = parent
    @num_rules = 0
    if str != ""
      ## Check if the stupid user passed in a stupid string with
      ## stupid brackets instead of parentheses
      if (str.length - str.gsub(/\)/,"").length) < (str.length - str.gsub(/\]/,"").length)
        str.gsub!(/\[/,"(")
        str.gsub!(/\]/,")")
      end
      buildStructure(@str)
    else
      head = ""
    end
    if str == ""
      str = to_s
    end
    @num_rules = getNumRules #+= @children[i].num_rules

  end

  # modify buildStructure to automatically get headwords
  def buildStructure(str)
    if str == ""
      @head = ""
      return ""
    else
      ## First let's check if its one of those weird switchboard
      ## trees that start with 2 open parens: ( (S
      if str =~ /^ *\( *\((.*)\) *$/
        str = "(" + $1
      end
      
      #Pluck off the head if it's there
      if str =~ /^ *\( *([^ ()]+)/
        ## Start of a rule
        str = $' #'
	match = $1

	@head = match.gsub(/\{[^}]*\}/,"")

	# Set headwords
	if match.include?('/')
	  @achd = match.gsub(/.*\{([^}]+)\}\/.*/,'\1')
	  @awhd = match.gsub(/.*\/.*\{([^}]+)\}/,'\1')
	  if !$m_id.key?(@achd)
	    $m_id["#{@achd} : #{@achd}"] = 1
	    $m_one["#{@achd} : #{$ROOTHDWD}"] = 1
	    $lword["#{@achd} #{@achd} : #{@achd}"] = 1
	  end
	  if !$m_id.key?(@awhd)
	    $m_id["#{@awhd} : #{@awhd}"] = 1
	    $m_one["#{@awhd} : #{$ROOTHDWD}"] = 1
	    $lword["#{@awhd} #{@awhd} : #{@awhd}"] = 1
	  end
	else 
	  @achd = match.gsub(/.*\{([^}]+)\}/,'\1')
	  @awhd = match.gsub(/.*\{([^}]+)\}/,'\1')
	  if !$m_id.key?(@achd)
	    $m_id["#{@achd} : #{@achd}"] = 1
	    $m_one["#{@achd} : #{$ROOTHDWD}"] = 1
	    $lword["#{@achd} #{@achd} : #{@achd}"] = 1
	  end
	  if !$m_id.key?(@awhd)
	    $m_id["#{@awhd} : #{@awhd}"] = 1
	    $m_one["#{@awhd} : #{$ROOTHDWD}"] = 1
	    $lword["#{@awhd} #{@awhd} : #{@awhd}"] = 1
	  end
	end

        while true
          if str =~ /^ *\(/
            child = Tree.new("", self)
            begin
              str = child.buildStructure(str)
            rescue
              ## Catching downstream exception we'll pass it up...
              #$stderr.puts "Error caught and being passed upwards"
              raise $!
            end
            @children << child
          elsif str =~ /^ *([^ ()]+) *\)/
            ## we've reached a leaf - i.e. a word and its close paren
            child = Tree.new("", self)
            child.head = $1 #.downcase
            child.num_rules = 1
            @children << child
            str = $' #'
            return str
          elsif str =~ /^ *\)/
            ## End of a tree
            str = $' #'
            return str
          else
            raise "Erroneous part of tree: #{str}."
          end
        end
      end
    end
  end



  ### set the parents for the whole tree
  def set_parents
    if @children.size==0
      $stderr.print "ERROR: EXPECT (PRETERM), POS, WORD NODES IN RCTREE!!! curr:"+@head + "\nLine=#{$line}\n"
      return
    end
    if @children.size==1 && @children[0].children.size==1 && @children[0].children[0].children.size!=0
      $stderr.print "ERROR: EXPECT (PRETERM), POS, WORD NODES IN RCTREE!!! curr:"+@head + " child:"+@children[0].head + " granch:"+@children[0].children[0].head + "\n"
      return
    end
    
    ## terminal case
    if @children.size==1 && @children[0].children.size==0
      return
    end
    
    ## unary case
    if @children.size==1
      ## recurse to left (or unary) child...
      @children[0].set_parents
      @children[0].parent = self
      return
    end
    
    ## binary case
    if @children.size==2
      if @head.include?('_')
	(preusc,postusc) = @head.split('_',2)
	#determine which binary branch has the head
      else
	
	## recurse to left (or unary) child...
	@children[0].set_parents
	@children[0].parent = self
	## recurse to right child...
	@children[1].set_parents
	@children[1].parent = self

	return
      end
      
      if @children.size>2 || @children.size<0
	$stderr.print "ERROR: calcHdwdCPT.rb requires binary trees. Node "+@head+" has "+@children.size.to_s+" children.\n"
      end
      #    $stderr.print "finishing up" + "\n"
    end

  end



  def toDat ( depth, qParent, pAc=$ROOTHDWD, pAw=$ROOTHDWD, numSibs=1 )

    if @children.size==0
      $stderr.print "ERROR: EXPECT (PRETERM), POS, WORD NODES IN RCTREE!!! curr:"+@head + "\nLine=#{$line}\n"
      return
    end
    if @children.size==1 && @children[0].children.size==1 && @children[0].children[0].children.size!=0
      $stderr.print "ERROR: EXPECT (PRETERM), POS, WORD NODES IN RCTREE!!! curr:"+@head + " child:"+@children[0].head + " granch:"+@children[0].children[0].head + "\n"
      return
    end

    ## terminal case, left child post-transform
    if @children.size==1 && @children[0].children.size==1 && @children[0].children[0].children.size==0
      pos = @children[0].children[0].head.gsub(/([^\#]*)\#.*/,'\1')
      if depth<5
#	print "Tmp " + depth.to_s + " " + qParent + " " + @head + " " + @children[0].head+"\n"
	gParentAw = qParent.gsub(/.*\/.*:(.*)/,'\1')
	lParentAw = qParent.gsub(/.*\/(.*):.*/,'\1')
	gChild = @children[0].head.gsub(/.*:(.*)/,'\1')
	lChild = @children[0].head.gsub(/(.*):.*/,'\1')
	gHeadAc = @head.gsub(/.*:(.*)\/.*/,'\1')
	lHeadAc = @head.gsub(/(.*):.*\/.*/,'\1')
	gHeadAw = @head.gsub(/.*\/.*:(.*)/,'\1')
	lHeadAw = @head.gsub(/.*\/(.*):.*/,'\1')

        if $options[:ignoreG]
          print "L* " + pAw + " : " + @children[0].achd + "\n"
        else
          print "L* " + gParentAw + " " + gChild + " " + pAw + " : " + @children[0].achd + "\n"
        end
	#print "Gex " + depth.to_s + " " + lParentAw+":"+gParentAw + " : " + lChild+":"+gChild + "\n"
	#print "Pc " + gChild + " : " + pos + "\n"
	#print "Frd " + depth.to_s + " " + lChild+":"+gChild + " " + lParentAw+":"+gParentAw + " : -:1\n"
#        print "L* " + lParentAw + " " + lChild + " " + pAw + " : " + @children[0].achd + "\n"
#        print "L0 " + gParentAw + " " + gChild + " " + pAw + " : " + @children[0].achd + "\n"
	#print "Gac " + depth.to_s + " " + lChild+":"+gChild + " " + lParentAw+":"+gParentAw + " : " + lHeadAc+":"+gHeadAc + "\n"
#        print "L* " + lParentAw + " " + lChild + " " + pAw + " : " + @children[0].achd + "\n"
	#print "Gad " + depth.to_s + " " + lHeadAc+":"+gHeadAc + " " + lChild+":"+gChild + " : " + lHeadAw+":"+gHeadAw + "\n"
      end
      ## if beyond max depth...
      if depth>=5
        $stderr.print "ERROR: TREE TOO BIG!!!\n"
      end
      return

    ## terminal case, right child post-transform
    elsif @children.size==1 && @children[0].children.size==0
      ## make sure head label equal to pos...
      pos = @children[0].head.gsub(/([^\#]*)\#.*/,'\1')
      gParentAw = qParent.gsub(/.*\/.*:(.*)/,'\1')
      lParentAw = qParent.gsub(/.*\/(.*):.*/,'\1')
      gHeadAc = @head.gsub(/.*:(.*)/,'\1')
      lHeadAc = @head.gsub(/(.*):.*/,'\1')
      if $options[:ignoreG]
        print "L* " + pAw + " : " + @achd + "\n"
        print "L0 " + pAw + " : " + @achd + "\n"
      else
        print "L* " + gParentAw + " " + gHeadAc + " " + pAw + " : " + @achd + "\n"
        print "L0 " + gParentAw + " " + gHeadAc + " " + pAw + " : " + @achd + "\n"
      end
      #print "Gex " + depth.to_s + " " + lParentAw+":"+gParentAw + " : " + lHeadAc+":"+gHeadAc + "\n"
      #print "Pc " + @head.gsub(/.*:(.*)/,'\1') + " : " + pos + "\n"
      #print "Frd " + depth.to_s + " " + lHeadAc+":"+gHeadAc + " " + lParentAw+":"+gParentAw + " : " + lHeadAc+":"+gHeadAc + "\n"
#      print "L* " + lParentAw + " " + lHeadAc + " " + pAw + " : " + @achd + "\n"
      return
      return

    ## unary case
    elsif @children.size==1
      ## recurse to unary child...
      @children[0].toDat(depth,qParent,pAc,pAw,0)
      gParentAw = qParent.gsub(/.*\/.*:(.*)/,'\1')
      lParentAw = qParent.gsub(/.*\/(.*):.*/,'\1')
      gChildChildAc = @children[0].children[0].head.gsub(/.*:(.*)\/.*/,'\1')
      lChildChildAc = @children[0].children[0].head.gsub(/(.*):.*\/.*/,'\1')
      gHeadAc = @head.gsub(/.*:(.*)\/.*/,'\1')
      lHeadAc = @head.gsub(/(.*):.*\/.*/,'\1')
      gHeadAw = @head.gsub(/.*\/.*:(.*)/,'\1')
      lHeadAw = @head.gsub(/.*\/(.*):.*/,'\1')
      if $options[:ignoreG]
        print "L* " + pAw + " : " + @children[0].children[0].achd + "\n"
      else
        print "L* " + gParentAw + " " + gChildChildAc + " " + pAw + " : " + @children[0].children[0].achd + "\n"
      end
      #print "Frd " + depth.to_s + " " + lChildChildAc+":"+gChildChildAc + " " + lParentAw+":"+gParentAw + " : -:1\n"
#      print "L* " + lParentAw + " " + lChildChildAc + " " + pAw + " : " + @children[0].children[0].achd + "\n"
#      print "L0 " + gParentAw + " " + gChildChildAc + " " + pAw + " : " + @children[0].children[0].achd + "\n"
      #print "Gac " + depth.to_s + " " + lChildChildAc+":"+gChildChildAc + " " + lParentAw+":"+gParentAw + " : " + lHeadAc+":"+gHeadAc + "\n"
      #print "Gad " + depth.to_s + " " + lHeadAc+":"+gHeadAc + " " + lChildChildAc+":"+gChildChildAc + " : " + lHeadAw+":"+gHeadAw + "\n"
      return

    ## binary case remaining incomplete
    elsif @children.size==2 && @head=~/\//
      ## recurse to left child...
      @children[0].toDat(depth,qParent,pAc,pAw)
      ## recurse to right child...
      @children[1].toDat(depth+1,@children[0].head,@children[0].achd,@children[0].awhd)
      gHeadAw = @head.gsub(/.*\/.*:(.*)/,'\1')
      lHeadAw = @head.gsub(/.*\/(.*):.*/,'\1')
      gRChild = @children[1].head.gsub(/.*:(.*)/,'\1')
      lRChild = @children[1].head.gsub(/(.*):.*/,'\1')
      gLChild = @children[0].head.gsub(/.*\/.*:(.*)/,'\1')
      lLChild = @children[0].head.gsub(/.*\/(.*):.*/,'\1')
      #print "Gaw " + depth.to_s + " " + lRChild+":"+gRChild + " " + lLChild+":"+gLChild + " : " + lHeadAw+":"+gHeadAw + "\n"
      return

    ## binary case becoming complete
    elsif @children.size==2 && @head!~/\//
      ## recurse to left child...
      @children[0].toDat(depth,qParent,pAc,pAw)
      ## recurse to right child...
      @children[1].toDat(depth+1,@children[0].head,@children[0].achd,@children[0].awhd)
      if numSibs > 0
        #print "Frd " + depth.to_s + " " + @children[0].head.gsub(/(.*):.*\/.*/,'\1')+":"+@children[0].head.gsub(/.*:(.*)\/.*/,'\1') + " " + qParent.gsub(/.*\/(.*):.*/,'\1')+":"+qParent.gsub(/.*\/.*:(.*)/,'\1') + " : " + @head.gsub(/(.*):.*/,'\1')+":"+@head.gsub(/.*:(.*)/,'\1') + "\n"
        if $options[:ignoreG]
          print "L* " + pAw + " : " + @children[0].achd + "\n"
          print "L0 " + pAw + " : " + @children[0].achd + "\n"
        else
          print "L* " + qParent.gsub(/.*\/.*:(.*)/,'\1') + " " + @children[0].head.gsub(/.*:(.*)\/.*/,'\1') + " " + pAw + " : " + @children[0].achd + "\n"
          print "L0 " + qParent.gsub(/.*\/.*:(.*)/,'\1') + " " + @children[0].head.gsub(/.*:(.*)\/.*/,'\1') + " " + pAw + " : " + @children[0].achd + "\n"
        end
      end
      return
    end

    $stderr.print "ERROR: N-ARY BRANCH IN RCTREE!!!\n"
    return

  end

  def hdwd_to_s
    if @children.length == 0
      return "#{@achd}"
    else
      
      s = "( #{@head}{#{@achd}/#{@awhd}} "
      @children.each{ |child|
        s += child.hdwd_to_s
        s += " "
      }
      s += ")"
    end
    return s
  end

end


#####
class Hash
  def +(h)
#    hout = h.merge(self)
    hout = {}
    each{ |key, value| 
      hout[key] = value
    }
    h.each{ |key, value| 
      if key?(key)
        hout[key] = hout[key].to_i+value.to_i
      else
        hout[key] = value.to_i
      end
    }
    return hout
  end
  def to_s(modelname="")
    out = ""
    each_pair{ |key, value|
      out = out + modelname + " #{key} = #{value}\n"
    }
    return out
  end
end


#####
$ctr=0
while($line = gets)
  t = Tree.new($line)
  t.parent = Tree.new()
  t.parent.head = $ROOTOFTREE
  t.parent.awhd = $ROOTHDWD
  t.parent.children[0] = t
  t.set_parents

  t.toDat(1,"h:ROOT/h:REST",$ROOTHDWD,$ROOTHDWD)
  if $error
    $stderr.puts "\nWARNING: This tree generated a word-less Pw: #{t}\n"
    $error = false
  end

  $ctr = $ctr+1
  if $ctr % 1000 == 0
    $stderr.print "  ...finished #{$ctr} trees in L* calculation...\n"
  end

end

print "M_PRIOR #{$ROOTHDWD} : #{$ROOTHDWD}"+"\n"
print "M_ID #{$ROOTHDWD} : #{$ROOTHDWD}"+"\n"
print "M_1 #{$ROOTHDWD} : #{$ROOTHDWD}"+"\n"
print $m_id.to_s("M_ID")
print $m_one.to_s("M_1")
# $lword.delete("unk unk : unk")
print $lword.to_s("L")
# print out the final L* model
#print $lstar.to_s("L*")
