###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

#!/usr/bin/ruby

######################################################################
# rctrees2lstar.rb
# 
# Reads in headword-annotated trees, and outputs the L* and a^T_0 models.
# TO RUN: cat genmodel/<file>hw.crctrees | grep -v '\^[5-9]' | sed 's/\^[0-9]//g' | ruby scripts/rctrees2lstar.rb
#
# OPTIONS:
#  -n leaves out headwords from each constituent
#  -o prints out the older-style "*"-annotated heads
#
######################################################################

require "scripts/umnlp.rb"

##### parse options
require 'optparse'

$options = {}
 $options[:all] = false
# $options[:nohw] = false
OptionParser.new do |opts|
  opts.banner = "Usage: cat genmodel/<file>hw.crctrees | grep -v '\^[5-9]' | sed 's/\^[0-9]//g' | ruby scripts/rctrees2lstar.rb [options]"

  opts.on("-d", "--debug", "print out extra stuff for debugging") do |v|
    $options[:debug] = v
  end
  opts.on("-a", "--allmodels", "print out L0 and L+ along with L*") do |v|
    $options[:all] = v
  end
end.parse!


$ROOTOFTREE = "h:ROOT/h:REST"
$stack = []
$lzero = {}
$lplus = {}
$lstar = {}
$m_id = {}
$m_one = {}
$lword = {}

#####
class Tree

  #attr_reader :hdwdrules
  attr_accessor :achd
  attr_accessor :awhd
  #attr_accessor :lastrules

  def initialize(str="", parent=$ROOTOFTREE)
    @str = str
    @children = Array.new
    @parent = parent
    @num_rules = 0
    if str != ""
      ## Check if the stupid user passed in a stupid string with
      ## stupid brackets instead of parentheses
      if (str.length - str.gsub(/\)/,"").length) < (str.length - str.gsub(/\]/,"").length)
        str.gsub!(/\[/,"(")
        str.gsub!(/\]/,")")
      end
      buildStructure(@str)
    else
      head = ""
    end
    if str == ""
      str = to_s
    end
    @num_rules = getNumRules #+= @children[i].num_rules

  end

  # modify buildStructure to automatically get headwords
  def buildStructure(str)
    if str == ""
      @head = ""
      return ""
    else
      ## First let's check if its one of those weird switchboard
      ## trees that start with 2 open parens: ( (S
      if str =~ /^ *\( *\((.*)\) *$/
        str = "(" + $1
      end
      
      #Pluck off the head if it's there
      if str =~ /^ *\( *([^ ()]+)/
        ## Start of a rule
        str = $' #'
	match = $1

	@head = match.gsub(/\{[^}]*\}/,"")

	# Set headwords
	if match.include?('/')
	  @achd = match.gsub(/.*\{([^}]+)\}\/.*/,'\1')
	  @awhd = match.gsub(/.*\/.*\{([^}]+)\}/,'\1')
	  if !$m_id.key?(@achd)
	    $m_id["#{@achd} : #{@achd}"] = 1
	    $m_one["#{@achd} : -"] = 1
	    $lword["#{@achd} #{@achd} : #{@achd}"] = 1
	  end
	  if !$m_id.key?(@awhd)
	    $m_id["#{@awhd} : #{@awhd}"] = 1
	    $m_one["#{@awhd} : -"] = 1
	    $lword["#{@awhd} #{@awhd} : #{@awhd}"] = 1
	  end
	else 
	  @achd = match.gsub(/.*\{([^}]+)\}/,'\1')
	  @awhd = match.gsub(/.*\{([^}]+)\}/,'\1')
	  if !$m_id.key?(@achd)
	    $m_id["#{@achd} : #{@achd}"] = 1
	    $m_one["#{@achd} : -"] = 1
	    $lword["#{@achd} #{@achd} : #{@achd}"] = 1
	  end
	  if !$m_id.key?(@awhd)
	    $m_id["#{@awhd} : #{@awhd}"] = 1
	    $m_one["#{@awhd} : -"] = 1
	    $lword["#{@awhd} #{@awhd} : #{@awhd}"] = 1
	  end
	end

        while true
          if str =~ /^ *\(/
            child = Tree.new("", self)
            begin
              str = child.buildStructure(str)
            rescue
              ## Catching downstream exception we'll pass it up...
              #$stderr.puts "Error caught and being passed upwards"
              raise $!
            end
            @children << child
          elsif str =~ /^ *([^ ()]+) *\)/
            ## we've reached a leaf - i.e. a word and its close paren
            child = Tree.new("", self)
            child.head = $1 #.downcase
            child.num_rules = 1
            @children << child
            str = $' #'
            return str
          elsif str =~ /^ *\)/
            ## End of a tree
            str = $' #'
            return str
          else
            raise "Erroneous part of tree: #{str}."
          end
        end
      end
    end
  end



  ### set the parents for the whole tree
  def set_parents
    if @children.size==0
      $stderr.print "ERROR: EXPECT (PRETERM), POS, WORD NODES IN RCTREE!!! curr:"+@head + "\nLine=#{$line}\n"
      return
    end
    if @children.size==1 && @children[0].children.size==1 && @children[0].children[0].children.size!=0
      $stderr.print "ERROR: EXPECT (PRETERM), POS, WORD NODES IN RCTREE!!! curr:"+@head + " child:"+@children[0].head + " granch:"+@children[0].children[0].head + "\n"
      return
    end
    
    ## terminal case
    if @children.size==1 && @children[0].children.size==0
      return
    end
    
    ## unary case
    if @children.size==1
      ## recurse to left (or unary) child...
      @children[0].set_parents
      @children[0].parent = self
      return
    end
    
    ## binary case
    if @children.size==2
      if @head.include?('_')
	(preusc,postusc) = @head.split('_',2)
	#determine which binary branch has the head
      else
	
	## recurse to left (or unary) child...
	@children[0].set_parents
	@children[0].parent = self
	## recurse to right child...
	@children[1].set_parents
	@children[1].parent = self

	return
      end
      
      if @children.size>2 || @children.size<0
	$stderr.print "ERROR: calcHdwdCPT.rb requires binary trees. Node "+@head+" has "+@children.size.to_s+" children.\n"
      end
      #    $stderr.print "finishing up" + "\n"
    end

  end




  ### calculate the L* model
  def lstar
    if @children.size==0
      $stderr.print "ERROR: EXPECT (PRETERM), POS, WORD NODES IN RCTREE!!! curr:"+@head + "\nLine=#{$line}\n"
      return
    end
    if @children.size==1 && @children[0].children.size==1 && @children[0].children[0].children.size!=0
      $stderr.print "ERROR: EXPECT (PRETERM), POS, WORD NODES IN RCTREE!!! curr:"+@head + " child:"+@children[0].head + " granch:"+@children[0].children[0].head + "\n"
      return
    end

    ## terminal case, left child post-transform
    if @children.size==1 && @children[0].children.size==1 && @children[0].children[0].children.size==0
      pos = @children[0].children[0].head.gsub(/([^\#]*)\#.*/,'\1')
      ceta1 = $stack.last.gsub(/.*:(.*)\{.*/,'\1')
      eeta1 = $stack.last.gsub(/.*\{(.*)\}/,'\1')
      cChild = @children[0].head.gsub(/.*:(.*)/,'\1')
      eChild = @children[0].achd  #head.gsub(/(.*):.*/,'\1')
      rule = "#{ceta1} #{cChild} #{eeta1} : #{eChild}"

      print "L* "+rule+"\n" #+"      from #{@parent.head} and #{@children[0]}\n"
      if $options[:debug]
        $stderr.print "TL "+rule+"\n"
      end
#      print "L0 "+rule+"\n"  ## SWU: UNSURE IF THIS IS CORRECT?

      return

    ## terminal case, right child post-transform
    elsif @children.size==1 && @children[0].children.size==0
#      $stderr.print "'Terminal: Head = "+@head+", Hdwd = "+@achd+"\n"

      ## 1-word tree case
      if @parent.head==$ROOTOFTREE #&& @children[0].size==1
        cprnt = @parent.head.gsub(/.*:.*\/.*:(.*)/,'\1')
        eprnt = @parent.awhd
        chere = @head.gsub(/.*:(.*)/,'\1')
        rule  = "#{cprnt} #{chere} #{eprnt} : #{@achd}"

        $lzero[rule] = $lzero.key?(rule) ? $lzero[rule]+1 : 1
        $lstar[rule] = $lstar.key?(rule) ? $lstar[rule]+1 : 1
	
        print "L* "+rule+"\n"
        if $options[:debug]
          $stderr.print "TR "+rule+"\n"
        end

        ## regular tree case
      else
	cprnt = @parent.children[0].head.gsub(/.*:.*\/.*:(.*)/,'\1')
	eprnt = @parent.children[0].awhd#.gsub(/.*:(.*)/,'\1')
	chere = @head.gsub(/.*:(.*)/,'\1')
	rule = "#{cprnt} #{chere} #{eprnt} : #{@achd}"

	$lzero[rule] = $lzero.key?(rule) ? $lzero[rule]+1 : 1
	$lstar[rule] = $lstar.key?(rule) ? $lstar[rule]+1 : 1
	
	print "L* "+rule+"\n"
      if $options[:debug]
        $stderr.print "TR "+rule+"\n"
      end
	#if $options[:all]
#        print "L0 "+rule+"\n"
	#end
      end


      return
    end

    ## unary case
    if @children.size==1

      ## recurse to left (or unary) child...
      @children[0].lstar( ) 
      if $options[:debug]
	$stderr.print "'Unary(do L*)   :  Head = "+@head+", Hdwd = "+@achd+"\n"
      end
      cchild = @children[0].head.gsub(/.*:(.*)/,'\1') #.gsub(/(.*)\{.*/,'\1')
      echild = @children[0].achd
      ceta1 = $stack.last.gsub(/.*:(.*)\{.*/,'\1')
      eeta1 = $stack.last.gsub(/.*\{(.*)\}/,'\1')
      rule = "#{ceta1} #{cchild} #{eeta1} : #{echild}"
      $lplus[rule] = $lplus.key?(rule) ? $lplus[rule]+1 : 1
      $lstar[rule] = $lstar.key?(rule) ? $lstar[rule]+1 : 1

      print "L* "+rule + "\n"
      if $options[:all]
	print "L+ "+rule + "\n"
      end
      if $options[:debug]
	$stderr.print "U  "+rule + "\n"
      end
      return
    end

    ## binary case
    if @children.size==2

      if @parent.head != $ROOTOFTREE

	if @parent.children[0] == self
	  if $options[:debug]
	    $stderr.print "'BinaryL        :  Head = "+@head+", achd = "+@achd+", awhd = "+@awhd+"\n"
	  end

	end

      end

      ## recurse to left (or unary) child...
      @children[0].lstar( )

      ## keeps track of an eta.1, to be followed by eta.1.0^k
      if @children[1].children.size==2 
	$stack.push( @children[0].head.gsub(/.*\/(.*)/,'\1')+"{#{@children[0].awhd}}" ) #aw of left child = orig. parent
	if $options[:debug]
	  $stderr.print "          stack :    "+$stack.join(' ')+"\n"
	end
      end

      ## recurse to right child...
      @children[1].lstar( )

      if @parent.head == $ROOTOFTREE && $options[:debug] && $options[:verbose]
	$stderr.print "  should be at the root \n"
      end

      ## done with this constituent, which was a new stack level
      if @parent.children[1] == self || @parent.head == $ROOTOFTREE
	if $options[:debug]
	  $stderr.print "'BinaryR(do L0) :  Head = "+@head+", achd = "+@achd+", awhd = "+@awhd+"\n"
	end
	
	chead = @head.gsub(/.*:(.*)/,'\1')
	ceta1 = $stack.last.gsub(/.*:(.*)\{.*/,'\1')
	eeta1 = $stack.last.gsub(/.*\{(.*)\}/,'\1')
	rule = "#{ceta1} #{chead} #{eeta1} : #{@achd}"
	$lzero[rule] = $lzero.key?(rule) ? $lzero[rule]+1 : 1
	$lstar[rule] = $lstar.key?(rule) ? $lstar[rule]+1 : 1
	
	print "L* "+rule+"\n"
	#if $options[:all]
        print "L0 "+rule+"\n"
	#end
	
	if $options[:debug]
	  $stderr.print "B  "+rule + "\n"
    end
	$stack.pop
	if $options[:debug]
	  $stderr.print "          stack :    "+$stack.join(' ')+"\n"
	end

      end
     



      return
    end
    
    if @children.size>2 || @children.size<0
      $stderr.print "ERROR: calcHdwdCPT.rb requires binary trees. Node "+@head+" has "+@children.size.to_s+" children.\n"
    end
    
  end

  def hdwd_to_s
    if @children.length == 0
      return "#{@achd}"
    else
      
      s = "( #{@head}{#{@achd}/#{@awhd}} "
      @children.each{ |child|
        s += child.hdwd_to_s
        s += " "
      }
      s += ")"
    end
    return s
  end



end


class Hash
  def +(h)
#    hout = h.merge(self)
    hout = {}
    each{ |key, value| 
      hout[key] = value
    }
    h.each{ |key, value| 
      if key?(key)
        hout[key] = hout[key].to_i+value.to_i
      else
        hout[key] = value.to_i
      end
    }
    return hout
  end
  def to_s(modelname="")
    out = ""
    each_pair{ |key, value|
      out = out + modelname + " #{key} = #{value}\n"
    }
    return out
  end
end

$ctr=0;
while (line = STDIN.gets)

  t = Tree.new(line)
  #print "M_PRIOR - : #{t.achd}"+"\n"
  t.parent = Tree.new()
  t.parent.head = $ROOTOFTREE
  t.parent.awhd = '-'
  t.parent.children[0] = t
  t.set_parents
  $stack = [ $ROOTOFTREE.gsub(/.*\/(.*)/,'\1')+"{-}" ] ##{t.achd}}" ]

  t.lstar

  $ctr = $ctr+1
  if $ctr % 1000 == 0
    $stderr.print "  ...finished #{$ctr} trees in L* calculation...\n"
  end

end

print "M_PRIOR - : -"+"\n"
print $m_id.to_s("M_ID")
print $m_one.to_s("M_1")
# $lword.delete("unk unk : unk")
print $lword.to_s("L")
# print out the final L* model
#print $lstar.to_s("L*")
