###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

#!/usr/bin/ruby

######################################################################
# sem2PBRTrain.rb
# 
# Input is prbsem_am_trees. Output models of the form:
# 
# play.01 a0 : ARG0
# play.01 a0 : ARG1
# play.01 m : ARGM-TMP
#
# This input will be pipe thru relfreq.pl
#
# TO RUN: cat genmodel/srl.pbrsem_am_trees | ruby scripts/sem2PBRTrain.rb [-v] 
#
# OPTIONS:
#  -v verbose mode
#
######################################################################

require "scripts/umnlp.rb"

##### parse options
require 'optparse'

$options = {}
$options[:verbose] = false

OptionParser.new do |opts|
  opts.banner = "Usage: ruby sem2PBRTrain.rb [options]"
  
  opts.on("-v", "--verbose", "turns on extra stderr output") do |v|
    $options[:verbose] = v
  end
  end.parse!
  
  $rel = "REL\\-[\\w\\-\\.]+!pbrdelim!"
  $id_rel = "id!semdelim!REL\\-[\\w\\-\\.]+!pbrdelim!"
  $m_rel = "m!semdelim!REL\\-[\\w\\-\\.]+!pbrdelim!"
  $a_rel = "a\\d!semdelim!REL\\-[\\w\\-\\.]+!pbrdelim!"

  $m = "m!semdelim!"
  $a = "a\\d!semdelim!"
  $id = "id(m|a\\d)?!semdelim!"
  
  #####
  class Tree
    
    #return the node contains str and the path to it from caller
    def findFirst(str, path)
      if @head =~ /#{str}/
        return self, path
      elsif @children.size > 0
        branch = 0
        @children.each { |c| 
          path << branch
          found, path = c.findFirst(str, path)
          if found != nil && found.class != Array
            return found, path 
          else
            path.pop
          end
          branch += 1
        }
        return nil, path
      else
        return nil, path
      end
    end
    
    #return the node by walking the path from the caller
    def walk(path)
      if path.empty?
        self
      else
        @children[path.pop].walk(path)
      end
    end
    
    def spitSem2PBRModel
      if @head =~ /#{$m_rel}/
        spitM2PBRModel
      elsif @head =~ /#{$id_rel}/
        spitId2PBRModel
      elsif @head =~ /#{$a_rel}/
        spitA2PBRModel
      else
        $stderr.print $lineNum, "Unknown semrole #{@head}", "\n"
      end
    end
    
    def spitId2PBRModel
      pred = getPred
      preterm = getRELPreterm
      curNode = self
      while ! curNode.parent.nil?
        sibNode = curNode.getSibling
        sibNodeSem = sibNode.getSemLabel
        if curNode.getSemLabel == "m" && sibNodeSem == "id" 
          sibNodeSem = "a0"
        end
        sibNodePbr = sibNode.getPBRLabel
        print "PBR " + pred, " ", sibNodeSem, " ", preterm, " : ", sibNodePbr, "\n"
        break if curNode.parent.head !~ /#{$id}/ #curNode.parent.head =~ /#{$a}/ or curNode.parent.head =~ /#{$m}/
        curNode = curNode.parent
      end
      
    end
    
    def spitM2PBRModel
      pred = getPred
      preterm = getRELPreterm
      sib = getSibling
      print "PBR " + pred, " a0 ",  preterm, " : ", sib.getPBRLabel, "\n" 
    end
    
    def spitA2PBRModel
      $stderr.print $lineNum, " a:REL still existed: #{@parent.head} -> #{@parent.children[0].head} #{@parent.children[1].head}", "\n"
    end
 
    def getRELPreterm
      preterm = @head[/REL\-(.*)!pbrdelim!(.*)\{/, 2]
      if preterm =~ /\-psv/ or @children[0].head =~ /\-psv/
        "p"
      else
        "a"
      end
    end
    
    def getPred
      @head[/REL\-(.*)!pbrdelim!/, 1]
    end
    
    def getSemLabel
      @head[/^(.*)!semdelim!(.*)!pbrdelim!/, 1]
#      sem = @head[/^(.*)!semdelim!(.*)!pbrdelim!/, 1]
#      if sem =~ /ida\d/
#        sem[/id(.*)/, 1]
#      else
#        sem
#      end
    end
    
    def getPBRLabel
      head[/^(.*)!semdelim!(.*)!pbrdelim!/, 2]
    end
    
    #Assume binary tree
    def getSibling
      if self == @parent.children[0]
        @parent.children[1]
      elsif self == @parent.children[1]
        @parent.children[0]
      else
        $stderr.print $lineNum, "Not a binary subtree at #{@parent.to_s}", "\n"
      end
    end
end
  
  ##########################################
  
  $lineNum = 0
  $debugLine = []

#  srlLine = "(ID!ldelim!S (ID!ldelim!PP-LOC (ID!ldelim!IN In) (ID!ldelim!NP (ID!ldelim!NP (ID!ldelim!DT an) (ID!ldelim!NNP Oct.) (ID!ldelim!CD 19) (ID!ldelim!NN review)) (ID!ldelim!PP (ID!ldelim!IN of) (ID!ldelim!NP (ID!ldelim!`` ``) (ID!ldelim!NP-TTL (ID!ldelim!DT The) (ID!ldelim!NN Misanthrope)) (ID!ldelim!'' '') (ID!ldelim!PP-LOC (ID!ldelim!IN at) (ID!ldelim!NP (ID!ldelim!NP (ID!ldelim!NNP Chicago) (ID!ldelim!POS 's)) (ID!ldelim!NNP Goodman) (ID!ldelim!NNP Theatre))))) (ID!ldelim!PRN (ID!ldelim!-LRB- -LRB-) (ID!ldelim!`` ``) (ID!ldelim!S-HLN (ID!ldelim!NP-SBJ (REL!ldelim!VBN Revitalized) (ARG1!ldelim!NNS Classics)) (ID!ldelim!VP (ID!ldelim!VBP Take) (ID!ldelim!NP (ID!ldelim!DT the) (ID!ldelim!NN Stage)) (ID!ldelim!PP-LOC (ID!ldelim!IN in) (ID!ldelim!NP (ID!ldelim!NNP Windy) (ID!ldelim!NNP City))))) (ID!ldelim!, ,) (ID!ldelim!'' '') (ID!ldelim!NP-TMP (ID!ldelim!NN Leisure) (ID!ldelim!CC &) (ID!ldelim!NNS Arts)) (ID!ldelim!-RRB- -RRB-)))) (ID!ldelim!, ,) (ID!ldelim!NP-SBJ-2 (ID!ldelim!NP (ID!ldelim!NP (ID!ldelim!DT the) (ID!ldelim!NN role)) (ID!ldelim!PP (ID!ldelim!IN of) (ID!ldelim!NP (ID!ldelim!NNP Celimene)))) (ID!ldelim!, ,) (ID!ldelim!VP (ID!ldelim!VBN played) (ID!ldelim!NP (ID!ldelim!-NONE- *)) (ID!ldelim!PP (ID!ldelim!IN by) (ID!ldelim!NP-LGS (ID!ldelim!NNP Kim) (ID!ldelim!NNP Cattrall)))) (ID!ldelim!, ,)) (ID!ldelim!VP (ID!ldelim!VBD was) (ID!ldelim!VP (ID!ldelim!ADVP-MNR (ID!ldelim!RB mistakenly)) (ID!ldelim!VBN attributed) (ID!ldelim!NP (ID!ldelim!-NONE- *-2)) (ID!ldelim!PP-CLR (ID!ldelim!TO to) (ID!ldelim!NP (ID!ldelim!NNP Christina) (ID!ldelim!NNP Haag))))) (ID!ldelim!. .))"
#  
#  srlTree = Tree.new(srlLine)
#  path = []
#  t, path = srlTree.findFirst($rel, path)
#  print t.to_s, path, "\n"
  
#  File.open("ttt").each_line do |line| 
  while (line = STDIN.gets)
    $lineNum = $lineNum + 1
    t = Tree.new(line)
    path = []
    tRel, path = t.findFirst($rel, path)
    if tRel.nil?
      $stderr.print $lineNum, " tree has no REL ", line, "\n"
    else
      tRel.spitSem2PBRModel    
    end
  end
