###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

#!/usr/bin/ruby

######################################################################
# sem2PBR.rb
# 
# Input are the srl.sem2pbr.model and the semtrees. Output is the predicted 
# series of propbank labels 
# 
# Format of the model is:
#
# PBR bundle.02 m : ID = 0.88888889
# PBR bundle.02 m : ARG2-INTO = 0.11111111
# PBR cheat.01 m : ID = 0.80000000
# PBR cheat.01 m : ARG1-OF = 0.06666667
# PBR cheat.01 m : ARGM-ADV = 0.06666667
# PBR cheat.01 m : ARGM-MNR = 0.06666667
#
# Only the most probable mapping is considered in this version. I.e.
# "bundle.02 m" will map to "ID" and "cheat.01 m" will map to "ID"
# in the example above.
#
# TO RUN: ruby scripts/sem2PBR.rb [-m srl.sem2pbr.model] [-s semtrees] [-v] 
#
# OPTIONS:
#  -v verbose mode
#
######################################################################

require "scripts/umnlp.rb"

##### parse options
require 'optparse'

$options = {}
$options[:verbose] = false

OptionParser.new do |opts|
  opts.banner = "Usage: ruby sem2PBR.rb [options]"
  
  opts.on("-v", "--verbose", "turns on extra stderr output") do |v|
    $options[:verbose] = v
  end
  opts.on("-m", "--model sem2pbr.model", String, "The sem2pbr.model file to load") do |m|
    $options[:model] = m
  end
  opts.on("-s", "--semtrees semtrees", String, "The semtrees file to evaluate the mapping sem -> pbr") do |s|
    $options[:semtrees] = s
  end

  end.parse!
  
  $model = Hash.new

  $rel = ":REL\\-[\\w\\-\\.]+"

  $id = "id(m|a\\d)?!semdelim!"

  $m = "m!semdelim!"
  $a = "a\\d!semdelim!"

  def loadModel
    File.open($options[:model]).each_line do |line|
      s = line.split(" ")
      key = s[1] + " " + s[2] + " " + s[3]
      if $model[key].nil?
        $model[key] = s[5]
      end
    end
  end


  #####
  class Tree
    
    def isLeftChild
      ! @parent.nil? && @parent.children[0] == self
    end
    
    def isRightChild
      ! @parent.nil? && @parent.children[1] == self
    end
    
    def leftMostChild
      if @children.size == 0
        self
      else
        @children[0].leftMostChild
      end
    end
    
    #return the node contains str and the path to it from caller
    def findFirst(str, path)
      if @head =~ /#{str}/
        return self, path
      elsif @children.size > 0
        branch = 0
        @children.each { |c| 
          path << branch
          found, path = c.findFirst(str, path)
          if found != nil && found.class != Array
            return found, path 
          else
            path.pop
          end
          branch += 1
        }
        return nil, path
      else
        return nil, path
      end
    end
    
    #return the node by walking the path from the caller
    def walk(path)
      if path.empty?
        self
      else
        @children[path.pop].walk(path)
      end
    end
    
    def getPred
      @head.split(":REL-", 2)[1]
    end
    
    def getSemLabel
      @head[/^(.*)!semdelim!/, 1]
#      sem = @head[/^(.*)!semdelim!/, 1]
#      if sem =~ /ida\d/
#        sem[/id(.*)/, 1]
#      else
#        sem
#      end
    end
    
    def getRELPreterm
      preterm = @head[/!semdelim!(.*)\{/, 1]
      if preterm =~ /\-psv/ or @children[0].head =~ /\-psv/
        "p"
      else
        "a"
      end
    end
    
    def hypothesizePBR
      pred = getPred #i.e. say.01
      
      #remove :REL-say.01 from leaf
      @head = @head.split(":REL-", 2)[0]
      
      p = @parent
      preterm = p.getRELPreterm
      
      #snap REL!pbrdelim! on pos node. It's wrong on the multi REL, like "pick up", "make over". Should improve on this!!!
      p.head.sub!(/!semdelim!/, "!semdelim!REL!pbrdelim!")
      
      #not until now that m:REL pattern could emerge. If so, set sibling to a0
#      if p.head =~ /m!semdelim!REL!pbrdelim!/
#        p.getSibling.head.sub!(/^.*!semdelim!/, "a0!semdelim!")
#      end
      
      while ! p.parent.nil?
        sib = p.getSibling
        sem = sib.getSemLabel
        if p.getSemLabel == "m" && sem == "id"
          sem = "a0"
        end
        key = pred + " " + sem + " " + preterm
        pbr = $model[key]
        if pbr.nil?
          $stderr.print $lineNum, " No model for ", key, "\n"
        else
          if pbr == "ID" && sib.hypothARGM_MOD
            pbr = "ARGM-MOD"
          elsif pbr == "ID" && sib.hypothARGM_NEG
            pbr = "ARGM-NEG"
          elsif pbr == "ID" && (sib.head =~ /\-tmp/ || (sib.head =~ /!semdelim!ADVP/ && sib.children[0].head =~ /!semdelim!RB/))
            pbr = "ARGM-TMP"
          elsif pbr == "ID" && (sib.head =~ /!semdelim!ADVP/ || sib.head =~ /\-adv/ || sib.head =~ /!semdelim!PP/)
            pbr = "ARGM-ADV"
          end
          sib.head.sub!(/!semdelim!/, "!semdelim!" + pbr + "!pbrdelim!")
        end
        
#        break if p.parent.head =~ /#{$a}/ or (p.parent.head =~ /#{$m}/ and !(p.parent.head =~ /WHSBAR/ && p.parent.isRightChild && ! p.parent.parent.nil? && p.parent.parent.head =~ /delim!NP/ && p.parent.getSibling.head =~ /delim!NPnn/))
        break if p.parent.head !~ /#{$id}/ 
        p = p.parent
      end
    end
    
    
    #Propbank only annotate ARGM_MOD for the rel under consideration. Locate the word right to this MDmd node
    #to see if it's a REL then this MDmd node should be an ARGM_MOD
    def hypothARGM_MOD
      if @head =~ /!semdelim!MDmd/
        p = self
        while !p.nil? && p.isRightChild
          p = p.parent
        end
        if !p.nil?
          nextWord = p.getSibling.leftMostChild
          nextWord.head =~ /:REL/ or nextWord.parent.head =~ /REL!pbrdelim!/
        else
          false
        end
      else
        false
      end
    end
    
    #only care about the ARGM-NEG at the POS level for now. Meaning check for 
    # (m!semdelim!RB RB#n't) and guess it is (m!semdelim!ARGM-NEG!pbrdelim!RB RB#n't)
    def hypothARGM_NEG
      @head =~ /!semdelim!RB/ && @children.size == 1 && 
        (@children[0].head == "RB#n't" || @children[0].head == "RB#not" || @children[0].head == "RB#no" || @children[0].head == "RB#never" || @children[0].head == "RB#neither" || @children[0].head == "RB#nor")
    end
    
    #Assume binary tree
    def getSibling
      if self == @parent.children[0]
        @parent.children[1]
      elsif self == @parent.children[1]
        @parent.children[0]
      else
        $stderr.print $lineNum, "Not a binary subtree at #{@parent.to_s}", "\n"
      end
    end
    
    def setIDForRestOfNonLeaf
      if @children.size > 0
        if @head !~ /!pbrdelim!/
          @head.sub!(/!semdelim!/, "!semdelim!ID!pbrdelim!")
        end
        @children.each{ |c| c.setIDForRestOfNonLeaf }
      end
    end
  end
  
  ##########################################
  
  loadModel
  
  $lineNum = 0
  
  File.open($options[:semtrees]).each_line do |line| 
    $lineNum = $lineNum + 1
    t = Tree.new(line)
    path = []
    tRel, path = t.findFirst($rel, path)
    if tRel.nil?
      $stderr.print $lineNum, " tree has no REL ", line, "\n"
    else
      tRel.hypothesizePBR    
    end
    t.setIDForRestOfNonLeaf
    print t.to_s, "\n"
  end
