#!/usr/bin/ruby
###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

##################################################
# tbtrees2srltrees.rb
# Converts treebank-style trees (in article form) into
# trees with appropriate labels for
# SRL (semantic role labeling) tasks
# 
# -v 1 : the default. srl labels attach to leaf nodes spanned by the labeled node
# -v 2 : srl labels attach to the node specified by propbank
# -v 3 : annotate non-leaf node with shallowed syntax, a: (any numbered arg), 
#        m: (any ARGM), r: (the relation), and id: for the rest
#
# Usage: cat prop.txt | ruby scripts/tbtrees2srltrees.rb -v 2
#
# Author: Tim Miller
# Author: Luan Nguyen
##################################################

require 'scripts/umnlp.rb'
require "fileutils.rb"

##### parse options
require 'optparse'

$ldelim="!ldelim!"

$options = {}
$options[:version] = 1
$options[:verbose] = false
$options[:no_main_arg_ext] = true
$options[:no_argm_ext] = false

OptionParser.new do |opts|
  opts.banner = "Usage: ruby tbtrees2srltrees.rb [options]"
  opts.on("-v", "--version=v", Integer, "version 1 annotate srl label on the leaf, version 2 annotate srl label on non-leaf node, version 3 for shallowed syntax") do |version|
    $options[:version] = version
  end
  opts.on("-p", "--predicate", "output REL-play.01 instead of just REL") do |p|
    $options[:predicate] = p
  end
  opts.on("-r", "--relation", "output REL-play instead of just REL") do |r|
    $options[:relation] = r
  end
  opts.on("-f", "--features", "output the 5 features of the predicate") do |f|
    $options[:features] = f
  end

  opts.on("-V", "--Verbose", "set to true for more stderr messages") do |verbose|
    $options[:verbose] = verbose
  end
  opts.on("-a", "--no-main-arg-ext", "set to true to trim ARG2-FOR to just ARG2") do |a|
    $options[:no_main_arg_ext] = a
  end
  opts.on("-m", "--no-argm-ext", "set to true to trim ARGM-DIS to just ARGM") do |m|
    $options[:no_argm_ext] = m
  end
end.parse!

$stderr.puts "Running version #{$options[:version]}, no_main_arg_ext=#{$options[:no_main_arg_ext]}, no_argm_ext=#{$options[:no_argm_ext]}"   

class Tree

## Take something like:
## 8 gold join.01 vf--a 0:2-ARG0 7:0-ARGM-MOD 8:0-rel 9:1-ARG1 11:1-ARGM-PRD 15:1-ARGM-TMP
## And add it as structure to the lexical items in this tree.
## (NNP vinken:ARG0) or (VBD joined:REL) or (NNP Nov.:ARGM-TMP) for example
def annotateArgs(arglist, pred, verb_feats, shallowedSyntax)
  annotateID(shallowedSyntax)
  arglist.each{ |arg|
    args = arg.split(/-/, 2);
    rel = args[1] 
    args = args[0].split(/,|\*/).map{ |i| i.split(":") }
    args.each{ |pos_height|
      start = pos_height[0].to_i
      height = pos_height[1].to_i		
      child = getLeafNum(start)
      root = child
       (height+1).times{
        root = root.parent
      }
      if shallowedSyntax
        root.annotateShallowedSyntax("#{rel}", pred, verb_feats)
      else
        root.annotateOneArg("#{rel}", pred, verb_feats)
      end
      
    }
  }
end

def annotateOneArg(label, pred="", verb_feats="")
  if label == "rel"
    @head.sub!("ID#{$ldelim}", "REL#{$ldelim}")
    #    @head = pred + "-" + verb_feats + ":" + @head
  else
    label.sub!("-", "RELDAZ")
    @head.sub!("ID#{$ldelim}", "#{label}#{$ldelim}")
  end
end

def annotateShallowedSyntax(label, pred="", verb_feats="")
  if label == "rel"
    @head.sub!("id#{$ldelim}", "r#{$ldelim}")
  elsif label =~ /^ARGM/
    @head.sub!("id#{$ldelim}", "m#{$ldelim}")
  else
    @head.sub!("id#{$ldelim}", "a#{$ldelim}")
  end
end

def annotateID(shallowedSyntax)
  id = shallowedSyntax ? "id" : "ID"
  if @children.size != 0
    if @head.match($ldelim)
      @head.sub!(/^.*!ldelim!/, "#{id}!ldelim!")
    else
      @head = "#{id}!ldelim!" + @head
    end
    @children.each{ |c| c.annotateID(shallowedSyntax) }
  end
end

## Take something like:
## 8 gold join.01 vf--a 0:2-ARG0 7:0-ARGM-MOD 8:0-rel 9:1-ARG1 11:1-ARGM-PRD 15:1-ARGM-TMP
## And add it as structure to the lexical items in this tree.
## (NNP vinken:ARG0) or (VBD joined:REL) or (NNP Nov.:ARGM-TMP) for example
def markupArgStructure(arglist, pred, verb_feats)
  appendToAllLeaves(":NULL")
  arglist.each{ |arg|
    args = arg.split(/-/, 2);
    rel = args[1] 
    args = args[0].split(/,|\*/).map{ |i| i.split(":") }
    args.each{ |pos_height|
      start = pos_height[0].to_i
      height = pos_height[1].to_i		
      child = getLeafNum(start)
      root = child
       (height+1).times{
        root = root.parent
      }
      root.appendToAllLeaves(":#{rel}", pred, verb_feats)
    }
  }
end

def getLeafNum(num)
  ## Base case:
  if @children.size == 0 and num == 0
    return self
  end
  
  ## Error handling:
  if @children.size <= 0
    $stderr.puts "Error: This tree doesn't have that many leaves: #{to_s}"
    exit
  end
  
  start_leaf = 0
  end_leaf = @children[0].getNumLeafs-1
  child_num = 0
  while(num > end_leaf)
    child_num += 1
    start_leaf = end_leaf + 1
    end_leaf = end_leaf + @children[child_num].getNumLeafs
  end
  return @children[child_num].getLeafNum(num-start_leaf)   
end

def appendToAllLeaves(label, pred="", verb_feats="")
  getNumLeafs.times{ |i|
    child = getLeafNum(i)
    child.head.gsub!(/(.+):.*/,'\1')
    if label.eql?(":rel")
      child.head += ":REL" #label 
      if $options[:predicate]
        child.head += "-#{pred}"
      end
      #child.head += "-#{pred}-#{verb_feats}"
      #child.head += "-#{pred}"
    elsif label =~ /:ARG[0-5]\-.*/ and $options[:no_main_arg_ext]
      child.head += label[0..4]
    elsif label =~ /:ARGM\-.*/ and $options[:no_argm_ext]
      child.head += label[0..4]
    else
      child.head += label
    end
  }
end
end


## These 2 lines could be made command-line arguments.
## but raw treebank is necessary because raw propbank contains indices that
## are based on the presence of traces
file_prefix = "/project/nlp/data/treebank/parsed/mrg/"
output_dir = "genmodel/propbanktrees/parsed/mrg/"
file = ""
all_trees = [];

$lineNum=0

while line = gets
#File.open("/project/nlp/data/propbank/propbank-1.0/prop.txt").each_line do |line| 
#File.open("ttt").each_line do |line| 
$lineNum = $lineNum + 1
line.strip!
#Just do the same with tbtrees2tree.pl although there's only instance of PRT| in wsj/10/wsj_1019.mrg
line.sub!(/PRT\|/,"");

## Fields are: (0)filename (1)sentence num (2)index of this predicate
##             (3)gold (4)pred.sense (5)verb features (6...)arg list
fields = line.split
if fields[0] != file
  file = fields[0]
  ts = TreeSlurper.new(file_prefix + file)
  all_trees = ts.getAll
end

tree_num = fields[1].to_i
pred = fields[4]
verb_feats = fields[5]
verb_feats.gsub!(/(-)/,'RELDAZ')
fields.slice!(0,6)
if ($options[:version] == 1)
  all_trees[tree_num].markupArgStructure(fields, pred, verb_feats)
elsif ($options[:version] == 2) 
  all_trees[tree_num].annotateArgs(fields, pred, verb_feats, false)
elsif ($options[:version] == 3) 
  all_trees[tree_num].annotateArgs(fields, pred, verb_feats, true)
else
  $stderr.puts("Unknown version #{$options[:version]}")
  exit(-1)
end

dirs = file.split(/\//)
FileUtils.mkdir_p(output_dir + "#{dirs[0]}/#{dirs[1]}")
fout = File.open(output_dir + file, "a")
fout.puts all_trees[tree_num].to_s
fout.close
#    $stderr.puts all_trees[tree_num].to_s, "\n"
end
