#!/usr/bin/ruby
###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

##################################################
# mergeSynRoleTrees.rb
# Merge all syntactic role trees for one treeBank tree onto a single tree 
# where non-id syntactic role label is given preference over the ""id" one.
#
# Usage: cat synrole_trees | ruby scripts/mergeSynRoleTrees.rb
# Author: Luan Nguyen
##################################################

##### parse options
require 'optparse'

$options = {}
$options[:clearrel] = true

OptionParser.new do |opts|
  opts.banner = "Usage: ruby mergeSynRoleTrees.rb [options]"
  opts.on("-r", "--clearrel", "set to false for not reseting r to id") do |clearrel|
    $options[:clearrel] = clearrel
  end
end.parse!

$ID_LABEL = "(id!ldelim!"
$A_LABEL = "(a!ldelim!"
$M_LABEL = "(m!ldelim!"
$R_LABEL = "(r!ldelim!"

def resetRelToId(label)
  if label == $R_LABEL
    return $ID_LABEL
  end
  return label
end

def doMergeLabels(labels)
  mergedLabels = []
  $prevLabels.zip(labels) do |x,y| 
    if $options[:clearrel]
      x = resetRelToId(x)
      y = resetRelToId(y)
    end
    if x != $ID_LABEL && y != $ID_LABEL && x != y
      pick = ""
      #conflict between a and r. 150/1165. Pick "a". E.g. "we can HAVE" (sent 3693) the rel HAVE should get "a" for the phrase
      if x == $A_LABEL && y == $R_LABEL
        pick = x
      elsif y == $A_LABEL && x == $R_LABEL
        pick = y
      #conflict between a and m. 538/1165. Pick "m". After all, "m" is a modificative "a"
      elsif x == $A_LABEL && y == $M_LABEL
        pick = y
      elsif y == $A_LABEL && x == $M_LABEL
        pick = x
      #conflict between m and r. 477/1165. Pick "m". Verbs like "have", "going (to)". The prev sent is a "r" and the next sent is a "m" (argm-mod)
      else
        pick = y
      end       
      mergedLabels << pick
      $stderr.puts("Conflict args at line #{$lineNum}, prevArg=#{x}, curArg=#{y}. Use #{pick}")

    elsif x != $ID_LABEL
      mergedLabels << x
    else
      mergedLabels << y
    end
  end
  $prevLabels = mergedLabels
end

def printTree
  if $prevTree != []
    mergedTree = []
    numArgs = 0
    $prevTree.zip($prevLabels) { |x,y| 
      if y == $A_LABEL
        numArgs = numArgs + 1
      end
      mergedTree << "#{x}#{y}" 
    }
    print numArgs, " ", $fromLine, " ", $toLine, " ", mergedTree.join
  end
end

$pattern = /\([\w]+\!ldelim\!/
$prevTree = []
$prevLabels = []

$fromLine=0
$toLine=0

$lineNum=0
while line = gets
#File.open("testpball_112917_1000.bintree").each_line do |line| 
  $lineNum = $lineNum + 1
  if line.match( /(id!ldelim!END eos)|(id!ldelim!EOS eos)/ )
    print 0, " ", $lineNum, " ", $lineNum, " ", line
    next
  end
  curTree = line.split($pattern)
  curLabels = line.scan($pattern)
  if (curTree == $prevTree)
    doMergeLabels(curLabels)
    $toLine = $toLine + 1
  else
    printTree
    $prevTree = curTree
    $prevLabels = curLabels
    $fromLine = $toLine = $lineNum
  end
end

#print the last tree
printTree