###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

#!/usr/bin/ruby

require 'scripts/umnlp.rb'
require 'optparse'
require 'ostruct'

## Takes in 2 text files filled with aligned trees,
## and computes labelled precision/recall

gold_file = ""
hypoth_file = ""

options = OpenStruct.new
options.gold = ""
options.hypoth = ""
options.printtrees = false
options.running_totals = false
options.edit = false
options.maxwords = 40
opts = OptionParser.new

opts.on("-g <gold-file>") { |val| options.gold = val }
opts.on("-h <hypoth-file>") { |val| options.hypoth = val }
opts.on("-p") {|val| options.printtrees = true}
opts.on("-r") {|val| options.running_totals = true}
opts.on("-f") {|val| options.edit = true}
opts.on("-w <max-words>") {|val| options.maxwords = val.to_i}
opts.parse(ARGV)

if options.gold == "" or options.hypoth == ""
  puts opts.to_s
  exit
end

puts "Gold trees: #{options.gold}"
puts "Hypoth trees: #{options.hypoth}"
#exit

ts1 = TreeSlurper.new(options.gold)
ts2 = TreeSlurper.new(options.hypoth)

gold_count = 0
hypoth_count = 0
correct = 0
precision = 0.0
recall = 0.0
egold = 0
ehypoth = 0
ecorrect = 0
eprec = 0.0
erec = 0.0
t1 = ts1.getNext
t2 = ts2.getNext #nextTree
line = 1
num_errors = 0
error_state = 0

while t1 != nil and t2 != nil

  if t1.getNumLeafs > options.maxwords
    $stderr.puts "Sentence ##{line} > #{options.maxwords} words: skipped"
    t1 = ts1.getNext
    t2 = ts2.getNext
    line += 1
    next
  end

  ## Check if there is some discrepancy between the two sentences we
  ## are going to be comparing
  if t1.getWordString != t2.getWordString
    $stderr.puts "Error in alignment between test and hypoth in sentence ##{line}:"
    $stderr.puts "gold = " + t1.getWordString
    $stderr.puts "hypo = " + t2.getWordString
    $stderr.puts "Attempting to increment gold standard to catch up for"
    $stderr.puts "unparseable test sentences"
    t1 = ts1.getNext
    t2 = ts2.getNext
    line += 1
    next
  end

  if options.printtrees
    puts "T1: " + t1.to_s
    puts "T2: " + t2.to_s
  end

  gold_count += t1.getNumRules
  hypoth_count += t2.getNumRules
  correct += t1.getNumCorrect(t2)

  if hypoth_count != 0
    precision = correct.to_f / hypoth_count.to_f
  end

  if gold_count != 0
    recall = correct.to_f / gold_count.to_f
  end

  if options.edit
    egold += t1.getNumEditedLeafs
    ehypoth += t2.getNumEditedLeafs
    ecorrect += t1.getNumEditedCorrect(t2)
    eprec = ecorrect.to_f / ehypoth.to_f
    erecall = ecorrect.to_f / egold.to_f
  end

  if options.running_totals
    puts "line #{line}"
    puts "Precis = #{precision} (just this: #{t1.getNumCorrect(t2).to_f / t2.getNumRules.to_f}"
    puts "Recall = #{recall} (just this: #{t1.getNumCorrect(t2).to_f / t1.getNumRules.to_f}"
    if options.edit
      puts "eprec = #{eprec}"
      puts "erecall = #{erecall}"
    end
  end
  t1 = ts1.getNext
  t2 = ts2.getNext
  line += 1
end

if t1 == nil and t2 != nil
  puts "Quit because t1 was nil"
elsif t1 != nil and t2 == nil
  puts "Quit because t2 was nil"
end

#puts "\n\n\nFound #{line - num_errors} sentences with accuracy:"
puts "  Precis = #{precision}"
puts "  Recall = #{recall}"
puts "  F-score = #{(2 * precision * recall) / (precision + recall)}"

if options.edit
  puts "  Edit_p = #{eprec}"
  puts "  Edit_r = #{erecall}"
  puts "  Edit_f = #{(2 * eprec * erecall) / ( eprec + erecall)}"
end

#puts "\nThere were #{num_errors} unparsed sentences"

