#!/usr/bin/perl

use warnings;
use strict;


if ($] < 5.010000)
{
  printf STDERR <<EOM

 Warning: This script requires PERL 5.10.0. Errors may occur with older versions of PERL.

EOM
;
}

# output type/format variables
my $verbose = 0;
my $csv = 0;

my $tokbase = 0;
my @phenomena = (); # list annotated instances of phenomena
my %items = (); # hashmap of items/sentences indexed by id
my @itemids = (); # list of item ids
my %pattern_enju = (
                    "barerel" => {
                                  'ARG2' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
                                  'MOD' =>  [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(noun_arg2)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)']
                                 },
                    "rnr" => {
                                  'ARG2' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      'and\t[^\t]*\t[^\t]*\t[^\t]*\t(\S+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\2)',
					      'and\t[^\t]*\t[^\t]*\t[^\t]*\t(\S+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\2)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG3\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
                                  'ARG' =>  [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ]
                             },
                    "argadj" => {
                                  'ARG2' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(comp_mod_arg1)\tMOD\t{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
                                  'ARG3' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG3\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
                                  'ARG' =>  [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(aux_arg12)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
                                  'MOD' =>  [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(noun_arg2)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(adj_arg1)\tARG1\t{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(prep_arg12)\tARG1\t{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ]
                                },
                    "ned" => {    'MOD' =>  [ '{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(adj_arg1)\tARG1\t{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG1' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG2' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(prep_arg12)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ]
                             },
                    "absol" => {  'ARG1' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'MOD' =>  [ '{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(verb_mod_arg1)\tMOD\t{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG' =>  [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ]
                               },
                    "vger" => {   'ARG1' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG2' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(comp_mod_arg1)\tMOD\t{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(prep_arg12)\tARG1\t{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'MOD' =>  [ '{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t(prep_arg12)\tARG1\t{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG' =>  [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ]
                              },
                    "control" => {'ARG1' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)'  ],
				  'ARG2' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG' =>  [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG3' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG3\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
                              },
                    "tough" => {  'ARG1' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG2' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG3' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG3\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ]
                               },
                    "itexpl" => { 'ARG1' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t[^\t]*\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG2' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t[^\t]*\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG3' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t[^\t]*\tARG3\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG' =>  [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t[^\t]*\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
                               },
                    "vpart" => {  'ARG1' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG2' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      'and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)',
					      'and\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG1\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+).*{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\tand\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG3' => [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG3\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
				  'ARG' =>  [ '{W1}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)\t([^\t]*)\tARG2\t{W2}\t[^\t]*\t[^\t]*\t[^\t]*\t(\d+)' ],
                               }
                   ); # hashmap of enju patterns indexed by phenomenon name

my %pattern_stanford = (
                        "barerel" => {
                                      'ARG2' => [ 'obj\({W1}-(\d+), {W2}-(\d+)\)' ],
                                      'MOD' =>  [ 'rcmod\({W1}-(\d+), {W2}-(\d+)\)',
						  'dep\({W1}-(\d+), {W2}-(\d+)\)' ]
                                     },
                        "rnr" => {
                                      'ARG2' => [ 'obj\({W1}-(\d+), {W2}-(\d+)\)',
					          'prep_{W1}\(\S*-(\d+), {W2}-(\d+)',
					          'dep\({W1}-(\d+), {W2}-(\d+)\)',
					          'ccomp\({W1}-(\d+), {W2}-(\d+)\)' ],
                                      'ARG' =>  [ 'prep_{W1}\(\S*-(\d+), {W2}-(\d+)' ]
                                 },
                        "argadj" => {
                                      'ARG2' => [ 'obj\({W1}-(\d+), {W2}-(\d+)\)',
						  'cop\({W2}-(\d+), {W1}-(\d+)\)' ],
				      'ARG' =>  [ 'aux\({W2}-(\d+), {W1}-(\d+)\)',
						  'auxpass\({W2}-(\d+), {W1}-(\d+)\)' ],
                                      'ARG3' => [ 'obj\({W1}-(\d+), {W2}-(\d+)\)' ],
				      'MOD' =>  [ 'prep_{W2}\({W1}-(\d+)',
						  'cop\((\S+), {W1}-(\d+)\).*advmod\(\2, {W2}-(\d+)\)',
						  'advmod\({W1}-(\d+), {W2}-(\d+)\)' ]
                                    },
                        "ned" => {
			              'ARG1' => [ 'nsubj\({W1}-(\d+), {W2}-(\d+)\)' ],
			              'ARG2' => [ 'obj\({W1}-(\d+), {W2}-(\d+)\)',
					          'prep_{W1}\(\S*-(\d+), {W2}-(\d+)' ],
			              'MOD' =>  [ 'amod\({W1}-(\d+), {W2}-(\d+)\)',
						  'amod\({W1}-(\d+), \S*{W2}-(\d+)\)',
						  'advmod\({W1}-(\d+), {W2}-(\d+)\)',
						  'nsubj\({W2}-(\d+), {W1}-(\d+)\)',
						  'nsubj\({W2}-(\d+), \S*{W1}-(\d+)\)',
						  'nn\({W1}-(\d+), {W2}-(\d+)\)',
						  'dep\({W2}-(\d+), {W1}-(\d+)\)' ]
                                    },
                        "absol" => {  'MOD' =>  [ 'prep_{W2}\({W1}-(\d+)',
						  'prep\({W1}-(\d+), {W2}-(\d+)\)',
						  'dep\({W1}-(\d+), {W2}-(\d+)\)',
						  'xcomp\({W1}-(\d+), {W2}-(\d+)\)' ],
				      'ARG' =>  [ 'dep\({W2}-(\d+), {W1}-(\d+)\)',
						  'nsubj\({W1}-(\d+), {W2}-(\d+)\)',
						  'partmod\({W2}-(\d+), {W1}-(\d+)\)',
						  'nsubjpass\({W1}-(\d+), {W2}-(\d+)\)' ],
				      'ARG1' => [ 'nsubj\({W1}-(\d+), {W2}-(\d+)\)',
						  'prep_{W1}\({W2}-(\d+)',
						  'partmod\({W2}-(\d+), {W1}-(\d+)\)',
						  'advmod\({W2}-(\d+), {W1}-(\d+)\)',
						  'amod\({W2}-(\d+), {W1}-(\d+)\)',
						  'dep\({W2}-(\d+), {W1}-(\d+)\)',
						  'nsubj\((\S+), {W2}-(\d+)\).*cop\(\2, {W1}-(\d+)\)',
						  'cop\((\S+), {W1}-(\d+)\).*partmod\({W2}-(\d+), \2\)' ]
                                    },
                        "vger" => {   'ARG1' => [ 'nsubj\({W1}-(\d+), {W2}-(\d+)\)',
						  'csubj\({W1}-(\d+), {W2}-(\d+)\)',
						  'csubj\((\S+), {W2}-(\d+)\).*cop\(\2, {W1}-(\d+)\)'],
                                      'ARG2' => [ 'dobj\({W1}-(\d+), {W2}-(\d+)\)',
						  'iobj\({W1}-(\d+), {W2}-(\d+)\)',
						  'prep_{W1}\(\S*-(\d+), {W2}-(\d+)',
						  'prepc_{W1}\(\S*-(\d+), {W2}-(\d+)',
						  'prep_{W2}\({W1}-(\d+)',
						  'prepc_{W2}\({W1}-(\d+)',
						  'prep\({W1}-(\d+), {W2}-(\d+)\)',
						  'pcomp\({W1}-(\d+), {W2}-(\d+)\)',
						  'ccomp\({W1}-(\d+), {W2}-(\d+)\)',
						  'dep\({W1}-(\d+), {W2}-(\d+)\)',
						  'xcomp\({W1}-(\d+), {W2}-(\d+)\)',
						  'tmod\({W1}-(\d+), {W2}-(\d+)\)',
						  'conj_and\((\S+), {W1}-(\d+)\).*dobj\(\2, {W2}-(\d+)\)' ],
                                      'ARG' => [ 'xcomp\({W1}-(\d+), {W2}-(\d+)\)' ],
                                      'MOD' =>  [ 'prep_{W2}\({W1}-(\d+)',
						  'dep\({W1}-(\d+), {W2}-(\d+)\)',
						  'rcmod\({W1}-(\d+), {W2}-(\d+)\)' ] },
                        "control" => {'ARG1' => [ 'xsubj\({W1}-(\d+), {W2}-(\d+)\)' ],
				      'ARG2' => [ 'xcomp\({W1}-(\d+), {W2}-(\d+)\)',
					          'ccomp\({W1}-(\d+), {W2}-(\d+)\)'],
				      'ARG' =>  [ 'nsubjpass\({W1}-(\d+), {W2}-(\d+)\)' ],
				      'ARG3' => [ 'xcomp\({W1}-(\d+), {W2}-(\d+)\)' ] },
                        "tough" => {  'ARG2' => [ 'dobj\({W1}-(\d+), {W2}-(\d+)\)',
						  'iobj\({W1}-(\d+), {W2}-(\d+)\)',
						  'xcomp\({W1}-(\d+), {W2}-(\d+)\)' ] },
                        "itexpl" => { 'ARG1' => [ 'nsubj\({W1}-(\d+), {W2}-(\d+)\)' ],
				      'ARG' =>  [ 'nsubjpass\({W1}-(\d+), {W2}-(\d+)\)' ],
				      'ARG2' => [ 'dobj\({W1}-(\d+), {W2}-(\d+)\)' ],
                                      'ANY' => [ '\w+\({W1}-(\d+), {W2}-(\d+)\)' ] },
                        "vpart" => {  'ARG2' => [ 'obj\({W1}-(\d+), {W2}-(\d+)\)',
						  'tmod\({W1}-(\d+), {W2}-(\d+)\)',
						  'xcomp\({W1}-(\d+), {W2}-(\d+)\)',
						  'ccomp\({W1}-(\d+), {W2}-(\d+)\)',
					          'prep_{W1}\(\S*-(\d+), {W2}-(\d+)',
					          'cop\((\S+), \S*{W2}-(\d+)\).*ccomp\({W1}-(\d+), \2\)' ],
				      'ARG' =>  [ 'nsubjpass\({W1}-(\d+), {W2}-(\d+)\)' ]
                                   }
                       ); # hashmap of stanford patterns indexed by phenomenon name
my %pattern_rasp = (
                    "barerel" => {
                                  'ARG2' => [ '\(\|dobj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
                                  'MOD' =>  [ '\(\|ccomp\| _ \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)',
				              '\(\|cmod\| _ \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ]
                                 },
                    "rnr" => {
                                  'ARG2' => [ '\(\|dobj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
                                  'ARG' =>  [ '\(\|dobj\| \|{W2}:(\d+)\| \|{W1}:(\d+)\|\)' ]
                             },
                    "argadj" => {
                                  'ARG2' => [ '\(\|dobj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
                                  'ARG' =>  [ '\(\|aux\| \|{W2}:(\d+)\| \|{W1}:(\d+)\|\)' ],
                                  'ARG3' => [ '\(\|obj2\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
                                  'MOD' =>  [ '\(\|ccomp\| _ \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ]
                                },
                    "ned" => {    'MOD' =>  [ '\(\|ncmod\| _ \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)',
				              '\(\|ncmod\| _ \|{W1}:(\d+)\| \|\S*{W2}:(\d+)\|\)' ],
                                  'ARG1' => [ '\(\|ncsubj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\| _\)' ],
                                  'ARG2' => [ '\(\|dobj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
                                  'ARG' =>  [ '\(\|dobj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ]
                             },
                    "absol" => {  'ARG1' => [ '\(\|ncsubj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\| _\)' ],
				  'ARG' =>  [ '\(\|xcomp\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
				  'MOD' =>  [ '\(\|ncmod\| _ \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)',
					      '\(\|xmod\| _ \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ]
                               },
                    "vger" => {   'ARG1' => [ '\(\|ncsubj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\| _\)',
				              '\(\|xsubj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
                                  'ARG2' => [ '\(\|dobj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)',
				              '\(\|ccomp\| \|that:(\d+)\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)',
				              '\(\|xcomp\| \|to\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
				  'ARG' =>  [ '\(\|xcomp\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
				  'MOD' =>  [ '\(\|iobj\| _ \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ] 
		              },
                    "control" => {'ARG1' => [ '\(\|ncsubj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\| _\)',
				              '\(\|xsubj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
				  'ARG2' => [ '\(\|xcomp\| \|to\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)',
					      '\(\|xcomp\| \|to\| \|{W1}:(\d+)\| \|and:(\d+)\|.*\|conj\| \|and:(\d+)\| \|{W2}:(\d+)\|\)' ],
                                  'ARG3' => [ '\(\|iobj\| \|{W2}:(\d+)\| \|{W1}:(\d+)\|\)',
				              '\(\|xcomp\| \|to\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
                                  'ARG' =>  [ '\(\|dobj\| \|{W2}:(\d+)\| \|{W1}:(\d+)\|\)' ]
                            },
                    "tough" => {  'ARG1' => [ '\(\|ncsubj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\| _\)' ],
                                  'ARG2' => [ '\(\|dobj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)',
				              '\(\|xcomp\| \|to\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ]
			       },
                    "itexpl" => { 'ARG1' => [ '\(\|ncsubj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\| _\)' ],
				  'ARG2' => [ '\(\|dobj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
				  'ARG' =>  [ '\(\|dobj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ]
                               },
                    "vpart" => {  'ARG2' => [ '\(\|dobj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)',
					      '\(\|ccomp\| \|that:(\d+)\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)',
					      '\(\|ccomp\| _ \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)',
				              '\(\|xcomp\| \|to\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ],
				  'ARG' =>  [ '\(\|dobj\| \|{W1}:(\d+)\| \|{W2}:(\d+)\|\)' ]
                               }
                   ); # hashmap of rasp patterns indexed by phenomenon name

my %pattern_candc = (
                     "barerel" => {'ARG2' => [ '\(dobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
				               '\(obj2 \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ],
                                   'MOD' =>  [ '\(cmod _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
				               '\(cmod _ \W*{W2}\W*_(\d+) \W*{W1}\W*_(\d+)\)']
                                  },
                     "rnr" => {    'ARG2' => [ '\(dobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ],
				   'ARG' =>  [ '\(obj2 \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ]
                              },
                     "argadj" => { 'ARG2' => [ '\(dobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(iobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(obj2 \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(ccomp \W*{W2}W*_(\d+) \W*{W1}\W*_(\d+) \S*\)',
					       '\(xcomp _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(xcomp to_(\d+) \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ],
                                   'ARG' =>  [ '\(aux \W*{W2}\W*_(\d+) \W*{W1}\W*_(\d+)\)',
				               '\(dobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(ccomp _ \W*{W1}W*_(\d+) \W*{W2}\W*_(\d+)\)' ],
                                   'ARG3' =>  [ '\(ncmod _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ],
                                   'MOD' =>  [ '\(ncmod _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(xmod _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(iobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(xcomp _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ]
                                 },
                     "ned" => {    'MOD' =>  [ '\(ncmod _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
				               '\(ncmod _ \W*{W1}\W*_(\d+) \W*\S*{W2}\W*_(\d+)\)'],
                                   'ARG1' => [ '\(ncsubj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+) _\)' ],
                                   'ARG2' => [ '\(dobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ]
                              },
                     "absol" => {  'ARG1' => [ '\(ncsubj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+) _\)',
				               '\(ncmod _ \W*{W2}\W*_(\d+) \W*{W1}\W*_(\d+)\)' ],
                                   'ARG' =>  [ '\(ncsubj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+) _\)',
					       '\(ncmod _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ],
                                   'MOD' =>  [ '\(xmod _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(ncmod _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(cmod _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ]
                                },
                     "vger" => {   'ARG1' => [ '\(ncsubj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+) _\)' ],
                                   'ARG2' => [ '\(dobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(iobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(xcomp _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(xcomp to_(\d+) \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
				               '\(ccomp \W*{W2}W*_(\d+) \W*{W1}\W*_(\d+) \S*\)' ],
                                   'ARG' =>  [ '\(xmod _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ],
                                   'MOD' =>  [ '\(ncmod _ \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ]
                               },
                     "control" => {'ARG1' => [ '\(ncsubj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+) _\)' ],
                                   'ARG2' => [ '\(dobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(xcomp to_(\d+) \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ],
                                   'ARG3' => [ '\(iobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(xcomp to_(\d+) \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ],
				   'ARG' =>  [ '\(ncsubj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+) obj\)' ]
                                  },
                     "tough" => {  'ARG2' => [ '\(dobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(xcomp to_(\d+) \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ]
                                },
                     "itexpl" => { 'ARG1' => [ '\(ncsubj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+) _\)' ],
				   'ARG2' => [ '\(dobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(xcomp to_(\d+) \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)' ],
				   'ARG' =>  [ '\(ncsubj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+) obj\)' ]
                                },
                     "vpart" => {  'ARG2' => [ '\(dobj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(xcomp to_(\d+) \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+)\)',
					       '\(ccomp _ \W*{W1}W*_(\d+) \W*{W2}\W*_(\d+)\)' ],
				   'ARG' =>  [ '\(ncsubj \W*{W1}\W*_(\d+) \W*{W2}\W*_(\d+) obj\)' ]
                                },

                    ); # hashmap of c&c patterns indexed by phenomenon name

my %pattern_mst = (
                   "barerel" => {
                                 'ARG2' => [ '{W1}-(\d+)\tOBJ\t{W2}-(\d+)' ],
                                 'MOD' =>  [ '{W1}-(\d+)\tNMOD\t{W2}-(\d+)' ]
                                },
                   "rnr" => {
                                 'ARG2' => [ '{W1}-(\d+)\tOBJ\t{W2}-(\d+)',
					     '{W1}-(\d+)\tPMOD\t{W2}-(\d+)' ],
				 'ARG' =>  [ '{W1}-(\d+)\tPMOD\t{W2}-(\d+)' ]
                            },
                   "argadj" => {
                                 'ARG2' => [ '{W1}-(\d+)\tOBJ\t{W2}-(\d+)',
					     '{W1}-(\d+)\tPRD\t{W2}-(\d+)',
					     '{W1}-(\d+)\tADV\t{W2}-(\d+)',
					     '{W1}-(\d+)\tOPRD\t{W2}-(\d+)'],
				 'ARG3' => [ '{W1}-(\d+)\tOBJ\t{W2}-(\d+)' ],
				 'ARG' =>  [ '{W1}-(\d+)\tVC\t{W2}-(\d+)' ],
				 'MOD' =>  [ '{W1}-(\d+)\t?MOD\t{W2}-(\d+)',
					     '{W1}-(\d+)\tADV\t{W2}-(\d+)',
					     '{W1}-(\d+)\tTMP\t{W2}-(\d+)',
					     '{W1}-(\d+)\tLOC\t{W2}-(\d+)',
					     '{W1}-(\d+)\tPRD\t{W2}-(\d+)',
					     '{W1}-(\d+)\tMNR\t{W2}-(\d+)']
                           },
                   "ned" => {    'MOD' =>  [ '{W1}-(\d+)\tNMOD\t{W2}-(\d+)',
				             '{W1}-(\d+)\tNMOD\t\S*{W2}-(\d+)',
				             '{W1}-(\d+)\tAMOD\t\S*{W2}-(\d+)' ],
				 'ARG1' => [ '{W1}-(\d+)\tSBJ\t{W2}-(\d+)' ],
				 'ARG2' => [ '{W1}-(\d+)\tOBJ\t{W2}-(\d+)' ]
                            },
                   "absol" => {  'ARG1' => [ '{W1}-(\d+)\tSBJ\t{W2}-(\d+)',
					     '{W2}-(\d+)\tAPPO\t{W1}-(\d+)' ],
				 'ARG' =>  [ '{W1}-(\d+)\tAPPO\t{W2}-(\d+)' ],
                                 'MOD' =>  [ '{W1}-(\d+)\tADV\t{W2}-(\d+)',
					     '{W1}-(\d+)\tVC\t{W2}-(\d+)'],
                                 'ARG' =>  [ '{W2}-(\d+)\tAPPO\t{W1}-(\d+)' ]
                              },
                   "vger" => {   'ARG1' => [ '{W1}-(\d+)\tSBJ\t{W2}-(\d+)' ],
				 'ARG2' => [ '{W1}-(\d+)\tOBJ\t{W2}-(\d+)',
					     '{W1}-(\d+)\tOPRD\t{W2}-(\d+)',
					     '{W1}-(\d+)\tAMOD\t{W2}-(\d+)',
					     '{W1}-(\d+)\tPMOD\t{W2}-(\d+)',
					     '{W1}-(\d+)\tADV\t{W2}-(\d+)' ],
				 'ARG' =>  [ '{W1}-(\d+)\tVC\t{W2}-(\d+)' ],
                                 'MOD' =>  [ '{W1}-(\d+)\tADV\t{W2}-(\d+)' ]
                             },
                   "control" => {'ARG1' => [ '{W1}-(\d+)\tSBJ\t{W2}-(\d+)' ],
				 'ARG' =>  [ '{W1}-(\d+)\tSBJ\t{W2}-(\d+)' ],
				 'ARG2' => [ '{W1}-(\d+)\tOPRD\t{W2}-(\d+)' ],
				 'ARG3' => [ '{W1}-(\d+)\tOPRD\t{W2}-(\d+)' ]
                             },
                   "tough" => {  'ARG2' => [ '{W1}-(\d+)\tOBJ\t{W2}-(\d+)',
					     '{W1}-(\d+)\tAMOD\t{W2}-(\d+)',
				             '{W1}-(\d+)\tEXTR\t{W2}-(\d+)' ]
                              },
                   "itexpl" => { 'ARG1' => [ '{W1}-(\d+)\tSBJ\t{W2}-(\d+)' ],
				 'ARG' =>  [ '{W1}-(\d+)\tSBJ\t{W2}-(\d+)' ],
				 'ARG2' => [ '{W1}-(\d+)\tOBJ\t{W2}-(\d+)' ]
                              },
                   "vpart" => {  'ARG1' => [ '{W1}-(\d+)\tSBJ\t{W2}-(\d+)' ],
				 'ARG' =>  [ '{W1}-(\d+)\tSBJ\t{W2}-(\d+)' ],
				 'ARG2' => [ '{W1}-(\d+)\tOBJ\t{W2}-(\d+)',
				             '{W1}-(\d+)\tPMOD\t{W2}-(\d+)']
                              }
                  ); # hashmap of mst patterns indexed by phenomenon name

my %pattern_xle = (
                   "barerel" => {
 	                         'ARG2' => [ 'eq\(attr\({W1}-(\d+),\'OBJ\'\),{W2}-(\d+)\)',
					     'eq\(attr\({W1}-(\d+),\'OBJ-TH\'\),{W2}-(\d+)\)',
                                             'eq\(attr\({W1}-(\d+),\'OBJ\'\),var\((\d+)\)\).*eq\(attr\(var\(\3\),\'PRED\'\),semform\(\'null_pro\'',
                                             'eq\(attr\(var\((\d+)\),\'PRED\'\),semform\(\'null_pro\'.*eq\(attr\({W1}-(\d+),\'OBJ\'\),var\(\2\)\)',
				             'eq\(attr\({W1}-(\d+),\'OBJ-TH\'\),var\((\d+)\)\).*eq\(attr\(var\(\3\),\'PRED\'\),semform\(\'null_pro\'' ],
                                 'MOD' =>  [ 'eq\(attr\({W1}-(\d+),\'ADJUNCT\'\),var\((\d+)\)\).*in_set\({W2}-(\d+),var\(\3\)\)',
				             'eq\(attr\({W1}-(\d+),\'COMP\'\),{W2}-(\d+)\)' ],
                                },
                   "rnr" => {
                                 'ARG2' => [ 'eq\(attr\({W1}-(\d+),\'OBJ\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'OBJ\'\),and-(\d+)\).*in_set\({W2}-(\d+),and-\3\)',
				             'eq\(attr\({W1}-(\d+),\'OBJ\'\),or-(\d+)\).*in_set\({W2}-(\d+),or-\3\)',
					     'eq\(attr\({W1}-(\d+),\'OBJ-TH\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'COMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP-PRED\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'OBL\'\),{W2}-(\d+)\)',
                                             'eq\(attr\({W1}-(\d+),\'OBJ\'\),var\((\d+)\)\).*eq\(attr\(var\(\3\),\'PRED\'\),semform\(\'null_pro\'',
                                             'eq\(attr\(var\((\d+)\),\'PRED\'\),semform\(\'null_pro\'.*eq\(attr\({W1}-(\d+),\'OBJ\'\),var\(\2\)\)' ]
                            },
                   "argadj" => {
                                 'ARG2' => [ 'eq\(attr\({W1}-(\d+),\'OBJ\'\),{W2}-(\d+)\)',
					     'eq\(attr\({W1}-(\d+),\'OBJ-TH\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'COMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP-PRED\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'OBL\'\),{W2}-(\d+)\)',
                                             'eq\(attr\({W1}-(\d+),\'OBJ\'\),var\((\d+)\)\).*eq\(attr\(var\(\3\),\'PRED\'\),semform\(\'null_pro\'',
                                             'eq\(attr\(var\((\d+)\),\'PRED\'\),semform\(\'null_pro\'.*eq\(attr\({W1}-(\d+),\'OBJ\'\),var\(\2\)\)' ],
				 'ARG3' => [ 'eq\(attr\({W1}-(\d+),\'XCOMP-PRED\'\),{W2}-(\d+)\)' ],
				 'ARG' =>  [ 'eq\(attr\({W1}-(\d+),\'COMP\'\),{W2}-(\d+)\)' ],
				 'MOD' =>  [ 'eq\(attr\({W1}-(\d+),\'ADJUNCT\'\),var\((\d+)\)\).*in_set\({W2}-(\d+),var\(\3\)\)' ],
                           },
                   "ned" => {    'MOD' =>  [ 'eq\(attr\({W1}-(\d+),\'ADJUNCT\'\),var\((\d+)\)\).*in_set\({W2}-(\d+),var\(\3\)\)',
					     'eq\(attr\({W1}-(\d+),\'ADJUNCT\'\),var\((\d+)\)\).*in_set\(and-(\d+),var\(\3\)\).*in_set\({W2}-(\d+),and-\4\)',
					     'eq\(attr\({W2}-(\d+),\'ADJUNCT\'\),var\((\d+)\)\).*in_set\({W1}-(\d+),var\(\3\)\)',
				             'eq\(attr\({W1}-(\d+),\'MOD\'\),var\((\d+)\)\).*in_set\({W2}-(\d+),var\(\3\)\)' ],
				 'ARG1' => [ 'eq\(attr\({W1}-(\d+),\'SUBJ\'\),{W2}-(\d+)\)' ],
				 'ARG2' => [ 'eq\(attr\({W1}-(\d+),\'OBJ\'\),{W2}-(\d+)\)',
					     'eq\(attr\({W1}-(\d+),\'OBJ-TH\'\),{W2}-(\d+)\)' ]
                            },
                   "absol" => {  'ARG1' => [ 'eq\(attr\({W1}-(\d+),\'SUBJ\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W2}-(\d+),\'ADJUNCT\'\),var\((\d+)\)\).*in_set\({W1}-(\d+),var\(\3\)\)' ],
				 'ARG' =>  [ 'eq\(attr\({W1}-(\d+),\'SUBJ\'\),{W2}-(\d+)\)' ],
                                 'MOD' =>  [ 'eq\(attr\({W1}-(\d+),\'ADJUNCT\'\),var\((\d+)\)\).*in_set\({W2}-(\d+),var\(\3\)\)' ],
                              },
                   "vger" => {   'ARG1' => [ 'eq\(attr\({W1}-(\d+),\'SUBJ\'\),{W2}-(\d+)\)' ],
				 'ARG2' => [ 'eq\(attr\({W1}-(\d+),\'OBJ\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'OBJ\'\),and-(\d+)\).*in_set\({W2}-(\d+),and-\3\)',
				             'eq\(attr\({W1}-(\d+),\'OBJ\'\),or-(\d+)\).*in_set\({W2}-(\d+),or-\3\)',
					     'eq\(attr\({W1}-(\d+),\'OBJ-TH\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'COMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP-PRED\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'OBL\'\),{W2}-(\d+)\)',
                                             'eq\(attr\({W1}-(\d+),\'OBJ\'\),var\((\d+)\)\).*eq\(attr\(var\(\3\),\'PRED\'\),semform\(\'null_pro\'',
                                             'eq\(attr\(var\((\d+)\),\'PRED\'\),semform\(\'null_pro\'.*eq\(attr\({W1}-(\d+),\'OBJ\'\),var\(\2\)\)' ],
				 'ARG' =>  [ 'eq\(attr\({W1}-(\d+),\'SUBJ\'\),{W2}-(\d+)\)' ],
                                 'MOD' =>  [ 'eq\(attr\({W1}-(\d+),\'ADJUNCT\'\),var\((\d+)\)\).*in_set\({W2}-(\d+),var\(\3\)\)' ]
                             },
                   "control" => {'ARG1' => [ 'eq\(attr\({W1}-(\d+),\'SUBJ\'\),{W2}-(\d+)\)' ],
				 'ARG2' => [ 'eq\(attr\({W1}-(\d+),\'OBJ\'\),{W2}-(\d+)\)',
					     'eq\(attr\({W1}-(\d+),\'OBJ-TH\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'COMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP-PRED\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'OBL\'\),{W2}-(\d+)\)' ],
				 'ARG' =>  [ 'eq\(attr\({W1}-(\d+),\'SUBJ\'\),{W2}-(\d+)\)' ],
				 'ARG3' => [ 'eq\(attr\({W1}-(\d+),\'XCOMP\'\),{W2}-(\d+)\)' ]
                             },
                   "tough" => {  'ARG2' => [ 'eq\(attr\({W1}-(\d+),\'OBJ\'\),{W2}-(\d+)\)',
					     'eq\(attr\({W1}-(\d+),\'OBJ-TH\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'COMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'COMP\'\),and-(\d+)\).*in_set\({W2}-(\d+),and-\3\)',
				             'eq\(attr\({W1}-(\d+),\'COMP\'\),or-(\d+)\).*in_set\({W2}-(\d+),or-\3\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP-PRED\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'OBL\'\),{W2}-(\d+)\)',
                                             'eq\(attr\({W1}-(\d+),\'OBJ\'\),var\((\d+)\)\).*eq\(attr\(var\(\3\),\'PRED\'\),semform\(\'null_pro\'',
                                             'eq\(attr\(var\((\d+)\),\'PRED\'\),semform\(\'null_pro\'.*eq\(attr\({W1}-(\d+),\'OBJ\'\),var\(\2\)\)' ]
                              },
                   "itexpl" => { 'ARG1' => [ 'eq\(attr\({W1}-(\d+),\'SUBJ\'\),{W2}-(\d+)\).*eq\(attr\({W2}-(\d+),\'PRON-TYPE\'\),\'pers\'\)' ],
				 'ARG' =>  [ 'eq\(attr\({W1}-(\d+),\'SUBJ\'\),{W2}-(\d+)\).*eq\(attr\({W2}-(\d+),\'PRON-TYPE\'\),\'pers\'\)' ],
				 'ARG2' => [ 'eq\(attr\({W1}-(\d+),\'OBJ\'\),{W2}-(\d+)\).*eq\(attr\({W2}-(\d+),\'PRON-TYPE\'\),\'pers\'\)',
					     'eq\(attr\({W1}-(\d+),\'OBJ-TH\'\),{W2}-(\d+)\)' ]
                              },
                   "vpart" => {  'ARG1' => [ 'eq\(attr\({W1}-(\d+),\'SUBJ\'\),{W2}-(\d+)\)' ],
				 'ARG' =>  [ 'eq\(attr\({W1}-(\d+),\'SUBJ\'\),{W2}-(\d+)\)' ],
				 'ARG2' => [ 'eq\(attr\({W1}-(\d+),\'OBJ\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'OBJ\'\),and-(\d+)\).*in_set\({W2}-(\d+),and-\3\)',
				             'eq\(attr\({W1}-(\d+),\'OBJ\'\),or-(\d+)\).*in_set\({W2}-(\d+),or-\3\)',
					     'eq\(attr\({W1}-(\d+),\'OBJ-TH\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'COMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'XCOMP-PRED\'\),{W2}-(\d+)\)',
				             'eq\(attr\({W1}-(\d+),\'OBL\'\),{W2}-(\d+)\)',
                                             'eq\(attr\({W1}-(\d+),\'OBJ\'\),var\((\d+)\)\).*eq\(attr\(var\(\3\),\'PRED\'\),semform\(\'null_pro\'',
                                             'eq\(attr\(var\((\d+)\),\'PRED\'\),semform\(\'null_pro\'.*eq\(attr\({W1}-(\d+),\'OBJ\'\),var\(\2\)\)' ]
                              }
                  ); # hashmap of xle patterns indexed by phenomenon name

sub read_annotation {
  my $infname = shift;
  open (my $inf, $infname) or die "Failed to open file $infname\n";
  while (<$inf>) {
    my $line = $_;
    chomp $line;
    # ERB 21-mar-11 Extended this to match lines in targets.divided.txt.
    if ($line =~ m/^\d+\@[^@]*\@[^@]*\@[^@]+\@[^@]+\@[^@]*$/) {
      push(@phenomena, $line);
    }
  }
  close $inf;
}

sub read_items {
  my $infname = shift;
  open (my $inf, $infname) or die "Failed to open file $infname\n";
  while (<$inf>) {
    my $line = $_;
    chomp $line;
    if ($line =~ m/^\d+@/) {
      my @fields = split(/\@/, $line);
      my $id = $fields[0];
      my $sent = $fields[6];
      $sent =~ s/\[\[.*?([^\|\]]+)\]\]/$1/g;
      $items{$id} = $sent;
      push(@itemids, $id);
    }
  }
  close $inf;
}

sub get_annot_w1s {
  my $annot = shift;
  my @toks = split(/\s+/,$annot);
#  $toks[0] =~ s/^([^+]*).*$/$1/; # drop particle
  $toks[0] =~ s/\+[A-Za-z]+\-/\-/; #drop particle
  my @w1s = split(/\|/,$toks[0]);
  return @w1s;
}

sub get_annot_w2s {
  my $annot = shift;
  my @toks = split(/\s+/,$annot);
#  $toks[2] =~ s/([^+]*).*/$1/; # drop particle
  $toks[2] =~ s/\+[A-Za-z]+\-/\-/; #drop particle
  my @w2s = split(/\|/,$toks[2]);
  return @w2s;
}

sub get_annot_dep {
  my $annot = shift;
  my @toks = split(/\s+/,$annot);
  return $toks[1];
}

sub get_sform {
  my $w = shift;
  $w =~ m/^(.*)-\d+$/;
  return $1;
}

sub get_tid {
  my $w = shift;
  $w =~ m/^.*-(\d+)$/;
  return $1;
}

my %results = ();
sub load_enju {
  my $infname = shift;
  open (my $inf, $infname) or die "Failed to open file $infname\n";
  my $i = 0;
  my $buffer = "";
  while (<$inf>) {
    my $line = $_;
    if ($line =~ m/^$/) {
      $results{$itemids[$i]}=$buffer;
      $buffer = "";
      $i ++;
    } else {
      $buffer .= $line."\n";
    }
  }
  close $inf;
}

sub load_rasp {
  my $infname = shift;
  open (my $inf, $infname) or die "Failed to open file $infname\n";
  my $status = 0;
  my $i = 0;
  my $buffer = "";
  my $tree = "";
  my @swords = ();
  my @toks = ();
  while (<$inf>) {
    my $line = $_;
    chomp $line;
    if ($line =~ m/^\(\"(.*)\"\) \d+ ; \(.*\)/) {
      @swords = split(/" "/,$1);
    } elsif ($line =~ m/^\(\|T/) {
      $tree .= $line."\n";
      $status = 1; # the tree, the deprels are comping up next
    } elsif ($status == 1) {
      if ($line =~ m/^\(/) {
        $buffer .= $line."\n";
      } elsif ($line =~ m/^$/) {
        while ($tree =~ m/(\|[^ ]+:\d+_[^ ]+\|)/g) {
          push(@toks, $1);
        }
        if ($#swords != $#toks) {
          die "Problem: token mismatch :-(\n @swords \n @toks\n";
        }
        for (my $i = 0; $i <= $#toks; $i ++) {
          my $old = quotemeta $toks[$i];
          my $id = -1;
          if ($toks[$i] =~ m/\|[^ ]+:(\d+)_[^ ]+\|/) {
            $id = $1;
          } else {
            die "Failed to find token id $toks[$i] \n";
          }
          $buffer =~ s/$old/\|$swords[$i]:$id\|/g;
        }
        $results{$itemids[$i]}=$buffer;
        $buffer = "";
        $tree = "";
        $i ++;
        @swords = ();
        @toks = ();
        $status = 0;
      } else {
        $tree .= $line."\n";
      }
    }
  }
  close $inf;
}

sub load_stanford {
  my $infname = shift;
  open (my $inf, $infname) or die "Failed to open file $infname\n";
  my $i = 0;
  my $buffer = "";
  my $status = 0;
  while (<$inf>) {
    my $line = $_;
    chomp $line;
    if ($line =~ m/^$/) {
      $status ++;
      if ($status == 2) {
        $results{$itemids[$i]}=$buffer;
        $buffer = "";
        $i ++;
        $status = 0;
      }
    } elsif ($status == 1) {
      $buffer .= $line."\n";
    }
  }
  close $inf;
}

sub load_candc {
  my $infname = shift;
  open (my $inf, $infname) or die "Failed to open file $infname\n";
  my $i = 0;
  my $buffer = "";
  my $status = 0;
  while (<$inf>) {
    my $line = $_;
    chomp $line;
    if ($line =~ m/^$/) {
      $status = 1;
    } elsif ($line =~ m/^<c>/) {
      $results{$itemids[$i]}=$buffer;
      $buffer = "";
      $i ++;
      $status = 0;
    } elsif ($status == 1) {
      $buffer .= $line."\n";
    }
  }
  close $inf;
}

sub load_mst {
  my $infname = shift;
  open (my $inf, $infname) or die "Failed to open file $infname\n";
  my $i = 0;
  my $buffer = "";
  while (<$inf>) {
    my $line = $_;
    chomp $line;
    if ($line =~ m/^$/) {
      my $deps = mst2wdep($buffer);
      $results{$itemids[$i]}=$deps;
      $buffer = "";
      $i ++;
    } else {
      $buffer .= $line."\n";
    }
  }
  close $inf;
}

sub mst2wdep {
  my $buffer = shift;
  my @tokens = ("_");
  my @triples = ();
  my $deps = "";
  while ($buffer =~ m/(\d+)\t([^\t]+)\t[^\t]+\t[^\t]+\t[^\t]\t[^\t]+\t(\d+)\t([^\t]+)\t[^\t]+\t[^\t]+\n/g) {
    my $id = $1;
    my $form = $2;
    my $head = $3;
    my $deprel = $4;
    push(@tokens, $form);
    push(@triples, $head);
    push(@triples, $deprel);
    push(@triples, $id);
  }
  for (my $i = 0; $i <= $#triples; $i += 3) {
    $deps .= "$tokens[$triples[$i]]-$triples[$i]\t$triples[$i+1]\t$tokens[$triples[$i+2]]-$triples[$i+2]\n";
  }
  return $deps;
}

sub load_xle {
  my $infname = shift;
  open (my $inf, $infname) or die "Failed to open file $infname\n";
  open (my $outf, ">xle-all.dep") or die "Failed to write to file xle-all.dep\n";
  my $i = 0;
  my $buffer = "";
  my $status = 0;
  while (<$inf>) {
    my $line = $_;
    chomp $line;
    if ($line =~ m/^=====begin=====$/) {
      $status = 1;
    } elsif ($line =~ m/^=====end=====$/) {
      my $deps = dependencifyxle($buffer);
      $results{$itemids[$i]}=$deps;
      # for debugging, dumping the dependencies here:
      print $outf "=====begin=====\n";
      print $outf $deps."\n";
      print $outf "=====end=====\n";
      $buffer = "";
      $i ++;
      $status = 0;
    } elsif ($status == 1) {
      $buffer .= $line."\n";
    }
  }
  close $inf;
  close $outf;
}

sub dependencifyxle {
  my $buffer = shift;
  my $deps = "";

  $buffer = renamevars($buffer);
  # first, gather surface forms, and corresponding numeric index
  my @surfacetoks = (); # list of surface tokens (already numbered, starting from 0)
  my @surfaceindex = ();
  my $id = 0;
  while ($buffer =~ m/surfaceform\((\d+),'(.+?)',\d+,\d+\)/g) {
    my $numidx = $1;
    my $sf = $2;
    $sf =~ s/^\^ (.*)$/$1/;
    push @surfacetoks, "$sf-$id";
    push @surfaceindex, $numidx;
    $id ++;
  }

  # second, map surface token ids to their corresponding variables
  my %varmap = ();
  for (my $i = 0; $i < $id; $i ++) {
    my $idx = $surfaceindex[$i];
    # collect terminals
    my @tlist = (); #list of terminals corresponding to $i^th word
    while ($buffer =~ m/terminal\((\d+),'.*?',\[$idx\]\)/g) {
      my $tidx = $1; #terminal index (in c-structure?)
      push @tlist, $tidx;
    }
    my @vlist = (); #list of variables corresponding to $i^th word
    for my $t (@tlist) {
      if ($buffer =~ m/phi\($t,(var\(\d+\))\)/) {
        my $var = $1;
        my $sf = "";
        if ($surfacetoks[$i] =~ m/(\w+)-\d+/) {
          $sf = $1;
        }
        push @vlist, $var;
        if ( ($sf eq "" || $buffer !~ m/semform\(\'\w+\#$sf\'/) &&
            ( ! exists $varmap{$var} ||
             $surfacetoks[$i] !~ m/^(the|a|an|this|that|these|those|[^a-zA-Z0-9].*)-\d+$/ ) ) { #this is truely ugly
          $varmap{$var} = $surfacetoks[$i];
        }
      }
    }
  }

  # last, replace variables with tokens
#  while ($buffer =~ m/(cf\(1,eq\(attr\((var\(\d+\)),'PRED'\),semform\('[^']+'.*?\)\)\)),/g) {
  while ($buffer =~ m/(cf\(1,(eq|in_set|scopes|subsume)\(.*?\)\),)/g) {
    my $dep = $1;
    for my $var (sort keys %varmap) {
      $var =~ m/var\((\d+)\)/;
      my $v = $1;
      my $t = $varmap{$var};
      $dep =~ s/var\($v\)/$t/g;
    }
    $deps .= $dep."\n";
  }

  return $deps;
}

sub renamevars { #note, not only var(xx) needs to be renamed, but also terminal indices and surface indices
  my $buffer = shift;
  my $result = "";
  my $num = -1;
  foreach my $line (split /\n/, $buffer) {
    if ($line =~ m/^fstructure/) {
      $num ++;
    }
    if ($num > 0) {
      if ($line =~ m/terminal\((\d+),'.*?',\[(\d+)\]\)/) { # replace terminal indices +  $num*10000
        my $newtid = $1 + $num * 1000000;
        my $newsid = $2 + $num * 1000000;
        $line =~ s/terminal\(\d+,('.*?'),\[\d+\]\)/terminal\($newtid,$1,\[$newsid\]\)/;
      } elsif ($line =~ m/surfaceform\((\d+),'(.+?)',\d+,\d+\)/) { # replace surface indices + $num*10000
        my $newsid = $1 + $num * 1000000;
        $line =~ s/surfaceform\(\d+,/surfaceform\($newsid/;
      }
      my $newline = $line;
      while ($line =~ m/var\((\d+)\)/g) { # replace variables + $num*10000
        my $oldvid = $1;
        my $newvid = $oldvid + $num * 1000000;
        $newline =~ s/var\($oldvid\)/var\($newvid\)/g;
      }
      $line = $newline;
    }
    $result .= $line."\n";
  }
  return $result;
}

sub evaluate {
  my $system = shift;
  my $patterns = shift;
  my @count = (0, 0);
  my @correct = (0, 0);
  my %pcount = ();
  my %pcorrect = ();
  for my $phen (@phenomena) {
    my @fields = split(/\@/, $phen);
    my $id = $fields[0];
    my $pname = $fields[1];

    # ERB 21-mar-11: Splitting phonemena by A/B division
    my $pnameab = $fields[1] . "-". $fields[4];
    if (!exists $pcount{$pnameab}) {
      $pcount{$pnameab} = [0, 0];
      $pcorrect{$pnameab} = [0, 0];
    }
    my $positive = $fields[2];
    $count[$positive] ++;
    $pcount{$pnameab}[$positive] ++;

    my $annot = $fields[3];
    my $res = $results{$id};
    my $found = -1;
    my $match = ""; # matched deps in the parser output
    my $matchedc = ""; # first matched customized pattern
    my $matchedp = ""; # first matched (not customized) pattern
    my $unmatched = ""; # unmatched patterns
    if (exists $patterns->{$pname} &&
        exists $patterns->{$pname}{get_annot_dep($annot)}) {
      $found = 0;
      my @plist = @{$patterns->{$pname}{get_annot_dep($annot)}};
    SEARCH: for my $p (@plist) {
        for my $w1 (get_annot_w1s $annot) {
          for my $w2 (get_annot_w2s $annot) {
            my $customized = $p;
            my $wf1 = get_sform $w1;
            my $wf2 = get_sform $w2;
            my $wp1 = get_tid $w1;
            my $wp2 = get_tid $w2;
            $customized =~ s/\{W1\}/$wf1/g;
            $customized =~ s/\{W2\}/$wf2/g;
            if ($res =~ m/($customized)/si) {
              $match = $1;
              $matchedp = $p;
              $matchedc = $customized;
              my $pos1 = $2;
              my $pos2 = $3;
              if ($wp1 >= 999 &&
                  (abs($wp2 - $pos2) - $tokbase) > 4) {
                next;
              }
              if ($wp2 >= 999 &&
                  (abs($wp1 - $pos1) - $tokbase) > 4) {
                next;
              }
              $found = 1;
              last SEARCH;
            } else {
              $unmatched .= "U: ".$customized."\n";
            }
          }
          }
      }
    }
    if ($positive == $found) {
      $correct[$positive] ++;
      $pcorrect{$pnameab}[$positive] ++;
    }

    if ($csv == 1) {
      if ($found == 0) {
        $unmatched = "\n".$unmatched;
        $unmatched =~ s/\nU: /\n/g;
        $unmatched =~ s/^\n//;
        chomp $unmatched;
      } else {
        $unmatched = "";
      }
      print "\"$id\",\"$pnameab\",\"$positive\",\"$annot\",\"$match\",\"$matchedc\",\"$matchedp\",\"$unmatched\"\n";

    } else {
      print $phen.$found."\n";
      if ($verbose > 3) {
        print $res."\n";
      }
      if ($found == 1) {
        if ($verbose > 0) {
          print "P: ".$match."\n";
          if ($verbose > 1) {
            print "M: ".$matchedc."\n";
            if ($verbose > 2) {
              print "R: ".$matchedp."\n";
            }
          }
        }
      } elsif ($found == 0) {
        if ($verbose > 2) {
          print $unmatched;
        }
      }
    }
  }
  if ($verbose > 0) {
    print "\nSummary:\n";
    print "Correct Targets : ".($correct[1]+$correct[0])." / ".($count[1]+$count[0])."\n";
    for my $p (sort keys %pcount) {
      print "  $p : ".($pcorrect{$p}[1]+$pcorrect{$p}[0])." / ".($pcount{$p}[1]+$pcount{$p}[0])."\n";
    }
    print "Correct Total Positive Targets : $correct[1] / $count[1]\n";
    for my $p (sort keys %pcount) {
      print "  $p : $pcorrect{$p}[1] / $pcount{$p}[1]\n";
    }
    print "Correct Total Negative Targets : $correct[0] / $count[0]\n";
    for my $p (sort keys %pcount) {
      print "  $p : $pcorrect{$p}[0] / $pcount{$p}[0]\n";
    }
    print "\n";
  }
}


if (@ARGV < 4 || @ARGV > 5) {
  die "Usage: [[-v[0-3]]|-csv] system item_file annot_file result_file\n"
}

if (@ARGV == 5) {
  if ($ARGV[0] eq "-v") {
    $verbose = 1;
  } elsif ($ARGV[0] =~ m/^\-v(\d+)$/) {
    $verbose = $1;
  } elsif ($ARGV[0] eq "-csv") {
    $csv = 1;
    print "\"ITEM-ID\",\"PHENOMENON\",\"POSITIVE\",\"REF-DEP\",\"MATCHED-PARSER-OUTPUT\",\"MATCHED-PATTERN\",\"MATCHED-REGEX\",\"UNMATCHED-PATTERN\"\n";
  }
  shift;
}

read_items $ARGV[1];
read_annotation $ARGV[2];

if ($ARGV[0] eq "enju") {
  $tokbase = 0;
  load_enju $ARGV[3];
  evaluate("enju", \%pattern_enju);
} elsif ($ARGV[0] eq "candc") {
  $tokbase = 0;
  load_candc $ARGV[3];
  evaluate("candc", \%pattern_candc);
} elsif ($ARGV[0] eq "stanford" || $ARGV[0] eq "cj" ) {
  $tokbase = 1;
  load_stanford $ARGV[3];
  evaluate("stanford", \%pattern_stanford);
} elsif ($ARGV[0] eq "rasp") {
  $tokbase = 1;
  load_rasp $ARGV[3];
  evaluate("rasp", \%pattern_rasp);
} elsif ($ARGV[0] eq "mst") {
  $tokbase = 1;
  load_mst $ARGV[3];
  evaluate("mst", \%pattern_mst);
} elsif ($ARGV[0] eq "xle") {
  $tokbase = 0;
  load_xle $ARGV[3];
  evaluate("xle", \%pattern_xle);
} else {
  die "Unknown system $ARGV[0]\n";
}



