###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################


# cat wsj_0001.trees | perl scripts/treesed.pl
use Getopt::Std;

getopts("d");

$DEBUG = 0;
if ($opt_d) {
  $DEBUG = 1;
}

sub debug {
  if ($DEBUG) {
    $msg = $_[1];
    print stderr $_[0] , " " , $msg, "\n";
  }
}

$SRL = "\w+\!ldelim\!";

## for each tree...
while ( <> ) {
  ## translate to parens...
  s/\[/\(/g;
  s/\]/\)/g;
  ## for each constituent...
  while ( $_ =~ /\([^\(\)]*\)/ ) {
    ## convert outer parens to braces...
    $_ =~ s/\(([^\(\)]*)\)/{\1}/;
    #################### ADD SED RULES HERE: apply rules to angles (children) within braces (consituent)...
    debug($step++, "   $_");

    # fold child trace into parent nonterm as extraction
    #s/{([^ ]*) +<\*NONE\*[^>]*> *}/{\1-empty}/;
    s/{([^ ]*) +<($SRL)?\*NONE\*[^>]*> *}/{\1-empty}/;
    # fold child trace into parent nonterm as extraction
    #s/{([^ ]*) +( *)<\*NONE\*[^>]*>(.*<.*)}/\3/;
    s/{([^ ]*) +( *)<($SRL)?\*NONE\*[^>]*>(.*<.*)}/\4/;
    # fold child trace into parent nonterm as extraction
    #s/{([^ ]*) +(.*<.*)<\*NONE\*[^>]*>( *)}/\2/;
    s/{([^ ]*) +(.*<.*)<($SRL)?\*NONE\*[^>]*>( *)}/\2/;
    # fold child trace into parent nonterm as extraction
    #s/{([^ ]*) +(.*)<\*NONE\*[^>]*>(.*)}/\2\3/;
    s/{([^ ]*) +(.*)<($SRL)?\*NONE\*[^>]*>(.*)}/\2\4/;

    # fold child empty category into parent nonterm as extraction (trace chain)
    #s/{([^ ]*) +(.*)<([^ \^]*)[^ ]*-([0-9]+)-empty-([0-9]+) *0 *>(.*<[^ ]*-g[^ ]*-\4.*)}/{\1-g\3-\5 \2\6}/;
    s/{([^ ]*) +(.*)<($SRL)?([^ \^]*)[^ ]*-([0-9]+)-empty-([0-9]+) *0 *>(.*<[^ ]*-g[^ ]*-\5.*)}/{\1-g\4-\6 \2\7}/;
#    # fold child empty category into parent nonterm as extraction (trace chain)
#    s/{([^ ]*) +(.*)<([^ \^]*)[^ ]*-([0-9]+)-empty-([0-9]+) *0 *>(.*)}/{\1-g\3-\5 \2\6}/;
    # fold child empty category into parent nonterm as extraction
    #s/{([^ ]*) +(.*)<([^ \^\-]*)[^ ]*-empty-([0-9]+) *0 *>(.*)}/{\1-g\3-\4 \2\5}/;
    s/{([^ ]*) +(.*)<($SRL)?([^ \^\-]*)[^ ]*-empty-([0-9]+) *0 *>(.*)}/{\1-g\4-\5 \2\6}/;

    ## # project extraction up unless child is RC or S
    ## s/{([^ ]*) +(.*<[^RS][^ ]*)-g([^ ]+) +(.*>.*)}/{\1-g\3 \2-g\3 \4}/;

    # project extraction up
    s/{([^ ]*) +(.*<[^ ]*)-g([^ ]+) +(.*>.*)}/{\1-g\3 \2-g\3 \4}/;

    # undo extr projection if '-[0-9]' numbers of siblings match + fold child empty category into parent (trace chain ending in pro)
    s/{([^ ]*)-g([^ ]*)-([0-9]+) +(.*)<[^ \^]*[^ ]*-\3-empty *0 *>(.*<[^ ]*-g[^ ]*-\3.*)}/{\1 \4\5}/;
    # undo extr projection if '-[0-9]' numbers of siblings match (parent is extr)
    s/{([^ ]*)-([0-9]+)-g[^ ]*-\2 +(.*<.*)}/{\1 \3}/;
    # undo extr projection if '-[0-9]' numbers of siblings match (left child is extr)
    s/{([^ ]*)-g([^ ]*)-([0-9]+) +(.*<[^ ]*-\3[^0-9].*) +(<[^ ]*-g\2-\3[^0-9].*)}/{\1 \4 \5}/;
    # undo extr projection if '-[0-9]' numbers of siblings match (right child is extr)
    s/{([^ ]*)-g([^ ]*)-([0-9]+) +(.*<[^ ]*-g\2-\3[^0-9].*) +(<[^ ]*-\3[^0-9].*)}/{\1 \4 \5}/;

    # last resort: nuke all remaining empty constituents w/o trace
    s/{([^ ]*) +<[^ >]*-empty[^>]*> *<[^ >]* ([^>]*)> *}/{\1 \2}/;
    s/{([^ ]*) +<[^ >]* ([^>]*)> +<[^ >]*-empty[^>]*> *}/{\1 \2}/;
    s/{([^ ]*) (.*) <[^ >]*-empty[^>]*>(.*)}/{\1 \2\3}/;

#     # undo extr projection if WHSBAR -> WHX S-gX
#     s/{(WHSBAR[^ ]*)-g([^ ]*) +(<WH\2.*) +(<[^ ]*-g\2 .*)}/{\1 \3 \4}/;
#     # undo extr projection if NP -> NP SBAR-gNP
#     s/{(NP[^ ]*)-g(NP[^ ]*) +(<NP.*) +(<S[^ ]*-g\2 .*)}/{\1 \3 \4}/;
#     # undo extr projection if NP -> NP SBAR-gWHNP
#     s/{(NP[^ ]*)-g(WH[^ ]*) +(<NP.*) +(<S[^ ]*-g\2 .*)}/{\1 \3 \4}/;
#     # undo extr projection if S -> X S-gX (basically, move to spec of CP)
#     s/{(S[^ ]*)-g([^ ]*) +(.*<\2.*<S[^ ]*-g\2 .*)}/{\1 \3}/;
#     # undo extr projection if S -> S-gX X
#     s/{(S[^ ]*)-g([^ ]*) +(.*<S[^ ]*-g\2 .*<\2.*)}/{\1 \3}/;
#     # undo extr projection if SINV -> X VP-gX
#     s/{(SINV[^ ]*)-g([^ ]*) +(.*<\2.*<VP[^ ]*-g\2 .*)}/{\1 \3}/;
# #    # undo extr projection if SINV -> VP VP-gS (obsolete; was for imperative -- now Simp instead of VP)
# #    s/{(SINV[^ ]*)-g(S) +(<VPvb[^zgdn].*) +(<VP[^ ]*-gS .*)}/{\1 \3 \4}/;
#     # undo extr projection if VP -> VP-gX X (conjunction of verb followed by complement ?!)
#     s/{(VP[^ ]*)-g([^ ]*) +(.*<VP[^ ]*-g\2 .*) +(.*<\2.*)}/{\1 \3 \4}/;
#     # undo extr projection if NP -> NP-gX X (conjunction of noun followed by complement ?!)
#     s/{(NP[^ ]*)-g([^ ]*) +(.*<NP[^ ]*-g\2 .*) +(.*<\2.*)}/{\1 \3 \4}/;
#     # undo extr projection for "easy to take t"
#     s/{(ADJP[^ ]*)-g([^ ]*) +(.*<JJ.*) +(.*<Sproto[^ ]*-g\2 .*)}/{\1 \3 \4}/;
#     # undo extr projection for "reasons to do X (t)"
#     s/{(NP[^ ]*)-g([^ ]*) +(.*<NP.*) +(.*<Sproto[^ ]*-g\2 .*)}/{\1 \3 \4}/;
# #    # undo extr projection for "NP-gPP|SBAR did VP [PP|SBAR such as ...]"
# #    s/{(S[^ ]*)-g([^ ]*) +(.*<NP[^ ]*-g\2 .*) +(.*<VP.*)}/{\1 \3 \4}/;
#     # undo extr projection for PRN-gS
#     s/{(PRN[^ ]*)-gS(.*)}/{\1\2}/;

#    print stderr ":::  $_\n";

    ####################
    ## convert inner angles (if any) to bracks...
    while ( s/({[^{}]*)<([^<>]*)>/\1\[\2\]/ ){}
    ## convert outer braces to angles...
    $_ =~ s/{(.*)}/<\1>/;
  }

  #################### FINALLY: delete numbers...
  s/[-=][0-9]+([^ \)\]]*) /\1 /g;
  s/[-=][0-9]+([^ \)\]]*) /\1 /g;
  s/[-=][0-9]+([^ \)\]]*) /\1 /g;

  ## finish up...
  $_ =~ s/</[/;
  $_ =~ s/>/]/;
  ## translate to parens again...
  $_ =~ s/\[/\(/g;
  $_ =~ s/\]/\)/g;
  ## output...
  print $_;
}
