###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################


# cat wsj_0001.trees | perl scripts/treesed.pl
use Getopt::Std;

getopts("pd");

$remove_punct = 0;
if($opt_p){
  $remove_punct = 1; 
}

$DEBUG = 0;
if ($opt_d) {
  $DEBUG = 1;
}

sub debug {
  if ($DEBUG) {
    $msg = $_[1];
    print stderr $_[0] , " " , $msg, "\n";
  }
}

$SRL = "!colon![^>]*";

## for each tree...
$lineNum=1;
while ( <> ) {
  debug("***TB*** line ", $lineNum++);

  ## Make parentheses look standard...
  s/\( */(/g;
  s/ *\)/)/g;

  ## Remove repair annotation
  s/\(RM \(\-DFL\- \\\[\) \)//g;
  s/\(IP \(\-DFL\- \\\+\) \)//g;
  s/\(-DFL- E_S\)//g;
  s/\(-DFL- N_S\)//g;
  s/\(RS \(\-DFL\- \\\]\) \)//g;
  ## Sometimes that creates empty categories (especially in mis-annotated data)
  ## Remove those empty categories
  s/\([^ ]+[ ]+\)//;

  ## Collapse out everything between cat. and UNF
  s/\(([^ ]+)-[^ ]+-UNF/(\1-UNF/g;
  s/-UNF/UNF/g;
  # We wil undo this shift later, but for right now it's done so INTJ is not 
  # confused with IN (I think 4 underscores should be enough)
  s/INTJ/____INTJ/g;

  ## translate to parens...
  s/\[/\(/g;
  s/\]/\)/g;

  if($remove_punct == 1){
      # Remove/change punctuation...
      s/\([^ ]+ \.\.\.\)//g;
      s/\([^ ]+ \.\)//g;
      s/\([^ ]+ \!\)//g;
      s/\([^ ]+ \?\)//g;
      s/\([^ ]+ \,\)//g;
      s/\([^ ]+ *\([^ ]+ *\-\-\) *\)//g;  ## dash is nested... don't know why
      s/\([^ ]+ \-\-\)//g;
      s/\([^ ]+ \-\)//g;
      s/\([^ ]+ \;\)//g;
      s/\([^ ]+ \:\)//g;
      s/\([^ ]+ \`\)//g;
      s/\([^ ]+ \'\)//g;
      s/\([^ ]+ \`\`\)//g;
      s/\([^ ]+ \'\'\)//g;
      s/\([^ ]+ -L.B-\)//g;
      s/\([^ ]+ -R.B-\)//g;
  }

  # Get rid of typos (categories starting with ^)
  s/\(\^/\(/g;

  # Distinguish punctuation...
  s/\([^ ]+ \.\.\.(:[^\)]*)?\)/\(\.\.\. \.\.\.\1\)/g;
  s/\([^ ]+ \?(:[^\)]*)?\)/\(\? \?\1\)/g;
  s/\([^ ]+ *\([^ ]+ *\-\-(:[^\)]*)?\) *\)/\(\-\- \-\-\1\)/g;  ## dash is nested... don't know why
  s/\([^ ]+ \-\-(:[^\)]*)?\)/\(\-\- \-\-\1\)/g;
  s/\([^ ]+ \-(:[^\)]*)?\)/\(\-\- \-\-\1\)/g;
  s/\([^ ]+ \:(:[^\)]*)?\)/\(\: \:\1\)/g;
  s/\([^ ]+ \;(:[^\)]*)?\)/\(\; \;\1\)/g;
  s/\([^ ]+ \`(:[^\)]*)?\)/\(\` \`\1\)/g;
  s/\([^ ]+ \'(:[^\)]*)?\)/\(\' \'\1\)/g;

  ########## GLOBAL SYMBOL SHIFT
  #s/\!/\!exclamation\!/g;
  s/\~/\!tilde\!/g;
  #s/\`\`/\!openquote\!/g;
  #s/\`/\!openscare\!/g;
  s/\@/\!at\!/g;
  s/\#/\!pound\!/g;
  #s/\$/\!dollar\!/g;
  #s/\%/\!percent\!/g;
  #s/\^/\!carat\!/g;
  s/\&/\!ampersand\!/g;
  #s/\*/\!star\!/g;
  s/\-\-/\!dash\!/g;
  s/\+/\!plus\!/g;
  #s/\=/\!equals\!/g;
  s/\:/\!colon\!/g;
  s/\;/\!semi\!/g;
  #s/\"/\!dblquote\!/g;
  #s/\'\'/\!closequote\!/g;
  #s/\'/\!closescare\!/g;
  #s/\./\!period\!/g;
  #s/\,/\!comma\!/g;
  #s/([^\*])\?/\1\!question\!/g;
  s/\//\!slash\!/g;
  ##########

  # substitute -LRB- to \!LRB\!
  s/-L.B-/\!LRB\!/g;
  # substitute -RRB- to \!RRB\!
  s/-R.B-/\!RRB\!/g;
  # substitute -NONE- to \*NONE\*
  s/-NONE-/\*NONE\*/g;
#  # substitute *T* to \!t\! ("word" trace will get lowercased anyway)
#  s/\*T\*/\!t\!/g;
#  # substitute *RNR* to \!rnr\! ("word" trace will get lowercased anyway)
#  s/\*RNR\*/\!rnr\!/g;
  # substitute NP-TMP to NP-tmp
  s/NP[^ ]*\-TMP/NP-tmp/g;
  # substitute S*-ADV to S*-adv
  s/(S[^ ]*)\-ADV/\1-adv/g;
  # remove all other dash specifications beginning with capital letter
  s/\-[A-Z][A-Z]*([^ \)\]]*) /\1 /g;
  s/\-[A-Z][A-Z]*([^ \)\]]*) /\1 /g;
  s/\-[A-Z][A-Z]*([^ \)\]]*) /\1 /g;


  ## for each constituent...
  $step = 0;
  while ( $_ =~ /\([^\(\)]*\)/ ) {
    ## convert outer parens to braces...
    $_ =~ s/\(([^\(\)]*)\)/{\1}/;
    #################### ADD SED RULES HERE: apply rules to angles (children) within braces (consituent)...
    #print stderr "   $_\n";
    debug(++$step, "   $_");

    #### EOS PUNCT
    # kick eos punct up out of constit (should have been annotated this way!)
    s/{(?![^ ]*-mp)([^ ]*) +(.*) +(<[\.\!\?] [\.\!\?]>)}(?=.*\))/{\1 \2} \3/;
    # kick eos punct + right quote up out of constit (should have been annotated this way -- quote delimits can be obtained from left, but punct is needed at root)
    s/{(?![^ ]*-mp)(?![^ ]*-rq)([^ ]*) +(.*)(<``? ``?>)(.*) +(<[\.\!\?] [\.\!\?]> +<''? ''?>)}(?=.*\))/{\1-rq-mq \2\3\4} \5/;
    # kick eos punct + right quote up out of constit (should have been annotated this way -- quote delimits can be obtained from left, but punct is needed at root)
    s/{(?![^ ]*-mp)(?![^ ]*-rq)([^ ]*) +(.*) +(<[\.\!\?] [\.\!\?]> +<''? ''?>)}(?=.*\))/{\1-rq \2} \3/;
#    # isolate period + quote after S
#    s/{(?![^ ]*-mp)([^ ]*) +(.*<.*) +(<[\.\!\?] [^>]*>) +(<''? ''?>) *}/\(\1-mp \(\1-rq-mp \{\1 \2\} \3\4\)/;
    # isolate quote + ... + period + quote (allows other junk after, like rest of line in case of embedded quote)
    s/{(?![^ ]*-mp)([^ ]*) +(<``? ``?>) +(<.*) +(<[\.\!\?] [^>]*>) +(<''? ''?>.*)}/\(\1-mp-mq \(\1-rq-mp-mq \2 \{\1 \3\} \4\) \5\)/;
    # isolate ... + period + quote (allows other junk after, like rest of line in case of embedded quote)
    s/{(?![^ ]*-mp)([^ ]*) +(<.*) +(<[\.\!\?] [^>]*>) +(<''? ''?>.*)}/\(\1-mp \(\1-rq-mp \{\1 \2\} \3\) \4\)/;
    # isolate period after S (allows other junk after period, like quotes or rest of line in case of embedded quote)
    s/{(?![^ ]*-mp)([^ ]*) +(<.*) +(<[\.\!\?] [^>]*>)(.*)}/\(\1-mp \{\1 \2\} \3\4\)/;
#    # isolate period after S
#    s/{(?![^ ]*-mp)([^ ]*) +(.*<.*) +(<[\.\!\?] [^>]*>) *}/\(\1-mp \{\1 \2\} \3\)/;
#    # isolate period quote after S
#    s/{([^ ]*) +(.*<.*<.*) +<(\. [^>]*)> +<('' [^>]*)> *}/\(\1 \(\1-rq \{\1-rq-rp \2\} <\3>\) <\4>\)/;
#    # isolate period after S
#    s/{([^ ]*) +(.*<.*<.*) +<(\. [^>]*)> *}/\(\1 \{\1-rp \2\} <\3>\)/;
    ## # grab punct after S
    ## s/{([^ ]*) +(.*)<(S[^ ]*) ([^>]*)> *<(, [^>]*)>(.*)}/\(\1 \2\{\3 <\3 \4> <\5>\}\6\)/;

# #    # grab paren/brack around single category not at right of parent
# #    s/{([^ ]*) +(.*)<(-L.B-) ([^>]*)> *<([^ ]*) ([^>]*)> *<(-R.B-) ([^>]*)> +(<.*)}/\(\1 \2\{\5 <\3 \4> <\5 \6> <\7 \8>\} \9\)/;
# #    # grab paren/brack around single category at right of parent
# #    s/{([^ ]*) +(<.*)<(-L.B-) ([^>]*)> *<([^ ]*) ([^>]*)> *<(-R.B-) ([^>]*)>( *)}/\(\1 \2\{\5 <\3 \4> <\5 \6> <\7 \8>\}\9\)/;
#     # grab paren/brack at edges of multiple categories
#     s/{([^ ]*) +( *)<(-L.B-) ([^>]*)> *(.*>.*<.*) *<(-R.B-) ([^>]*)>( *)}/\(\1 <\3 \4> \{\1 \5\} <\6 \7>\)/;
# ##    # remove unary branch resulting from paren grab
# ##    s/\(([^ ]*) {[^ ]* (<-L.B- -L.B-> .* <-R.B- -R.B->)} *\)/{\1 \2}/;
# #    # grab quotes around single category with comma not at right of parent (new category label matching category inside quotes)
# #    s/{([^ ]*) +(.*)<(`` [^>]*)> *<([^ ]*) ([^>]*)> *<(, [^>]*)> *<('' [^>]*)> +(<.*)}/\(\1 \2\{\4 <\3> <\4 \5> <\6> <\7>\} \8\)/;
# #    # grab quotes around single category with comma at right of parent (new category label matching category inside quotes)
# #    s/{([^ ]*) +(<.*)<(`` [^>]*)> *<([^ ]*) ([^>]*)> *<(, [^>]*)> *<('' [^>]*)>( *)}/\(\1 \2\{\4 <\3> <\4 \5> <\6> <\7>\}\8\)/;
# #    # grab quotes around single category not at right of parent (new category label matching category inside quotes)
# #    s/{([^ ]*) +(.*)<(``) ([^>]*)> *<([^ ]*) ([^>]*)> *<('') ([^>]*)> +(<.*)}/\(\1 \2\{\5 <\3 \4> <\5 \6> <\7 \8>\} \9\)/;
# #    # grab quotes around single category at right of parent (new category label matching category inside quotes)
# #    s/{([^ ]*) +(<.*)<(``) ([^>]*)> *<([^ ]*) ([^>]*)> *<('') ([^>]*)>( *)}/\(\1 \2\{\5 <\3 \4> <\5 \6> <\7 \8>\}\9\)/;
#     # grab quotes at edges of multiple categories (new category label matching category outside quotes)
#     s/{([^ ]*) +( *)<(``) ([^>]*)> *(.*>.*<.*) *<('') ([^>]*)>( *)}/\(\1 <\3 \4> \{\1 \5\} <\6 \7>\)/;
# ##    # remove unary branch resulting from grab quotes
# ##    s/\(([^ ]*) {[^ ]* (<`` [^>]*> .* <'' [^>]*>)} *\)/{\1 \2}/;

    #### EMPTY CATEGORIES
    # delete ICH (moved -- not just raised -- modifier) and U (transposed currency symbol) traces; too hard to reconstruct
    s/{\*NONE\* .*ICH.*}//;
    s/{\*NONE\* *\*U\*(!colon!.*)? *}//;
    # eliminate expletive it cleft (a shame, but sent is too deeply attached)
    s/{NP[^ ]* +<(NP[^>]*)> +<S[^ ]*-empty 0($SRL)?> *}/{\1}/;
    # delete unary projections of deleted empty constituents
    s/{[^ ]* *}//;
    # fold empty category for currency into nonterm
    s/{QP([^ ]*) +(.*<\!pound\!.*)}/{NP-currunit\1 \2}/;
    s/{QP([^ ]*) +(.*<\$.*)}/{NP-currunit\1 \2}/;
    s/{QP([^ ]*) +(.*<\%.*)}/{NP-currunit\1 \2}/;
    s/{(NP[^ ]*|ADJP[^ ]*) (.*)<NP-currunit[^ ]* ([^>]*)> *<\*NONE\* [^>]*>([^}]*)}/{\1 \2 \3\4}/;   ## probably redundant now with *U* removed
    # fold empty X categories into X-empty nonterm
    s/{([^ ]*) +<\*NONE\* +\*t\*\-([0-9]+)($SRL)?> *}/{\1-empty-\2 0\3}/;
    s/{([^ ]*) +<\*NONE\* +\*rnr\*\-([0-9]+)($SRL)?> *}/{\1-empty-\2 0\3}/;
    s/{([^ ]*) +<\*NONE\* +\*($SRL)?> *}/{\1-empty-ctrl 0\2}/;
    if ( $_ =~ /\!colon\!(REL|rel)/ ) { #use this to check if input is srl tree
    	s/{([^ ]*) +<\*NONE\* +[^>]*($SRL)> *}/{\1-empty 0\2}/;	
    } else {
    	s/{([^ ]*) +<\*NONE\* +[^>]*> *}/{\1-empty 0}/;
    }

#    # rename VP with TO to infinitive VP-inf nonterm
#    s/{VP +(<TO[^>]*> *<VP[^>]*>[^ ]*)}/{VPto-inf \1}/;
#    # fold NP trace in S trace into infinitive VP-inf nonterm
#    s/{S +<NP-empty 0> *<VPto ([^>]*)> *}/{VPto \1}/;
    # fold NP trace in VP into passive VP nonterm
    s/{(VP[^ ]*|VBN[^ ]*|VBD[^ ]*) +(.*)<(VBN[^ ]*|VBD[^ ]*) +([^>]*)> *<NP[^ ]*-empty(-ctrl)? 0($SRL)?>(.*)}/{\1 \2<\3-v-argNP \4> \7}/;
    # fold empty NP in S with VPvb|VPvbp into S imperative
    s/{(S[^ ]*|PRN[^ ]*) +(.*)<[^ ]*NP[^ ]*-empty-ctrl 0($SRL)?> *<(VPvbp|VPvb)([^a-z][^>]*)>(.*)}/{\1 \2<Simp\5>\6}/;
    # fold empty NP in S with VPvbg into S pro progressive / passive / infinitive
    s/{(S[^ ]*) +(.*)<[^ ]*NP[^ ]*-empty(-ctrl)? 0($SRL)?> *<VP(vbg|vbn|prd|vbd|vb|to)([^>]*)>(.*)}/{\1 \2<Spro\5\6>\7}/;
    # fold empty NP in S with VPto into S pro stative
    s/{(S[^ ]*) +(.*)<[^ ]*NP[^ ]*-empty(-ctrl)? 0($SRL)?> *(.*)<(ADJP|NP|PP)([^>]*)>(.*)}/{\1 \2\5<Sproprd\7>\8}/;
#    # fold NP trace in S with VP into VP nonterm
#    s/{S[^ ]* +<[^ ]*-empty 0> *<VP([^>]*)> *}/{VP\1}/;
#    # fold NP trace in S with VP into VP nonterm, subsuming args
#    s/{S[^ ]* +(.*)<[^ ]*-empty 0> *<(VP[^ ]*) ([^>]*)>(.*)}/{VP\2 \1<VP\2 \3>\4}/;
#    # fold NP trace in S with XP into XP nonterm
#    s/{S[^ ]* +<[^ ]*-empty 0> *<(ADJP[^>]*|NP[^>]*|PP[^>]*)> *}/{\1}/;
    # fold NP trace in SBAR into S nonterm
    s/{SBAR[^ \^]*([^ ]*) +<\*NONE\* [^>]*> *<S([^ \^]*)[^ \-]*([^ ]*) ([^>]*)> *}/{S\2\1\3 \4}/;
    s/{SBAR[^ \^]*([^ ]*) +<[^ ]*-empty-([0-9]*) 0($SRL)?> *<S([^ \^]*)[^ \-]*([^ ]*) ([^>]*)> *}/{S\2\1\5-\4 \6}/;
#    # redo some mislabeled rel clauses with subject extr
#    s/{(SBAR[^ ]*) +(<WH.*) +<(Simp|Spro[a-z]*)(.*)}/{WH\1 \2 <S-extrNP\4}/;
    # redo SBAR as WHSBAR
    s/{(SBAR[^ ]*) +(<WH.*) +(<S.*)}/{WH\1 \2 \3}/;
#    # last resort: nuke all remaining empty constituents w/o trace
#    s/{[^ ]* +<[A-Z]*-empty(-ctrl)? +[^>]*> *<([^>]*)> *}/{\2}/;
#    s/{[^ ]* +<([^>]*)> +<[A-Z]*-empty(-ctrl)? +[^>]*> *}/{\1}/;

    # turn NN in VP into VBG -- weird way to annotate gerunds
    s/{(VP[^ ]*) <NN[^ \^]*([^ ]*) ([^>]*)>(.*)}/{\1 <VBGvbg\2 \3>\4}/;

    #### TERMINALS
#    s/{(NN(?!P)S?) +([^ ]*)}/{\1nn \2}/;
    # give terminal symbols lower case and add lowercase pos to category
    if ($_ =~ /{(VB[A-Z]*|MD|TO|BES|HVS)([^ ]*) *([^ \/]*) *}/) { $c=$1; $u=$2; $p=lc($1); $w=lc($3); s/{.*}/{$c$p$u $w}/; }
#    if ($_ =~ /{(VB[A-Z]*|MD|TO|BES|HVS)([^ ]*) *([^ \/]*) *}/) { $c=$1; $u=$2; $p=lc($1); $w=$3; s/{.*}/{$c$p$u $w}/; } ## To use case in POS decisions
    # give terminal symbols lower case
    if ($_ =~ /{([A-Z\$]*)([^ ]*) *([^ \/]*) *}/) { $c="$1$2"; $w=lc($3); s/{.*}/{$c $w}/; }
#    if ($_ =~ /{([A-Z\$]*)([^ ]*) *([^ \/]*) *}/) { $c="$1$2"; $w=$3; s/{.*}/{$c $w}/; } ## To use case in POS decisions (with above)

    #### CONJUNCTION
    # grab XP ; conj XP as LISTXP-semi
    s/{([^ ]*) +(.*)<(S[^- ]*|NP[^- ]*|VP[^- ]*|PP[^- ]*|ADJP[^- ]*|ADVP[^- ]*|CD[^- ]*)([^>]*)> *<(\!semi\![^>]*)> *<(CC[^>]*)> *<\3([^>]*)>(.*)}/\(\1 \2\{LIST\3-semi <\3\4> <\5> <\6> <\3\7>\}\8\)/;
    # grab XP ; XP ; XP as LISTXP-semi at end of constit
    s/{([^ ]*) +(.*)<(NP[^- ]*)([^>]*)> *<(\!semi\![^>]*)> *<\3([^>]*)> *<(\!semi\![^>]*)> *<\3([^>]*)>( *)}/\(\1 \2\{LIST\3-semi <\3\4> <\5> <LIST\3-semi \[\3\6\] \[\7\] \[\3\8\]>\}\9\)/;
    # grab XP ; LISTXP-semi as LISTXP-semi
    s/{(?!LIST)([^ ]*) +(.*)<(S[^- ]*|NP[^- ]*|VP[^- ]*|PP[^- ]*|ADJP[^- ]*|ADVP[^- ]*|CD[^- ]*)([^>]*)> *<(\!semi\![^>]*)> *<LIST\3-semi([^>]*)>(.*)}/\(\1 \2\{LIST\3-semi <\3\4> <\5> <LIST\3-semi\6>\}\7\)/;
    # grab XP , conj XP as LISTXP
    s/{([^ ]*) +(.*)<(S[^- ]*|NP[^- ]*|VP[^- ]*|PP[^- ]*|ADJP[^- ]*|ADVP[^- ]*|CD[^- ]*)([^>]*)> *<(,[^>]*)> *<(CC[^>]*)> *<\3([^>]*)>(.*)}/\(\1 \2\{LIST\3 <\3\4> <\5> <\6> <\3\7>\}\8\)/;
    # grab XP , XP conj XP as LISTXP
    s/{([^ ]*) +(.*)<(S[^- ]*|NP[^- ]*|VP[^- ]*|PP[^- ]*|ADJP[^- ]*|ADVP[^- ]*|CD[^- ]*)([^>]*)> *<(,[^>]*)> *<\3([^>]*)> *<(CC[^>]*)> *<\3([^>]*)>(.*)}/\(\1 \2<\3\4> <\5> \{LIST\3 <\3\6> <\7> <\3\8>\}\9\)/;
    # grab XP , XP , XP as LISTXP at end of constit
    s/{([^ ]*) +(.*)<(NP[^- ]*)([^>]*)> *<(,[^>]*)> *<\3([^>]*)> *<(,[^>]*)> *<\3([^>]*)>( *)}/\(\1 \2\{LIST\3 <\3\4> <\5> <LIST\3 \[\3\6\] \[\7\] \[\3\8\]>\}\9\)/;
    # grab XP , LISTXP as LISTXP
#    while ( s/{(?!LIST)([^ ]*) +(.*)<(S[^- ]*|NP[^- ]*|VP[^- ]*|PP[^- ]*|ADJP[^- ]*|ADVP[^- ]*|CD[^- ]*)([^>]*)> *<(,[^>]*)> *<LIST\3([^>]*)>(.*)}/\{\1 \2<LIST\3 \[\3\4\] \[\5\] \[LIST\3\6\]>\7\}/ ){}
    s/{(?!LIST)([^ ]*) +(.*)<(S[^- ]*|NP[^- ]*|VP[^- ]*|PP[^- ]*|ADJP[^- ]*|ADVP[^- ]*|CD[^- ]*)([^>]*)> *<(,[^>]*)> *<LIST\3([^>]*)>(.*)}/\(\1 \2\{LIST\3 <\3\4> <\5> <LIST\3\6>\}\7\)/;
    # grab XP XP conj XP as XP LISTXP
    s/{(SBAR|SQ|SINV|S|NP|VP|PP|ADJP|ADVP|CD)([^ ]*) +(.*)<\1([^ ]*) ([^>]*)> *<\1\4 ([^>]*)> *<(CC[^>]*)> *<\1\4 ([^>]*)>(.*)}/\(\1\4 \3<\1\4 \5> \{LIST\1\4 <\1\4 \6> <\7> <\1\4 \8>\}\9\)/;
    ##s/{([^ ]*) +(.*)<(S|NP|VP|PP|ADJP|ADVP|CD)([^ ]*) ([^>]*)> *<\3\4 ([^>]*)> *<(CC[^>]*)> *<\3\4 ([^>]*)>(.*)}/\(\1\4 \2<\3\4 \5> \{LIST\3\4 <\3\4 \6> <\7> <\3\4 \8>\}\9\)/;
    ##s/{([^ ]*) +(.*)<(S[^ ]*|NP[^ ]*|VP[^ ]*|PP[^ ]*|ADJP[^ ]*|ADVP[^ ]*|CD[^ ]*) ([^>]*)> *<\3 ([^>]*)> *<(CC[^>]*)> *<\3 ([^>]*)>(.*)}/\(\1 \2<\3 \4> \{LIST\3 <\3 \5> <\6> <\3 \7>\}\8\)/;
    # grab XP XP XP as LISTXP at end of constit
    s/{([^ ]*) +(.*)<(NP[^ ]*) ([^>]*)> *<\3 ([^>]*)> *<\3 ([^>]*)>( *)}/\(\1 \2<\3 \4> \{LIST\3 <\3 \5> <\3 \6>\}\7\)/;
    # grab XP XP LISTXP as XP LISTXP
    s/{(?!LIST)([^ ]*) +(.*)<(S[^ ]*|NP[^ ]*|VP[^ ]*|PP[^ ]*|ADJP[^ ]*|ADVP[^ ]*|CD[^ ]*)([^>]*)> *<\3([^>]*)> *<LIST\3([^>]*)>(.*)}/\(\1 \2<\3\4> \{LIST\3 <\3\5> <LIST\3\6>\}\7\)/;
    # grab X conj X as X  ####(do generic conj first, as more specific one will appear beside it and ultimately replace it via "undo unary identity projection")
    s/{([^ ]*) +(.*)<([^ ]*) ([^>]*)> *<(CC)([^>]*)> *<\3 ([^>]*)>(.*)}/\(\1 \2\{\3 <\3 \4> <\5\6> <\3 \7>\}\8\)/;
    # don't use proj in any new nonterminal resulting from conj
    #s/{([^ ]*)-proj[^\- ]*([^ ]* .*<.*)}/{\1\2}/;


    $LEXTPU="[,`'\\!\\.\\?^][ \\)>\\]]*[^\\)>\\]]*|^[^\\)>\\]]*";
    #$LEXTP="^[^\\)>\\]]*";
    $REXTPU="[ \\)>\\]]*[^\\)>\\]]*[,`'\\!\\.\\?]";

debug($step, " / $_");
    #### NOUN PHRASES
    # right-binarize (basal) NPs as much as possible
    s/{(NP|WHNP)[a-z]*([^ ]*) +(.*)<(?![-,`'\!])(?!`)([A-Z]*)([^ ]* [^>]*)> *<(NN[A-Z]*)([a-z]*)([^ ]* [^>]*)>(.*)}/\(\1\2 \3\{\6 <\4\5> <\6\7\8>\}\9\)/;
    # left-binarize NP genitive marker if left and right context reduced to nil
    s/{(NP|WHNP)[a-z]*([^ ]*) +( *)<(NN[A-Z]*|NP)([a-z]*)([^ ]* [^>]*)> *<(POS)([^ ]* [^>]*)>( *)}/\{\1pos <\4\5\6> <\7\8>\}/;
    # grab NN|NP + comma + mod + internal comma as NP
    s/{(NP)[a-z]*([^ ]*) +<(NN[A-Z]*|NP)n?n?([a-z]*)([^ ]* [^>]*)> *<, ,> *<((?=ADVP|PP|S|VP|WHSBAR|ADJP|NP)(?![^ ]*-rc)[^ ]*)([^>]*)>( *<, ,>.*)}/\(\1\2 \{\3 <\3nn\4\5> <\6-rc-mc [, ,] [\6\7]>\}\8\)/;
    # grab NN|NP + comma + mod + external punct as NP
    s/{(NP)[a-z]*([^ ]*) +<(NN[A-Z]*|NP)n?n?([a-z]*)([^ ]* [^>]*)> *<, ,> *<((?=ADVP|PP|S|VP|WHSBAR|ADJP|NP)(?![^ ]*-rc)[^ ]*)([^>]*)> *}($REXTPU)/\(\1\2 \{\3 <\3nn\4\5> <\6-rc-mc [, ,] [\6\7]>\}\)\8/;
#    # grab NN|NP at left + comma + mod + comma
#    s/{(NP)[a-z]*([^ ]*) +( *)<(NN[A-Z]*|NP)n?n?([a-z]*)([^ ]* [^>]*)> *<(ADVP|PP|S|VP|WHSBAR|ADJP|NP)([^ ]* [^>]*)>(.*)}/\(\1\2 \3\{\4 <\4nn\5\6> <\7\8>\}\9\)/;  ##>( *<.*)}
#    # grab NN|NP at left + comma + mod + external comma
#    s/{(NP)[a-z]*([^ ]*) +( *)<(NN[A-Z]*|NP)n?n?([a-z]*)([^ ]* [^>]*)> *<(ADVP|PP|S|VP|WHSBAR|ADJP|NP)([^ ]* [^>]*)>(.*)}/\(\1\2 \3\{\4 <\4nn\5\6> <\7\8>\}\9\)/;  ##>( *<.*)}
    # grab mod to right of NN or NP, if left context reduced to nil
    s/{(NP)[a-z]*([^ ]*) +( *)<(NN[A-Z]*|NP)n?n?([a-z]*)([^ ]* [^>]*)> *<(ADVP|PP|S|VP|WHSBAR|ADJP|NP)([^ ]* [^>]*)>(.*)}/\(\1\2 \3\{\4 <\4nn\5\6> <\7\8>\}\9\)/;  ##>( *<.*)}
#    # NN projections ignore pos info
#    s/{(NN)[A-Z]*([^ ]*) +(<.*)}/{\1\2 \3}/;
#    # left-binarize NPs after left context reduced to nil
#    s/{(NP)[a-z]*([^ ]*) +( *)<(NN[A-Z]*|NP)([a-z]*)([^ ]* [^>]*)> *<\, *\,> *<(PP|S|VP|WHSBAR)([^ ]* [^>]*)>(.*)}/\(\1\2 \3\{\4 <\4\5\6> <\, \,> <\7\8>\}\9\)/;  ##>( *<.*)}
    # undo last unary N bar projection
    s/\((NP)[a-z]*([^ ]*) +\{(NP|NN[A-Z]*)([a-z]*)([^ ]*) +(.*)\} *\)/{\1\2 \6}/;  ## +(\[

    #### TIME NPS
    # substitute NP-tmp
    s/{NP-[^ ]*tmp[^ \-\=]*[^ ]* (.*)}/\(PP-tmp \{NP \1\}\)/g;
#    # remove all other dash specifications beginning with capital letter or number, not ending in dash
#    s/{([^ \-\=]*)[\-\=][\-\=A-Z0-9]*[^\-] /{\1 /g;

    #### VERB PHRASES
    # grab internal comma + S-adv|ADVP|RB|PP + comma + VB|VP as VP
    s/{(VP|VB[A-Z]*)[a-z]*([^ ]*)(.*<, ,>) *<((?=S-adv|ADVP|RB|PP)[^ ]*)([^>]*)> *<, ,> *<(VB[A-Z]*|VP)([a-z]*)([^ ]*)([^>]*)>( *| *<[^A-Z].*)}/\(\1\7\2\3 \{\6\7\8 <\4-lc-mc [\4\5] [, ,]> <\6\7\8\9>\}$10\)/;
    # grab external punct + S-adv|ADVP|RB|PP + comma + VB|VP as VP
    s/($LEXTPU){(VP|VB[A-Z]*)[a-z]*([^ ]*) *<((?=S-adv|ADVP|RB|PP)[^ ]*)([^>]*)> *<, ,> *<(VB[A-Z]*|VP)([a-z]*)([^ ]*)([^>]*)>( *| *<[^A-Z].*)}/\1\(\2\7\3 \{\6\7\8 <\4-lc-mc [\4\5] [, ,]> <\6\7\8\9>\}$10\)/;
    # right-binarize VPs after right context reduced to nil or punct constituent
    s/{(VP|VB[A-Z]*)[a-z]*([^ ]*) +(.*)<(S-adv|ADVP|RB|PP)([^ ]* [^>]*)> *<(VB[A-Z]*|VP)([a-z]*)([^ ]* [^>]*)>( *| *<[^A-Z].*)}/\(\1\7\2 \3\{\6\7 <\4\5> <\6\7\8>\}\9\)/; ##(<.*)<
#    # right-binarize VPs after right context does not contain normal constituents (i.e. contains punct like quote)
#    s/{(VP|VB[A-Z]*)[a-z]*([^ ]*) +(.*)<(S-adv|ADVP|RB|PP)([^ ]* [^>]*)> *<(VB[A-Z]*|VP)([a-z]*)([^ ]* [^>]*)>( *<[^A-Z].*)}/   \(\1\7\2 \3\{\6\7 <\4\5> <\6\7\8>\}\9\)/; ##(<.*)<
    # left-binarize VPs headed by VB or BES as much as possible
    s/{(VP|VB[A-Z]*|SQ)[a-z]*([^ ]*) +(.*)<(VB[A-Z]*|BES)([a-z]*)([^ ]* [^>]*)> *<(?!CC)([A-Z]+)([^ ]* [^>]*)>(.*)}/\(\1\5\2 \3\{\4\5 <\4\5\6> <\7\8>\}\9\)/;
    # left-binarize VPs headed by TO or MD as much as possible
    s/{(VP|VB[A-Z]*|SQ)[a-z]*([^ ]*) +(.*)<(TO|MD)([a-z]*)([^ ]* [^>]*)> *<(VP|VB)([^ ]* [^>]*)>(.*)}/\(\1\5\2 \3\{VP\5 <\4\5\6> <\7\8>\}\9\)/;
    # grab VP|VB + comma + mod + internal comma as VP
    s/{(VP|VB[A-Z]*|SQ)[a-z]*([^ ]*) +(.*)<(VP[a-z]*)([^ ]*)([^>]*)> *<, ,> *<((?=PP|VP)[^ ]*)([^>]*)>( *<, ,>.*)}/\(\1\5\2 \3\{\4 <\4\5\6> <\7-rc-mc [, ,] [\7\8]>\}\9\)/;
    # grab VP|VB + comma + mod + external punct as VP
    s/{(VP|VB[A-Z]*|SQ)[a-z]*([^ ]*) +(.*)<(VP[a-z]*)([^ ]*)([^>]*)> *<, ,> *<((?=PP|VP)[^ ]*)([^>]*)> *}($REXTPU)/\(\1\5\2 \3\{\4 <\4\5\6> <\7-rc-mc [, ,] [\7\8]>\}\)\9/;
debug($step, " \\ $_");
    # left-binarize VPs headed by VP as much as possible
    s/{(VP|VB[A-Z]*|SQ)[a-z]*([^ ]*) +(.*)<(VP)([a-z]*)([^ ]* [^>]*)> *<(PP|VP)([^ ]* [^>]*)>(.*)}/\(\1\5\2 \3\{\4\5 <\4\5\6> <\7\8>\}\9\)/;
#    # VB projections clump pos info
#    s/{(VBG|VBN)(vbg|vbn)([^ ]*) +(<.*)}/{VBprd\3 \4}/;
#    s/{(VBZ|VBD)(vbz|vbd)([^ ]*) +(<.*)}/{VBtns\3 \4}/;
    # undo last unary V bar projection
    s/\((VP|VB[A-Z]*)[a-z]*([^ ]*) +\{(VP|VB[A-Z]*)([a-z]*)([^ ]*) +(.*)\} *\)/{\1\4\2 \6}/;  ## +(\[

    #### SENTENTIAL PROJECTIONS
    # right-binarize NP-SBJ VP as S
    s/{(S[A-Z]*)[a-z]*([^ ]*) +(.*)<(NP)([^ ]* [^>]*)> *<(VP)([a-z]*)([^ ]* [^>]*)>(.*)}/\(\1\2 \3\{S <\4\5> <\6\7\8>\}\9\)/;
    # under SINV: right-binarize MOD VP as VP
    s/{(SINV)[a-z]*([^ ]*) +(.*)<(S-adv|ADVP|RB[A-Z]*|PP)([^ ]* [^>]*)> *<(VP|VB[A-Z]*)([a-z]*)([^>]*)>(.*)}/\(\1\2 \3\{\6\7 <\4\5> <\6\7\8>\}\9\)/;
    # under S: grab internal comma + S-adv|ADVP|RB|PP + comma + VB|VP as VP
    s/{(S[A-Z]*)[a-z]*([^ ]*)(.*<, ,>) *<((?=S-adv|ADVP|RB|PP)[^ ]*)([^>]*)> *<, ,> *<(VB[A-Z]*|VP)([a-z]*)([^ ]*)([^>]*)>(.*)}/\(\1\7\2\3 \{\6\7\8 <\4-lc-mc [\4\5] [, ,]> <\6\7\8\9>\}$10\)/;
    # under S: grab external punct + S-adv|ADVP|RB|PP + comma + VB|VP as VP
    s/($LEXTPU){(S[A-Z]*)[a-z]*([^ ]*) *<((?=S-adv|ADVP|RB|PP)[^ ]*)([^>]*)> *<, ,> *<(VB[A-Z]*|VP)([a-z]*)([^ ]*)([^>]*)>(.*)}/\1\(\2\7\3 \{\6\7\8 <\4-lc-mc [\4\5] [, ,]> <\6\7\8\9>\}$10\)/;
    # under S: right-binarize MOD VP as VP
    s/{(S[A-Z]*)[a-z]*([^ ]*) +(<.*)<(S-adv|ADVP|RB[A-Z]*|PP)([^ ]* [^>]*)> *<(VP|VB[A-Z]*)([a-z]*)([^>]*)>(.*)}/\(\1\2 \3\{\6\7 <\4\5> <\6\7\8>\}\9\)/;
    # under S: grab internal comma + S-adv|ADVP|RB|PP + comma + S as S
    s/{(S[A-Z]*)[a-z]*([^ ]*)(.*<, ,>) *<((?=S-adv|ADVP|RB|PP)[^ ]*)([^>]*)> *<, ,> *<(\1)([a-z]*)([^ ]*)([^>]*)>(.*)}/\(\1\7\2\3 \{\6\7\8 <\4-lc-mc [\4\5] [, ,]> <\6\7\8\9>\}$10\)/;
    # under S: grab external punct + S-adv|ADVP|RB|PP + comma + S as S
    s/($LEXTPU){(S[A-Z]*)[a-z]*([^ ]*) *<((?=S-adv|ADVP|RB|PP)[^ ]*)([^>]*)> *<, ,> *<(\2)([a-z]*)([^ ]*)([^>]*)>(.*)}/\1\(\2\7\3 \{\6\7\8 <\4-lc-mc [\4\5] [, ,]> <\6\7\8\9>\}$10\)/;
    # under S: right-binarize MOD + S as S
    s/{(S[A-Z]*)[a-z]*([^ ]*) +(.*)<(S-adv|ADVP|RB[A-Z]*|PP)([^ ]* [^>]*)> *<(\1)([a-z]*)( [^>]*)>(.*)}/\(\1\7\2 \3\{\6\7 <\4\5> <\6\7\8>\}\9\)/;
    # left-binarize S
    s/{(S[A-Z]*)[a-z]*([^ ]*) +(.*)<(\1)([a-z]*)( [^>]*)> *<(ADVP|RB[A-Z]*|PP)([^ ]* [^>]*)>(.*)}/\(\1\5\2 \3\{\4\5 <\4\5\6> <\7\8>\}\9\)/;
##    # right-binarize S from above
##    s/{(S[^ ]*) +<(ADVP[^ ]*|PP[^ ]*) (.*>.*<.*)}/\(\1 \2 \{\1 \3\}\)/;
##    # left-binarize S from above
##    s/{(S[^ ]*) (.*>.*<.*) +<(ADVP[^ ]*|PP[^ ]*)}/\(\1 \{\1 \2\} \3\)/;
#    # left-binarize VPs after left context reduced to nil
#    s/{(VP[^ ]*|SQ[^ ]*) +( *)<(VB[^ ]*) ([^>]*)> *<([^ ]*) ([^>]*)> *(<.*)}/\(\1 \2\{\3 <\3 \4> <\5 \6>\}\7\)/;
    # redo Sto
    s/{(S) +(<NP.*) +(<VPto.*)}/{\1to \2 \3}/;
    # undo last unary S bar projection
    s/\((S)[a-z]*([^ ]*) +\{(S)([a-z]*)([^ ]*) +(.*)\} *\)/{\1\4\2 \6}/;  ## +(\[

    #### ADJECTIVAL / ADVERBIAL PHRASES
    # right-binarize ADJPs as much as possible
    s/{(ADJP)[a-z]*([^ ]*) +(.*)<(RB[A-Z]*)([^ ]* [^>]*)> *<(JJ[A-Z]*)([a-z]*)([^ ]* [^>]*)>(.*)}/\(\1\2 \3\{\6 <\4\5> <\6\7\8>\}\9\)/;
    # left-binarize ADJPs after left context reduced to nil
    s/{(ADJP)[a-z]*([^ ]*) +( *)<(JJ[A-Z]*|ADJP)([a-z]*)([^ ]* [^>]*)> *<(PP|S)([^ ]* [^>]*)>(.*)}/\(\1\2 \3\{\4 <\4\5\6> <\7\8>\}\9\)/;  ##>( *<.*)}
    # undo last unary A bar projection
    s/\((ADJP)[a-z]*([^ ]*) +\{(JJ[A-Z]*)([a-z]*)([^ ]*) +(.*)\} *\)/{\1\2 \6}/;  ## +(\[
    # right-binarize ADVPs as much as possible
    s/{(ADVP)[a-z]*([^ ]*) +(.*)<(RB[A-Z]*)([^ ]* [^>]*)> *<(RB[A-Z]*)([a-z]*)([^ ]* [^>]*)>(.*)}/\(\1\2 \3\{\6 <\4\5> <\6\7\8>\}\9\)/;
    # left-binarize ADVPs after left context reduced to nil
    s/{(ADVP)[a-z]*([^ ]*) +( *)<(RB[A-Z]*|ADVP)([a-z]*)([^ ]* [^>]*)> *<(PP|S)([^ ]* [^>]*)>(.*)}/\(\1\2 \3\{\4 <\4\5\6> <\7\8>\}\9\)/;  ##>( *<.*)}
    # undo last unary Ad bar projection
    s/\((ADVP)[a-z]*([^ ]*) +\{(RB[A-Z]*)([a-z]*)([^ ]*) +(.*)\} *\)/{\1\2 \6}/;  ## +(\[
    # annotate unary rb
    s/{(ADVP[^ ]*) *<(RB[^ ]*) ([^ >]*)> *}/{\1 <\2-unary \3>}/;

    #### PREPOSITIONAL PHRASES
#    # annotate particles with word instead of pos
#    s/{(PRT)[^ ]* *<(RP)[^ ]* ([a-z]*) *> *}/{\1\3 <\2\3 \3>}/;
    # annotate prepositions with word instead of pos
    s/{(IN)[a-z]*([^ ]*) *(of|that)(\!colon\!.*)?}/{\1\3\2 \3\4}/;
    #s/{(PP|SBAR)[a-z]*([^ ]*) *<(IN|TO)(of|that|to)([^ ]* .*)}/{\1\4 <\3\4\5}/;
    # left-binarize PPs/SBARs headed by IN or TO as much as possible
    s/{(PP|SBAR)[a-z]*([^ ]*) +(.*)<(IN|TO)([a-z]*)([^ ]* [^>]*)> *<([A-Z]+)([^ ]* [^>]*)>(.*)}/\(\1\5\2 \3\{\1\5 <\4\5\6> <\7\8>\}\9\)/;
    # right-binarize PPs after right context reduced to nil
    s/{(PP)[a-z]*([^ ]*) +(.*)<(ADVP|RB|PP)([^ ]* [^>]*)> *<(PP)([a-z]*)([^ ]* [^>]*)>( *)}/\(\1\7\2 \3\{\6\7 <\4\5> <\6\7\8>\}\9\)/; ##(<.*)<
    # undo last unary P bar projection
    s/\((PP)[a-z]*([^ ]*) +\{(PP)([a-z]*)([^ ]*) +(.*)\} *\)/{\1\4\2 \6}/;  ## +(\[
#    # right-binarize PPs after right context reduced to nil
#    s/{(PP)[a-z]*([^ ]*) +(.*)<(ADVP|RB)([^ ]* [^>]*)> *<(IN)([a-z]*)([^ ]* [^>]*)>(.*)}/\(\1\7\2 \3\{\6\7 <\4\5> <\6\7\8>\}\9\)/; ##(<.*)<

    #### TERMINAL SYMBOLS
    # propagate unary head pos at terminal
    s/{(NP)[a-z]*([^ ]*) +<(NN[A-Z]*)([a-z]*)([^ ]*) +([^<>]*)> *}/{\1\2 <\3\5 \6>}/;
    # propagate unary head pos at terminal
    s/{(VP)[a-z]*([^ ]*) +<(VB[A-Z]*)([a-z]*)([^ ]*) +([^<>]*)> *}/{\1\4\2 <\3\4\5 \6>}/;
    # propagate unary head pos at terminal
    s/{(ADJP)[a-z]*([^ ]*) +<(JJ[A-Z]*)([a-z]*)([^ ]*) +([^<>]*)> *}/{\1\2 <\3\5 \6>}/;
    # propagate unary head pos at terminal
    s/{(ADVP)[a-z]*([^ ]*) +<(RB[A-Z]*)([a-z]*)([^ ]*) +([^<>]*)> *}/{\1\2 <\3\5 \6>}/;
    # undo unary identity projection
    s/{([^ ]*) +<\1([^ ]*) ([^>]*)> *}/{\1\2 \3}/;

#    # remove unary children
#    s/{([^ ]*) +<[^ ]* +([^<>]*)> *}/{\1 \2}/;

#    # grab punct at end of any consitit
#    s/{([^ ]*) +(.*<.*<.*) <(\,|\.|\!|!question!)([^>]*)> *}/\(\1 \{\1 \2\} <\3\4>\)/;

    s/{(.*)<(CD [^>]*)> *<(CD [^>]*)>(.*)}/\(\1\{CD <\2> <\3>\4\}\)/;
    s/{(.*)<(RB [^>]*)> *<(QP [^>]*)>(.*)}/\(\1\{QP <\2> <\3>\4\}\)/;

    #### BRACKETS / PARENS
    # 1a. introduce, from matched brackets / parens at edges of constituent, delimited tag
    s/{(?![^ ]*-mb)([^ ]*) +(<\!LRB\! \!lrb\!> .* <\!RRB\! \!rrb\!>) *}/{\1-mb \2}/;
    # 1b. introduce, from matched brackets / parens inside constituent, delimited tag on new constituent
    s/{(?![^ ]*-mb)(.*) (<\!LRB\! \!lrb\!>) +<(?!')([^ ]*?)(-[lr]b)?([^ ]*) ([^>]*)> +(<\!RRB\! \!rrb\!>)(.*)}/\(\1 \{\3\5-mb \2 <\3\4\5 \6> \7\}\8\)/;
    # 2a. introduce, from beginning of sentence to bracket / paren at end of constituent, delimited tag
    s/^([^\)>\]]*){(?![^ ]*-mb)([^ ]*) +(<(?![^ ]*-rb)(?!')[^>]*> +<\!RRB\! \!rrb\!>) *}/\1\{\2-lb-mb \3\}/;
    # 2b. introduce, from beginning of sentence to bracket / paren at end of constituent, delimited tag on new constituent
    s/^([^\)>\]]*){(?![^ ]*-mb)([^ ]*) +<(?![^ ]*-rb)(?!')(?!NP)([^ ]*?)(-lb)?([^ ]*) ([^>]*)> +(<\!RRB\! \!rrb\!>)(.*)}/\1\(\2 \{\3\5-lb-mb <\3\4\5 \6> \7\}\8\)/;
    # 3a. introduce, from external punct to bracket / paren at end of constituent, delimited tag
    s/([,`'\!\.\?][ \)>\]]+[^\)>\]]*){(?![^ ]*-mb)([^ ]*) +(<(?![^ ]*-rb)(?!')[^>]*> +<\!RRB\! \!rrb\!>) *}/\1\{\2-lb-mb \3\}/;
    # 3b. introduce, from external punct to bracket / paren at end of constituent, delimited tag on new constituent
    s/([,`'\!\.\?][ \)>\]]+[^\)>\]]*){(?![^ ]*-mb)([^ ]*) +<(?![^ ]*-rb)(?!')(?!NP)([^ ]*?)(-lb)?([^ ]*) ([^>]*)> +(<\!RRB\! \!rrb\!>)(.*)}/\1\(\2 \{\3\5-lb-mb <\3\4\5 \6> \7\}\8\)/;
    # 4a. introduce, from bracket / paren at edge of constituent to external punct, delimited tag
    s/{(?![^ ]*-mb)([^ ]*) +(<\!LRB\! \!lrb\!> +<(?![^ ]*-lb)(?!')[^>]*>) *}([ \)>\]]*[^\)>\]]*[,`'\!\.\?])/\{\1-rb-mb \2\}\3/;
    # 4b. introduce, from bracket / paren within the constituent to external punct, delimited tag on new constituent
    s/{(?![^ ]*-mb)(.*) +(<\!LRB\! \!lrb\!>) +<(?![^ ]*-lb)(?!')([^ ]*?)(-rb)?([^ ]*) ([^>]*)> *}([ \)>\]]*[^\)>\]]*[,`'\!\.\?])/\(\1 \{\3\5-rb-mb \2 <\3\4\5 \6>\}\)\7/;
#     # 5. propagate lack tag up left child
#     s/{(?![^ ]*-lb)([^ ]*) +<([^ ]*-lb) +([^>]*)>(.*) *}/{\1-lb <\2 \3>\4}/;
#     # 6. propagate lack tag up right child
#     s/{(?![^ ]*-rb)([^ ]*) +(.*)<([^ ]*-rb) +([^>]*)> *}/{\1-rb \2<\3 \4>}/;
#     # 7. propagate lack tag and bracket / paren up left child
#     s/{(?![^ ]*-lb)([^ ]*) +<(?![^ ]*-rb)([^ ]*) +\[\!LRB\! \!lrb\!\] +([^>]*) *>(.*)}/\(\1 <\!LRB\! \!lrb\!> \{\2-lb \3\}\4\)/;
#     # 8. propagate lack tag and bracket / paren up right child
#     s/{(?![^ ]*-rb)(.*) +<(?![^ ]*-lb)([^ ]*) +([^>]*) +\[\!RRB\! \!rrb\!\] *>( *)}/\(\1 \{\2-rb \3\} <\!RRB\! \!rrb\!>\4\)/;
#     # 5. propagate lack tag up from left child
#     s/{(?![^ ]*-lb)([^ ]*) +<([^ ]*-lb[^ ]*) +([^>]*)>(.*) *}/{\1-lb <\2 \3>\4}/;
#     # 6. propagate lack tag up from right child
#     s/{(?![^ ]*-rb)([^ ]*) +(.*)<([^ ]*-rb[^ ]*) +([^>]*)> *}/{\1-rb \2<\3 \4>}/;
#     # 7a. propagate lack tag and left comma up out of left child (w. at least one children remaining)
#     s/\((?![^ ]*-lb)([^ ]*) +( *)\{(?![^ ]*-lb)([^ ]*) +<(\!LRB\! \!lrb\!)> +(<.*<.*)\} *(.*)\)/\(\1 <\4> \2\{\3-lb \5\}\6\)/;
#     # 7b. propagate lack tag and left comma up out of non-left child (w. at least two children remaining)
#     s/{(?![^ ]*-lb)([^ ]*) +<(\!LRB\! \!lrb\!)> +(.*>.*>) *}/ \(\1 <\2> \{\1-lb \3\}\)/;
#     # 8a. propagate lack tag and right comma up out of right child (w. at least one children remaining)
#     s/\((?![^ ]*-rb)([^ ]*) +(.*)\{(?![^ ]*-rb)([^ ]*) +(<.*<.*) +<(\!RRB\! \!rrb\!)> *\}( *)\)/\(\1 \2\{\3-rb \4\} <\5>\6\)/;
#     # 8b. propagate lack tag and right comma up out of non-right child (w. at least two children remaining)
#     s/{(?![^ ]*-rb)([^ ]*) +(<.*<.*) +<(\!RRB\! \!rrb\!)> *}/\(\1 \{\1-rb \2\} <\3>\) /;

#     #### BRACKETS / PARENS
#     # right bracket/paren raising: introduce from matched parens
#     s/{(.*) +<\!LRB\! \!lrb\!($SRL)?> +<(?![^ ]*-rb)([^ ]*) +([^>]*)> +<\!RRB\! \!rrb\!($SRL)?>(.*)}/\(\1 {\3-rb <\!LRB\! \!lrb\!\2> <\3 \4>} <\!RRB\! \!rrb\!\5>\6\)/;
#     # right bracket/paren raising: introduce from right paren
#     s/{(.*<.*) +<(?![^ ]*-rb)([^ ]*) +([^>]*)> +<\!RRB\! \!rrb\!($SRL)?>(.*)}/\(\1 {\2-rb <\2 \3>} <\!RRB\! \!rrb\!\4>\5\)/;
#     # right bracket/paren raising: propagate
#     s/{(?![^ ]*-rb)([^ ]*) +(<.*) +<\!RRB\! \!rrb\!($SRL)?> *}(?! +<\.)(?=.*\))/{\1-rb \2} <\!RRB\! \!rrb\!\3>/;
#     s/{([^ ]*-rb) +(<.*) +<\!RRB\! \!rrb\!($SRL)?> *}(?! +<\.)(?=.*\))/{\1-rb \2} <\!RRB\! \!rrb\!\3>/;
#     # right bracket/paren raising: terminate
#     s/{(.*) +<([^ ]*)-rb +([^>]*)> +<\!RRB\! \!rrb\!($SRL)?> +(<.*)}/\(\1 {\2 <\2-rb \3> <\!RRB\! \!rrb\!\4>} \5\)/;

    #### QUOTES
#    # 1a. introduce, from matched quotes at edges of constituent, delimited tag
#    s/{(?![^ ]*-mq)([^ ]*) +(<``? ``?> *<(?![^ ]*-mq).* (<, ,> +)?<''? ''?>) *}/{\1-mq \2}/;
#    # 1b. introduce, from matched quotes inside constituent, delimited tag on new constituent
#    s/{(?![^ ]*-mq)(.*) (<``? ``?>) +<(?![^ ]*-mq)([^ ]*?)(-[lr]q)?([^ ]*) ([^>]*)> +((<, ,> +)?<''? ''?>)(.*)}/\(\1 \{\3\5-mq \2 <\3\4\5 \6> \7\}\9\)/;
#    # 2a. introduce incompletely quoted constituent at left
#    s/{(?![^ ]*-mq)(?![^ ]*-[lr]q)([^ ]*) ((?!.*``? ``?.*''? ''?).* *<(?![^ ]*-rq)[^>]*> *<''? ''?>.*)}/{\1-lq \2}/;
#    # 2b. introduce incompletely quoted constituent at right
#    s/{(?![^ ]*-mq)([^ ]*) (.*<``? ``?> *<(?![^ ]*-lq)[^>]*>(?!.*''? ''?.*\}).*)}/{\1-rq \2}/;
    # grab complete quote
    s/{(?![^ ]*-mq)(.*) (<``? ``?>) +<(?![^ ]*-mq)([^ ]*?)(-[lr]q)?([^ ]*) ([^>]*)> +((<, ,> +)?<''? ''?>)(.*)}/\(\1 \{\3\5-mq \2 <\3\4\5 \6> \7\}\9\)/;
    # grab incomplete quote: `` + X as X-rq-rc-mq
    s/{([^ ]*) (.*)(<`` ``>) +<([^ ]*) ([^>]*)> *}/\(\1 \2\{\4-rq-rc-mq \3 <\4 \5\>\}\)/;
    # tag incomplete quote: ... + `` + ... as -rq-rc
    s/{(?![^ ]*-mq)([^ ]*) (.*<`` ``>((?!'' '').)*)}/{\1-rq-rc \2}/;
    # grab incomplete quote: X + '' as X-lq-mq
    s/{([^ ]*) +<([^ ]*) ([^>]*)> (<'' ''>)(.*) *}/\(\1 \{\2-lq-mq <\2 \3> \4\}\5\)/;
    # tag incomplete quote: ... + '' + ... as -rq
    s/{(?![^ ]*-mq)([^ ]*) (((?!`` ``).)*<'' ''>.*)}/{\1-rq \2}/;
#    # 2a. introduce, from beginning of sentence to quote at end of constituent, delimited tag
#    s/^([^\)>\]]*){(?![^ ]*-mq)([^ ]*) +(<(?![^ ]*-rq)[^>]*> +(<, ,> +)?<''? ''?>) *}/\1\{\2-lq-mq \3\}/;
#    # 2b. introduce, from beginning of sentence to quote at end of constituent, delimited tag on new constituent
#    s/^([^\)>\]]*){(?![^ ]*-mq)([^ ]*) +<(?![^ ]*-rq)([^ ]*?)(-lq)?([^ ]*) ([^>]*)> +((<, ,> +)?<''? ''?>)(.*)}/\1\(\2 \{\3\5-lq-mq <\3\4\5 \6> \7\}\9\)/;
#    # 3a. introduce, from external punct to quote at end of constituent, delimited tag
#    s/([,`'\!\.\?][ \)>\]]+[^\)>\]]*){(?![^ ]*-mq)([^ ]*) +(<(?![^ ]*-rq)[^>]*> +(<, ,> +)?<''? ''?>) *}/\1\{\2-lq-mq \3\}/;
#    # 3b. introduce, from external punct to quote at end of constituent, delimited tag on new constituent
#    s/([,`'\!\.\?][ \)>\]]+[^\)>\]]*){(?![^ ]*-mq)([^ ]*) +<(?![^ ]*-rq)([^ ]*?)(-lq)?([^ ]*) ([^>]*)> +((<, ,> +)?<''? ''?>)(.*)}/\1\(\2 \{\3\5-lq-mq <\3\4\5 \6> \7\}\9\)/;
#    # 4a. introduce, from quote at edge of constituent to external punct, delimited tag
#    s/{(?![^ ]*-mq)([^ ]*) +(<``? ``?> +<(?![^ ]*-lq)[^>]*>) *}([ \)>\]]*[^\)>\]]*[,`'\!\.\?])/\{\1-rq-mq \2\}\3/;
#    # 4b. introduce, from quote within the constituent to external punct, delimited tag on new constituent
#    s/{(?![^ ]*-mq)(.*) +(<``? ``?>) +<(?![^ ]*-lq)([^ ]*?)(-rq)?([^ ]*) ([^>]*)> *}([ \)>\]]*[^\)>\]]*[,`'\!\.\?])/\(\1 \{\3\5-rq-mq \2 <\3\4\5 \6>\}\)\7/;
#     # 5. propagate lack tag up left child
#     s/{(?![^ ]*-lq)([^ ]*) +<([^ ]*-lq) +([^>]*)>(.*) *}/{\1-lq <\2 \3>\4}/;
#     # 6. propagate lack tag up right child
#     s/{(?![^ ]*-rq)([^ ]*) +(.*)<([^ ]*-rq) +([^>]*)> *}/{\1-rq \2<\3 \4>}/;
#     # 7. propagate lack tag and quote up left child
#     s/{(?![^ ]*-lq)([^ ]*) +<(?![^ ]*-rq)([^ ]*) +(\[``? ``?\]) +([^>]*) *>(.*)}/\(\1 \3 \{\2-lq \4\}\5\)/;
#     # 8. propagate lack tag and quote up right child
#     s/{(?![^ ]*-rq)(.*) +<(?![^ ]*-lq)([^ ]*) +([^>]*) +((\[, ,\] +)?\[''? ''?\]) *>( *)}/\(\1 \{\2-rq \3\} \4\5\)/;
#     # 5. propagate lack tag up from left child
#     s/{(?![^ ]*-lq)([^ ]*) +<([^ ]*-lq[^ ]*) +([^>]*)>(.*) *}/{\1-lq <\2 \3>\4}/;
#     # 6. propagate lack tag up from right child
#     s/{(?![^ ]*-rq)([^ ]*) +(.*)<([^ ]*-rq[^ ]*) +([^>]*)> *}/{\1-rq \2<\3 \4>}/;
#     # 7a. propagate lack tag and left comma up out of left child (w. at least one children remaining)
#     s/\((?![^ ]*-lq)([^ ]*) +( *)\{(?![^ ]*-lq)([^ ]*) +<(``? ``?)> +(<.*<.*)\}(.*) *\)/\(\1 <\4> \2\{\3-lq \5\}\6\)/;
#     # 7b. propagate lack tag and left comma up out of non-left child (w. at least two children remaining)
#     s/{(?![^ ]*-lq)([^ ]*) +<(``? ``?)> +(.*>.*>) *}/ \(\1 <\2> \{\1-lq \3\}\)/;
#     # 8a. propagate lack tag and right comma up out of right child (w. at least one children remaining)
#     s/\((?![^ ]*-rq)([^ ]*) +(.*)\{(?![^ ]*-rq)([^ ]*) +(<.*<.*) +<(''? ''?)> *\}( *)\)/\(\1 \2\{\3-rq \4\} <\5>\6\)/;
#     # 8b. propagate lack tag and right comma up out of non-right child (w. at least two children remaining)
#     s/{(?![^ ]*-rq)([^ ]*) +(<.*<.*) +<(''? ''?)> *}/\(\1 \{\1-rq \2\} <\3>\) /;

#     #### QUOTES
#     # right quote raising: introduce from matched comma quote ... comma quote
#     s/{(.*) +<, ,($SRL)?> +<`` ``($SRL)?> +<(?![^ ]*-rq)([^ ]*) +([^>]*)> +<, ,($SRL)?> +<'' ''($SRL)?>(.*)}/\(\1 \(\4-rq-rc <, ,\2> {\4-rq <`` ``\3> <\4 \5>}\) <, ,\6> <'' ''\7>\8\)/;
#     # right quote raising: introduce from matched quotes with comma
#     s/{(.*) +<`` ``($SRL)?> +<(?![^ ]*-rq)([^ ]*) +([^>]*)> +<, ,($SRL)?> +<'' ''($SRL)?>(.*)}/\(\1 {\3-rq <`` ``\2> <\3 \4> <, ,\5>} <'' ''\6>\7\)/;
#     # right quote raising: introduce from matched quotes
#     s/{(.*) +<`` ``($SRL)?> +<(?![^ ]*-rq)([^ ]*) +([^>]*)> +<'' ''($SRL)?>(.*)}/\(\1 {\3-rq <`` ``\2> <\3 \4>} <'' ''\5>\6\)/;
#     # right quote raising: introduce from right quote
#     s/{(.*<.*) +<(?!,)(?![^ ]*-rq)([^ ]*) +([^>]*)> +<'' ''($SRL)?>(.*)}/\(\1 {\2-rq <\2 \3>} <'' ''\4>\5\)/;
#     # right quote raising: propagate
#     s/{(?![^ ]*-rq)([^ ]*) +(<.*) +<'' ''($SRL)?> *}(?! +<\.)(?=.*\))/{\1-rq \2} <'' ''\3>/;
#     s/{([^ ]*-rq) +(<.*) +<'' ''($SRL)?> *}(?! +<\.)(?=.*\))/{\1 \2} <'' ''\3>/;
#     # right quote raising: terminate
#     s/{(.*) +<([^ ]*)-rq +([^>]*)> +<'' ''($SRL)?> +(<.*)}/\(\1 {\2 <\2-rq \3> <'' ''\4>} \5\)/;

    #### DASHES
    # 1a. introduce, from matched dashes at edges of constituent, delimited tag
    s/{(?![^ ]*-md)([^ ]*) +(<\!dash\! \!dash\!> .* <\!dash\! \!dash\!>) *}/{\1-md \2}/;
    # 1b. introduce, from matched dashes inside constituent, delimited tag on new constituent
    s/{(?![^ ]*-md)(.*) (<\!dash\! \!dash\!>) +<(?!')([^ ]*?)(-[lr]d)?([^ ]*) ([^>]*)> +(<\!dash\! \!dash\!>)(.*)}/\(\1 \{\3\5-md \2 <\3\4\5 \6> \7\}\8\)/;
    # 2a. introduce, from beginning of sentence to dash at end of constituent, delimited tag
    s/^([^\)>\]]*){(?![^ ]*-md)([^ ]*) +(<(?![^ ]*-rd)(?!')[^>]*> +<\!dash\! \!dash\!>) *}/\1\{\2-ld-md \3\}/;
    # 2b. introduce, from beginning of sentence to dash at end of constituent, delimited tag on new constituent
    s/^([^\)>\]]*){(?![^ ]*-md)([^ ]*) +<(?![^ ]*-rd)(?!')(?!NP)([^ ]*?)(-ld)?([^ ]*) ([^>]*)> +(<\!dash\! \!dash\!>)(.*)}/\1\(\2 \{\3\5-ld-md <\3\4\5 \6> \7\}\8\)/;
    # 3a. introduce, from external punct to dash at end of constituent, delimited tag
    s/([,`'\!\.\?][ \)>\]]+[^\)>\]]*){(?![^ ]*-md)([^ ]*) +(<(?![^ ]*-rd)(?!')[^>]*> +<\!dash\! \!dash\!>) *}/\1\{\2-ld-md \3\}/;
    # 3b. introduce, from external punct to dash at end of constituent, delimited tag on new constituent
    s/([,`'\!\.\?][ \)>\]]+[^\)>\]]*){(?![^ ]*-md)([^ ]*) +<(?![^ ]*-rd)(?!')(?!NP)([^ ]*?)(-ld)?([^ ]*) ([^>]*)> +(<\!dash\! \!dash\!>)(.*)}/\1\(\2 \{\3\5-ld-md <\3\4\5 \6> \7\}\8\)/;
    # 4a. introduce, from dash at edge of constituent to external punct, delimited tag
    s/{(?![^ ]*-md)([^ ]*) +(<\!dash\! \!dash\!> +<(?![^ ]*-ld)(?!')[^>]*>) *}([ \)>\]]*[^\)>\]]*[,`'\!\.\?])/\{\1-rd-md \2\}\3/;
    # 4b. introduce, from dash within the constituent to external punct, delimited tag on new constituent
    s/{(?![^ ]*-md)(.*) +(<\!dash\! \!dash\!>) +<(?![^ ]*-ld)(?!')([^ ]*?)(-rd)?([^ ]*) ([^>]*)> *}([ \)>\]]*[^\)>\]]*[,`'\!\.\?])/\(\1 \{\3\5-rd-md \2 <\3\4\5 \6>\}\)\7/;
#     # 5. propagate lack tag up left child
#     s/{(?![^ ]*-ld)([^ ]*) +<([^ ]*-ld) +([^>]*)>(.*) *}/{\1-ld <\2 \3>\4}/;
#     # 6. propagate lack tag up right child
#     s/{(?![^ ]*-rd)([^ ]*) +(.*)<([^ ]*-rd) +([^>]*)> *}/{\1-rd \2<\3 \4>}/;
#     # 7. propagate lack tag and dash up left child
#     s/{(?![^ ]*-ld)([^ ]*) +<(?![^ ]*-rd)([^ ]*) +\[\!dash\! \!dash\!\] +([^>]*) *>(.*)}/\(\1 <\!dash\! \!dash\!> \{\2-ld \3\}\4\)/;
#     # 8. propagate lack tag and dash up right child
#     s/{(?![^ ]*-rd)(.*) +<(?![^ ]*-ld)([^ ]*) +([^>]*) +\[\!dash\! \!dash\!\] *>( *)}/\(\1 \{\2-rd \3\} <\!dash\! \!dash\!>\4\)/;
#     # 5. propagate lack tag up from left child
#     s/{(?![^ ]*-ld)([^ ]*) +<([^ ]*-ld[^ ]*) +([^>]*)>(.*) *}/{\1-ld <\2 \3>\4}/;
#     # 6. propagate lack tag up from right child
#     s/{(?![^ ]*-rd)([^ ]*) +(.*)<([^ ]*-rd[^ ]*) +([^>]*)> *}/{\1-rd \2<\3 \4>}/;
#     # 7a. propagate lack tag and left comma up out of left child (w. at least one children remaining)
#     s/\((?![^ ]*-ld)([^ ]*) +( *)\{(?![^ ]*-ld)([^ ]*) +<(\!dash\! \!dash\!)> +(<.*<.*)\} *(.*)\)/\(\1 <\4> \2\{\3-ld \5\}\6\)/;
#     # 7b. propagate lack tag and left comma up out of non-left child (w. at least two children remaining)
#     s/{(?![^ ]*-ld)([^ ]*) +<(\!dash\! \!dash\!)> +(.*>.*>) *}/ \(\1 <\2> \{\1-ld \3\}\)/;
#     # 8a. propagate lack tag and right comma up out of right child (w. at least one children remaining)
#     s/\((?![^ ]*-rd)([^ ]*) +(.*)\{(?![^ ]*-rd)([^ ]*) +(<.*<.*) +<(\!dash\! \!dash\!)> *\}( *)\)/\(\1 \2\{\3-rd \4\} <\5>\6\)/;
#     # 8b. propagate lack tag and right comma up out of non-right child (w. at least two children remaining)
#     s/{(?![^ ]*-rd)([^ ]*) +(<.*<.*) +<(\!dash\! \!dash\!)> *}/\(\1 \{\1-rd \2\} <\3>\) /;

#     #### DASHES
#     # right dash raising: introduce from matched dashes
#     s/{(.*) +<\!dash\! !dash!($SRL)?> +<(?![^ ]*-rd)([^ ]*) +([^>]*)> +<\!dash\! !dash!($SRL)?>(.*)}/\(\1 {\3-rd <\!dash\! !dash!\2> <\3 \4>} <\!dash\! !dash!\5>\6\)/;
#     # right dash raising: introduce from right dash
#     s/{(.*<.*) +<(?![^ ]*-rd)([^ ]*) +([^>]*)> +<\!dash\! !dash!($SRL)?>(.*)}/\(\1 {\2-rd <\2 \3>} <\!dash\! !dash!\4>\5\)/;
#     # right dash raising: propagate
#     s/{(?![^ ]*-rd)([^ ]*) +(<.*) +<\!dash\! !dash!($SRL)?> *}(?! +<\.)(?=.*\))/{\1-rd \2} <\!dash\! !dash!\3>/;
#     s/{([^ ]*-rd) +(<.*) +<\!dash\! !dash!($SRL)?> *}(?! +<\.)(?=.*\))/{\1 \2} <\!dash\! !dash!\3>/;
#     # right dash raising: terminate
#     s/{(.*) +<([^ ]*)-rd +([^>]*)> +<\!dash\! !dash!($SRL)?> +(<.*)}/\(\1 {\2 <\2-rd \3> <\!dash\! !dash!\4>} \5\)/;

    #### COMMAS
    # 1a. introduce, from matched dashes at edges of constituent, delimited tag
    s/{(?![^ ]*-mc)([^ ]*) +(<, ,> .* <, ,>) *}/{\1-mc \2}/;
# #     # 1. introduce delimited tag from matched commas at edges of constituent
# #     s/{(.*)(?!<[^ ]*-rc[^<]*)(<, ,> +)<([^- ]*[^ ]*?)(-[lr]c)*(?![^ ]*-[lr]c)([^>]*?)>( +<, ,>.*)}/{\1\2<\3-lc-rc\5>\6}/;
# #     # 2. introduce delimited tag from beginning of sentence to comma at edge of constituent (don't stamp -rc node b/c comma did not originate there!)
# #     s/^([^\)>\]]*){(?![^ ]*-mc)(?!LIST)([^ ]* +)<(?![^ ]*-[cq]r)(?!')(?!NP)(?!LIST)([^- ]*[^ ]*?)(-lc)?(?![^ ]*[lr]c)([^>]*?)>( +<, ,>.*)}/\1\{\2<\3-lc-rc\5>\6\}/;
# #     # 3. introduce delimited tag from external punct to comma at edge of constituent (don't stamp -rc node b/c comma did not originate there!)
# #     s/([,`'\!\.\?][ \)>\]]*[^\)>\]]*[\(<\[][^\)>\]]*){(?![^ ]*-mc)(?!LIST)([^ ]* +)<(?![^ ]*-[cq]r)(?!')(?!LIST)([^- ]*[^ ]*?)(-lc)?(?![^ ]*[lr]c)([^>]*?)>( +<, ,>.*)}/\1\{\2<\3-lc-rc\5>\6\}/;
# #     # 4. introduce delimited tag from comma at edge of constituent to external punct (don't stamp -lc node b/c comma did not originate there!)
# #     s/{(?![^ ]*-mc)(?!LIST)([^ ]* +)(.*<, ,> +)<(?![^ ]*-lc)(?!')(?!LIST)([^- ]*[^ ]*?)(-rc)?(?![^ ]*[lr]c)([^>]*?)> *}([ \)>\]]*[^\)>\]]*[,`'\!\.\?])/\{\1\2<\3-lc-rc\5>\}\6/;
# # #    # 3. introduce delimited tag from external punct to comma at edge of constituent (don't stamp -rc node b/c comma did not originate there!)
# # #    s/([,`'\!\.\?] *[\)>\]][^\)>\]]*[\(<\[][^\)>\]]*){(?![^ ]*-mc)(?!LIST)([^ ]* +)<(?![^ ]*-[cq]r)(?!')(?!LIST)([^- ]*[^ ]*?)(-lc)?(?![^ ]*[lr]c)([^>]*?)>( +<, ,>.*)}/\1\{\2<\3-lc-mc-rc\5>\6\}/;
# # #    # 4. introduce delimited tag from comma at edge of constituent to external punct (don't stamp -lc node b/c comma did not originate there!)
# # #    s/{(?![^ ]*-mc)(?!LIST)([^ ]* +)(.*<, ,> +)<(?![^ ]*-lc)(?!')(?!LIST)([^- ]*[^ ]*?)(-rc)?(?![^ ]*[lr]c)([^>]*?)> *}([ \)>\]]*[^\)>\]][\(<\[] *[,`'\!\.\?])/\{\1\2<\3-lc-mc-rc\5>\}\6/;
#     # 1a. introduce, from matched commas at edges of constituent, delimited tag
#     s/{(?![^ ]*-mc)(?!LIST)([^ ]*) +(<, ,> .* <, ,>) *}/{\1-mc \2}/;
#     # 1b. introduce, from matched commas inside constituent, delimited tag on new constituent
#     s/{(?![^ ]*-mc)(?!LIST)(.*?) (<, ,>) +<(?!')(?!LIST)([^ ]*?)(-[lr]c)?([^ ]*) ([^>]*)> +(<, ,>)(.*)}/\(\1 \{\3\5-mc \2 <\3\4\5 \6> \7\}\8\)/;
#     # 2a. introduce, from beginning of sentence to comma at end of constituent, delimited tag
#     s/^([^\)>\]]*){(?![^ ]*-mc)(?!LIST)([^ ]*) +(<(?![^ ]*-[cq]r)(?!')(?!LIST)[^>]*> +<, ,>) *}/\1\{\2-lc-mc \3\}/;
#     # 2b. introduce, from beginning of sentence to comma at end of constituent, delimited tag on new constituent
#     s/^([^\)>\]]*){(?![^ ]*-mc)(?!LIST)([^ ]*) +<(?![^ ]*-[cq]r)(?!')(?!NP)(?!LIST)([^ ]*?)(-lc)?([^ ]*) ([^>]*)> +(<, ,>)(.*)}/\1\(\2 \{\3\5-lc-mc <\3\4\5 \6> \7\}\8\)/;
#     # 3a. introduce, from external punct to comma at end of constituent, delimited tag
#     s/([,`'\!\.\?][ \)>\]]+[^\)>\]]*){(?![^ ]*-mc)(?!LIST)([^ ]*) +(<(?![^ ]*-[cq]r)(?!')(?!LIST)[^>]*> +<, ,>) *}/\1\{\2-lc-mc \3\}/;
#     # 3b. introduce, from external punct to comma at end of constituent, delimited tag on new constituent
#     s/([,`'\!\.\?][ \)>\]]+[^\)>\]]*){(?![^ ]*-mc)(?!LIST)([^ ]*) +<(?![^ ]*-[cq]r)(?!')(?!NP)(?!LIST)([^ ]*?)(-lc)?([^ ]*) ([^>]*)> +(<, ,>)(.*)}/\1\(\2 \{\3\5-lc-mc <\3\4\5 \6> \7\}\8\)/;
# #    # 3c. introduce, from sub-cons punct to comma within the constituent, delimited tag on new constituent
# #    s/{(?![^ ]*-mc)(?!LIST)(.*,[ \)>\]]+) +<(?![^ ]*-[cq]r)(?!')(?!LIST)([^ ]*?)(-lc)?([^ ]*) ([^>]*)> +(<, ,>)(.*)}/\1\(\2 \{\3\5-lc-mc <\3\4\5 \6> \7\}\8\)/;
#     # 4a. introduce, from comma at edge of constituent to external punct, delimited tag
#     s/{(?![^ ]*-mc)(?!LIST)([^ ]*) +(<, ,> +<(?![^ ]*-lc)(?!')(?!LIST)[^>]*>) *}([ \)>\]]*[^\)>\]]*[,`'\!\.\?])/\{\1-rc-mc \2\}\3/;
#     # 4b. introduce, from comma within the constituent to external punct, delimited tag on new constituent
#     s/{(?![^ ]*-mc)(?!LIST)(.*) +(<, ,>) +<(?![^ ]*-lc)(?!')(?!LIST)([^ ]*?)(-rc)?([^ ]*) ([^>]*)> *}([ \)>\]]*[^\)>\]]*[,`'\!\.\?])/\(\1 \{\3\5-rc-mc \2 <\3\4\5 \6>\}\)\7/;
# #    # 4c. introduce, from comma within the constituent to sub-cons punct, delimited tag on new constituent
# #    s/{(?![^ ]*-mc)(?!LIST)(.*) +(<, ,>) +<(?![^ ]*-lc)(?!')(?!LIST)([^ ]*?)(-rc)?([^ ]*) ([^>]*)> +([ \)>\]]*[^\)>\]]*[,`'\!\.\?].*)}/\(\1 \{\3\5-rc-mc \2 <\3\4\5 \6>\} \7\)/;


    # make sure punct tags precede everything else
    while ( s/{([^ ]*)(?!-[rl][a-z])(-[^- ]+)(-[rl][a-z])/{\1\3\2/g ){}
    #while ( s/<([^ ]*)(?!-[rl][a-z])(-[^- ]+)(-[rl][a-z])/<\1\3\2/g ){}
    # delete redundant tags
    #while ( s/{([^ ]*)(?!-[rl][a-z])(-[^- ]+)(-[rl][a-z])/{\1\3\2/g ){}
    while ( s/<([^ ]*)(-[^- ]+)([^ ]*)\1/<\1\2\3/g ){}

#    # 7a. propagate lack tag and left comma up out of left child (w. at least two children remaining)
#    s/\((?![^ ]*-lc)([^ ]*) +( *)\{(?![^ ]*-lc)([^ ]*) +<(, ,)> +(<.*<.*)\} *(.*)\)/\(\1 <\4> \2\{\3-lc \5\}\6\)/;
#    # 8a. propagate lack tag and right comma up out of right child (w. at least two children remaining)
#    s/\((?![^ ]*-rc)([^ ]*) +(.*)\{(?![^ ]*-rc)([^ ]*) +(<.*<.*) +<(, ,)> *\}( *)\)/\(\1 \2\{\3-rc \4\} <\5>\6\)/;
#    # 7b. propagate lack tag and left comma up out of non-left child (w. at least two children remaining)
#    s/{(?![^ ]*-lc)([^ ]*) +<(, ,)> +(.*>.*>) *}/ \(\1 <\2> \{\1-lc \3\}\)/;
#    # 8b. propagate lack tag and right comma up out of non-right child (w. at least two children remaining)
#    s/{(?![^ ]*-rc)([^ ]*) +(<.*<.*) +<(, ,)> *}/\(\1 \{\1-rc \2\} <\3>\) /;
#    # 5. propagate lack tag up from left child
#    s/{(?![^ ]*-lc)([^ ]*) +<([^ ]*-lc[^ ]*) +([^>]*)>(.*) *}/{\1-lc <\2 \3>\4}/;
#    # 6. propagate lack tag up from right child
#    s/{(?![^ ]*-rc)([^ ]*) +(.*)<([^ ]*-rc[^ ]*) +([^>]*)> *}/{\1-rc \2<\3 \4>}/;
#    # 7a. propagate lack tag and left comma up out of left child
#    s/{(?![^ ]*-lc)([^ ]*) + *<(?![^ ]*-lc)([^ ]*) +\[, ,\] +([^>]*) *>(.*)}/\(\1 <, ,> \{\2-lc \3\}\4\)/;
#    # 7b. propagate lack tag and left comma up out of non-left child (w. at least two children remaining)
#    s/{(?![^ ]*-lc)([^ ]*) +<, ,> +(<.*) *}/ {\1 <, ,> <\1-lc \2>}/;
#    # 8a. propagate lack tag and right comma up out of right child
#    s/{(?![^ ]*-rc)([^ ]*) +(.*)<(?![^ ]*-rc)([^ ]*) +([^>]*) +\[, ,\] *> *}/\(\1 \2\{\3-rc \4\} <, ,>\)/;
#    # 8b. propagate lack tag and right comma up out of non-right child (w. at least two children remaining)
#    s/{(?![^ ]*-rc)([^ ]*) +(<.*) +<, ,> *}/{\1 <\1-rc \2> <, ,>} /;
#    # 7. propagate lack tag and comma up from left child
#    s/{(?![^ ]*-rc)([^ ]*) +<, ,> +(<.*)}/<, ,> {\1-lc \2}/;
#    # 8. propagate lack tag and comma up from right child
#    s/{(?![^ ]*-lc)([^ ]*) +(<.*) +<, ,>}/{\1-rc \2} <, ,>/;
#    # 7b. propagate lack tag and left comma up out of non-left child (w. at least two children remaining)
#    s/(?=[\)>\]]) +{(?![^ ]*-lc)([^ ]*) +<, ,> +(<.*) *}/ {BBB\1 <, ,> <\1-lc \2>}/;
#    # 8b. propagate lack tag and right comma up out of non-right child (w. at least two children remaining)
#    s/<(?![^ ]*-rc)([^ ]*) +(<.*) +<, ,> *} +(?=[\(<\[])/{AAA\1 <\1-rc \2> <, ,>} /;
#     # 7a. propagate lack tag and left comma up out of left child
#     s/{(?![^ ]*-lc)([^ ]*) + *<(?![^ ]*-lc)([^ ]*) +\[, ,\] +([^>]*) *>(.*)}/\(\1 <, ,> \{\2-lc \3\}\4\)/;
#     # 7b. propagate lack tag and left comma up out of non-left child (w. at least two children remaining)
#     s/{(?![^ ]*-lc)([^ ]*) +(<.*)<(?![^ ]*-lc)([^ ]*) +\[, ,\] +(\[[^>]*\[[^>]*) *> *(.*)}/\(\1 \2\{\3 <, ,> <\3-lc \4>\}\5\)/;
#     # 8a. propagate lack tag and right comma up out of right child
#     s/{(?![^ ]*-rc)([^ ]*) +(.*)<(?![^ ]*-rc)([^ ]*) +([^>]*) +\[, ,\] *> *}/\(\1 \2\{\3-rc \4\} <, ,>\)/;
#     # 8b. propagate lack tag and right comma up out of non-right child (w. at least two children remaining)
#     s/{(?![^ ]*-rc)([^ ]*) +(.*)<(?![^ ]*-rc)([^ ]*) +(\[[^>]*\[[^>]*) +\[, ,\] *> *(<.*)}/\(\1 \2\{\3 <\3-rc \4> <, ,>\}\5\)/;
# #    # 7. propagate lack tag and comma up from left child
# #    s/{(?![^ ]*-lc)(.*) +<(?![^ ]*-rc)([^ ]*) +\[, ,\] +([^>]*) *>(.*)}/{\1 <, ,> <\2-lc \3>\4}/;
# #    # 8. propagate lack tag and comma up from right child
# #    s/{(?![^ ]*-rc)(.*) +<(?![^ ]*-lc)([^ ]*) +([^>]*) +\[, ,\] *>(.*)}/{\1 <\2-rc \3> <, ,>\4}/;
# #    # 9. grab right punctuation
# #    s/{(.*)<([^ ]*)-rc([^ ]*) ([^>]*)> +<, ,>( +<.*)}/{\1 <\2\3 \[\2-rc\3 \4\] \[, ,\]> \5}/;
# #    # 10. grab left punctuation
# #    s/{(.*> +)<, ,> +<([^ ]*)-lc([^ ]*) ([^>]*)>(.*)}/{\1 <\2\3 \[, ,\] \[\2-lc\3 \4\]> \5}/;

#    #### COMMAS
#    # right comma raising: introduce from matched commas
#    s/{(.*) +<, ,($SRL)?> +<(?![^ ]*-rc)([^ ]*) +([^>]*)> +<, ,($SRL)?>(.*)}/\(\1 {\3-rc <, ,\2> <\3 \4>} <, ,\5>\6\)/;
#    # right comma raising: introduce from right comma
#    s/{(.*<.*) +<(?![^ ]*-rc)([^ ]*) +([^>]*)> +<, ,($SRL)?>(.*)}/\(\1 {\2-rc <\2 \3>} <, ,\4>\5\)/;
#    # right comma raising: propagate
#    s/{(?![^ ]*-rc)([^ ]*) +(<.*) +<, ,($SRL)?> *}(?! +<\.)(?=.*\))/{\1-rc \2} <, ,\3>/;
#    s/{([^ ]*-rc) +(<.*) +<, ,($SRL)?> *}(?! +<\.)(?=.*\))/{\1 \2} <, ,\3>/;
#    # right comma raising: terminate
#    s/{(.*) +<([^ ]*)-rc +([^>]*)> +<, ,($SRL)?> +(<.*)}/\(\1 {\2 <\2-rc \3> <, ,\4>} \5\)/;


#     #### VERB PHRASES (EXPERIMENTAL FLAT VPS)
#     # right-binarize VPs after right context reduced to nil
#     s/{(VP)[a-z]*([^ ]*) +(.*)<(ADVP|RB|PP)([^ ]* [^>]*)> *<(VB[A-Z]*|VP)([a-z]*)([^ ]* [^>]*)>( *)}/\(\1\7\2 \3\{\6\7 <\4\5> <\6\7\8>\}\9\)/; ##(<.*)<
#     # right-binarize VPs after right context does not contain normal constituents (i.e. contains punct like quote)
#     s/{(VP)[a-z]*([^ ]*) +(.*)<(ADVP|RB|PP)([^ ]* [^>]*)> *<(VB[A-Z]*|VP)([a-z]*)([^ ]* [^>]*)>( *<[^A-Z].*)}/\(\1\7\2 \3\{\6\7 <\4\5> <\6\7\8>\}\9\)/; ##(<.*)<
#     # NEW!
#     s/{(VP) +<((?=VB|BES|TO|VP)[A-Z]*)([a-z]*)([^>]*)>(.*)}/{\1\3 <\2\3\4>\5}/;
# #print "A: $_";
#     s/{(VP[^ ]*) +<((?=VB|BES)[^- ]*)([^>]*)> +<((?=S(?!BAR)|SBARthat|NP|ADJP|VP|PRT)[^- ]*)([^>]*)> *}/{\1 <\2-arg\4\3> <\4\5>}/;
#     s/{(VP[^ ]*) +<((?=VB|BES)[^- ]*)([^>]*)> +(.*)<((?=S(?!BAR)|SBARthat|NP|ADJP|VP|PRT)[^- ]*)([^ ]*)([^>]*)> +<((?=S(?!BAR)|SBARthat|NP|ADJP|VP|PRT)[^- ]*)([^ ]*)([^>]*)>}/\($1 <$2-arg$5-arg$8$3> $4\{AdvP\*_$5$6_AdvP\*_$8$9_AdvP\* <$5$6$7> <$8$9${10}>\}\)/;
#     s/{(VP[^ ]*) +<((?=VB|BES)[^- ]*)([^>]*)> +(.*)<((?=S(?!BAR)|SBARthat|NP|ADJP|VP|PRT)[^- ]*)([^ ]*)([^>]*)> +<(AdvP\*[^ ]*)([^>]*)> *}/\(\1 <\2-arg\5\3> \4\{AdvP\*_\5\6_\8 <\5\6\7> <\8\9>\}\)/;
#     s/{(VP[^ ]*) +<((?=VB|BES)[^- ]*)([^>]*)> +(.*)<((?=S(?!BAR)|SBARthat|NP|ADJP|VP|PRT)[^- ]*)([^ ]*)([^>]*)> +<([^ ]*)([^>]*)> *}/\(\1 <\2-arg\5\3> \4\{AdvP\*_\5\6_AdvP\* <\5\6\7> <\8\9>\}\)/;
#     s/{(VP[^ ]*) +<((?=VB|BES)[^- ]*)([^>]*)> +(.*)<([^ ]*)([^>]*)> +<((?=S(?!BAR)|SBARthat|NP|ADJP|VP|PRT)[^- ]*)([^ ]*)([^>]*)>}/\(\1 <\2-arg\7\3> \4\{AdvP\*_\7\8_AdvP\* <\5\6> <\7\8\9>\}\)/;
#     s/{(VP[^ ]*) +<((?=VB|BES)[^- ]*)([^>]*)> +(.*)<([^ ]*)([^>]*)> +<(AdvP\*[^ ]*)([^>]*)> *}/\(\1 <\2\3> \4\{\7 <\5\6> <\7\8>\}\)/;
#     s/{(VP[^ ]*) +<((?=VB|BES)[^- ]*)([^>]*)> +(.*)<([^ ]*)([^>]*)> +<([^ ]*)([^>]*)> *}/\(\1 <\2\3> \4\{AdvP\* <\5\6> <\7\8>\}\)/;
# #print "B: $_\n";

    #print stderr ":::$_\n";
    ####################

    ## convert inner angles (if any) to bracks...
    while ( s/{(.*)<([^>]*)>(.*)}/{\1\[\2\]\3}/ ){}
    ## convert outer braces to angles...
    $_ =~ s/{(.*)}/<\1>/;
  }
  ## finish up...
  $_ =~ s/</[/;
  $_ =~ s/>/]/;
  ## translate to parens again...
  $_ =~ s/\[/\(/g;
  $_ =~ s/\]/\)/g;

  $_ =~ s/____INTJ/INTJ/g;

#  ## remove `punctuation-delimited constituent' tags (??)
#  s/-.dlt//g;

# WS: COMMENTED OUT B/C WRECKING COLON PRE_PROC (COMMENT BACK IN AND DO REGRESSION TEST)
#  ## unshift colons (for SRL) and dashes
#  $_ =~ s/!colon!!colon!/:!colon!/g;
#  $_ =~ s/!colon!([^ !]+)/:\U\1/g;
#  #$_ =~ s/:([^ ]*)\!dash!(.*)/:\1--\2/g;
#  #$_ =~ s/:([^ ]*)\!dash!(.*)/:\1--\2/g;
#  $_ =~ s/RELDASH/-/g;
#  $_ =~ s/:REL-([^\.]+)\./:REL-\L\1./g;

#  s/\!semi\! \!semi\!/, \!semi\!/g;

  print $_;
}
