###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

use Getopt::Std;

getopts("d");

$DEBUG = 0;
if ($opt_d) {
  $DEBUG = 1;
}

sub debug {
  if ($DEBUG) {
    $msg = $_[1];
    print stderr $_[0] , " " , $msg, "\n";
  }
}

## for each tree...
while ( <> ) {

  ## translate to parens...
  s/\[/\(/g;
  s/\]/\)/g;
  ## for each constituent...
  while ( $_ =~ /\([^\(\)]*\)/ ) {
    ## convert outer parens to braces...
    $_ =~ s/\(([^\(\)]*)\)/{\1}/;
    #################### ADD SED RULES HERE: apply rules to angles (children) within braces (consituent)...
    debug(++$step, "   $_");


debug($step, " / $_");
    # propagate right punct
    s/{([^- ]*)([^ ]*)(.*)<([^ ]*?)((-r.)+)([^>]*)> *}/{\1\5\2\3<\4\5\7>}/;
    # propagate left punct
    s/{([^- ]*)([^ ]* *)<([^ ]*?)((-l.)+)([^>]*)>(.*)}/{\1\4\2<\3\4\6>\7}/;
    # undo duplicates
    while ( s/{([^ ]*)(-[rl].)([^ ]*)\2/{\1\3\2/ ) {}
debug($step," \\ $_");


    #### PUNCT
    # 4. kick right eos punct out of constit
    s/{(?![^ ]*-rp)([^ ]*) +(.*) +<([\.\!\?] [\.\!\?])>}(?=.*\))/{\1-rp \2} <\3>/;

    #### BRACKETS / PARENS
#    # 1. propagate left  tag up from constit
#    s/{(?![^ ]*-lb)([^ ]*) +<([^ ]*-lb[^ ]*) +([^>]*)>(.*) *}/{\1-lb <\2 \3>\4}/;
#    # 2. propagate right tag up from constit
#    s/{(?![^ ]*-rb)([^ ]*) +(.*)<([^ ]*-rb[^ ]*) +([^>]*)> *}/{\1-rb \2<\3 \4>}/;
    # 3. kick left  brack/paren up out of constit
    s/{(?![^ ]*-lb)([^ ]*) +<(\!LRB\! \!lrb\!)> +(.*)}(?=.*\))/<\2> {\1-lb \3}/;
    # 4. kick right brack/paren up out of constit
    s/{(?![^ ]*-rb)([^ ]*) +(.*) +<(\!RRB\! \!rrb\!)>}(?=.*\))/{\1-rb \2} <\3>/;
#    # 3. kick left  brack/paren up out of constit (w. at least two children remaining)
#    s/{(?![^ ]*-lb)([^ ]*) +<(\!LRB\! \!lrb\!)> +(<.*<.*)}(?=.*\))/\(\1 <\2> {\1-lb \3}\)/;
#    # 4. kick right brack/paren up out of constit (w. at least two children remaining)
#    s/{(?![^ ]*-rb)([^ ]*) +(<.*<.*) +<(\!RRB\! \!rrb\!)>}(?=.*\))/\(\1 {\1-rb \2} <\3>\)/;
#    # 5. kick left  brack/paren up out of constit (and re-structure child remaining)
#    s/{(?![^ ]*-lb)([^ ]*) +<(?![^ ]*-lb)([^ ]*) +\[(\!LRB\! \!lrb\!)\] +\[([^>]*)\] *> +<([^ ]*) ([^>]*)> *}/\(\1 <\3> {\1-lb <\2-lb \5> <\6>}\)/;
#    # 6. kick right brack/paren up out of constit (and re-structure child remaining)
#    s/{(?![^ ]*-rb)([^ ]*) +<([^>]*)> +<(?![^ ]*-rb)([^ ]*) +\[([^ ]*) ([^>]*)\] +\[(\!RRB\! \!rrb\!)\] *> *}/\(\1 {\1-rb <\2> <\3-rb \5>} <\6>\)/;

    #### QUOTES
#    # 1. propagate left  tag up from constit
#    s/{(?![^ ]*-lq)([^ ]*) +<([^ ]*-lq[^ ]*) +([^>]*)>(.*) *}/{\1-lq <\2 \3>\4}/;
#    # 2. propagate right tag up from constit
#    s/{(?![^ ]*-rq)([^ ]*) +(.*)<([^ ]*-rq[^ ]*) +([^>]*)> *}/{\1-rq \2<\3 \4>}/;
    # 3. kick left  quote up out of constit
    s/{(?![^ ]*-lq)([^ ]*) +<(``? ``?)> +(.*)}(?=.*\))/<\2> {\1-lq \3}/;
    # 4. kick right quote up out of constit
    s/{(?![^ ]*-rq)([^ ]*) +(.*) +<(''? ''?)>}(?=.*\))/{\1-rq \2} <\3>/;
#    # 3. kick left  quote up out of constit (w. at least two children remaining)
#    s/{(?![^ ]*-lq)([^ ]*) +<(``? ``?)> +(<.*<.*)}(?=.*\))/\(\1 <\2> {\1-lq \3}\)/;
#    # 4. kick right quote up out of constit (w. at least two children remaining)
#    s/{(?![^ ]*-rq)([^ ]*) +(<.*<.*) +<(''? ''?)>}(?=.*\))/\(\1 {\1-rq \2} <\3>\)/;
#    # 5. kick left  quote up out of constit (and re-structure child remaining)
#    s/{(?![^ ]*-lq)([^ ]*) +<(?![^ ]*-lq)([^ ]*) +\[(``? ``?)\] +\[([^ ]*) ([^>]*)\] *> +<([^>]*)> *}/\(\1 <\3> {\1-lq <\2-lq \5> <\6>}\)/;
#    # 6. kick right quote up out of constit (and re-structure child remaining)
#    s/{(?![^ ]*-rq)([^ ]*) +<([^>]*)> +<(?![^ ]*-rq)([^ ]*) +\[([^ ]*) ([^>]*)\] +\[(''? ''?)\] *> *}/\(\1 {\1-rq <\2> <\3-rq \5>} <\6>\)/;

    #### DASHES
#    # 1. propagate left  tag up from constit
#    s/{(?![^ ]*-ld)([^ ]*) +<([^ ]*-ld[^ ]*) +([^>]*)>(.*) *}/{\1-ld <\2 \3>\4}/;
#    # 2. propagate right tag up from constit
#    s/{(?![^ ]*-rd)([^ ]*) +(.*)<([^ ]*-rd[^ ]*) +([^>]*)> *}/{\1-rd \2<\3 \4>}/;
    # 3. kick left  dash up out of constit
    s/{(?![^ ]*-ld)([^ ]*) +<(\!dash\! \!dash\!)> +(.*)}(?=.*\))/<\2> {\1-ld \3}/;
    # 4. kick right dash up out of constit
    s/{(?![^ ]*-rd)([^ ]*) +(.*) +<(\!dash\! \!dash\!)>}(?=.*\))/{\1-rd \2} <\3>/;
#    # 3. kick left  dash up out of constit (w. at least two children remaining)
#    s/{(?![^ ]*-ld)([^ ]*) +<(\!dash\! \!dash\!)> +(<.*<.*)}(?=.*\))/\(\1 <\2> {\1-ld \3}\)/;
#    # 4. kick right dash up out of constit (w. at least two children remaining)
#    s/{(?![^ ]*-rd)([^ ]*) +(<.*<.*) +<(\!dash\! \!dash\!)>}(?=.*\))/\(\1 {\1-rd \2} <\3>\)/;
#    # 5. kick left  dash up out of constit (and re-structure child remaining)
#    s/{(?![^ ]*-ld)([^ ]*) +<(?![^ ]*-ld)([^ ]*) +\[(\!dash\! \!dash\!)\] +\[([^ ]*) ([^>]*)\] *> +<([^>]*)> *}/\(\1 <\3> {\1-ld <\2-ld \5> <\6>}\)/;
#    # 6. kick right dash up out of constit (and re-structure child remaining)
#    s/{(?![^ ]*-rd)([^ ]*) +<([^>]*)> +<(?![^ ]*-rd)([^ ]*) +\[([^ ]*) ([^>]*)\] +\[(\!dash\! \!dash\!)\] *> *}/\(\1 {\1-rd <\2> <\3-rd \5>} <\6>\)/;

    #### COMMAS
#    # 1. propagate left  tag up from constit
#    s/{(?![^ ]*-lc)([^ ]*) +<([^ ]*-lc[^ ]*) +([^>]*)>(.*) *}/{\1-lc <\2 \3>\4}/;
#    # 2. propagate right tag up from constit
#    s/{(?![^ ]*-rc)([^ ]*) +(.*)<([^ ]*-rc[^ ]*) +([^>]*)> *}/{\1-rc \2<\3 \4>}/;
    # 3. kick left  comma out of constit
    s/{(?![^ ]*-lc)([^ ]*) +<(, ,)> +(.*)}(?=.*\))/<\2> {\1-lc \3}/;
    # 4. kick right comma out of constit
    s/{(?![^ ]*-rc)([^ ]*) +(.*) +<(, ,)>}(?=.*\))/{\1-rc \2} <\3>/;
#    # 3. kick left  comma out of constit (w. at least two children remaining)
#    s/{(?![^ ]*-lc)([^ ]*) +<(, ,)> +(<.*<.*)}(?=.*\))/\(\1 <\2> {\1-lc \3}\)/;
#    # 4. kick right comma out of constit (w. at least two children remaining)
#    s/{(?![^ ]*-rc)([^ ]*) +(<.*<.*) +<(, ,)>}(?=.*\))/\(\1 {\1-rc \2} <\3>\)/;
#    # 5. kick left  comma out of constit (and re-structure child remaining)
#    s/{(?![^ ]*-lc)([^ ]*) +<(?![^ ]*-lc)([^ ]*) +\[(, ,)\] +\[([^ ]*) ([^>]*)\] *> +<([^>]*)> *}/\(\1 <\3> {\1-lc <\2-lc \5> <\6>}\)/;
#    # 6. kick right comma out of constit (and re-structure child remaining)
#    s/{(?![^ ]*-rc)([^ ]*) +<([^>]*)> +<(?![^ ]*-rc)([^ ]*) +\[([^ ]*) ([^>]*)\] +\[(, ,)\] *> *}/\(\1 {\1-rc <\2> <\3-rc \5>} <\6>\)/;


    ####################
    ## convert inner angles (if any) to bracks...
    while ( s/({[^{}]*)<([^<>]*)>/\1\[\2\]/ ){}
    ## convert outer braces to angles...
    $_ =~ s/{(.*)}/<\1>/;
  }
  ## finish up...
  $_ =~ s/</[/;
  $_ =~ s/>/]/;
  ## translate to parens again...
  $_ =~ s/\[/\(/g;
  $_ =~ s/\]/\)/g;

  ## for each constituent...
  while ( $_ =~ /\([^\(\)]*\)/ ) {
    ## convert outer parens to braces...
    $_ =~ s/\(([^\(\)]*)\)/{\1}/;
    #################### ADD SED RULES HERE: apply rules to angles (children) within braces (consituent)...
    debug(++$step, "   $_");

    # put back any remaining left  comma
    s/{(.*)(<, ,>) +<([^ ]*)-lc([^ ]*) ([^>]*)>(.*)}/\(\1\{\3\4 \2 <\3-lc\4 \5>\}\6\)/;
    # put back any remaining right comma
    s/{(.*)<([^ ]*)-rc([^ ]*) ([^>]*)> +(<, ,>)(.*)}/\(\1\{\2\3 <\2-rc\3 \4> \5\}\6\)/;
    # put back any remaining left  dash
    s/{(.*)(<\!dash\! \!dash\!>) +<([^ ]*)-ld([^ ]*) ([^>]*)>(.*)}/\(\1\{\3\4 \2 <\3-ld\4 \5>\}\6\)/;
    # put back any remaining right dash
    s/{(.*)<([^ ]*)-rd([^ ]*) ([^>]*)> +(<\!dash\! \!dash\!>)(.*)}/\(\1\{\2\3 <\2-rd\3 \4> \5\}\6\)/;
    # put back any remaining left  quote
    s/{(.*)(<``? ``?>) +<([^ ]*)-lq([^ ]*) ([^>]*)>(.*)}/\(\1\{\3\4 \2 <\3-lq\4 \5>\}\6\)/;
    # put back any remaining right quote
    s/{(.*)<([^ ]*)-rq([^ ]*) ([^>]*)> +(<''? ''?>)(.*)}/\(\1\{\2\3 <\2-rq\3 \4> \5\}\6\)/;
    # put back any remaining left  brack/paren
    s/{(.*)(<\!LRB\! \!lrb\!>) +<([^ ]*)-lb([^ ]*) ([^>]*)>(.*)}/\(\1\{\3\4 \2 <\3-lb\4 \5>\}\6\)/;
    # put back any remaining right brack/paren
    s/{(.*)<([^ ]*)-rb([^ ]*) ([^>]*)> +(<\!RRB\! \!rrb\!>)(.*)}/\(\1\{\2\3 <\2-rb\3 \4> \5\}\6\)/;
    # put back any remaining right punct
    s/{(.*)<([^ ]*)-rp([^ ]*) ([^>]*)> +(<[\.\!\?] [\.\!\?]>)(.*)}/\(\1\{\2\3 <\2-rp\3 \4> \5\}\6\)/;

    ####################
    ## convert inner angles (if any) to bracks...
    while ( s/({[^{}]*)<([^<>]*)>/\1\[\2\]/ ){}
    ## convert outer braces to angles...
    $_ =~ s/{(.*)}/<\1>/;
  }
  ## finish up...
  $_ =~ s/</[/;
  $_ =~ s/>/]/;
  ## translate to parens again...
  $_ =~ s/\[/\(/g;
  $_ =~ s/\]/\)/g;

  ## for each constituent...
  while ( $_ =~ /\([^\(\)]*\)/ ) {
    ## convert outer parens to braces...
    $_ =~ s/\(([^\(\)]*)\)/{\1}/;
    #################### ADD SED RULES HERE: apply rules to angles (children) within braces (consituent)...
    debug(++$step, "   $_");

    # remove parent of unary constituents that remain
    s/{[^ ]* +<([^ ]* [^>]*)> *}/{\1}/;
#    s/{([^ ]*)(-.dlt|-l.|-r.)+ +<\1 ([^>]*)> *}/{\1 \3}/;

    ####################
    ## convert inner angles (if any) to bracks...
    while ( s/({[^{}]*)<([^<>]*)>/\1\[\2\]/ ){}
    ## convert outer braces to angles...
    $_ =~ s/{(.*)}/<\1>/;
  }
  ## finish up...
  $_ =~ s/</[/;
  $_ =~ s/>/]/;
  ## translate to parens again...
  $_ =~ s/\[/\(/g;
  $_ =~ s/\]/\)/g;

  print $_;
}
