###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

## trees2svsrules.pl
##   same as trees2rules.pl, but prints out L model (and M model, optionally)
## OPTIONS:
##   -g will print out the GG model instead of the M model
##   -d will print out MdL and MdR instead of M
##   -x will add the lines G : u d l c e
use Getopt::Std;

getopts('gdx');

# cat wsj_0001.trees | perl scripts/rctrees2flattrees.pl

########## FOR TREES IN GENERAL...
if (!$opt_g) {
  if ($opt_d) {
    print "MdL 0 h:ROOT{-} : mS:S mEND:END\n";
  } else {
    print "M ROOT{-} : mS:S mEND:END\n";
  }
} else {
  print "GG -:ROOT{-} : mS:S\{eos\} mEND:END\{eos\}\n";
}
##########

my %estore;

## for each tree...
while ( <> ) {
  $line = $_;
  $line=~/^[ \(]?([^ :]*):([^ ]*).*$/;
  @gr = split('\^',$2);
  if ($opt_d) {
    @cr = split('{',$gr[0]);
    print "Gr : $1$cr[0]:$gr[0]\n";
  } else {
    print "Gr : $gr[0]\n";
  }

  ########## FOR TREES IN GENERAL...
  if (!$opt_g) {
      print "LC : h:ROOT\n";
  }
  ##########

  ## translate to parens...
  s/\[/\(/g;
  s/\]/\)/g;

  ## for each constituent...
  while ( $_ =~ /\([^\(\)]*\)/ ) {

    ## convert outer parens to braces...
    $_ =~ s/\(([^\(\)]*)\)/\+\1~/;

    ########## ADD SED RULES HERE: apply rules to angles (children) within braces (consituent)...
    #print stderr "A== $_";
    ## if terminal branch...
    if ( ($g,$p,$w) = ($_ =~ /\+([^ ]*) +([^ <]*)\#([^ <~]*)/) ) {
      $g=lc($g);
      $sd = "";
      s/\+([^ ]*) +([^ <]*\#[^ <~]*)~/+$g $p\#$w~/;
      ($l,$c,$e,$sd) = ($g =~ /([^:]*):(.*)\{(.*)\}(\^.*)?/);
      $estore{$e}++;
	  print "HW : $e\n";
      if ( $sd ne "" ) {
	$s = uc(substr($sd,1,1));
	$d = uc(substr($sd,3,1));
	#print "S $s\nD $d\n";
      }
      if ($opt_x) {
	print "G : $s $d $l$c:$c\{$e\}\n";
      }

      if (!$opt_g) {
	print "LC : $l$c:$c\n";
	if ($opt_d && $sd ne "") {
	  print "Md$s $d $l$c:$c\{$e\} : -:- -:-\n";
	} else {
	  print "M $c\{$e\} : -:- -:-\n";
	}
        #print "GG $l$c:$c\{$e\} : -{-} -{-}\n";
        print "Cp $p : $c\n";
	print "Pc $c : $p\n";
	print "Pw $w : $p\n";
	print "P : $p\n";
      } else {
	#print "LC : $g\n";
	print "GG $g : -:-{-} -:-{-}\n";
        print "Cp $p : $c\n";
        print "Pc $c : $p\n";
	print "Pw $w : $p\n";
	print "P : $p\n";
      }
      #print "W : $w\n";
      #print "PW $p : $c\#$w\n";
    }
    ## if nonterminal branch...
    if ( ($g,$gg) = ($_ =~ /\+([^ ]*)(.*<[^ ]*) [^<]*~/) ) {
      #print "$p$gg\n";
      @A = split ( / [^<]*</, $gg );
      ( $l, $c, $e, $sd) = (   $g =~ /(.*):(.*){(.*)}(\^.*)?/);
      ($l0,$c0,$e0,$sd0) = ($A[1] =~ /(.*):(.*){(.*)}(\^.*)?/);
      ($l1,$c1,$e1,$sd1) = ($A[2] =~ /(.*):(.*){(.*)}(\^.*)?/);
      $estore{$e}++; # = 1;
	  print "HW : $e\n";
      #$estore{$e0} = 1;
      #$estore{$e1} = 1;
      if ( $sd ne "" ) {
	$s = uc(substr($sd,1,1));
	$d = uc(substr($sd,3,1));
      }
      if ( $sd0 ne "" ) {
	$s0 = uc(substr($sd0,1,1));
	$d0 = uc(substr($sd0,3,1));
      }
      if ( $sd1 ne "" ) {
	$s1 = uc(substr($sd1,1,1));
	$d1 = uc(substr($sd1,3,1));
      }
      if ($opt_x) {
	print "G : $s $d $l$c:$c\{$e\}\n";
      }
      if (!$opt_g) {
	print "LC : $l$c:$c\n";
	if ($opt_d && $sd ne "") {
	  print "Md$s $d $l$c:$c\{$e\} : $l0$c0:$c0 $l1$c1:$c1\n";
	} else {
	  print "M $c\{$e\} : $l0$c0:$c0 $l1$c1:$c1\n";
	}
        #print "GG $c\{$e\} : $c0\{$e0\} $c1\{$e1\}\n";
      } else {
	#print "LC : $g\n";
	print "GG $g :@A\n";
      }
      print "L $l0$c0 $e : $e0\n";
      print "L $l1$c1 $e : $e1\n";
      print "L $l0 $e : $e0\n";
      print "L $l1 $e : $e1\n";
    }
    ##########

    ## convert inner angles (if any) to bracks...
    while ($_ =~ /\+[^\+~]*</) {
      $_ =~ s/(\+[^\+~]*)<([^<>]*)>/\1\[\2\]/;
    }

    ## convert outer braces to angles...
    $_ =~ s/\+(.*)~/<\1>/;
  }

  ## finish up...
  $_ =~ s/</[/;
  $_ =~ s/>/]/;

  ## translate to parens again...
  #$_ =~ s/\[/\(/g;
  #$_ =~ s/\]/\)/g;

#  print $_;
}

#foreach $key (sort keys %estore) {
#  print "HW : $key\n";
#}
print "HW : eos\n";
print "HW : -\n";
