# INPUT: probabilities from rlnclust-EM 
#         and probabilities (post-relfreq) from trees2svsrules

use strict;
use Getopt::Std;

getopts('gvlz');                # will print out M instead of GG
my $VERBOSE = 0;
our($opt_g,$opt_v,$opt_l,$opt_z);

########### OPTIONS
if ($opt_v) {
  $VERBOSE = 1;
}

# input models (should be valid probabilities)
my %Ppi_udlch;                  # original Gr model (root distribution)
my %P_udlch;                    # helper G model
my %P_lch;                      # helper G model
my %P_i;                        # E model from rlnclust-EM.pl
my %P_h;                        # HW model from trees2svsrules.pl
my %P_lclcgivlch;               # original M model from trees2svsrules.pl
my %P_hgivi;                    # H model from rlnclust-EM.pl
my %P_cgivp;
my %P_pgivw;

# intermediate model
my %P_igivh;                    # formed by bayes' rule
my %P_udlcih;                   # a joint to help get P(lch|lci)
my %P_lcih;                     # a joint to help get P(lch|lci)

# output models
my %Ppi_udlci;                  # result: clustered Gr model
my %P_lclcgivudlci;             # result: clustered M model
my %P_udlci;                    # option: a joint to help get P(lch|lci)
my %P_hgivudlci;                # option: for obs model instead of P(h|e)
my %P_lci;                      # option: a joint to help get P(lch|lci)
my %P_hgivlci;                  # option: for obs model instead of P(h|e)

my %P_chgivi;
my %P_cgivi;


########### MODEL READ: NEED Gr+H+E+HW; M+H
while ( <> ) {
  chomp;

  if ( m/^\#/ ) { 
    next;

  }
  # read in P(h|e)
  elsif ( m/^H +(.*) +: +([^=]*)( = (.*))?$/ && $4>0) {
    #my $model = $1; my $cond = $2; my $targ = $3; my $prob = $5;
    $P_hgivi{$1}{$2} = $4;
    #if (!$opt_l) {
    #  print $_."\n";
    #}                           # for opt_l, use P(h|lci) instead
  }
  # read in P(lclc|udlci)
  elsif ( m/^M +(.*)\{(.*)\} +: +([^=]*)( = (.*))?$/ ) {
    $P_lclcgivlch{$1}{$3}{$2} = $5;
    # no printout
  }
  # read in P(e)
  elsif ( m/^E +: +([^=]*)( = (.*))?$/ ) {
    $P_i{$1} = $3;
    #print $_."\n";
  } 
  # read in P(h)
  elsif ( m/^HW +: +([^=]*)( = (.*))?$/ ) {
    $P_h{$1} = $3;
    print $_."\n";
  }
  # read in P(udlci) at root
  elsif ( m/^Gr +: +(.*)\{(.*)\}( = (.*))?$/ ) {
    $Ppi_udlch{$1}{$2} = $4;
    # no printout
  }
  # read in P(udlci) everywhere
  elsif ( m/^G +: +([a-z]) +([0-9]) +(.*)\{(.*)\}( = (.*))?$/ ) {
    $P_lch  {       $3 }{$4} = $6;
    $P_udlch{"$1 $2 $3"}{$4} = $6;
    # no printout -- will never be used again
  }
  # read in P(c|p)
  elsif ( m/^Cp +([^ ]+) +: +([^ ]+) += +(.*)/  && $3>0) {
    $P_cgivp{$1}{$2}=$3;
  }
  # read in P(p|w)
  elsif ( m/^Pw +([^ ]+) +: +([^ ]+) += +(.*)/  && $3>0) {
    $P_pgivw{$1}{$2}=$3;
    print $_."\n";
  }
  # any other rule
  else {
    print $_."\n";
  }

}

## verify model reads
if ($VERBOSE==1) {
  foreach my $i (sort keys %P_hgivi) { 
    foreach my $h (sort keys %{$P_hgivi{$i}}) { 
      print stderr "read H $i : $h = $P_hgivi{$i}{$h}\n";
    }
  }
  foreach my $l (sort keys %P_lclcgivlch) { 
    foreach my $r (sort keys %{$P_lclcgivlch{$l}}) { 
      foreach my $h (sort keys %{$P_lclcgivlch{$l}{$r}}) { 
        print stderr "read M $l : $r {$h} = $P_lclcgivlch{$l}{$r}{$h}\n";
        if ($P_lch{$l}{$h}==0) {
          print stderr "WARNING: lacking G $l\{$h\}\n";
        }
      }
    }
  }
  foreach my $udlc (sort keys %P_udlch) { 
    foreach my $h (sort keys %{$P_udlch{$udlc}}) { 
      print stderr "read G : $udlc {$h} = $P_udlch{$udlc}{$h}\n";
    }
  }
  foreach my $lc (sort keys %P_lch) { 
    foreach my $h (sort keys %{$P_lch{$lc}}) { 
      print stderr "read G : $lc {$h} = $P_lch{$lc}{$h}\n";
    }
  }
  foreach my $key (sort keys %P_i) {
    print stderr "read E : $key = $P_i{$key}\n";
  }
  foreach my $key (sort keys %P_h) {
    print stderr "read HW : $key = $P_h{$key}\n";
  }
  foreach my $udlc (sort keys %Ppi_udlch) { 
    foreach my $h (sort keys %{$Ppi_udlch{$udlc}}) { 
      print stderr "read Gr : $udlc {$h} = $Ppi_udlch{$udlc}{$h}\n";
    }
  }
  #foreach my $key (sort keys %Ppi_lch) { print "read Gr : $key = $Ppi_lch{$key}\n"; }
}

# build intermediate model (could do this together in one step if using P_h)
my %DP_igivh;
foreach my $i (keys %P_i) {
  foreach my $h (keys %{$P_hgivi{$i}}) {
    $P_igivh{$h}{$i} = $P_hgivi{$i}{$h}*$P_i{$i}; #/$P_h{$h};
    $DP_igivh{$h}   += $P_igivh{$h}{$i};
  }
}
foreach my $h (keys %P_igivh) {
  foreach my $i (keys %{$P_igivh{$h}}) {
    $P_igivh{$h}{$i} /= $DP_igivh{$h}; #= $DP_igivh{$h}==0 ? 0 : $P_igivh{$h}{$i}/$DP_igivh{$h};
  }
}
if ($VERBOSE==1) {
  foreach my $h (keys %P_igivh) {
    foreach my $i (keys %{$P_igivh{$h}}) {
      print "Intermed: P($i|$h) = $P_igivh{$h}{$i}\n";
    }
  }
}


# build Gr model P(lci) = Sum_h P(lch) * P(e|h)
foreach my $udlc (keys %Ppi_udlch) {
  foreach my $h (keys %{$Ppi_udlch{$udlc}}) {
    foreach my $i (keys %{$P_igivh{$h}}) {
      #if ( $Ppi_udlch{$udlc}{$h}>0 && $P_igivh{$h}{$i}>0 && $Ppi_udlch{$udlc}{$h}*$P_igivh{$h}{$i}==0 ) { print stderr "WARNING: multiplying P($udlc\{$h\})*P($i|$h) zeros out\n"; }
      $Ppi_udlci{"$udlc\{$i\}"} += $Ppi_udlch{$udlc}{$h} * $P_igivh{$h}{$i};
    }
  }
}

# FOR NONTERMINALS IN M MODEL: build intermediate G model P(lci) = Sum_h P(lch) * P(e|h)
foreach my $udlc (keys %P_udlch) {
  foreach my $h (keys %{$P_udlch{$udlc}}) {
    foreach my $i (keys %{$P_igivh{$h}}) {
      $P_udlcih{"$udlc\{$i,$h\}"} = $P_udlch{$udlc}{$h} * $P_igivh{$h}{$i};
      $P_udlci{"$udlc\{$i\}"} += $P_udlch{$udlc}{$h} * $P_igivh{$h}{$i};
      #print stderr "Intermed: P($udlc\{$i\}) = ".$P_udlci{"$udlc\{$i\}"}."\n";
    }
  }
}
# FOR NONTERMINALS IN M MODEL: build optional other H model, P(h|lci)
foreach my $udlc (keys %P_udlch) {
  foreach my $h (keys %{$P_udlch{$udlc}}) {
    foreach my $i (keys %{$P_igivh{$h}}) {
      if ($P_udlcih{"$udlc\{$i,$h\}"}>0) {
        $P_hgivudlci{"$udlc\{$i\} : $h"} =	$P_udlcih{"$udlc\{$i,$h\}"} / $P_udlci{"$udlc\{$i\}"};
      }
    }
  }
}

#my %DP_lclcgivlci;
# build M model P(lclc|lci) Sum_h P(lclc|lch)*P(h|e)
foreach my $udlc (keys %P_lclcgivlch) {
  foreach my $lclc (keys %{$P_lclcgivlch{$udlc}}) {
    foreach my $h (keys %{$P_lclcgivlch{$udlc}{$lclc}}) {
      foreach my $i (keys %P_hgivi) {
        if ($P_udlci{"$udlc\{$i\}"}==0) {
          if ($P_udlcih{"$udlc\{$i,$h\}"}==0) {
            #print stderr "WARNING: would have divided $udlc\{$i,$h\} by zero. Do you have P(lci) for non-roots?\n";
            next;               # $P_lclcgivlci{"$udlc\{$i\} : $lclc"} = 0;
          } else {
            print stderr "ERROR: $udlc\{$i\} empty but $udlc\{$i,$h\} not!!\n";
            next;
          }
				#if ( $P_hgivi{$i}{$h}>0 ) {
        } else {
          $P_lclcgivudlci{"$udlc\{$i\} : $lclc"} += $P_lclcgivlch{$udlc}{$lclc}{$h} * $P_hgivudlci{"$udlc\{$i\} : $h"}; # $P_lcih{"$udlc\{$i,$h\}"} / $P_lci{"$udlc\{$i\}"};  # $P_hgivi{$i}{$h}; # 
          #$DP_lclcgivlci{"$udlc\{$i\}"}        += $P_lclcgivlch{$udlc}{$lclc}{$h} * $P_hgivi{$i}{$h};
        }
      }
    }
  }
}
#foreach my $rule (keys %P_lclcgivlci) {
#    our($cond,$targ) = split(/ +: +/,$rule);
#    #print $cond."   ".$targ."\n";
#    $P_lclcgivlci{$rule} /= $DP_lclcgivlci{$cond};
#}


# FOR TERMINALS IN H MODEL: build intermediate G model P(lci) = Sum_h P(lch) * P(e|h)
foreach my $lc (keys %P_lch) {
  foreach my $h (keys %{$P_lch{$lc}}) {
    foreach my $i (keys %{$P_igivh{$h}}) {
      $P_lcih{"$lc\{$i,$h\}"} = $P_lch{$lc}{$h} * $P_igivh{$h}{$i};
      $P_lci{"$lc\{$i\}"} += $P_lch{$lc}{$h} * $P_igivh{$h}{$i};
      #print stderr "Intermed: P($lc\{$i\}) = ".$P_lci{"$lc\{$i\}"}."\n";
    }
  }
}
# FOR TERMINALS IN H MODEL: build optional other H model, P(h|lci)
foreach my $i (keys %P_hgivi) {
#print stderr "$i\n";
  foreach my $h (keys %{$P_hgivi{$i}}) {
#print stderr ". $h\n";
    foreach my $p (keys %{$P_pgivw{$h}}) {
#print stderr ". . $p\n";
      foreach my $c (keys %{$P_cgivp{$p}}) {
#print stderr ". . . $c ($i $h $p) !!!!!!\n";
        $P_chgivi{$i}{"$c $h"} += $P_hgivi{$i}{$h} * $P_pgivw{$h}{$p} * $P_cgivp{$p}{$c};
        $P_cgivi{$i}{$c}       += $P_hgivi{$i}{$h} * $P_pgivw{$h}{$p} * $P_cgivp{$p}{$c};
      }
    }
  }
}
# foreach my $lc (keys %P_lch) {
#   foreach my $h (keys %{$P_lch{$lc}}) {
#     foreach my $i (keys %{$P_igivh{$h}}) {
#       if ($P_lcih{"$lc\{$i,$h\}"}>0) {
#         $P_hgivlci{"$lc\{$i\} : $h"} =	$P_lcih{"$lc\{$i,$h\}"} / $P_lci{"$lc\{$i\}"};
#       }
#     }
#   }
# }

# print out resulting models
foreach my $key (sort keys %Ppi_udlci) {
  if ($Ppi_udlci{$key}>0 || $opt_z) {
    print "Gr : $key = $Ppi_udlci{$key}\n";
  }
}
foreach my $key (sort keys %P_lclcgivudlci) {
  if ($P_lclcgivudlci{$key}>0 || $opt_z) {
    print "M $key = $P_lclcgivudlci{$key}\n";
  }
}

if ($opt_g) {
  foreach my $key (sort keys %P_udlci) {
    if ($P_udlci{$key}>0 || $opt_z) {
      print "G : $key = $P_udlci{$key}\n";
    }
  }
}
if ($opt_l) {
  foreach my $i (sort keys %P_chgivi) {
    foreach my $ch (sort keys %{$P_chgivi{$i}}) {
      my ($c,$h)=split(/ /,$ch);
      my $pr = $P_chgivi{$i}{$ch} / $P_cgivi{$i}{$c};
      print "H $c $i : $h = $pr\n";
    }
  }
}
#foreach my $key (sort keys %P_hgivci) {
#  if ($P_hgivci{$key}>0 || $opt_z) {
#    print "H $key = $P_hgivci{$key}\n";
#  }
#}
