#!/usr/bin/perl

use Getopt::Std;
#my $modelformat=0;
#my $SMOOTHING=0;
#getopts('s');
#if (defined($opt_s)) {
#	$SMOOTHING = $opt_s;
#}
#if (defined($opt_b)) {
#	$bigrams = $opt_b;
#}
use strict;

my %word_counts;
my %unigram;
my %bigram;


if($#ARGV != 1){
	die "Need input ngram models.\n  USAGE: cat *.words | perl scripts/words2ngrampred.pl *.unigram *.bigram\n\n";
} else {

	foreach my $arg (@ARGV) {
		open NGRAMMODEL, "< $arg";
		while(<NGRAMMODEL>) {
			if (/ :\s+([^ ]+)\s+=\s+(\S+)/) {
				$unigram{$1} = $2; # "$1 $2\n";
			} elsif (/ :\s+([^ ]+)\s+([^ ]+)\s+=\s+(\S+)/) {
				$bigram{"$1 $2"} = $3; #print "$1 $2 $3\n";
			}
		}
		close NGRAMMODEL;
	}
}

print "unigram,bigram\n";
while(<STDIN>){
    chomp;
    s/[^ ]*\d+[^ ]*/__num__/g;
    my @words = split;
	unshift(@words,"eos");
	#push(@words,"eos");
    for (my $i=0; $i<=$#words; $i++) {

		unless ($words[$i] eq "eos") {
			# print unigram probability
			if ( exists($unigram{$words[$i]}) ) {
				print $unigram{$words[$i]}.",";
			} else {
				print "0,";
			}

			# print bigram probability
			if ( exists($bigram{"$words[$i-1] $words[$i]"}) ) {
				print $bigram{"$words[$i-1] $words[$i]"}."\n";
			} else {
				print "0\n";
			}
		}

    }
}

