#!/usr/bin/perl

use Getopt::Std;
my $modelformat=0;
my $bigrams=0;
getopts('mb');
if (defined($opt_m)) {
	$modelformat = $opt_m;
}
if (defined($opt_b)) {
	$bigrams = $opt_b;
}
use strict;

my %word_counts;
my @sentences;
my $min_count = 0;
if($#ARGV == 0){
  $min_count = $ARGV[0];
}

print STDERR "Min count is $min_count\n";

while(<STDIN>){
    chomp;
    s/\[/(/g;s/\]/)/g;s/[^ ]*#//g;
    s/(\([^ ]*)//g;s/\)//g;s/^ *//g;s/  */ /g;
    s/[^ ]*\d+[^ ]*/__num__/g;
    push @sentences, $_;
#    print;
    my @words = split / /;
    foreach my $word (@words){
#        print "$word ";
        if(!exists($word_counts{$word})){
            $word_counts{$word} = 1;
        }else{
            $word_counts{$word}++;
        }
    }
#    print "<<<\n";
}

foreach my $sentence (@sentences){
    foreach my $word (split / /, $sentence){
        if($word_counts{$word} >= $min_count){
			if ($modelformat) {
				print " : $word\n";
			} else {
				print "$word ";
			}
        }else{
			if ($modelformat) {
				print " : __rare__\n";
			} else {
				print "__rare__ ";
			}
        }
    }
    print "\n";
}

#print "__rare__ => ".$word_counts{"__rare__"}."\n";
