#!/usr/bin/python

import sys
import re

## To reduce search time for consult_transtable.py, this script 
## takes a translation model output by a Moses script and reduces it 
## to an output file consisting of the unique unigrams from 
## the model.

try:
	modelname = sys.argv[1]
	outputfilename = sys.argv[2]
except:
	print "Give a translation model as an argument"
f = open(modelname)
modeltext = f.readlines()
f.close()

outputfile = open(outputfilename,"w")

unigrams = {}

for line in modeltext:
	match = re.match(r"(\S+) \|\|\|",line)
	if match:
		try:
			unigrams[match.group(1)]
		except:
			unigrams[match.group(1)]=""

for key in unigrams.keys():
	outputfile.write(key+'\n')
outputfile.close()
