
import re
import sys
import tempfile
from model import Model

# Define OOV token
unk="#unk)"

# Initialize helper variables
trees=tempfile.TemporaryFile()
whitespace=re.compile('\s+')
count = Model()

# Count word types in trees
for tree in sys.stdin:
    trees.write(tree)
    for word in re.findall('\#\S+?\)',tree):
        count[word] += 1.0


# Substitute OOV token for words that occur only once
trees.seek(0)
for tree in trees:
    for word in re.findall('\#\S+?\)',tree):
        if count[word] == 1.0:
            tree = tree.replace(word,unk,1)
    sys.stdout.write(tree)


# Close temporary file
trees.close()
