#!/bin/bash

if [ $# != 6 ]
then
  echo "Usage is $0 grammar input-dir input-file results-dir temp-dir num-batches"
  echo "e.g. $0 german_lc.gr ~/corpora/wmt10/training europarl-v5.de-en.de ~/results ~/temp 20"
  echo "Results will be stored in results-dir/tokenized and results-dir/parsed"
  exit
fi

grammar=$1
input_dir=$2
filename=$3
output_dir=$4
temp_dir=$5
batches=$6

tokenized_dir=$output_dir/tokenized
parsed_dir=$output_dir/parsed

mkdir -p $tokenized_dir $parsed_dir $temp_dir

input=$input_dir/$filename
tokenized=$tokenized_dir/$filename
parsed=$parsed_dir/$filename

moses_scripts=/usr/local/bin/moses-scripts/scripts-20110118-1208

parser_dir=/home/showlett/phd/code/parsing
parser=$parser_dir/distributed_parser


cat $input | sed 's/(/\*lrb\*/g' | sed 's/)/\*rrb\*/g' > $tokenized
$parser $grammar $tokenized $parsed $temp_dir $batches
