#!/bin/bash

if [ $# != 4 ]
then
  echo "Usage is $0 input-dir fileset results-dir temp-dir"
  echo "e.g. $0 ~/corpora/wmt10/training europarl-v5 ~/results ~/temp"
  echo "e.g. $0 ~/corpora/wmt10/dev news-test2008 ~/results ~/temp"
  echo "Previous step's results will be taken from results-dir/tokenized and results-dir/parsed"
  echo "Results will be stored in results-dir/reordered"
  exit
fi

input_dir=$1
fileset=$2
output_dir=$3
temp_dir=$4

tokenized_dir=$output_dir/tokenized
parsed_dir=$output_dir/parsed
reordered_dir=$output_dir/reordered

mkdir -p $reordered_dir $temp_dir

reorderer=/home/showlett/phd/code/reordering/Collins_rules.py

de_filename=$fileset.de
en_filename=$fileset.en

$reorderer $tokenized_dir/$de_filename $parsed_dir/$de_filename $temp_dir/$de_filename
cat $temp_dir/$de_filename | sed 's/\*lrb\*/(/g' | sed 's/\*rrb\*/)/g' > $reordered_dir/$de_filename
rm $temp_dir/$de_filename

cp $input_dir/$en_filename $reordered_dir/$en_filename

