#! /bin/csh

# runBitextBoth.sh
# Shane Bergsma
# December 5, 2010

if (1) then

# Use the mono view plus the bitext feature field of the original example:
paste Data/FVs/bitext.b1.train Data/BitextExamples/bitext.train | awk -F '\t' '{print $1 " " $5}' > Data/FVs/bitext.both.train
paste Data/FVs/bitext.b1.dev Data/BitextExamples/bitext.dev | awk -F '\t' '{print $1 " " $5}' > Data/FVs/bitext.both.dev
paste Data/FVs/bitext.b1.test Data/BitextExamples/bitext.test | awk -F '\t' '{print $1 " " $5}' > Data/FVs/bitext.both.test
#paste Data/FVs/bitext.b1.unlab Data/BitextExamples/bitext.unlab | awk -F '\t' '{print $1 " " $5}' > Data/FVs/bitext.both.unlab
# No need to evaluate unlabeled

endif

############## 2) CREATING FEATLIST ##############

if (1) then

echo "Generating the feature list"
cat Data/FVs/bitext.both.train | sed 's/^[01] //' | sed 's/#.*//' | tr ' ' '\n' | grep . | sed 's/:[^:][^:]*//g' | sort | uniq -c | sort -nr | awk '$1 > 0' | awk '{print $2}' > Data/FVs/bitext.both.train.featList

endif

if (1) then

############## 3) VECTORIZING ##############

echo "Vectorizing the FVs:"

cat Data/FVs/bitext.both.train | Tools/vectorize.pl -f Data/FVs/bitext.both.train.featList > Data/FVs/bitext.train.both.fvs
cat Data/FVs/bitext.both.dev | Tools/vectorize.pl -f Data/FVs/bitext.both.train.featList > Data/FVs/bitext.dev.both.fvs
cat Data/FVs/bitext.both.test | Tools/vectorize.pl -f Data/FVs/bitext.both.train.featList > Data/FVs/bitext.test.both.fvs
#cat Data/FVs/bitext.both.unlab | Tools/vectorize.pl -f Data/FVs/bitext.both.train.featList > Data/FVs/bitext.unlab.both.fvs

# Use the non-'+' ones.  It does make a difference to Jesus.

endif

############## 4) RUNNING SVMs ##############

if (1) then

echo "Running the SVM:"

Tools/runTrainTestLR.pl -c 1e-4_1e-3_1e-2_1e-1_1e0_1e1_1e2_1e3_1e4 -s Data/FVs/bitext.train.both.fvs_Data/FVs/bitext.dev.both.fvs_Data/FVs/bitext.test.both.fvs -f Data/FVs/bitext.both.train.featList -t 2_10_100 -o Data/ML

exit
endif
