#! /bin/csh

# runBitext1.sh
# Shane Bergsma
# December 5, 2010

if (1) then

echo "Generating the feature list"
cat Data/FVs/bitext.b1.train | sed 's/^[01] //' | sed 's/#.*//' | tr ' ' '\n' | grep . | sed 's/:[^:][^:]*//g' | sort | uniq -c | sort -nr | awk '$1 > 0' | awk '{print $2}' > Data/FVs/bitext.b1.train.featList

endif

if (1) then

############## 3) VECTORIZING ##############

echo "Vectorizing the FVs:"

cat Data/FVs/bitext.b1.train | Tools/vectorize.pl -f Data/FVs/bitext.b1.train.featList > Data/FVs/bitext.b1.train.fvs
cat Data/FVs/bitext.b1.dev | Tools/vectorize.pl -f Data/FVs/bitext.b1.train.featList > Data/FVs/bitext.b1.dev.fvs
cat Data/FVs/bitext.b1.test | Tools/vectorize.pl -f Data/FVs/bitext.b1.train.featList > Data/FVs/bitext.b1.test.fvs
cat Data/FVs/bitext.b1.unlab | Tools/vectorize.pl -f Data/FVs/bitext.b1.train.featList > Data/FVs/bitext.b1.unlab.fvs
# Might as well do the WSJ stuff too:
cat Data/FVs/wsj.dev | Tools/vectorize.pl -f Data/FVs/bitext.b1.train.featList > Data/FVs/wsj.b1.dev.fvs
cat Data/FVs/wsj.test | Tools/vectorize.pl -f Data/FVs/bitext.b1.train.featList > Data/FVs/wsj.b1.test.fvs

endif

############## 4) RUNNING SVMs ##############

if (1) then

echo "Running the SVM:"

Tools/runTrainTestLR.pl -c 1e-6_1e-5_1e-4_1e-3_1e-2_1e-1_1e0_1e1_1e2 -s Data/FVs/bitext.b1.train.fvs_Data/FVs/bitext.b1.dev.fvs_Data/FVs/bitext.b1.test.fvs_Data/FVs/bitext.b1.unlab.fvs_Data/FVs/wsj.b1.dev.fvs_Data/FVs/wsj.b1.test.fvs -f Data/FVs/bitext.b1.train.featList -t 2_10_100 -o Data/ML

exit
endif
