#! /bin/csh

# runMono1.sh
# Shane Bergsma
# December 1, 2010

 ############## 1) CREATING COORD Features ##############

set recreate=1;

if ($recreate) then

echo "Creating coord features"
# Do the monolingual train, test, dev
cat Data/WSJExamples/wsj.train | Scripts/createCoordFeats.pl Data/FeatureData/lookups.cnts  > Data/FVs/wsj.train
cat Data/WSJExamples/wsj.dev | Scripts/createCoordFeats.pl Data/FeatureData/lookups.cnts > Data/FVs/wsj.dev
cat Data/WSJExamples/wsj.test | Scripts/createCoordFeats.pl Data/FeatureData/lookups.cnts > Data/FVs/wsj.test

# Also, do the bitext train, test, dev, and unlabeled
cat Data/BitextExamples/bitext.train | Scripts/createCoordFeats.pl Data/FeatureData/lookups.cnts > Data/FVs/bitext.b1.train
cat Data/BitextExamples/bitext.dev | Scripts/createCoordFeats.pl Data/FeatureData/lookups.cnts > Data/FVs/bitext.b1.dev
cat Data/BitextExamples/bitext.test | Scripts/createCoordFeats.pl Data/FeatureData/lookups.cnts > Data/FVs/bitext.b1.test
cat Data/BitextExamples/bitext.unlab | Scripts/createCoordFeats.pl Data/FeatureData/lookups.cnts > Data/FVs/bitext.b1.unlab

endif

############## 2) CREATING FEATLIST ##############

if ($recreate) then

echo "Generating the feature list"
cat Data/FVs/wsj.train | sed 's/^[01] //' | sed 's/#.*//' | tr ' ' '\n' | grep . | sed 's/:[^:][^:]*//g' | sort | uniq -c | sort -nr | awk '$1 > 0' | awk '{print $2}' > Data/FVs/wsj.train.featList

endif

if ($recreate) then

############## 3) VECTORIZING ##############

echo "Vectorizing the FVs:"

cat Data/FVs/wsj.train | Tools/vectorize.pl -f Data/FVs/wsj.train.featList > Data/FVs/wsj.train.m1.fvs
cat Data/FVs/wsj.dev | Tools/vectorize.pl -f Data/FVs/wsj.train.featList > Data/FVs/wsj.dev.m1.fvs
cat Data/FVs/wsj.test | Tools/vectorize.pl -f Data/FVs/wsj.train.featList > Data/FVs/wsj.test.m1.fvs
cat Data/FVs/bitext.b1.train | Tools/vectorize.pl -f Data/FVs/wsj.train.featList > Data/FVs/bitext.train.m1.fvs
cat Data/FVs/bitext.b1.dev | Tools/vectorize.pl -f Data/FVs/wsj.train.featList > Data/FVs/bitext.dev.m1.fvs
cat Data/FVs/bitext.b1.test | Tools/vectorize.pl -f Data/FVs/wsj.train.featList > Data/FVs/bitext.test.m1.fvs
cat Data/FVs/bitext.b1.unlab | Tools/vectorize.pl -f Data/FVs/wsj.train.featList > Data/FVs/bitext.unlab.m1.fvs

endif

############## 4) RUNNING SVMs ##############

if (1) then

echo "Running the SVM:"

Tools/runTrainTestLR.pl -c 1e-6_1e-5_1e-4_1e-3_1e-2_1e-1_1e0_1e1_1e2_1e3_1e4 -s Data/FVs/wsj.train.m1.fvs_Data/FVs/wsj.dev.m1.fvs_Data/FVs/wsj.test.m1.fvs_Data/FVs/bitext.train.m1.fvs_Data/FVs/bitext.dev.m1.fvs_Data/FVs/bitext.test.m1.fvs_Data/FVs/bitext.unlab.m1.fvs -f Data/FVs/wsj.train.featList -t 0 -o Data/ML

exit
endif
