#!/usr/bin/perl -w

# runTrainTestLR.pl

# Shane Bergsma
# July 5, 2009

# Scripts to handle training and classifying with the SVM:

# Get the usual params:
my ($cflag, $CValues, $sflag, $DataSets, $fflag, $featureList, $tflag, $TrainingSizes, $oflag, $outputDIR) = @ARGV;

die "Must pass c-values, data-set names, feature-list, training-sizes, and output directory!" if (scalar(@ARGV) != 10);
die "Bad command-line arguments"
  unless ($cflag eq "-c" && $sflag eq "-s" && $fflag eq "-f" && $tflag eq "-t" && $oflag eq "-o");

use File::Basename;
use File::Spec::Functions qw(rel2abs);

# Check that the output directory exists:
if (!opendir(DIR,$outputDIR)) {
  die "Must provide an existing output directory for generated SVM data";
} else {
  close(DIR);
}

my $scriptDIR = dirname(rel2abs($0));
print "Using scripts in $scriptDIR\n";

###### #HERE#: THIS IS WHAT YOU'LL NEED TO CHANGE IF YOU HAVE IT INSTALLED ELSEWHERE ######
# Provide the path to your local copy of liblinear:
my $liblinearPath = "~/Tools/LibLinear/liblinear-1.6";

# Extract the sets we use:
my ($trainSet, @testSets) = split(/_/,$DataSets);
die "Must pass at least train/dev" if ($trainSet eq "" || $testSets[0] eq "");

# Then do for all c values:
my @allTrainSizes = split(/_/,$TrainingSizes);
foreach $trainingSize (@allTrainSizes) {
  # Give some feedback if we're actually running different sizes:
  print "SVM on size: $trainingSize\n" if (scalar(@allTrainSizes > 1));
  # Set up the training set for this training size:
  my $trainSet2Use = $trainSet;
  if ($trainingSize ne "0") {
	$trainSet2Use = "$trainSet.first$trainingSize.tmp";
	Execute("head -n $trainingSize $trainSet > $trainSet2Use");
  }

  foreach $cValue (split(/_/,$CValues)) {
	# Now, set the model name used in the resolution:
	my $model = basename($trainSet) . ".c$cValue.model";

	# Run the training on the labeled examples:
	Execute("$liblinearPath/train -q -s 0 -B 1 -c $cValue $trainSet2Use $outputDIR/$model");

	# Only when training with ALL the examples, get the weights from
	# the model:
	printAndExecute("$scriptDIR/liblinear2weight.pl $outputDIR/$model $featureList | tr ' ' '\n' | grep -v \":0.000\" | grep -v \":-0.000\" | sort -gk 2 -t : > $outputDIR/$model.weights") if ($trainingSize eq "0");

	# Then classify the test sets:
	for $testSet (@testSets) {
	  print "TEST-SET: $testSet, C-Value:$cValue, TrainSize:$trainingSize: ";
	  my $predFile = "$outputDIR/" . basename($testSet) . ".$model.preds";
	  Execute("$liblinearPath/predict -b 1 $testSet $outputDIR/$model $predFile");
	}
	# Then, ditch the model -- you don't need it anymore:
	Execute("rm $outputDIR/$model");
  }								# End loop through c values

  # If we built it just for this time:
  Execute("rm $trainSet2Use") if ($trainingSize ne "0");
}    # End loop through training sizes

# Print and execute the given string:
sub printAndExecute {
  print "> $_[0]\n";
  system "$_[0]";
}

# Print and execute the given string:
sub Execute {
#  print "> $_[0]\n";
  system "$_[0]";
}
