#!/usr/bin/perl -w
#
# Extract the weight vector from a liblinear model output.  Pass the
# model and the feature list.

my ($modelFN, $featureListFN) = @ARGV;

open(MODEL, $modelFN) || die "Could not open $modelFN";
open(FEATURELIST, $featureListFN) || die "Could not open $featureListFN";

$l=<MODEL>; # solver
$l=<MODEL>; # number of classes
$l=<MODEL>; # labels
chomp $l;
my ($labelStr, @labels) = split(/ /, $l);
$l=<MODEL>; # number of features
$l=<MODEL>; # bias
$l=<MODEL>; # a line that says "w"

# Read the corresponding weights and labels:
my @modelWeights = <MODEL>;
chomp(@modelWeights);
my @featWeights = <FEATURELIST>;
chomp(@featWeights);
push(@featWeights, "bias");

my $sizeModelWeights = scalar(@modelWeights);
my $sizeFeatWeights = scalar(@featWeights);
print STDERR "Warning: feature weight list size $sizeFeatWeights and model weight size $sizeModelWeights are different\n"
  if ($sizeModelWeights != $sizeFeatWeights);
die "Error: less features $sizeFeatWeights than model parameters $sizeModelWeights.\n"
  if ($sizeModelWeights > $sizeFeatWeights);

# Now, suppose you have more features than actually occurred in the
# training set.  modelweights might be 100 but featureweights might be
# 110.  The bias will be at 99th in model (with zero indexing), and
# weights 99-109 in featureweights will be unused.

# Will this fix it????
$featWeights[scalar(@modelWeights)-1] = "bias";
# Seems to -- it has no effect when the sizes are the same, of course.

# Go through both fvs:
for (my $j = 0; $j < scalar(@modelWeights); $j++) {
  my $weightStr = $modelWeights[$j];
  my $feat = $featWeights[$j];
  # Get all the weights for all the classes:
  my (@weights) = split(/ /, $weightStr);
  my @output;
  for (my $i=0; $i<scalar(@weights); $i++) {
	my $outStr = "$feat*$labels[$i]:";
	$outStr .= sprintf("%.5f", $weights[$i]);
	push(@output, $outStr);
  }
  print "@output\n";
}
