package szte.csd;

import szte.datamining.DataHandler;
import szte.datamining.mallet.MalletDataHandler;

/**
 * 
 * This is a utility class to export the Vector Space Model representations of a corpora into the SVMlight format (used as a baseline).
 *
 */
public class DataSetSaver {
  
  public static void save(String task, int pl) throws Exception{
    ContentShiftDetector ltc = new ContentShiftDetector();
    ltc.task=task;
    CSDModel.PHRASELENGTH=pl;
    ltc.readCorpora();
    ltc.initClassifiers(ltc.train);
    ltc.traindata = ltc.buildVSM(ltc.train, null);
    ((MalletDataHandler)ltc.traindata).data.getTargetAlphabet().startGrowth();
    for(String label : ltc.getTerms().keySet())
    {
      System.out.println(label);
      ltc.relabel(ltc.traindata, ltc.train, label);
      DataHandler fs = ltc.featuresel(ltc.traindata);
      DataHandler evaldata = ltc.buildVSM(ltc.eval, fs);
      ltc.relabel(evaldata, ltc.eval, label);
      fs.saveDataset("svm/"+task+pl+"/train_"+label);
      evaldata.saveDataset("svm/"+task+pl+"/eval_"+label);
    }
  }
  
  public static void main(String[] args) throws Exception {
    for(int i=1;i<=2;++i){
      save("cmc",i);
      save("wiki",i);
      save("obes",i);
      save("reuters",i);
    }
  }
}
