/*
 * Decompiled with CFR 0.152.
 */
package tsg.parser.petrov;

import java.io.File;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Map;
import java.util.Scanner;
import java.util.Vector;
import settings.Parameters;
import tsg.TSNodeLabel;
import tsg.metrics.MetricOptimizerArray;
import tsg.parseEval.EvalB;
import tsg.parseEval.EvalC;
import tsg.parser.petrov.BitParParserPetrov;
import util.FileUtil;
import util.Utility;
import wordModel.UkWordMapping;
import wordModel.UkWordMappingPetrov;

public class RunPetrovGrammarWithBitpar
extends Thread {
    static UkWordMapping ukModel;
    static int nBest;
    static String topSymbol;
    static double minProbRule;
    static File petrovGrammarFile;
    static File petrovLexiconFile;
    static File outputDir;
    static int threads;
    static File trainTreebankFile;
    static File testTreebankFile;
    static int sentenceLengthLimitTest;
    String outputPath;
    File outputBitparGrammar;
    File outputBitparLexicon;
    File testTreebankFileClean;
    ArrayList<TSNodeLabel> originalTrainingTreebank;
    ArrayList<TSNodeLabel> trainingTreebank;
    ArrayList<TSNodeLabel> originalTestTreebank;
    ArrayList<TSNodeLabel> testTreebank;
    int testSize;
    BitParParserPetrov parser;
    static boolean removeCyclicRules;
    static String regexConvertLabel;
    static String regexCleanProb;

    static {
        regexConvertLabel = "\\^g";
        regexCleanProb = "[\\[\\]\\,]";
    }

    @Override
    public void run() {
        this.outputPath = outputDir + "/" + "Parsing_" + FileUtil.dateTimeString() + "/";
        new File(this.outputPath).mkdir();
        Parameters.openLogFile(new File(String.valueOf(this.outputPath) + "log.txt"));
        this.outputParametersToLogFile();
        this.testTreebankFileClean = new File(String.valueOf(this.outputPath) + testTreebankFile.getName());
        this.getTrainingAndTestTreebanks();
        this.preprocessUnknownWords();
        this.convertGrammar();
        this.convertLexicon();
        try {
            this.parse();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        this.parseEval();
    }

    private void parse() throws Exception {
        Parameters.reportLineFlush("Parsing with BitPar using " + threads + " threads");
        Parameters.reportLineFlush("Parsing " + this.testSize + " sentences:");
        this.parser = new BitParParserPetrov(this.testTreebank, this.originalTestTreebank, threads, this.outputPath, this.outputBitparGrammar, this.outputBitparLexicon, nBest, topSymbol);
        System.gc();
        this.parser.runParser();
    }

    private void parseEval() {
        Parameters.reportLineFlush("Running EvalB and EvalC");
        DecimalFormat df = new DecimalFormat("0.00");
        File[] parsedOutputFiles = this.parser.getParsedFiles();
        String[] parsedOutputFilesIdentifiers = this.parser.getParsedFilesIdentifiers();
        int length = parsedOutputFiles.length;
        int i = 0;
        while (i < length) {
            File f = parsedOutputFiles[i];
            String id = parsedOutputFilesIdentifiers[i];
            File evalBfile = FileUtil.changeExtention(f, "evalB");
            File evalCfile = FileUtil.changeExtention(f, "evalC");
            new EvalB(this.testTreebankFileClean, f, evalBfile);
            EvalC eval = new EvalC(this.testTreebankFileClean, f, evalCfile, null, true);
            float[] results = eval.makeEval();
            Parameters.reportLineFlush(String.valueOf(Utility.fse(15, id)) + ":\tRecall, Precision, Fscore (<=" + EvalC.CUTOFF_LENGTH + "):  [" + df.format(results[0]) + ", " + df.format(results[1]) + ", " + df.format(results[2]) + "]");
            ++i;
        }
    }

    private void getTrainingAndTestTreebanks() {
        try {
            Parameters.reportLineFlush("Reading Traininig Treebank");
            this.trainingTreebank = TSNodeLabel.getTreebank(trainTreebankFile);
            int trainingSize = this.trainingTreebank.size();
            Parameters.reportLineFlush("Traininig Treebank Size: " + trainingSize);
            Parameters.reportLineFlush("Reading Test Treebank");
            this.testTreebank = TSNodeLabel.getTreebank(testTreebankFile, sentenceLengthLimitTest);
            TSNodeLabel.printTreebankToFile(this.testTreebankFileClean, this.testTreebank, false, false);
            this.testSize = this.testTreebank.size();
            Parameters.reportLineFlush("Test Treebank Size: " + this.testSize);
            this.originalTrainingTreebank = this.trainingTreebank;
            this.originalTestTreebank = this.testTreebank;
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void preprocessUnknownWords() {
        Parameters.reportLineFlush("Processing Unknown Words with model: " + ukModel.getClass());
        Parameters.reportLineFlush("UK threshold: " + UkWordMapping.ukThreashold);
        ukModel.init(this.trainingTreebank, this.testTreebank);
        this.trainingTreebank = ukModel.transformTrainingTreebank();
        this.testTreebank = ukModel.transformTestTreebank();
        File transformedTrainingTreBankFile = new File(String.valueOf(this.outputPath) + "trainingTreebank_UK.mrg");
        File transformedTestTreBankFile = new File(String.valueOf(this.outputPath) + "testTreebank_UK.mrg");
        TSNodeLabel.printTreebankToFile(transformedTrainingTreBankFile, this.trainingTreebank, false, false);
        TSNodeLabel.printTreebankToFile(transformedTestTreBankFile, this.testTreebank, false, false);
        Parameters.reportLineFlush("Printed training treebank after unknonw word process to: " + transformedTrainingTreBankFile);
        Parameters.reportLineFlush("Printed test treebank after unknonw word process to: " + transformedTestTreBankFile);
    }

    private void convertGrammar() {
        this.outputBitparGrammar = new File(String.valueOf(this.outputPath) + "bitpar_grammar.txt");
        Parameters.reportLineFlush("Writing grammar to: " + this.outputBitparGrammar);
        Scanner grammarScan = FileUtil.getScanner(petrovGrammarFile);
        PrintWriter pw = FileUtil.getPrintWriter(this.outputBitparGrammar);
        int cyclicRulesSkipped = 0;
        int smallProbRuleSkipped = 0;
        while (grammarScan.hasNextLine()) {
            boolean unaryRule;
            int length;
            String line = grammarScan.nextLine();
            String[] lineSplit = line.split("\\s");
            double prob = Double.parseDouble(lineSplit[(length = lineSplit.length) - 1]);
            if (prob < minProbRule) {
                ++smallProbRuleSkipped;
                continue;
            }
            String lhs = RunPetrovGrammarWithBitpar.convertLable(lineSplit[0]);
            boolean bl = unaryRule = length == 4;
            if (unaryRule) {
                String rhsChild = RunPetrovGrammarWithBitpar.convertLable(lineSplit[2]);
                if (removeCyclicRules && lhs.equals(rhsChild)) {
                    ++cyclicRulesSkipped;
                    continue;
                }
                pw.println(String.valueOf(prob) + " " + lhs + " " + rhsChild);
                continue;
            }
            pw.print(String.valueOf(prob) + " " + lhs);
            int i = 2;
            while (i < length - 1) {
                String rhsChild = RunPetrovGrammarWithBitpar.convertLable(lineSplit[i]);
                pw.print(" " + rhsChild);
                ++i;
            }
            pw.println();
        }
        pw.close();
        Parameters.reportLine("Skipped small prob rules: " + smallProbRuleSkipped);
        Parameters.reportLineFlush("Skipped cyclic rules: " + cyclicRulesSkipped);
    }

    private static String convertLable(String label) {
        return label.replace('_', '-').replaceFirst(regexConvertLabel, "");
    }

    private void convertLexicon() {
        this.outputBitparLexicon = new File(String.valueOf(this.outputPath) + "bitpar_lexicon.txt");
        System.out.println("Writing lexicon to: " + this.outputBitparLexicon);
        Scanner lexiconScan = FileUtil.getScanner(petrovLexiconFile);
        Hashtable lexPosProbTable = new Hashtable();
        while (lexiconScan.hasNextLine()) {
            String line = lexiconScan.nextLine();
            line = line.replaceAll("\\\\", "");
            String[] lineSplit = line.split("\\s");
            int length = lineSplit.length;
            String pos = lineSplit[0];
            String lex = lineSplit[1];
            Vector<String> lexPosProbInTable = (Vector<String>)lexPosProbTable.get(lex);
            if (lexPosProbInTable == null) {
                lexPosProbInTable = new Vector<String>();
                lexPosProbTable.put(lex, lexPosProbInTable);
            }
            int index = 0;
            int i = 2;
            while (i < length) {
                double prob = RunPetrovGrammarWithBitpar.cleanProb(lineSplit[i]);
                if (prob > minProbRule) {
                    lexPosProbInTable.add(String.valueOf(pos) + "-" + index + " " + prob);
                }
                ++index;
                ++i;
            }
        }
        PrintWriter pw = FileUtil.getPrintWriter(this.outputBitparLexicon);
        for (Map.Entry e : lexPosProbTable.entrySet()) {
            String lex = (String)e.getKey();
            Vector lexPosProb = (Vector)e.getValue();
            if (lexPosProb.isEmpty()) continue;
            pw.print(lex);
            for (String posProb : lexPosProb) {
                pw.print("\t" + posProb);
            }
            pw.println();
        }
        pw.close();
    }

    private static double cleanProb(String p) {
        return Double.parseDouble(p.replaceAll(regexCleanProb, ""));
    }

    private void outputParametersToLogFile() {
        Parameters.reportLine("RunPetrovGrammarWithBitpar\n");
        Parameters.reportLine("outputPath: " + this.outputPath);
        Parameters.reportLine("nBest: " + nBest);
        Parameters.reportLine("topSymbol: " + topSymbol);
        Parameters.reportLine("sentenceLengthLimitTest: " + sentenceLengthLimitTest);
        Parameters.reportLine("minProbRule: " + minProbRule);
        Parameters.reportLine("petrovGrammarFile: " + petrovGrammarFile);
        Parameters.reportLine("petrovLexiconFile: " + petrovLexiconFile);
        Parameters.reportLine("trainTreebankFile: " + trainTreebankFile);
        Parameters.reportLine("testTreebankFile: " + testTreebankFile);
        Parameters.reportLine("removeCyclicRules: " + removeCyclicRules);
        Parameters.reportLineFlush("threads: " + threads);
    }

    public static void main(String[] args) throws Exception {
        UkWordMapping.ukThreashold = 0;
        ukModel = new UkWordMappingPetrov();
        nBest = 1000;
        topSymbol = "ROOT-0";
        sentenceLengthLimitTest = 40;
        MetricOptimizerArray.setLambdaValues(0.0, 2.0, 0.5);
        removeCyclicRules = true;
        petrovGrammarFile = new File(args[0]);
        petrovLexiconFile = new File(args[1]);
        outputDir = new File(args[2]);
        trainTreebankFile = new File(args[3]);
        testTreebankFile = new File(args[4]);
        threads = Integer.parseInt(args[5]);
        minProbRule = Double.parseDouble(args[6]);
        new RunPetrovGrammarWithBitpar().run();
    }
}

