/*
 * Decompiled with CFR 0.152.
 */
package tsg.parsingExp;

import java.io.File;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Map;
import java.util.Scanner;
import java.util.TreeSet;
import java.util.Vector;
import settings.Parameters;
import tsg.Label;
import tsg.TSNodeLabel;
import tsg.mb.TreeMarkoBinarization;
import tsg.mb.TreeMarkoBinarizationLeft_Petrov;
import tsg.metrics.MetricOptimizerArray;
import tsg.metrics.ParseMetricOptimizer;
import tsg.parseEval.EvalB;
import tsg.parseEval.EvalC;
import tsg.parsingExp.ConvertFragmentsToCFGRulesDouble;
import tsg.parsingExp.ConvertFragmentsToCFGRulesInt;
import tsg.parsingExp.ExtractPetrovPCFG;
import tsg.parsingExp.TSGparsingBitParDirect;
import tsg.parsingExp.TSGparsingBitParDirectSeparate;
import tsg.parsingExp.TSGparsingBitParFile;
import tsg.parsingExp.TSGparsingBitParFileSeparate;
import tsg.parsingExp.TSGparsingBitParNoThreads;
import tsg.parsingExp.TSGparsingBitParNoThreadsSeparate;
import tsg.utils.CleanPetrov;
import util.ArgumentReader;
import util.FileUtil;
import util.PrintProgress;
import util.Utility;
import wordModel.UkWordMapping;
import wordModel.UkWordMappingPetrov;
import wordModel.WordFeatures;

public abstract class TSGparsingBitPar
extends Thread {
    static final String lexPosSeparationString = "^";
    static final char lexPosSeparationChar = "^".charAt(0);
    static UkWordMapping ukModel;
    static int nBest;
    static int fragmentFreqThreashold;
    static boolean extractUnseenCFGrules;
    static int threads;
    static int sentenceLengthLimitTest;
    static int restrictTestToFirst;
    static boolean sortGrammar;
    static boolean usingPetrov;
    static boolean parse;
    static boolean removeTmpFiles;
    static String bitparApp;
    static String bitparCommandAndArgs;
    static final String internalFakeNodeLabel = "NODE@";
    static final int internalFakeNodeLabelLength = 5;
    static final String fakePrelexPrefix = "^";
    static boolean markoBinarize;
    static TreeMarkoBinarization treeMarkovBinarizer;
    static boolean freqAreInt;
    static double petrovSmoothingFactor;
    static boolean smoothLexicon;
    static double smoothLexFactor;
    static int openClassThreshold;
    String outputPath;
    File trainingFile;
    File testFile;
    File testFileClean;
    ArrayList<TSNodeLabel> trainingTreebank;
    ArrayList<TSNodeLabel> testTreebank;
    ArrayList<TSNodeLabel> originalTrainingTreebank;
    ArrayList<TSNodeLabel> originalTestTreebank;
    int trainingSize;
    int testSize;
    File fragmentsFile;
    File finalFragmentsFile;
    File bibtpar_grammarFile;
    File bitpar_lexiconFile;
    File[] parsedOutputFiles;
    String[] parsedOutputFilesIdentifiers;
    TreeSet<String> trainingLexicon;
    TreeSet<String> testLexiconUnwnown;
    Vector<TSNodeLabel> ambiguousCFGmapping;
    Hashtable<String, TSNodeLabel> unambiguousCFGmapping;
    PrintProgress progress;
    String topSymbol;
    static final String viterbProbPrefix = "vitprob=";
    static final String noParseMessage = "No parse for: ";
    static final int viterbProbPrefixLength;

    static {
        nBest = 1000;
        fragmentFreqThreashold = -1;
        extractUnseenCFGrules = true;
        sortGrammar = false;
        usingPetrov = false;
        parse = true;
        removeTmpFiles = true;
        bitparApp = Parameters.bitparApp;
        freqAreInt = true;
        smoothLexFactor = 0.01;
        openClassThreshold = 50;
        viterbProbPrefixLength = viterbProbPrefix.length();
    }

    public TSGparsingBitPar(File trainingFile, File testFile, File fragmentsFile, File outputDir) {
        this.trainingFile = trainingFile;
        this.testFile = testFile;
        this.fragmentsFile = fragmentsFile;
        this.outputPath = outputDir.exists() ? outputDir + "/" + "Parsing_" + FileUtil.dateTimeString() + "/" : outputDir + "/";
        this.testFileClean = new File(String.valueOf(this.outputPath) + testFile.getName());
        new File(this.outputPath).mkdirs();
    }

    @Override
    public void run() {
        Parameters.openLogFile(new File(String.valueOf(this.outputPath) + "log.txt"));
        this.printStartingParameters();
        this.getTrainingAndTestTreebanks();
        this.preprocessUnknownWords();
        this.checkUnknownWords();
        this.markoBinarizeTraining();
        this.getTopSymbol();
        if (petrovSmoothingFactor > 0.0) {
            freqAreInt = false;
            this.extractCFGfreqPetrovSmooth();
        } else {
            this.extractUnseenCFGrules();
        }
        this.filterFragmentsFreq();
        this.makeMappingFragmentsToCFRules();
        this.makeLexiconBitPar();
        try {
            this.smoothLexicon();
        }
        catch (Exception e1) {
            e1.printStackTrace();
            return;
        }
        if (parse) {
            System.gc();
            bitparCommandAndArgs = String.valueOf(bitparApp) + " -vp -b " + nBest + " -s " + this.topSymbol + " " + this.bibtpar_grammarFile + " " + this.bitpar_lexiconFile;
            Parameters.reportLineFlush("Parsing with BitPar using " + threads + " threads");
            Parameters.reportLineFlush("Parsing " + this.testSize + " sentences:");
            this.progress = new PrintProgress("Sentence #:");
            try {
                this.parseWithBitPar();
            }
            catch (Exception e) {
                e.printStackTrace();
                Parameters.reportError(e.getMessage());
                return;
            }
            this.progress.end();
            this.parseEval();
        }
        Parameters.closeLogFile();
    }

    private void getTrainingAndTestTreebanks() {
        try {
            Parameters.reportLineFlush("Reading Traininig Treebank");
            this.trainingTreebank = TSNodeLabel.getTreebank(this.trainingFile);
            this.trainingSize = this.trainingTreebank.size();
            Parameters.reportLineFlush("Traininig Treebank Size: " + this.trainingSize);
            Parameters.reportLineFlush("Reading Test Treebank");
            this.testTreebank = TSNodeLabel.getTreebank(this.testFile, sentenceLengthLimitTest);
            if (restrictTestToFirst > 0) {
                this.testTreebank = new ArrayList<TSNodeLabel>(this.testTreebank.subList(0, restrictTestToFirst));
            }
            TSNodeLabel.printTreebankToFile(this.testFileClean, this.testTreebank, false, false);
            this.testSize = this.testTreebank.size();
            Parameters.reportLineFlush("Test Treebank Size: " + this.testSize);
            this.originalTrainingTreebank = this.trainingTreebank;
            this.originalTestTreebank = this.testTreebank;
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void preprocessUnknownWords() {
        if (UkWordMapping.ukThreashold <= 0) {
            return;
        }
        Parameters.reportLineFlush("Processing Unknown Words");
        ukModel.init(this.trainingTreebank, this.testTreebank);
        this.trainingTreebank = ukModel.transformTrainingTreebank();
        this.testTreebank = ukModel.transformTestTreebank();
        File transformedTrainingTreBankFile = new File(String.valueOf(this.outputPath) + "trainingTreebank_UK.mrg");
        File transformedTestTreBankFile = new File(String.valueOf(this.outputPath) + "testTreebank_UK.mrg");
        TSNodeLabel.printTreebankToFile(transformedTrainingTreBankFile, this.trainingTreebank, false, false);
        TSNodeLabel.printTreebankToFile(transformedTestTreBankFile, this.testTreebank, false, false);
        Parameters.reportLineFlush("Printed training treebank after unknonw word process to: " + transformedTrainingTreBankFile);
        Parameters.reportLineFlush("Printed test treebank after unknonw word process to: " + transformedTestTreBankFile);
    }

    private void markoBinarizeTraining() {
        if (!markoBinarize) {
            return;
        }
        Parameters.reportLineFlush("MarkoBinarize Training Treebank");
        this.trainingTreebank = treeMarkovBinarizer.markoBinarizeTreebank(this.trainingTreebank);
        File transformedTrainingTreBankFile = new File(String.valueOf(this.outputPath) + "trainingTreebank_UK_MB.mrg");
        TSNodeLabel.printTreebankToFile(transformedTrainingTreBankFile, this.trainingTreebank, false, false);
        Parameters.reportLineFlush("Printed training treebank after MarkoBinarization to: " + transformedTrainingTreBankFile);
    }

    private void getTopSymbol() {
        this.topSymbol = this.trainingTreebank.get(0).label();
        ParseMetricOptimizer.topLabel = Label.getLabel(this.topSymbol);
        Parameters.reportLineFlush("Grammar Starting Symbol:" + this.topSymbol);
    }

    private void checkUnknownWords() {
        this.trainingLexicon = TSGparsingBitPar.getLexiconFromTreebank(this.trainingTreebank);
        this.testLexiconUnwnown = TSGparsingBitPar.getLexiconFromTreebank(this.testTreebank);
        this.testLexiconUnwnown.removeAll(this.trainingLexicon);
        if (!this.testLexiconUnwnown.isEmpty()) {
            Parameters.reportLineFlush("Test treebank contains following unknown words:\n" + this.testLexiconUnwnown);
            Parameters.reportLineFlush("Will Smooth them");
        }
    }

    private void filterFragmentsFreq() {
        if (this.fragmentsFile == null) {
            Parameters.reportLineFlush("No fragment File");
            return;
        }
        if (fragmentFreqThreashold <= 1) {
            Parameters.reportLineFlush("Using all fragments (threashold<=1)");
            return;
        }
        Parameters.reportLineFlush("Filtering Fragments: keeping only those with freq >= " + fragmentFreqThreashold);
        File filteredFile = new File(String.valueOf(this.outputPath) + "filteredFragments_ge" + fragmentFreqThreashold + ".txt");
        PrintWriter pw = FileUtil.getPrintWriter(filteredFile);
        Scanner fragmentScan = FileUtil.getScanner(this.fragmentsFile);
        int originalTotalFragments = 0;
        int filteredFragments = 0;
        while (fragmentScan.hasNextLine()) {
            ++originalTotalFragments;
            String line = fragmentScan.nextLine();
            String[] treeFreq = line.split("\t");
            long freq = Long.parseLong(treeFreq[1]);
            if (freq < (long)fragmentFreqThreashold) continue;
            ++filteredFragments;
            pw.println(line);
        }
        pw.close();
        this.fragmentsFile = filteredFile;
        Parameters.reportLineFlush("Printing filtered fragments to " + filteredFile);
        Parameters.reportLineFlush("Original Fragments # : " + originalTotalFragments);
        Parameters.reportLineFlush("Kept Fragments # : " + filteredFragments);
    }

    private void extractUnseenCFGrules() {
        if (!extractUnseenCFGrules) {
            this.finalFragmentsFile = this.fragmentsFile;
            return;
        }
        File unseenCFGrulesFile = new File(String.valueOf(this.outputPath) + "unseenCFG.txt");
        this.finalFragmentsFile = new File(String.valueOf(this.outputPath) + "fragmentsAndCfgRules.txt");
        Parameters.reportLine("Collecting Unseen CFG rules in " + unseenCFGrulesFile);
        Parameters.reportLineFlush("Printing all fragments to " + this.finalFragmentsFile);
        PrintWriter allFragmPW = FileUtil.getPrintWriter(this.finalFragmentsFile);
        HashSet<String> fragmentSet = new HashSet<String>();
        int totalFragments = 0;
        if (this.fragmentsFile != null) {
            Scanner fragmentScan = FileUtil.getScanner(this.fragmentsFile);
            while (fragmentScan.hasNextLine()) {
                String line = fragmentScan.nextLine();
                String[] treeFreq = line.split("\t");
                String fragment = treeFreq[0];
                fragmentSet.add(fragment);
                allFragmPW.println(line);
                ++totalFragments;
            }
        }
        Hashtable<String, int[]> cfgRulesFreq = this.extractCFGfreq();
        int added = 0;
        PrintWriter unseenCFRulesPW = FileUtil.getPrintWriter(unseenCFGrulesFile);
        for (Map.Entry<String, int[]> e : cfgRulesFreq.entrySet()) {
            String cfgRule = "(" + e.getKey() + ")";
            if (fragmentSet.contains(cfgRule)) continue;
            int freq = e.getValue()[0];
            String newLine = String.valueOf(cfgRule) + "\t" + freq;
            unseenCFRulesPW.println(newLine);
            allFragmPW.println(newLine);
            ++added;
        }
        Parameters.reportLine("Total fragments: " + totalFragments);
        Parameters.reportLine("Added CFGrules: " + added);
        Parameters.reportLineFlush("Total fragments + CFG rules: " + (totalFragments + added));
        unseenCFRulesPW.close();
        allFragmPW.close();
    }

    private Hashtable<String, int[]> extractCFGfreq() {
        Hashtable<String, int[]> cfgRulesFreq = new Hashtable<String, int[]>();
        for (TSNodeLabel t : this.trainingTreebank) {
            ArrayList<TSNodeLabel> nodes = t.collectAllNodes();
            for (TSNodeLabel n : nodes) {
                if (n.isLexical) continue;
                String rule = n.cfgRule();
                Utility.increaseStringIntArray(cfgRulesFreq, rule);
            }
        }
        Parameters.reportLine("Total CFG rules: " + cfgRulesFreq.size());
        return cfgRulesFreq;
    }

    private void extractCFGfreqPetrovSmooth() {
        ExtractPetrovPCFG petrovPCFG = new ExtractPetrovPCFG(this.trainingTreebank, petrovSmoothingFactor);
        Hashtable<String, double[]> cfgFreqSmooth = petrovPCFG.getCFGfreq();
        this.finalFragmentsFile = new File(String.valueOf(this.outputPath) + "fragmentsAndCfgRules.txt");
        Parameters.reportLineFlush("Printing all fragments to " + this.finalFragmentsFile);
        PrintWriter allFragmPW = FileUtil.getPrintWriter(this.finalFragmentsFile);
        HashSet<String> fragmentSet = new HashSet<String>();
        int totalFragments = 0;
        if (this.fragmentsFile != null) {
            Scanner fragmentScan = FileUtil.getScanner(this.fragmentsFile);
            while (fragmentScan.hasNextLine()) {
                String line = fragmentScan.nextLine();
                String[] treeFreq = line.split("\t");
                String fragment = treeFreq[0];
                fragmentSet.add(fragment);
                allFragmPW.println(line);
                ++totalFragments;
            }
        }
        int added = 0;
        for (Map.Entry<String, double[]> e : cfgFreqSmooth.entrySet()) {
            String cfgRule = "(" + e.getKey() + ")";
            if (fragmentSet.contains(cfgRule)) continue;
            double freq = e.getValue()[0];
            String newLine = String.valueOf(cfgRule) + "\t" + freq;
            allFragmPW.println(newLine);
            ++added;
        }
        Parameters.reportLine("Total fragments: " + totalFragments);
        Parameters.reportLine("Added CFGrules: " + added);
        Parameters.reportLineFlush("Total fragments + CFG rules: " + (totalFragments + added));
        allFragmPW.close();
    }

    private void makeMappingFragmentsToCFRules() {
        Object converter;
        Parameters.reportLineFlush("Making mapping Fragments to CFG rules and preparing grammar for BitPar");
        this.bibtpar_grammarFile = new File(String.valueOf(this.outputPath) + "bitpar_grammar.txt");
        File ambiguousFragmentsFile = new File(String.valueOf(this.outputPath) + "ambiguousFragmetnsCFG.txt");
        int numberCFGTypes = 0;
        if (freqAreInt) {
            converter = new ConvertFragmentsToCFGRulesInt(this.finalFragmentsFile, ambiguousFragmentsFile, this.bibtpar_grammarFile, internalFakeNodeLabel, "^");
            this.ambiguousCFGmapping = ((ConvertFragmentsToCFGRulesInt)converter).getAmbiguousCFGmapping();
            this.unambiguousCFGmapping = ((ConvertFragmentsToCFGRulesInt)converter).getunambiguousCFGmapping();
            numberCFGTypes = ((ConvertFragmentsToCFGRulesInt)converter).getCFGtypes();
        } else {
            converter = new ConvertFragmentsToCFGRulesDouble(this.finalFragmentsFile, ambiguousFragmentsFile, this.bibtpar_grammarFile, internalFakeNodeLabel, "^");
            this.ambiguousCFGmapping = ((ConvertFragmentsToCFGRulesDouble)converter).getAmbiguousCFGmapping();
            this.unambiguousCFGmapping = ((ConvertFragmentsToCFGRulesDouble)converter).getunambiguousCFGmapping();
            numberCFGTypes = ((ConvertFragmentsToCFGRulesDouble)converter).getCFGtypes();
        }
        if (sortGrammar) {
            FileUtil.sortFile(this.bibtpar_grammarFile);
        }
        int numberAmbiguousFragments = this.ambiguousCFGmapping.size();
        int numberUnambiguousFragments = this.unambiguousCFGmapping.size();
        float ambiguityFactor = (float)numberAmbiguousFragments / (float)(numberCFGTypes - numberUnambiguousFragments);
        Parameters.reportLineFlush("Total CFG types: " + numberCFGTypes);
        Parameters.reportLineFlush("Total unambiguous fragments: " + numberUnambiguousFragments);
        Parameters.reportLineFlush("Total ambiguous fragments: " + numberAmbiguousFragments);
        Parameters.reportLineFlush("Ambiguity Factor: " + ambiguityFactor);
    }

    private void makeLexiconBitPar() {
        Parameters.reportLineFlush("Preparing lexicon for BitPar");
        this.bitpar_lexiconFile = new File(String.valueOf(this.outputPath) + "bitpar_lexicon.txt");
        PrintWriter lexiconPW = FileUtil.getPrintWriter(this.bitpar_lexiconFile);
        for (String word : this.trainingLexicon) {
            lexiconPW.println(String.valueOf(word) + "\t" + "^" + word + " " + 1);
        }
        for (String word : this.testLexiconUnwnown) {
            lexiconPW.println(String.valueOf(word) + "\t" + "^" + word + " " + 1);
        }
        lexiconPW.close();
    }

    private void smoothLexicon() throws Exception {
        Label word;
        if (!smoothLexicon && this.testLexiconUnwnown.isEmpty()) {
            return;
        }
        Parameters.reportLineFlush("Smoothing lexicon");
        Hashtable<Label, HashSet<Label>> wordPosMapping = new Hashtable<Label, HashSet<Label>>();
        Hashtable<Label, HashSet<Label>> posWordMapping = new Hashtable<Label, HashSet<Label>>();
        for (TSNodeLabel t : this.trainingTreebank) {
            ArrayList<TSNodeLabel> lex = t.collectLexicalItems();
            for (TSNodeLabel tSNodeLabel : lex) {
                Label label = tSNodeLabel.parent.label;
                Label word2 = tSNodeLabel.label;
                if (WordFeatures.isNumber(word2.toString())) continue;
                TSGparsingBitPar.addInWordPostable(wordPosMapping, word2, label);
                TSGparsingBitPar.addInWordPostable(posWordMapping, label, word2);
            }
        }
        HashSet<Label> openClassPosSet = new HashSet<Label>();
        int openClassPos = 0;
        int closeClassPos = 0;
        for (Map.Entry entry : posWordMapping.entrySet()) {
            int n = ((HashSet)entry.getValue()).size();
            if (n > openClassThreshold) {
                openClassPosSet.add((Label)entry.getKey());
                ++openClassPos;
                continue;
            }
            ++closeClassPos;
        }
        Parameters.reportLineFlush("Close class postags: " + closeClassPos);
        Parameters.reportLineFlush("Open class postags (" + openClassPos + "): " + openClassPosSet);
        FileWriter fileWriter = new FileWriter(this.bibtpar_grammarFile, true);
        int added = 0;
        if (smoothLexicon) {
            for (Map.Entry entry : wordPosMapping.entrySet()) {
                word = (Label)entry.getKey();
                HashSet existingPosForWord = (HashSet)entry.getValue();
                boolean isOpenClassWord = true;
                for (Label pos : existingPosForWord) {
                    if (openClassPosSet.contains(pos)) continue;
                    isOpenClassWord = false;
                    break;
                }
                if (!isOpenClassWord) continue;
                for (Label pos : openClassPosSet) {
                    if (existingPosForWord.contains(pos)) continue;
                    String treeCFGrule = pos + " " + "^" + word;
                    String line = String.valueOf(smoothLexFactor) + "\t" + treeCFGrule + "\n";
                    TSNodeLabel fragment = new TSNodeLabel(pos, false);
                    fragment.assignUniqueDaughter(new TSNodeLabel(word, true));
                    this.unambiguousCFGmapping.put(treeCFGrule, fragment);
                    fileWriter.append(line);
                    ++added;
                }
            }
        }
        for (String string : this.testLexiconUnwnown) {
            word = Label.getLabel(string);
            for (Label pos : openClassPosSet) {
                String treeCFGrule = pos + " " + "^" + word;
                String line = String.valueOf(smoothLexFactor) + "\t" + treeCFGrule + "\n";
                TSNodeLabel fragment = new TSNodeLabel(pos, false);
                fragment.assignUniqueDaughter(new TSNodeLabel(word, true));
                this.unambiguousCFGmapping.put(treeCFGrule, fragment);
                fileWriter.append(line);
                ++added;
            }
        }
        fileWriter.close();
        Parameters.reportLineFlush("Added unseen word pos: " + added);
    }

    private static void addInWordPostable(Hashtable<Label, HashSet<Label>> wordPosMapping, Label word, Label pos) {
        HashSet<Label> posSet = wordPosMapping.get(word);
        if (posSet == null) {
            posSet = new HashSet();
            wordPosMapping.put(word, posSet);
        }
        posSet.add(pos);
    }

    private static TreeSet<String> getLexiconFromTreebank(ArrayList<TSNodeLabel> treebank) {
        TreeSet<String> result = new TreeSet<String>();
        for (TSNodeLabel t : treebank) {
            ArrayList<TSNodeLabel> lex = t.collectLexicalItems();
            for (TSNodeLabel l : lex) {
                String word = l.label();
                result.add(word);
            }
        }
        return result;
    }

    private static TreeSet<String> getLexiconFromFragments(File fragmentsFile) {
        TreeSet<String> result = new TreeSet<String>();
        Scanner scan = FileUtil.getScanner(fragmentsFile);
        while (scan.hasNextLine()) {
            String line = scan.nextLine();
            try {
                TSNodeLabel fragment = new TSNodeLabel(line, false);
                ArrayList<TSNodeLabel> lex = fragment.collectLexicalItems();
                for (TSNodeLabel l : lex) {
                    String word = l.label();
                    result.add(word);
                }
            }
            catch (Exception e) {
                e.printStackTrace();
                return null;
            }
        }
        return result;
    }

    protected abstract void parseWithBitPar() throws Exception;

    public static void printBitParFlatSentence(ArrayList<String[]> testSentencesWords, File outputFile) {
        PrintWriter pwBitParInput = FileUtil.getPrintWriter(outputFile);
        for (String[] sentencePosWord : testSentencesWords) {
            String sentenceBitParFormat = String.valueOf(Utility.joinStringArrayToString(sentencePosWord, "\n")) + "\n";
            pwBitParInput.println(sentenceBitParFormat);
        }
        pwBitParInput.close();
    }

    public static void printBitParFlatSentence(String[] testSentenceWords, File outputFile) {
        PrintWriter pwBitParInput = FileUtil.getPrintWriter(outputFile);
        String sentenceBitParFormat = String.valueOf(Utility.joinStringArrayToString(testSentenceWords, "\n")) + "\n";
        pwBitParInput.println(sentenceBitParFormat);
        pwBitParInput.close();
    }

    protected synchronized void doneWithOneSentence() {
        this.progress.next();
    }

    public TSNodeLabel postProcessParseTree(TSNodeLabel tree) {
        tree = this.replaceRulesWithFragments(tree);
        if (markoBinarize) {
            tree = treeMarkovBinarizer.undoMarkovBinarization(tree);
        }
        if (usingPetrov) {
            tree = CleanPetrov.cleanPetrovTree(tree);
        }
        return tree;
    }

    private synchronized TSNodeLabel replaceRulesWithFragments(TSNodeLabel tree) {
        String firstDaughetLabel = tree.firstDaughter().label();
        boolean ambiguousFragment = firstDaughetLabel.startsWith(internalFakeNodeLabel);
        TSNodeLabel fragment = null;
        if (ambiguousFragment) {
            int index = 0;
            try {
                String digits = firstDaughetLabel.substring(5);
                index = Integer.parseInt(digits);
            }
            catch (StringIndexOutOfBoundsException e) {
                Parameters.reportLineFlush("Error302: " + firstDaughetLabel + " " + tree);
                return null;
            }
            fragment = this.ambiguousCFGmapping.get(index);
            if (fragment == null) {
                Parameters.reportError("Couldn't find the fragment uniquely associated with unique index:" + index);
                return null;
            }
            tree.daughters = tree.firstDaughter().daughters;
        } else {
            fragment = this.unambiguousCFGmapping.get(tree.cfgRule());
        }
        TSNodeLabel result = fragment.clone();
        ArrayList<TSNodeLabel> terminals = result.collectTerminalItems();
        Iterator<TSNodeLabel> termIter = terminals.iterator();
        TSNodeLabel[] tSNodeLabelArray = tree.daughters;
        int n = tree.daughters.length;
        int n2 = 0;
        while (n2 < n) {
            TSNodeLabel d = tSNodeLabelArray[n2];
            TSNodeLabel term = termIter.next();
            if (!term.isLexical) {
                TSNodeLabel subFragment = this.replaceRulesWithFragments(d);
                term.daughters = subFragment.daughters;
                TSNodeLabel[] tSNodeLabelArray2 = subFragment.daughters;
                int n3 = subFragment.daughters.length;
                int n4 = 0;
                while (n4 < n3) {
                    TSNodeLabel d1 = tSNodeLabelArray2[n4];
                    d1.parent = subFragment;
                    ++n4;
                }
            }
            ++n2;
        }
        return result;
    }

    private void parseEval() {
        Parameters.reportLineFlush("Running EvalB and EvalC");
        DecimalFormat df = new DecimalFormat("0.00");
        int length = this.parsedOutputFiles.length;
        int i = 0;
        while (i < length) {
            File f = this.parsedOutputFiles[i];
            String id = this.parsedOutputFilesIdentifiers[i];
            File evalBfile = FileUtil.changeExtention(f, "evalB");
            File evalCfile = FileUtil.changeExtention(f, "evalC");
            new EvalB(this.testFileClean, f, evalBfile);
            EvalC eval = new EvalC(this.testFileClean, f, evalCfile, null, true);
            float[] results = eval.makeEval();
            Parameters.reportLineFlush(String.valueOf(Utility.fse(15, id)) + ":\tRecall, Precision, Fscore (<=" + EvalC.CUTOFF_LENGTH + "):  [" + df.format(results[0]) + ", " + df.format(results[1]) + ", " + df.format(results[2]) + "]");
            ++i;
        }
    }

    protected static ArrayList<String[]> getSentencesWords(ArrayList<TSNodeLabel> treebank) {
        ArrayList<String[]> result = new ArrayList<String[]>();
        for (TSNodeLabel t : treebank) {
            ArrayList<TSNodeLabel> lex = t.collectLexicalItems();
            String[] sentenceWords = new String[lex.size()];
            int index = 0;
            for (TSNodeLabel l : lex) {
                String word;
                sentenceWords[index] = word = l.label();
                ++index;
            }
            result.add(sentenceWords);
        }
        return result;
    }

    public abstract String getClassName();

    private void printStartingParameters() {
        Parameters.reportLine("\nTSGparsingBitPar (" + this.getClassName() + ")\n");
        Parameters.reportLine("Threads: " + threads);
        Parameters.reportLine("Extract Unseen CFGrules: " + extractUnseenCFGrules);
        Parameters.reportLine("Fragment frequency threashold (>=): " + fragmentFreqThreashold);
        boolean preprocessUnknownWords = UkWordMapping.ukThreashold > 0;
        Parameters.reportLine("Preprocess Unknown Words: " + preprocessUnknownWords);
        if (preprocessUnknownWords) {
            Parameters.reportLine("Threashold Uk: " + UkWordMapping.ukThreashold);
            Parameters.reportLine("UkModel: " + ukModel.getClass());
        }
        Parameters.reportLine("Remove Tmp Files: " + removeTmpFiles);
        Parameters.reportLine("Marko Binarize: " + markoBinarize);
        if (markoBinarize) {
            Parameters.reportLine("Binarization Model: " + treeMarkovBinarizer.getDescription());
            Parameters.reportLine("Markov Horizontal: " + TreeMarkoBinarization.markH);
            Parameters.reportLine("Markov Vertical: " + TreeMarkoBinarization.markV);
        }
        Parameters.reportLine("Frequencies are integers: " + freqAreInt);
        Parameters.reportLine("nBest: " + nBest);
        Parameters.reportLine("Sentence Length Limit Test: " + sentenceLengthLimitTest);
        Parameters.reportLine("Training Treebank File: " + this.trainingFile);
        Parameters.reportLine("Test Treebank File: " + this.testFile);
        Parameters.reportLine("Test Treebank Cleaned File: " + this.testFileClean);
        Parameters.reportLine("Using Petrov: " + usingPetrov);
        if (usingPetrov) {
            Parameters.reportLine("Petrov smoothing factor: " + petrovSmoothingFactor);
        }
        Parameters.reportLine("Smoothing lexicon: " + smoothLexicon);
        if (smoothLexicon) {
            Parameters.reportLine("Smoothing lexicon factor: " + smoothLexFactor);
            Parameters.reportLine("Open class threshold: " + openClassThreshold);
        }
        Parameters.reportLine("Fragments File: " + this.fragmentsFile);
        Parameters.reportLineFlush("Output Directory: " + this.outputPath);
    }

    protected TSNodeLabel dealWithNOParsedSentences(String[] originalTestSentenceWords) {
        TSNodeLabel result = TSNodeLabel.defaultWSJparse(originalTestSentenceWords, this.topSymbol);
        Parameters.reportLineFlush("No parse for sentence: " + Arrays.toString(originalTestSentenceWords) + "\n\t" + "Default parse: " + result);
        return result;
    }

    public static void main1(String[] args) throws Exception {
        String outputPath = "/Users/fedja/Work/Code/TreeGrammars/tmp/WsjUk4/";
        File fragmentsFile1 = new File(String.valueOf(outputPath) + "Parsing_Tue_Nov_16_00_18_57/fragmentsAndCfgRules.txt");
        File fragmentsFile2 = new File(String.valueOf(outputPath) + "Parsing_Tue_Nov_16_00_20_50/fragmentsAndCfgRules.txt");
        Scanner scan1 = new Scanner(fragmentsFile1);
        Scanner scan2 = new Scanner(fragmentsFile2);
        while (scan1.hasNextLine() && scan2.hasNextLine()) {
            String line2;
            String line1 = scan1.nextLine().split("\t")[0];
            if (line1.equals(line2 = scan2.nextLine().split("\t")[0])) continue;
            System.out.println(line1);
            System.out.println(line2);
            return;
        }
    }

    public static void main(String[] args) throws Exception {
        String nBestOption = "-nBest:";
        String threadsOption = "-threads:";
        String fragmentFreqThreasholdOption = "-fragmentFreqThreashold:";
        String extractUnseenCFGrulesOption = "-extractUnseenCFGrules:";
        String unknownThreasholdOption = "-ukThreshold:";
        String ukModelOption = "-ukModel:";
        String removeTmpFilesOption = "-removeTmpFiles:";
        String directParsingOption = "-directParsing:";
        String sentenceLengthLimitTestOption = "-sentenceLengthLimitTest:";
        String runSeparateBitParPerSentenceOption = "-runSeparateBitParPerSentence:";
        String restrictTestToFirstOption = "-restrictTestToFirst:";
        String markoBinarizeOption = "-markoBinarize:";
        String markovH_option = "-markovH:";
        String markovV_option = "-markovV:";
        String runOnlyMPD_option = "-runOnlyMPD:";
        String freqAreIntOption = "-freqAreInt:";
        String sortGrammarFileOption = "-sortGrammarFile:";
        String usingPetrovOption = "-usingPetrov:";
        String petrovSmoothingFactorOption = "-petrovSmoothingFactor:";
        String smoothLexiconOption = "-smoothLexicon:";
        String smoothLexiconFactorOption = "-smoothLexiconFactor:";
        String bitparPathOption = "-bitparPath:";
        nBest = 1000;
        threads = 1;
        sentenceLengthLimitTest = 1000;
        restrictTestToFirst = -1;
        fragmentFreqThreashold = -1;
        extractUnseenCFGrules = true;
        removeTmpFiles = true;
        parse = true;
        boolean directParsing = true;
        boolean runSeparateBitParPerSentence = false;
        UkWordMapping.ukThreashold = -1;
        ukModel = new UkWordMappingPetrov();
        TreeMarkoBinarization.markH = 1;
        TreeMarkoBinarization.markV = 2;
        treeMarkovBinarizer = new TreeMarkoBinarizationLeft_Petrov();
        MetricOptimizerArray.setLambdaValues(0.0, 2.0, 0.05);
        String usage = "USAGE: java [-Xmx10G] tsg.parsingExp.TSGparsingBitPar [-threads:2] [-nBest:1000] [-ukModel:petrov] [-ukThreshold:-1] [-fragmentFreqThreashold:-1] [-extractUnseenCFGrules:true] [-removeTmpFiles:true] [-directParsing:true] [-runSeparateBitParPerSentence:false] [-sentenceLengthLimitTest:1000] [-restrictTestToFirst:-1] [-markoBinarize:false] [-markovH:1] [-markovV:2] [-runOnlyMPD:false] [-freqAreInt:true] [-usingPetrov:false] [-petrovSmoothingFactor:0] [-smoothLexicon:false][-bitparPath:] trainingTreebankFile testTreebankFile fragmentFile outputDir";
        if (args.length == 0 || args.length > 28) {
            System.err.println("Incorrect number of arguments: " + args.length);
            System.err.println(usage);
            System.exit(-1);
        }
        int i = 0;
        while (i < args.length - 4) {
            String option = args[i];
            if (option.startsWith(nBestOption)) {
                nBest = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(threadsOption)) {
                threads = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(fragmentFreqThreasholdOption)) {
                fragmentFreqThreashold = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(extractUnseenCFGrulesOption)) {
                extractUnseenCFGrules = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(ukModelOption)) {
                ukModel = UkWordMapping.getModel(ArgumentReader.readStringOption(option));
            } else if (option.startsWith(unknownThreasholdOption)) {
                UkWordMapping.ukThreashold = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(removeTmpFilesOption)) {
                removeTmpFiles = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(directParsingOption)) {
                directParsing = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(runSeparateBitParPerSentenceOption)) {
                runSeparateBitParPerSentence = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(sentenceLengthLimitTestOption)) {
                sentenceLengthLimitTest = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(markoBinarizeOption)) {
                markoBinarize = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(markovH_option)) {
                TreeMarkoBinarization.markH = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(markovV_option)) {
                TreeMarkoBinarization.markV = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(restrictTestToFirstOption)) {
                restrictTestToFirst = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(freqAreIntOption)) {
                freqAreInt = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(runOnlyMPD_option)) {
                MetricOptimizerArray.runOnlyMPD = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(sortGrammarFileOption)) {
                sortGrammar = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(usingPetrovOption)) {
                usingPetrov = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(petrovSmoothingFactorOption)) {
                petrovSmoothingFactor = ArgumentReader.readDoubleOption(option);
            } else if (option.startsWith(smoothLexiconOption)) {
                smoothLexicon = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(smoothLexiconFactorOption)) {
                smoothLexFactor = ArgumentReader.readDoubleOption(option);
            } else if (option.startsWith(bitparPathOption)) {
                bitparApp = ArgumentReader.readStringOption(option);
            } else {
                System.err.println("Not a valid option: " + option);
                System.err.println(usage);
                System.exit(-1);
            }
            ++i;
        }
        File trainingFile = ArgumentReader.readFileOptionNoSeparation(args[i++]);
        File testFile = ArgumentReader.readFileOptionNoSeparation(args[i++]);
        File fragmentsFile = ArgumentReader.readFileOptionNoSeparation(args[i++]);
        File outputDir = ArgumentReader.readFileOptionNoSeparation(args[i++]);
        TSGparsingBitPar T = null;
        T = threads == 1 ? (runSeparateBitParPerSentence ? new TSGparsingBitParNoThreadsSeparate(trainingFile, testFile, fragmentsFile, outputDir) : new TSGparsingBitParNoThreads(trainingFile, testFile, fragmentsFile, outputDir)) : (directParsing ? (runSeparateBitParPerSentence ? new TSGparsingBitParDirectSeparate(trainingFile, testFile, fragmentsFile, outputDir) : new TSGparsingBitParDirect(trainingFile, testFile, fragmentsFile, outputDir)) : (runSeparateBitParPerSentence ? new TSGparsingBitParFileSeparate(trainingFile, testFile, fragmentsFile, outputDir) : new TSGparsingBitParFile(trainingFile, testFile, fragmentsFile, outputDir)));
        T.run();
    }
}

