/*
 * Decompiled with CFR 0.152.
 */
package tsg.kernels;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import settings.Parameters;
import tsg.TSNodeLabel;
import tsg.TSNodeLabelStructure;
import tsg.corpora.Wsj;
import tsg.kernels.AllOrderedNodeSubSet;
import tsg.kernels.CommonStructures;
import tsg.kernels.CommonSubBranch;
import tsg.kernels.CommonSubBranchMUBFreqThreads;
import tsg.kernels.CommonSubBranchMUBThreads;
import tsg.kernels.CommonSubtreesMUBFreqThreads;
import tsg.kernels.CommonSubtreesMUBThreads;
import tsg.kernels.RetrieveCorrectFreqQueue;
import tsg.kernels.SortAndMergeFragmentFiles;
import tsg.kernels.SortAndMergeFragmentFilesBz;
import tsg.kernels.UncoveredFragmentsExtractor;
import tsg.mb.TreeMarkoBinarization;
import tsg.mb.TreeMarkoBinarizationLeft_Petrov;
import util.ArgumentReader;
import util.FileUtil;
import wordModel.UkWordMapping;
import wordModel.UkWordMappingPetrov;

public class FragmentSeeker {
    static String exactFrequenciesOption = "-exactFrequencies:";
    static String partialFragmentsOption = "-partialFragments:";
    static String maxCombinationOption = "-maxCombination:";
    static String maxMappingsOption = "-maxMappings:";
    static String flushToDiskEveryOption = "-flushToDiskEvery:";
    static String resumeOption = "-resumeDir:";
    static String threadsOption = "-threads:";
    static String compressTmpFilesOption = "-compressTmpFiles:";
    static String removeTmpFilesOption = "-removeTmpFiles:";
    static String extractUncoveredFragmentsOption = "-extractUncoveredFragments:";
    static String uknownThresholdOption = "-ukThreshold:";
    static String markoBinarizeOption = "-markoBinarize:";
    static String outputPathOption = "-outputPath:";
    static String outputPath = null;
    static String resumePathDir = null;
    static boolean exactFrequencies = true;
    static boolean partialFragments = false;
    static boolean compressTmpFiles = false;
    static boolean removeTmpFiles = true;
    static boolean extractCoverFragments = false;
    static boolean markoBinarize = false;
    static int flushToDiskEvery = 100;
    static int threads = 1;
    static File treebankFile;
    static ArrayList<TSNodeLabelStructure> treebankStructure;
    static ArrayList<TSNodeLabel> treebank;
    static int treebankSize;
    static int startIndex;
    static int fileCounterStart;
    static String prefixFragmentsFiles;
    static String prefixTmpFragmentsFiles;
    static String prefixSortedFragmentsFiles;
    static File tmpDir;

    static {
        startIndex = 0;
        fileCounterStart = 0;
        prefixFragmentsFiles = "fragments_";
        prefixTmpFragmentsFiles = "tmp_";
        prefixSortedFragmentsFiles = "sorted_";
        tmpDir = null;
    }

    public static void main(String[] args) throws Exception {
        File fragmentExactFreqFile;
        File f;
        int n;
        int n2;
        File[] fileArray;
        long time = System.currentTimeMillis();
        String usage = "USAGE: java [-Xmx1G] -jar FragmentSeeker.jar [-partialFragments:false][-maxCombination:1000] [-maxMappings:1000] [-flushToDiskEvery:100] [-markoBinarize:false][-resumeDir:previousDirPath] [-exactFrequencies:true] [-removeTmpFiles:true][-compressTmpFiles:false] [-extractUncoveredFragments:false] [-threads:1] [-outputPath:null][-markoBinarize:false] [-ukThreshold:-1] treebankFile";
        CommonStructures.maxCombDaughters = 1000;
        AllOrderedNodeSubSet.maxComb = 1000L;
        AllOrderedNodeSubSet.maxComb = 1000L;
        UkWordMapping.ukThreashold = -1;
        UkWordMappingPetrov ukModel = new UkWordMappingPetrov();
        TreeMarkoBinarizationLeft_Petrov treeMarkovBinarizer = new TreeMarkoBinarizationLeft_Petrov();
        TreeMarkoBinarization.markH = 2;
        TreeMarkoBinarization.markV = 1;
        if (args.length == 0 || args.length > 14) {
            System.err.println("Incorrect number of arguments: " + args.length);
            System.err.println(usage);
            System.exit(-1);
        }
        int i = 0;
        while (i < args.length - 1) {
            String option = args[i];
            if (option.startsWith(exactFrequenciesOption)) {
                exactFrequencies = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(partialFragmentsOption)) {
                partialFragments = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(maxCombinationOption)) {
                CommonSubBranch.maxCombDaughters = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(maxMappingsOption)) {
                AllOrderedNodeSubSet.maxComb = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(flushToDiskEveryOption)) {
                flushToDiskEvery = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(resumeOption)) {
                resumePathDir = ArgumentReader.readStringOption(option);
            } else if (option.startsWith(removeTmpFilesOption)) {
                removeTmpFiles = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(compressTmpFilesOption)) {
                compressTmpFiles = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(extractUncoveredFragmentsOption)) {
                extractCoverFragments = ArgumentReader.readBooleanOption(option);
            } else if (option.startsWith(threadsOption)) {
                threads = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(uknownThresholdOption)) {
                UkWordMapping.ukThreashold = ArgumentReader.readIntOption(option);
            } else if (option.startsWith(outputPathOption)) {
                outputPath = ArgumentReader.readStringOption(option);
            } else if (option.startsWith(markoBinarizeOption)) {
                markoBinarize = ArgumentReader.readBooleanOption(option);
            } else {
                System.err.println("Not a valid option: " + option);
                System.err.println(usage);
                System.exit(-1);
            }
            ++i;
        }
        treebankFile = new File(args[args.length - 1]);
        if (!treebankFile.exists() || !treebankFile.canRead()) {
            System.err.println("Inpur file doesn't exist or not accessible: " + args[args.length - 1]);
            System.exit(-1);
        }
        String inputFileAbsolutePath = null;
        try {
            inputFileAbsolutePath = treebankFile.getCanonicalPath();
            if (resumePathDir != null) {
                outputPath = String.valueOf(new File(resumePathDir).getCanonicalPath()) + "/";
            } else {
                File dir;
                String dataFolder = "/FragmentSeeker_" + FileUtil.dateTimeStringUnique() + "/";
                outputPath = outputPath != null ? ((dir = new File(outputPath)).exists() ? dir + dataFolder : dir + "/") : String.valueOf(treebankFile.getCanonicalFile().getParentFile().getCanonicalPath()) + dataFolder;
                new File(outputPath).mkdirs();
            }
        }
        catch (IOException e) {
            System.err.println("Inpur directory doesn't exist or not accessible: " + args[args.length - 1]);
            System.exit(-1);
        }
        try {
            treebank = Wsj.getTreebank(treebankFile);
        }
        catch (Exception e) {
            e.printStackTrace();
            System.err.println("Something wrong happened while reading the treebank in file: " + inputFileAbsolutePath);
            System.exit(-1);
        }
        tmpDir = new File(String.valueOf(outputPath) + "tmp");
        int treebankMaxDepth = TSNodeLabel.maxDepthTreebank(treebank);
        treebankSize = treebank.size();
        if (treebankSize < 2) {
            System.err.println("The treebank should have at least 2 structures.");
            System.exit(-1);
        }
        File approxFreqFile = partialFragments ? new File(String.valueOf(outputPath) + "partialfragments_approxFreq.txt") : new File(String.valueOf(outputPath) + "fragments_approxFreq.txt");
        int maxFileCounter = treebank.size() / flushToDiskEvery;
        if (treebank.size() % flushToDiskEvery > 0) {
            ++maxFileCounter;
        }
        CommonStructures.maxDepth = treebankMaxDepth;
        CommonStructures.flushToDiskEvery = flushToDiskEvery;
        CommonStructures.flushToDiskFileCounter = fileCounterStart;
        CommonStructures.flushToDiskPath = String.valueOf(tmpDir.getAbsolutePath()) + "/";
        CommonStructures.diskFileCounterSize = new Integer(maxFileCounter).toString().length();
        CommonStructures.prefixFragmentsFiles = prefixFragmentsFiles;
        CommonStructures.prefixTmpFragmentsFiles = prefixTmpFragmentsFiles;
        CommonStructures.compressTmpFiles = compressTmpFiles;
        if (resumePathDir == null) {
            new File(outputPath).mkdir();
            Parameters.openLogFile(new File(String.valueOf(outputPath) + "FS_1.log"));
        } else {
            File[] ouputFiles;
            int countFS = 1;
            fileArray = ouputFiles = new File(outputPath).listFiles();
            n2 = ouputFiles.length;
            n = 0;
            while (n < n2) {
                f = fileArray[n];
                String fName = f.getName();
                if (fName.startsWith("FS") && fName.endsWith(".log")) {
                    ++countFS;
                }
                ++n;
            }
            Parameters.openLogFile(new File(String.valueOf(outputPath) + "FS_" + countFS + ".log"));
            Parameters.reportLineFlush("\n\nResuming FS");
        }
        Parameters.reportLineFlush("TreeBank file:\t" + treebankFile);
        Parameters.reportLineFlush("Treebank size:\t" + treebankSize);
        Parameters.reportLineFlush("Max depth:\t" + treebankMaxDepth);
        Parameters.reportLineFlush("Using partial fragments:\t" + partialFragments);
        Parameters.reportLineFlush("AllOrderedNodeSubSet.maxComb:\t" + AllOrderedNodeSubSet.maxComb);
        Parameters.reportLineFlush("maxCombDaughters:\t" + CommonStructures.maxCombDaughters);
        Parameters.reportLineFlush("Number of threads:\t" + threads);
        Parameters.reportLineFlush("Flushing fragments every:\t" + flushToDiskEvery);
        Parameters.reportLineFlush("Output exact frequencies:\t" + exactFrequencies);
        Parameters.reportLineFlush("Extract Cover Fragments:\t" + extractCoverFragments);
        Parameters.reportLineFlush("Uknown Word Threshold:\t" + UkWordMapping.ukThreashold);
        if (markoBinarize) {
            File transformedTrainingTreeBankFile;
            Parameters.reportLine("MarkoBinarize Training Treebank");
            Parameters.reportLine("Type: " + ((TreeMarkoBinarization)treeMarkovBinarizer).getDescription());
            Parameters.reportLine("MarkH: " + TreeMarkoBinarization.markH);
            Parameters.reportLine("MarkV: " + TreeMarkoBinarization.markV);
            treebank = treeMarkovBinarizer.markoBinarizeTreebank(treebank);
            treebankFile = transformedTrainingTreeBankFile = new File(String.valueOf(outputPath) + "trainingTreebank_MB.mrg");
            TSNodeLabel.printTreebankToFile(transformedTrainingTreeBankFile, treebank, false, false);
            Parameters.reportLineFlush("Printed training treebank after MarkoBinarization to: " + transformedTrainingTreeBankFile);
        }
        if (UkWordMapping.ukThreashold > 0) {
            File transformedTrainingTreeBankFile;
            Parameters.reportLineFlush("Processing Unknown Word.");
            ukModel.init(treebank, null);
            treebank = ukModel.transformTrainingTreebank();
            treebankFile = transformedTrainingTreeBankFile = new File(String.valueOf(outputPath) + "trainingTreebank_UK.mrg");
            TSNodeLabel.printTreebankToFile(transformedTrainingTreeBankFile, treebank, false, false);
            Parameters.reportLineFlush("Written treebank with unknown words to: " + transformedTrainingTreeBankFile);
        }
        treebankStructure = Wsj.getTreebankStructure(treebankFile);
        if (approxFreqFile.exists()) {
            Parameters.reportLineFlush("File with approx freq found. Not extracting Fragments again!");
        } else {
            CommonStructures cs;
            if (!tmpDir.exists()) {
                tmpDir.mkdir();
            } else {
                FragmentSeeker.resumeTmpDir();
            }
            if (partialFragments) {
                cs = exactFrequencies ? new CommonSubBranchMUBThreads(treebankStructure, threads, startIndex) : new CommonSubBranchMUBFreqThreads(treebankStructure, threads, startIndex);
                cs.run();
                Parameters.reportLineFlush("Finished Extracting Partial Fragmetns.");
            } else {
                cs = exactFrequencies ? new CommonSubtreesMUBThreads(treebankStructure, threads, startIndex) : new CommonSubtreesMUBFreqThreads(treebankStructure, threads, startIndex);
                cs.run();
                Parameters.reportLineFlush("Finished Extracting Fragmetns.");
            }
            if (compressTmpFiles) {
                new SortAndMergeFragmentFilesBz(tmpDir, approxFreqFile, prefixFragmentsFiles, prefixSortedFragmentsFiles, prefixTmpFragmentsFiles);
            } else {
                new SortAndMergeFragmentFiles(tmpDir, approxFreqFile, prefixFragmentsFiles, prefixSortedFragmentsFiles, prefixTmpFragmentsFiles);
            }
            Parameters.reportLineFlush("Fragments with their approximate frequencies written in:\n\t" + approxFreqFile);
        }
        File file = fragmentExactFreqFile = partialFragments ? new File(String.valueOf(outputPath) + "partialfragments_exactFreq.txt") : new File(String.valueOf(outputPath) + "fragments_exactFreq.txt");
        if (exactFrequencies) {
            if (fragmentExactFreqFile.exists()) {
                Parameters.reportLineFlush("File with exact freq found.");
            } else {
                RetrieveCorrectFreqQueue RCF = new RetrieveCorrectFreqQueue(treebank, approxFreqFile, fragmentExactFreqFile, partialFragments, threads);
                RCF.run();
                if (RCF.isInterrupted()) {
                    Parameters.reportLineFlush("Interruprion!");
                    return;
                }
                Parameters.reportLineFlush("Fragments with their exact frequencies written in:\n\t" + fragmentExactFreqFile);
            }
        }
        if (removeTmpFiles) {
            Parameters.reportLineFlush("Removing temporal files.");
            File[] outputFiles = tmpDir.listFiles();
            if (outputFiles != null) {
                fileArray = outputFiles;
                n2 = outputFiles.length;
                n = 0;
                while (n < n2) {
                    f = fileArray[n];
                    f.delete();
                    ++n;
                }
            }
            tmpDir.delete();
        }
        if (extractCoverFragments) {
            File fragmentFile;
            File coverFragmentsFile = null;
            File allFragmentsFile = null;
            File file2 = fragmentFile = exactFrequencies ? fragmentExactFreqFile : approxFreqFile;
            if (partialFragments) {
                if (exactFrequencies) {
                    coverFragmentsFile = new File(String.valueOf(outputPath) + "partialfragments_COVER_exactFreq.txt");
                    allFragmentsFile = new File(String.valueOf(outputPath) + "partialfragments_ALL_exactFreq.txt");
                } else {
                    coverFragmentsFile = new File(String.valueOf(outputPath) + "partialfragments_COVER_approxFreq.txt");
                    allFragmentsFile = new File(String.valueOf(outputPath) + "partialfragments_ALL_approxFreq.txt");
                }
                if (!allFragmentsFile.exists()) {
                    Parameters.reportLineFlush("Cannot collect cover fragments for partial fragments yet!");
                } else {
                    Parameters.reportLineFlush("File with COVER fragments already found.");
                }
            } else {
                if (exactFrequencies) {
                    coverFragmentsFile = new File(String.valueOf(outputPath) + "fragments_COVER_exactFreq.txt");
                    allFragmentsFile = new File(String.valueOf(outputPath) + "fragments_ALL_exactFreq.txt");
                } else {
                    coverFragmentsFile = new File(String.valueOf(outputPath) + "fragments_COVER_approxFreq.txt");
                    allFragmentsFile = new File(String.valueOf(outputPath) + "fragments_ALL_approxFreq.txt");
                }
                if (!allFragmentsFile.exists()) {
                    Parameters.reportLineFlush("Collecting COVER Fragments in " + coverFragmentsFile);
                    new UncoveredFragmentsExtractor(treebank, fragmentFile, coverFragmentsFile, threads).run();
                } else {
                    Parameters.reportLineFlush("File with COVER fragments already found.");
                }
            }
            PrintWriter allFragmentFilePW = FileUtil.getPrintWriter(allFragmentsFile);
            Parameters.reportLineFlush("Collecting ALL Fragments in " + allFragmentsFile);
            FileUtil.append(fragmentFile, allFragmentFilePW);
            if (!partialFragments) {
                FileUtil.append(coverFragmentsFile, allFragmentFilePW);
            }
            allFragmentFilePW.close();
        }
        Parameters.reportLineFlush("Took: " + (System.currentTimeMillis() - time) / 1000L + " seconds.");
        Parameters.closeLogFile();
    }

    private static void resumeTmpDir() {
        String fileName;
        Object f;
        Object[] tmpFiles = tmpDir.listFiles();
        Arrays.sort(tmpFiles);
        boolean foundSortedFile = false;
        boolean foundHole = false;
        int expectedIndex = 1;
        Object[] objectArray = tmpFiles;
        int n = tmpFiles.length;
        int n2 = 0;
        while (n2 < n) {
            f = objectArray[n2];
            fileName = ((File)f).getName();
            if (fileName.startsWith(prefixSortedFragmentsFiles)) {
                foundSortedFile = true;
                break;
            }
            ++n2;
        }
        objectArray = tmpFiles;
        n = tmpFiles.length;
        n2 = 0;
        while (n2 < n) {
            f = objectArray[n2];
            fileName = ((File)f).getName();
            if (fileName.startsWith(prefixTmpFragmentsFiles)) {
                Parameters.reportLineFlush("Deleting " + fileName);
                ((File)f).delete();
            }
            ++n2;
        }
        if (!foundSortedFile) {
            objectArray = tmpFiles;
            n = tmpFiles.length;
            n2 = 0;
            while (n2 < n) {
                f = objectArray[n2];
                fileName = ((File)f).getName();
                if (fileName.startsWith(prefixFragmentsFiles)) {
                    if (foundHole) {
                        Parameters.reportLineFlush("Deleting " + fileName);
                        ((File)f).delete();
                    } else {
                        String fileIndexString = fileName.substring(fileName.lastIndexOf("_") + 1, fileName.lastIndexOf("."));
                        int fileIndex = Integer.parseInt(fileIndexString);
                        if (fileIndex == expectedIndex) {
                            ++expectedIndex;
                        } else {
                            foundHole = true;
                            Parameters.reportLineFlush("Found hole: didnt find file index " + expectedIndex);
                            Parameters.reportLineFlush("Deleting " + fileName);
                            ((File)f).delete();
                        }
                    }
                }
                ++n2;
            }
        }
        if (foundSortedFile) {
            startIndex = treebankSize;
        } else {
            fileCounterStart = expectedIndex - 1;
            startIndex = fileCounterStart * flushToDiskEvery;
        }
        if (startIndex >= treebankSize) {
            Parameters.reportLineFlush("All trees have been compared");
        } else {
            Parameters.reportLineFlush("Starting to compare tree from index " + startIndex);
        }
        Parameters.closeLogFile();
    }
}

