/*
 * Decompiled with CFR 0.152.
 */
package tsg.dop;

import java.io.File;
import java.io.PrintWriter;
import java.util.Hashtable;
import java.util.LinkedList;
import java.util.Map;
import java.util.Scanner;
import java.util.TreeSet;
import settings.Parameters;
import tsg.CFSG;
import tsg.TSNode;
import tsg.corpora.ConstCorpus;
import tsg.corpora.Wsj;
import util.FileUtil;
import util.PrintProgressStatic;
import util.Utility;

public class DOP_Goodman
extends CFSG<Long> {
    @Override
    public void readCFGFromCorpus() {
        int uniqueLableIndex = 1;
        for (TSNode TreeLine : Parameters.trainingCorpus.treeBank) {
            TreeLine.toNormalForm();
            uniqueLableIndex = TreeLine.toUniqueInternalLabels(false, uniqueLableIndex, false);
            LinkedList<String> goodmanPCFG = TreeLine.goodman(true);
            for (String rule : goodmanPCFG) {
                int lastSpace = rule.lastIndexOf(" ");
                long count = Long.parseLong(rule.substring(lastSpace + 1));
                if ((rule = rule.substring(0, lastSpace)).indexOf(34) == -1) {
                    Utility.increaseStringLong(this.internalRules, rule, count);
                    continue;
                }
                rule = Utility.removeDoubleQuotes(rule);
                Utility.increaseStringLong(this.lexRules, rule, count);
            }
        }
        String log = "Read rules from corups. \n # Internal Rules: " + this.internalRules.size() + "\n # Lex Rules: " + this.lexRules.size();
        FileUtil.appendReturn(log, Parameters.logFile);
    }

    public static void printGoodmanCFG(File corpusFile, File grammarFile, File lexiconFile) {
        int uniqueLableIndex = 1;
        ConstCorpus corpus = new ConstCorpus(corpusFile);
        Hashtable<String, Long> internalRules = new Hashtable<String, Long>();
        Hashtable<String, Long> lexRules = new Hashtable<String, Long>();
        PrintProgressStatic.start("Reading sentence:");
        for (TSNode TreeLine : corpus.treeBank) {
            PrintProgressStatic.next();
            TreeLine.toNormalForm();
            uniqueLableIndex = TreeLine.toUniqueInternalLabels(false, uniqueLableIndex, false);
            LinkedList<String> goodmanPCFG = TreeLine.goodman(true);
            for (String rule : goodmanPCFG) {
                int lastSpace = rule.lastIndexOf(" ");
                long count = Long.parseLong(rule.substring(lastSpace + 1));
                if ((rule = rule.substring(0, lastSpace)).indexOf(34) == -1) {
                    Utility.increaseStringLong(internalRules, rule, count);
                    continue;
                }
                rule = Utility.removeDoubleQuotes(rule);
                Utility.increaseStringLong(lexRules, rule, count);
            }
        }
        PrintProgressStatic.end();
        String log = "Read rules from corups. \n # Internal Rules: " + internalRules.size() + "\n # Lex Rules: " + lexRules.size();
        System.out.println(log);
        PrintWriter grammar = FileUtil.getPrintWriter(grammarFile);
        TreeSet orderedInternal = new TreeSet(internalRules.keySet());
        for (String rule : orderedInternal) {
            Object count = internalRules.get(rule);
            String line = String.valueOf(count.toString()) + "\t" + rule;
            grammar.write(String.valueOf(line) + "\n");
        }
        grammar.close();
        PrintWriter lexicon = FileUtil.getPrintWriter(lexiconFile);
        TreeSet orderedLexical = new TreeSet(lexRules.keySet());
        for (String rule : orderedLexical) {
            Long count = lexRules.get(rule);
            String line = String.valueOf(((Object)count).toString()) + "\t" + rule;
            lexicon.write(String.valueOf(line) + "\n");
        }
        lexicon.close();
        System.out.println("Printed `lexicon` and `grammar` files");
    }

    public static void convertLexiconInBitParFormat(File inputFile, File outputFile) {
        Scanner scan = FileUtil.getScanner(inputFile);
        Hashtable<String, String[]> lexPosCountsTable = new Hashtable<String, String[]>();
        while (scan.hasNextLine()) {
            String line = scan.nextLine();
            if (line.length() == 0) continue;
            String[] lineSplit = line.split("\t");
            int count = Integer.parseInt(lineSplit[0]);
            String[] posLex = lineSplit[1].split("\\s+");
            String pos = posLex[0];
            String lex = posLex[1];
            String[] posCount = (String[])lexPosCountsTable.get(lex);
            String toAddInPosCount = String.valueOf(pos) + " " + count;
            if (posCount == null) {
                posCount = new String[]{toAddInPosCount};
                lexPosCountsTable.put(lex, posCount);
                continue;
            }
            posCount[0] = String.valueOf(posCount[0]) + "\t" + toAddInPosCount;
        }
        PrintWriter pw = FileUtil.getPrintWriter(outputFile);
        for (Map.Entry e : lexPosCountsTable.entrySet()) {
            pw.println(String.valueOf((String)e.getKey()) + "\t" + ((String[])e.getValue())[0]);
        }
        pw.close();
    }

    public static void mainOld(String[] args) {
        Parameters.corpusName = "Wsj";
        Parameters.lengthLimitTraining = 20;
        Parameters.lengthLimitTest = 20;
        Wsj.testSet = "22";
        Wsj.skip120TrainingSentences = false;
        Wsj.transformNPbasal = true;
        Wsj.transformSG = false;
        Parameters.semanticTags = true;
        Parameters.replaceNumbers = true;
        Parameters.ukLimit = 1;
        Parameters.outputPath = "/home/fsangati/PROJECTS/TSG/RESULTS/DOP_Goodman/";
        Parameters.parserName = "fedePar";
        Parameters.nBest = 1;
        Parameters.cachingActive = false;
        DOP_Goodman Grammar2 = new DOP_Goodman();
        Parameters.trainingCorpus.checkRedundentRules();
        Grammar2.readCFGFromCorpus();
        Grammar2.printLexiconAndGrammarFiles();
    }

    public static void postProcessFile(File inputFile, File outputFile) {
        Scanner scan = FileUtil.getScanner(inputFile);
        PrintWriter pw = FileUtil.getPrintWriter(outputFile);
        while (scan.hasNextLine()) {
            String line = scan.nextLine();
            if (line.equals("")) continue;
            if (line.startsWith("vitprob")) {
                pw.println(line);
                continue;
            }
            TSNode TN = new TSNode(line);
            TN.removeUniqueInternalLabels(false);
            TN.fromNormalForm();
            pw.println(TN.toString());
        }
        pw.close();
    }

    public static void main(String[] args) {
        File corpusFile = new File(args[0]);
        File grammarFile = new File(args[1]);
        File lexiconFile = new File(args[2]);
        DOP_Goodman.printGoodmanCFG(corpusFile, grammarFile, lexiconFile);
    }
}

