/*
 * Decompiled with CFR 0.152.
 */
package jigsaw.grammar;

import java.io.FileOutputStream;
import java.io.ObjectOutputStream;
import java.util.Collection;
import java.util.List;
import jigsaw.syntax.Grammar;
import jigsaw.syntax.Rule;
import jigsaw.syntax.Tree;
import jigsaw.treebank.PennTreebankReader;
import jigsaw.treebank.Trees;
import jigsaw.util.StringUtils;

public class SimplePtbPcfgExtractor {
    public static Grammar extractPcfg(String ptbroot, int begin, int end) {
        Grammar g = new Grammar();
        Collection<Tree<String>> trees = PennTreebankReader.readTrees(ptbroot, begin, end);
        g.registerS("ROOT");
        int tcount = 0;
        for (Tree<String> tree : trees) {
            ++tcount;
            tree = new Trees.StandardTreeNormalizer().transformTree(tree);
            for (Tree<String> tree2 : tree) {
                if (tree2.isLeaf()) continue;
                if (tree2.isPreTerminal()) {
                    int t = g.registerT(tree2.getLabel());
                    int tag = g.T2i(t);
                    List<String> yield = tree2.getYield();
                    String word = StringUtils.join(yield, " ");
                    g.incrSeenCount(word, tag, 1);
                    continue;
                }
                int lhs = g.registerNT(tree2.getLabel());
                int[] rhs = new int[tree2.getChildren().size()];
                int i = 0;
                for (Tree<String> c : tree2.getChildren()) {
                    rhs[i] = c.isPreTerminal() ? g.registerT(c.getLabel()) : g.registerNT(c.getLabel());
                    ++i;
                }
                Rule r = new Rule(lhs, rhs);
                g.incrSeenCount(r);
            }
        }
        System.out.println(String.valueOf(tcount) + " trees extracted.");
        return g;
    }

    public static void main(String[] args) {
        if (args.length != 2 && args.length != 4) {
            System.err.println("Usage: <ptb directory> <output grammar name>");
            System.err.println("       <begin> <end> <ptb directory> <output grammar name>");
            System.exit(1);
        }
        try {
            String ptbroot = null;
            int begin = 200;
            int end = 2172;
            String gname = null;
            switch (args.length) {
                case 2: {
                    ptbroot = args[0];
                    gname = args[1];
                    break;
                }
                case 4: {
                    begin = Integer.parseInt(args[0]);
                    end = Integer.parseInt(args[1]);
                    ptbroot = args[2];
                    gname = args[3];
                    break;
                }
                default: {
                    System.err.println("Invalid arguments.");
                    System.exit(1);
                }
            }
            Grammar g = SimplePtbPcfgExtractor.extractPcfg(ptbroot, begin, end);
            g.dumpGrammar(gname);
            System.out.println("Dumped");
            System.out.println();
            System.out.println("Unnormalaized grammar:");
            System.out.println("Rules : " + g.rules().size());
            System.out.println("Non-Terminals : " + g.nts().size());
            System.out.println("Preterminal Tags : " + g.ts().size());
            g.normalize();
            g.buildProb();
            g.lexicon().buildUWModel();
            System.out.println("Normalized");
            g.dumpGrammar(String.valueOf(gname) + "-normalized");
            System.out.println("Dumped normalized grammmar");
            ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(String.valueOf(gname) + ".gr"));
            oos.writeObject(g);
            System.out.println("Serialized");
            System.out.println();
            System.out.println("Normalaized grammar:");
            System.out.println("Rules : " + g.rules().size());
            System.out.println("Non-Terminals : " + g.nts().size());
            System.out.println("Preterminal Tags : " + g.ts().size());
        }
        catch (Exception ex) {
            ex.printStackTrace();
        }
    }
}

