/*
 * Decompiled with CFR 0.152.
 */
package tsg.corpora;

import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Scanner;
import tsg.Label;
import tsg.TSNodeLabel;
import util.FileUtil;

public class Atis2 {
    public static String AtisBase = "/scratch/fsangati/CORPUS/ATIS2/";
    public static File AtisOriginal = new File(String.valueOf(AtisBase) + "atis2");
    public static File AtisOriginalSentences = new File(String.valueOf(AtisBase) + "atis2_sentences");
    public static File AtisClean = new File(String.valueOf(AtisBase) + "atis2_clean.mrg");
    public static File AtisCleanNoTraces = new File(String.valueOf(AtisBase) + "atis2_clean_noTraces.mrg");
    public static File AtisCleanRightBranchingKlein = new File(String.valueOf(AtisBase) + "atis2_clean_RB_Klein.mrg");

    private static void makeAtisClean() throws Exception {
        Scanner reader = FileUtil.getScanner(AtisOriginal);
        PrintWriter writer = FileUtil.getPrintWriter(AtisClean);
        int lineNumber = 0;
        while (reader.hasNextLine()) {
            ++lineNumber;
            String line = reader.nextLine().trim();
            if (line.length() == 0) continue;
            line = line.replaceAll("\\[tree", "");
            line = line.replaceAll("tree", "");
            line = line.replaceAll(",", " ");
            line = line.replaceAll("\\[", "");
            line = line.replaceAll("\\]", "");
            line = line.replaceAll("\\s+", " ");
            line = Atis2.adjustParenthesisation(line);
            TSNodeLabel tree = new TSNodeLabel(line);
            Atis2.adjustLexiconAndPrelex(tree);
            writer.println(tree.toString());
        }
        reader.close();
        writer.close();
    }

    private static String adjustParenthesisation(String line) {
        char[] charSequence = line.toCharArray();
        int length = charSequence.length;
        BitSet charRemoveIndexes = new BitSet();
        BitSet fakeParIndexes = new BitSet();
        boolean previousWasOpenPar = false;
        boolean firstCloseParSeries = false;
        int parIndex = 0;
        int lastOpenIndex = -1;
        int i = 0;
        while (i < length) {
            char c = charSequence[i];
            if (c == '(') {
                ++parIndex;
                lastOpenIndex = i;
                firstCloseParSeries = true;
                if (previousWasOpenPar) {
                    fakeParIndexes.set(parIndex);
                    charRemoveIndexes.set(i);
                }
                previousWasOpenPar = true;
            } else {
                previousWasOpenPar = false;
                if (c == ')') {
                    if (fakeParIndexes.get(parIndex)) {
                        fakeParIndexes.clear(parIndex);
                        charRemoveIndexes.set(i);
                    }
                    if (firstCloseParSeries) {
                        charRemoveIndexes.set(lastOpenIndex);
                        charRemoveIndexes.set(i);
                    }
                    firstCloseParSeries = false;
                    --parIndex;
                }
            }
            ++i;
        }
        StringBuilder sb = new StringBuilder(length);
        int i2 = 0;
        while (i2 < length) {
            char c = charSequence[i2];
            if (!charRemoveIndexes.get(i2)) {
                sb.append(c);
            }
            ++i2;
        }
        return sb.toString();
    }

    private static void adjustLexiconAndPrelex(TSNodeLabel tree) {
        ArrayList<TSNodeLabel> lex = tree.collectLexicalItems();
        for (TSNodeLabel l : lex) {
            String label = l.label();
            String newLabel = label.substring(0, label.length() - 1);
            l.relabel(newLabel);
            TSNodeLabel p = l.parent;
            String pLabel = p.label();
            p.relabel(pLabel.toUpperCase());
        }
    }

    private static void removeTraces() throws Exception {
        ArrayList<TSNodeLabel> treebank = TSNodeLabel.getTreebank(AtisClean);
        PrintWriter pw = new PrintWriter(AtisCleanNoTraces);
        for (TSNodeLabel tree : treebank) {
            tree.pruneSubTrees("XXX");
            pw.println(tree.toString());
        }
        pw.close();
    }

    private static void makeAtisRightBranchingKlein() throws Exception {
        Label Xlabel = Label.getLabel("X");
        ArrayList<TSNodeLabel> treebank = TSNodeLabel.getTreebank(AtisClean);
        PrintWriter pw = new PrintWriter(AtisCleanRightBranchingKlein);
        for (TSNodeLabel tree : treebank) {
            ArrayList<TSNodeLabel> terminals = tree.collectPreLexicalItems();
            TSNodeLabel rightBranching = TSNodeLabel.makeRightBranchingKlein(terminals, Xlabel, Xlabel);
            pw.println(rightBranching.toString());
        }
        pw.close();
    }

    private static void printPosTags() throws Exception {
        HashSet<String> posSet = new HashSet<String>();
        ArrayList<TSNodeLabel> treebank = TSNodeLabel.getTreebank(AtisClean);
        for (TSNodeLabel tree : treebank) {
            ArrayList<TSNodeLabel> pos = tree.collectPreLexicalItems();
            for (TSNodeLabel p : pos) {
                posSet.add(p.label());
            }
        }
        System.out.println(posSet);
    }

    public static void main(String[] args) throws Exception {
        Atis2.removeTraces();
    }
}

