/*
 * Decompiled with CFR 0.152.
 */
package tsg.corpora;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Scanner;
import tsg.TSNode;
import tsg.corpora.ConstCorpus;
import util.DepConstConverter;
import util.FileUtil;

public class Tiger
extends ConstCorpus {
    private static final long serialVersionUID = 0L;
    private static final String traceTag = "-NONE-";
    private static final File fdscDir = new File("/scratch/fsangati/CORPUS/Tiger/fdsc-Aug07");

    public static ConstCorpus annotateHeads(ConstCorpus tiger) {
        ConstCorpus result = new ConstCorpus();
        int wrongAssignment = 0;
        File outputFileDependency = new File("/home/fsangati/CORPUS/Tiger/tiger_DB_readable");
        PrintWriter dependencyWriter = FileUtil.getPrintWriter(outputFileDependency);
        Object[] fileList = fdscDir.listFiles();
        Arrays.sort(fileList);
        int[] indexes = new int[fileList.length];
        int i = 0;
        while (i < fileList.length) {
            String fileName = ((File)fileList[i]).getName();
            int number = Integer.parseInt(fileName.substring(9, 14));
            TSNode tree = tiger.treeBank.get(number - 3);
            result.treeBank.add(tree);
            indexes[i] = number - 3;
            tree.removeHeadAnnotations();
            String[] dependecyStrings = Tiger.scanDependencyFile((File)fileList[i]);
            int[][] dependency = Tiger.readDependency(dependecyStrings, tree, fileName);
            String[] output = new String[]{""};
            boolean right = DepConstConverter.assignHeadsFromDependencyTable(tree, dependency, output);
            if (!right) {
                ++wrongAssignment;
            }
            dependencyWriter.println(tree.collectTerminals().toString());
            dependencyWriter.println(output[0]);
            ++i;
        }
        System.out.println("Wrong assignments: " + wrongAssignment + "/" + fileList.length);
        System.out.println(Arrays.toString(indexes));
        return result;
    }

    public static void printTiger() {
        boolean quotations = false;
        File inputFile = new File("/home/fsangati/CORPUS/Tiger/Complete/tiger_release_july03.penn");
        String outputComplete = "/home/fsangati/CORPUS/Tiger/Complete/tiger_Complete";
        ConstCorpus corpora = new ConstCorpus(inputFile, "TIGER");
        corpora.removeTraces(traceTag);
        corpora.toFile_Complete(new File(outputComplete), quotations);
    }

    public static int[][] readDependency(String[] dependecyStrings, TSNode tree, String fileName) {
        int countDep = 0;
        int countRoot = 0;
        int indexRoot = -1;
        int i = 1;
        while (i < dependecyStrings.length) {
            int comaIndex;
            dependecyStrings[i] = dependecyStrings[i].replaceAll("\\)\\)", ")");
            String dep = dependecyStrings[i];
            int firstTilde = dep.indexOf(126);
            int secondTilde = dep.indexOf(126, firstTilde + 1);
            if (secondTilde != -1) {
                comaIndex = dep.lastIndexOf(", ");
                int closeIndex = dep.lastIndexOf(41);
                int firstIndex = Integer.parseInt(dep.substring(firstTilde + 1, comaIndex).trim());
                int secondIndex = Integer.parseInt(dep.substring(secondTilde + 1, closeIndex).trim());
                if (firstIndex < 500 && secondIndex < 500) {
                    ++countDep;
                }
            } else if (dep.startsWith("tiger_id") || dep.startsWith("coord_form")) {
                comaIndex = dep.lastIndexOf(", ");
                int openIndex = dep.indexOf(40);
                int closeIndex = dep.lastIndexOf(41);
                int firstIndex = Integer.parseInt(dep.substring(firstTilde + 1, comaIndex).trim());
                if (firstIndex == 0) {
                    String realIndex = dep.substring(comaIndex + 1, closeIndex).trim();
                    indexRoot = dep.startsWith("coord_form") ? Tiger.getConjunctionIndex(tree, realIndex) + 1 : Integer.parseInt(realIndex);
                    String root = dep.substring(openIndex + 1, firstTilde).trim();
                    ++countRoot;
                    int j = 1;
                    while (j < dependecyStrings.length) {
                        String replaceFrom = String.valueOf(root) + "~0,";
                        String replaceTo = String.valueOf(root) + "~" + indexRoot + ",";
                        dependecyStrings[j] = dependecyStrings[j].replaceAll(replaceFrom, replaceTo);
                        ++j;
                    }
                }
            }
            ++i;
        }
        int[][] result = new int[countDep][2];
        int depIndex = 0;
        int i2 = 1;
        while (i2 < dependecyStrings.length) {
            String dep = dependecyStrings[i2];
            int firstTilde = dep.indexOf(126);
            int secondTilde = dep.indexOf(126, firstTilde + 1);
            if (secondTilde != -1) {
                int comaIndex = dep.lastIndexOf(", ");
                int closeIndex = dep.lastIndexOf(41);
                int firstIndex = Integer.parseInt(dep.substring(firstTilde + 1, comaIndex).trim());
                int secondIndex = Integer.parseInt(dep.substring(secondTilde + 1, closeIndex).trim());
                if (firstIndex != 0 && firstIndex < 500 && secondIndex < 500) {
                    result[depIndex][0] = firstIndex - 1;
                    result[depIndex][1] = secondIndex - 1;
                    ++depIndex;
                }
            }
            ++i2;
        }
        return result;
    }

    public static int getConjunctionIndex(TSNode tree, String conj) {
        List<TSNode> list = tree.collectTerminals();
        int minHight = Integer.MAX_VALUE;
        int countMinHight = 0;
        int bestIndex = -1;
        ListIterator<TSNode> i = list.listIterator();
        while (i.hasNext()) {
            int termHight;
            TSNode term = i.next();
            if (!term.toString(false, false).toLowerCase().equals(conj) || (termHight = term.hight()) > minHight) continue;
            countMinHight = termHight == minHight ? ++countMinHight : 1;
            bestIndex = i.previousIndex();
            minHight = termHight;
        }
        if (countMinHight == 0 || countMinHight > 1) {
            System.out.print("");
        }
        return bestIndex;
    }

    public static String[][] scanXmlFile(File xmlFile) {
        LinkedList<String> sentenceWords = new LinkedList<String>();
        String[][] result = new String[2000][];
        int sentenceIndex = 0;
        try {
            Scanner scanfdsc = new Scanner(xmlFile, "ISO-8859-1");
            while (scanfdsc.hasNextLine()) {
                String dependencyLine = scanfdsc.nextLine();
                if ((dependencyLine = dependencyLine.trim()).indexOf("<t id=") == 0) {
                    int startIndex = dependencyLine.indexOf("word=") + 6;
                    int endIndex = dependencyLine.indexOf("lemma=") - 2;
                    String word = dependencyLine.substring(startIndex, endIndex);
                    sentenceWords.add(word);
                    continue;
                }
                if (dependencyLine.indexOf("</terminals>") != 0 || sentenceWords.isEmpty()) continue;
                result[sentenceIndex] = new String[sentenceWords.size()];
                result[sentenceIndex] = sentenceWords.toArray(result[sentenceIndex]);
                ++sentenceIndex;
                sentenceWords.clear();
            }
        }
        catch (IOException e) {
            FileUtil.handleExceptions(e);
        }
        return result;
    }

    public static String[] scanDependencyFile(File depFile) {
        LinkedList<String> sentenceDependency = new LinkedList<String>();
        try {
            Scanner scanfdsc = new Scanner(depFile, "ISO-8859-1");
            while (scanfdsc.hasNextLine()) {
                String dependencyLine = scanfdsc.nextLine();
                if ((dependencyLine = dependencyLine.trim()).length() == 0 || dependencyLine.indexOf("sentence(") == 0 || dependencyLine.indexOf("id(TiGerDB") == 0 || dependencyLine.indexOf("structure(") == 0) continue;
                if (dependencyLine.equals(")")) {
                    String[] result = new String[sentenceDependency.size()];
                    result = sentenceDependency.toArray(result);
                    return result;
                }
                if (dependencyLine.indexOf("sentence_form(") == 0) {
                    int openParenthesis = dependencyLine.indexOf(40);
                    int closeParenthesis = dependencyLine.lastIndexOf(41);
                    dependencyLine = dependencyLine.substring(openParenthesis + 1, closeParenthesis - 1);
                }
                sentenceDependency.add(dependencyLine);
            }
        }
        catch (IOException e) {
            FileUtil.handleExceptions(e);
        }
        return null;
    }

    public static void buildBinaryTiger() {
        File tigerPennComplete = new File("/home/fsangati/CORPUS/Tiger/Complete/tiger_release_july03_compressed.penn");
        ConstCorpus tiger = new ConstCorpus(tigerPennComplete, "TIGER");
        tiger.toBinaryFile(new File("/home/fsangati/CORPUS/Tiger/Complete/tiger_binary_complete"));
    }

    public static void main(String[] args) {
        Tiger.buildBinaryTiger();
    }
}

