/*
 * Decompiled with CFR 0.152.
 */
package wordModel;

import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Map;
import java.util.Random;
import java.util.Vector;
import tsg.TSNodeLabel;
import tsg.corpora.Wsj;
import util.FileUtil;
import wordModel.UkWordMappingPetrov;
import wordModel.UkWordMappingStd;

public abstract class UkWordMapping {
    public static int ukThreashold;
    public static boolean compareTrainTest;
    public static boolean printParamInfo;
    ArrayList<TSNodeLabel> trainingTreebank;
    ArrayList<TSNodeLabel> testTreebank;
    Hashtable<String, int[]> lexFrequency;
    Hashtable<String, Hashtable<String, ArrayList<String>>> trainingPosWordFeatureStats;
    Hashtable<String, Hashtable<String, ArrayList<String>>> developPosWordFeaturesStats;
    int wordTokensTraining;
    int wordTypesTraining;
    int wordsBelowThresholdTraining;
    int wordsDevelop;
    int totalDevelopBins;
    int awfullyBadMarkedBins;
    int badMarkedBins;
    int lessMarkedBins;
    int waveMarkedBins;
    int nonMarkedBins;
    static int minFreqRealPos;
    static int selectSampleNumber;
    static Random rand;

    static {
        compareTrainTest = false;
        printParamInfo = false;
        minFreqRealPos = 3;
        selectSampleNumber = 10;
        rand = new Random();
    }

    public void init(ArrayList<TSNodeLabel> trainingTreebank, ArrayList<TSNodeLabel> testTreebank) {
        this.trainingTreebank = trainingTreebank;
        this.testTreebank = testTreebank;
        this.buildLexFrequency();
        this.loadDefaultParameters();
        if (printParamInfo) {
            this.printParametersInfo();
        }
        if (compareTrainTest) {
            this.compareTrainTest();
        }
    }

    public int getUkThreashold() {
        return ukThreashold;
    }

    protected abstract void loadDefaultParameters();

    protected abstract void printParametersInfo();

    protected void compareTrainTest() {
        this.trainingPosWordFeatureStats = this.buildPosWordFeaturesStats(true);
        this.developPosWordFeaturesStats = this.buildPosWordFeaturesStats(false);
        System.out.println("TRAINING STATISTICS");
        this.printPosWordFeatureStatsTraining();
        System.out.println("\n\n_______________________________________________\n\n");
        System.out.println("DEVELOP STATISTICS");
        this.printPosWordFeatureStatsDevelop();
        System.out.println("\n\n_______________________________________________\n\n");
        this.printModelStats();
        System.out.println("\n\n_______________________________________________\n\n");
        System.out.println("Words token in training: " + this.wordTokensTraining);
        System.out.println("Words type in training: " + this.wordTypesTraining);
        System.out.println("Words token below threshold training: " + this.wordsBelowThresholdTraining);
        System.out.println("Words token unknown in develop: " + this.wordsDevelop);
        System.out.println("\n\n_______________________________________________\n\n");
        System.out.println("Total Developed Bins: " + this.totalDevelopBins);
        System.out.println("Total Bins !! : " + this.awfullyBadMarkedBins);
        System.out.println("Total Bins ~  : " + this.waveMarkedBins);
        System.out.println("Total Bins !  : " + this.badMarkedBins);
        System.out.println("Total Bins <  : " + this.lessMarkedBins);
        System.out.println("Total Bins    : " + this.nonMarkedBins);
    }

    protected abstract void printModelStats();

    public void buildLexFrequency() {
        this.lexFrequency = new Hashtable();
        for (TSNodeLabel t : this.trainingTreebank) {
            ArrayList<TSNodeLabel> lex = t.collectLexicalItems();
            this.wordTokensTraining += lex.size();
            for (TSNodeLabel l : lex) {
                String word = l.label();
                int[] count = this.lexFrequency.get(word);
                if (count == null) {
                    count = new int[]{1};
                    this.lexFrequency.put(word, count);
                    ++this.wordTypesTraining;
                    continue;
                }
                count[0] = count[0] + 1;
            }
        }
    }

    public ArrayList<TSNodeLabel> transformTrainingTreebank() {
        return this.transformTreebank(this.trainingTreebank, ukThreashold);
    }

    public ArrayList<TSNodeLabel> transformTestTreebank() {
        return this.transformTreebank(this.testTreebank, ukThreashold);
    }

    public void ouputTrainigTreebankWithWordFeatures(File ouputFile) {
        this.ouputTreebankWithWordFeatures(this.trainingTreebank, ouputFile, ukThreashold);
    }

    public void ouputTestTreebankWithWordFeatures(File ouputFile) {
        this.ouputTreebankWithWordFeatures(this.testTreebank, ouputFile, ukThreashold);
    }

    public void ouputTreebankWithWordFeatures(ArrayList<TSNodeLabel> treebank, File outputFile, int threashold) {
        ArrayList<TSNodeLabel> treebankTransformed = this.transformTreebank(treebank, threashold);
        PrintWriter pw = FileUtil.getPrintWriter(outputFile);
        for (TSNodeLabel t : treebankTransformed) {
            pw.println(t.toString());
        }
        pw.close();
    }

    public ArrayList<TSNodeLabel> transformTreebank(ArrayList<TSNodeLabel> treebank, int threashold) {
        ArrayList<TSNodeLabel> result = new ArrayList<TSNodeLabel>(treebank.size());
        for (TSNodeLabel t : treebank) {
            TSNodeLabel tClone = t.clone();
            ArrayList<TSNodeLabel> lex = tClone.collectLexicalItems();
            boolean first = true;
            for (TSNodeLabel l : lex) {
                int freq;
                String word = l.label();
                int[] freqArray = this.lexFrequency.get(word);
                int n = freq = freqArray == null ? 0 : freqArray[0];
                if (freq <= threashold) {
                    String wordFeatures = this.getFeatureOfWord(word, first, -1);
                    l.relabel(wordFeatures);
                }
                first = false;
            }
            result.add(tClone);
        }
        return result;
    }

    public Hashtable<String, Hashtable<String, ArrayList<String>>> buildPosWordFeaturesStats(boolean training) {
        Hashtable<String, Hashtable<String, ArrayList<String>>> result = new Hashtable<String, Hashtable<String, ArrayList<String>>>();
        ArrayList<TSNodeLabel> treebank = training ? this.trainingTreebank : this.testTreebank;
        int trainingDevelop = training ? 0 : 1;
        for (TSNodeLabel t : treebank) {
            ArrayList<TSNodeLabel> lex = t.collectLexicalItems();
            boolean first = true;
            for (TSNodeLabel l : lex) {
                int freq;
                String word = l.label();
                String pos = l.parent.label();
                int[] freqArray = this.lexFrequency.get(word);
                int n = freq = freqArray == null ? 0 : freqArray[0];
                if (freq <= ukThreashold) {
                    if (training) {
                        ++this.wordsBelowThresholdTraining;
                    } else {
                        ++this.wordsDevelop;
                    }
                    String wordFeatures = this.getFeatureOfWord(word, first, trainingDevelop);
                    UkWordMapping.addInPosWordFeaturesStats(result, pos, wordFeatures, word);
                }
                first = false;
            }
        }
        return result;
    }

    public static void addInPosWordFeaturesStats(Hashtable<String, Hashtable<String, ArrayList<String>>> table, String pos, String wordFeatures, String word) {
        Hashtable<String, ArrayList<String>> posTable = table.get(pos);
        if (posTable == null) {
            posTable = new Hashtable();
            ArrayList<String> wordList = new ArrayList<String>();
            wordList.add(word);
            posTable.put(wordFeatures, wordList);
            table.put(pos, posTable);
        } else {
            ArrayList<String> wordList = posTable.get(wordFeatures);
            if (wordList == null) {
                wordList = new ArrayList();
                posTable.put(wordFeatures, wordList);
            }
            wordList.add(word);
        }
    }

    public abstract String getFeatureOfWord(String var1, boolean var2, int var3);

    public void printPosWordFeatureStatsTraining() {
        for (Map.Entry<String, Hashtable<String, ArrayList<String>>> e : this.trainingPosWordFeatureStats.entrySet()) {
            String pos = e.getKey();
            Hashtable<String, ArrayList<String>> wordFeaturesStats = e.getValue();
            for (Map.Entry<String, ArrayList<String>> f : wordFeaturesStats.entrySet()) {
                String wordFeatures = f.getKey();
                ArrayList<String> wordList = f.getValue();
                System.out.println(String.valueOf(pos) + "\t" + wordFeatures + "\t" + wordList);
            }
        }
    }

    public void printPosWordFeatureStatsDevelop() {
        for (Map.Entry<String, Hashtable<String, ArrayList<String>>> e : this.developPosWordFeaturesStats.entrySet()) {
            String pos = e.getKey();
            Hashtable<String, ArrayList<String>> wordFeaturesStats = e.getValue();
            for (Map.Entry<String, ArrayList<String>> f : wordFeaturesStats.entrySet()) {
                String wordFeatures = f.getKey();
                ArrayList<String> wordList = f.getValue();
                String freqTraining = this.getFreqTraining(wordFeatures, pos);
                System.out.println(String.valueOf(pos) + "\t" + wordFeatures + "\t" + wordList + "\t" + freqTraining);
            }
        }
    }

    private String getFreqTraining(String wordFeatures, String realPos) {
        StringBuilder result = new StringBuilder();
        int freqRealPos = -1;
        int maxPos = -1;
        int numPosWithMax = 0;
        for (Map.Entry<String, Hashtable<String, ArrayList<String>>> e : this.trainingPosWordFeatureStats.entrySet()) {
            String pos = e.getKey();
            Hashtable<String, ArrayList<String>> wordFeaturesStats = e.getValue();
            ArrayList<String> wordList = wordFeaturesStats.get(wordFeatures);
            if (wordList == null) continue;
            int freq = wordList.size();
            String wordListSelection = this.selectSample(wordList);
            result.append("\t" + pos + " " + freq + " " + wordListSelection);
            if (pos.equals(realPos)) {
                freqRealPos = freq;
            }
            if (freq > maxPos) {
                maxPos = freq;
                numPosWithMax = 0;
                continue;
            }
            if (freq != maxPos) continue;
            ++numPosWithMax;
        }
        ++this.totalDevelopBins;
        if (result.length() == 0) {
            ++this.awfullyBadMarkedBins;
            return "!!";
        }
        if (freqRealPos <= 0) {
            ++this.badMarkedBins;
            result.insert(0, "!");
        } else if (freqRealPos != maxPos || numPosWithMax > 1) {
            ++this.lessMarkedBins;
            result.insert(0, "<");
        } else if (freqRealPos < minFreqRealPos) {
            ++this.waveMarkedBins;
            result.insert(0, "~");
        } else {
            ++this.nonMarkedBins;
        }
        return result.toString();
    }

    private String selectSample(ArrayList<String> wordList) {
        HashSet<String> setWordList = new HashSet<String>(wordList);
        int size = setWordList.size();
        if (size <= selectSampleNumber) {
            return setWordList.toString();
        }
        HashSet<String> setResult = new HashSet<String>();
        Vector<String> wordVector = new Vector<String>(setWordList);
        int added = 0;
        do {
            int i = rand.nextInt(size);
            String w = wordVector.get(i);
            setResult.add(w);
        } while (++added < selectSampleNumber);
        wordVector = new Vector(setResult);
        wordVector.add("...");
        return wordVector.toString();
    }

    public static UkWordMapping getModel(String readStringOption) {
        if (readStringOption.equals("std")) {
            return new UkWordMappingStd();
        }
        return new UkWordMappingPetrov();
    }

    public static void main(String[] args) throws Exception {
        ukThreashold = 5;
        File trainingSet = new File(String.valueOf(Wsj.WsjOriginalCleanedTopSemTagsOff) + "wsj-02-21.mrg");
        File developSet1 = new File(String.valueOf(Wsj.WsjOriginalCleanedTopSemTagsOff) + "wsj-22.mrg");
        File developSet2 = new File(String.valueOf(Wsj.WsjOriginalCleanedTopSemTagsOff) + "wsj-23.mrg");
        File developSet3 = new File(String.valueOf(Wsj.WsjOriginalCleanedTopSemTagsOff) + "wsj-24.mrg");
        ArrayList<TSNodeLabel> trainingTreebank = TSNodeLabel.getTreebank(trainingSet);
        ArrayList<TSNodeLabel> developTreebank = new ArrayList<TSNodeLabel>();
        developTreebank.addAll(TSNodeLabel.getTreebank(developSet3));
        printParamInfo = true;
        compareTrainTest = true;
        ukThreashold = 1;
        UkWordMappingPetrov UkModel = new UkWordMappingPetrov();
        UkModel.init(trainingTreebank, developTreebank);
    }
}

