/*
 * Decompiled with CFR 0.152.
 */
package jigsaw.syntax;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.io.Writer;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jigsaw.syntax.SymbolTable;
import jigsaw.util.StringUtils;

public class Lexicon
implements Serializable {
    private static final long serialVersionUID = -537673587380271408L;
    private SymbolTable _words = new SymbolTable();
    private SymbolTable _tags = null;
    private HashMap<Integer, HashSet<Integer>> _wordTags = new HashMap();
    public double pruneThreshold = 0.001;
    public int uwThreshold = 2;
    public int smoothInUnknownsThreshold = 10;
    public boolean useSignatureForKnownSmoothing = true;
    public double[] smooth = new double[]{1.0, 1.0};
    private HashMap<String, Integer> _wtc = new HashMap();
    private HashMap<Integer, Integer> _wc = new HashMap();
    private HashMap<Integer, Integer> _tc = new HashMap();
    private HashMap<Integer, Integer> _unseentc = new HashMap();
    private HashMap<String, Integer> _unseenstc = new HashMap();
    private HashMap<String, Integer> _unseensc = new HashMap();
    private int _totalSeen = 0;
    private int _totalUnseen = 0;

    public Lexicon(SymbolTable tags) {
        this._tags = tags;
    }

    public Lexicon() {
        this(new SymbolTable());
    }

    public SymbolTable tags() {
        return this._tags;
    }

    public String word(int w) {
        return this._words.lookup(w);
    }

    public int word(String word) {
        return this._words.lookup(word);
    }

    public String tag(int t) {
        return this._tags.lookup(t);
    }

    public void loadLexicon(String filename) throws IOException {
        BufferedReader br = new BufferedReader(new FileReader(filename));
        String line = null;
        Pattern p = Pattern.compile("([^ ]+) ([0-9]+)");
        while ((line = br.readLine()) != null && line.length() > 0) {
            String[] toks = line.split("\\t");
            String wordS = toks[0];
            int word = this._words.register(wordS);
            int i = 1;
            while (i < toks.length) {
                Matcher m = p.matcher(toks[i]);
                if (m.matches()) {
                    String tagS = m.group(1);
                    int count = Integer.parseInt(m.group(2));
                    int tag = this._tags.register(tagS);
                    this.incrSeenCount(word, tag, count);
                }
                ++i;
            }
        }
        this.buildUWModel();
        br.close();
    }

    public void loadLexicon(BufferedReader br) throws IOException {
        String line = null;
        Pattern p = Pattern.compile("([^ ]+) ([0-9]+)");
        while ((line = br.readLine()) != null && line.trim().length() > 0) {
            String[] toks = line.split("\\t");
            String wordS = toks[0];
            int word = this._words.register(wordS);
            int i = 1;
            while (i < toks.length) {
                Matcher m = p.matcher(toks[i]);
                if (m.matches()) {
                    String tagS = m.group(1);
                    int count = Integer.parseInt(m.group(2));
                    int tag = this._tags.register(tagS);
                    this.incrSeenCount(word, tag, count);
                }
                ++i;
            }
        }
        this.buildUWModel();
    }

    public void dumpLexicon(String filename) throws IOException {
        BufferedWriter bw = new BufferedWriter(new FileWriter(String.valueOf(filename) + ".lexicon"));
        for (int word : this._wordTags.keySet()) {
            String wordS = this._words.lookup(word);
            bw.write(wordS);
            for (int tag : this._wordTags.get(word)) {
                bw.write("\t" + this.tag(tag) + " " + this.getWordTagCount(word, tag));
            }
            bw.write("\n");
        }
        bw.close();
    }

    public void dumpLexicon(String filename, int thres) throws IOException {
        BufferedWriter bw = new BufferedWriter(new FileWriter(String.valueOf(filename) + ".lexicon"));
        for (int word : this._wordTags.keySet()) {
            if (thres > 0 && this._wc.get(word) < thres) continue;
            String wordS = this._words.lookup(word);
            bw.write(wordS);
            for (int tag : this._wordTags.get(word)) {
                bw.write("\t" + this.tag(tag) + " " + this.getWordTagCount(word, tag));
            }
            bw.write("\n");
        }
        bw.close();
    }

    public void dumpLexicon(Writer pw, int thres) throws IOException {
        for (int word : this._wordTags.keySet()) {
            if (thres > 0 && this._wc.get(word) < thres) continue;
            String wordS = this._words.lookup(word);
            pw.write(wordS);
            for (int tag : this._wordTags.get(word)) {
                pw.write("\t" + this.tag(tag) + " " + this.getWordTagCount(word, tag));
            }
            pw.write("\n");
        }
        pw.write("\n");
    }

    public void buildUWModel() {
        for (int w : this._wc.keySet()) {
            if (this.getWordCount(w) >= this.uwThreshold) continue;
            for (int t : this._wordTags.get(w)) {
                String sig = this.getSignature(this._words.lookup(w), -1);
                this.incrUnseenCount(sig, t, this.getWordTagCount(w, t));
            }
        }
    }

    public boolean isKnown(String word) {
        return this.getWordCount(word) > 0;
    }

    public boolean isKnown(int word) {
        return this.getWordCount(word) > 0;
    }

    public Iterator<Integer> tagIteratorByWord(String word, int loc, boolean smoothUnknown) {
        if (this.isKnown(word)) {
            return this.tagIteratorByWord(this._words.lookup(word), loc);
        }
        if (smoothUnknown) {
            Vector<Integer> tags = new Vector<Integer>();
            int t = 0;
            while (t < this._tags.size()) {
                double s = this.scoreD(word, t, loc);
                if (s >= this.pruneThreshold) {
                    tags.add(t);
                }
                ++t;
            }
            return tags.iterator();
        }
        return new Vector().iterator();
    }

    public Set<Integer> lookup(String word) {
        int wordInt = this._words.lookup(word);
        if (wordInt != -1 && this._wordTags.containsKey(wordInt)) {
            return this._wordTags.get(wordInt);
        }
        return Collections.emptySet();
    }

    public Iterator<Integer> tagIteratorByWord(int word, int loc) {
        return this._wordTags.get(word).iterator();
    }

    public double scorePetrov(String word, int tag, int loc) {
        int w_uc;
        int w = this._words.lookup(word);
        double wc = this.getWordCount(w);
        if (wc == 0.0 && loc == 0 && (wc = (double)this.getWordCount(w_uc = this._words.lookup(StringUtils.uncapitalize(word)))) != 0.0) {
            w = w_uc;
        }
        if (wc > 0.0) {
            double tc = this.getTagCount(tag);
            double wtc = this.getWordTagCount(w, tag);
            double probTagGivenWord = Double.NEGATIVE_INFINITY;
            if (wc > (double)this.smoothInUnknownsThreshold) {
                probTagGivenWord = wtc / wc;
            } else {
                double probTagGivenUnseen = this.getUnseenTagCount(tag) / this._totalUnseen;
                probTagGivenWord = (wtc + this.smooth[0] * probTagGivenUnseen) / (wc + this.smooth[0]);
            }
            double probTag = tc / (double)this._totalSeen;
            double probWord = wc / (double)this._totalSeen;
            if (probTag == 0.0) {
                System.out.println("break lex3 0.0");
                return 0.0;
            }
            double s = probTagGivenWord * probWord / probTag;
            if (s == 0.0) {
                System.out.println("break lex4 0.0");
            }
            return probTagGivenWord * probWord / probTag;
        }
        String sig = this.getSignature(word, loc);
        double sc = this.getUnseenSigCount(sig);
        double stc = this.getUnseenSigTagCount(sig, tag);
        double utc = this.getUnseenTagCount(tag);
        double tc = this.getTagCount(tag);
        double probTagGivenUnseen = utc / (double)this._totalUnseen;
        double probTagGivenSig = (stc + this.smooth[1] * probTagGivenUnseen) / (sc + this.smooth[1]);
        if (probTagGivenSig == 0.0) {
            System.out.println("break lex 0.0");
        }
        return probTagGivenSig / tc;
    }

    public double scoreD(String word, int tag, int loc) {
        int w = this._words.lookup(word);
        double wc = this.getWordCount(w);
        if (wc > 0.0) {
            double wtc = this.getWordTagCount(w, tag);
            double probTagGivenWord = Double.NEGATIVE_INFINITY;
            if (wc > (double)this.smoothInUnknownsThreshold) {
                probTagGivenWord = wtc / wc;
            } else {
                double probTagGivenUnseen = this.getUnseenTagCount(tag) / this._totalUnseen;
                probTagGivenWord = (wtc + this.smooth[0] * probTagGivenUnseen) / (wc + this.smooth[0]);
            }
            return probTagGivenWord;
        }
        String sig = this.getSignature(word, loc);
        double sc = this.getUnseenSigCount(sig);
        double stc = this.getUnseenSigTagCount(sig, tag);
        double utc = this.getUnseenTagCount(tag);
        double probTagGivenUnseen = utc / (double)this._totalUnseen;
        double probTagGivenSig = (stc + this.smooth[1] * probTagGivenUnseen) / (sc + this.smooth[1]);
        return probTagGivenSig;
    }

    public double score(String word, int tag, int loc) {
        double pb_W_T;
        boolean seen;
        int w = this._words.lookup(word);
        double c_TW = this.getWordTagCount(w, tag);
        double c_W = this.getWordCount(w);
        double total = this._totalSeen;
        double totalUnseen = this._totalUnseen;
        double c_T = this.getTagCount(tag);
        double c_Tunseen = this.getUnseenTagCount(tag);
        boolean bl = seen = c_W > 0.0;
        if (seen) {
            double p_T_U = this.useSignatureForKnownSmoothing ? this.scoreProbTagGivenWordSignature(word, tag, loc, this.smooth[0]) : c_Tunseen / totalUnseen;
            double pb_T_W = c_W > (double)this.smoothInUnknownsThreshold ? c_TW / c_W : (c_TW + this.smooth[1] * p_T_U) / (c_W + this.smooth[1]);
            double p_T = c_T / total;
            double p_W = c_W / total;
            pb_W_T = Math.log(pb_T_W * p_W / p_T);
        } else if (loc >= 0) {
            pb_W_T = this.scoreUnknown(word, tag, loc, c_T, total, this.smooth[0]);
        } else {
            double pb_W0_T = this.scoreUnknown(word, tag, 0, c_T, total, this.smooth[0]);
            double pb_W1_T = this.scoreUnknown(word, tag, 1, c_T, total, this.smooth[0]);
            pb_W_T = Math.log((Math.exp(pb_W0_T) + 2.0 * Math.exp(pb_W1_T)) / 3.0);
        }
        if (pb_W_T > -100.0) {
            return pb_W_T;
        }
        return Double.NEGATIVE_INFINITY;
    }

    public double scoreUnknown(String word, int tag, int loc, double c_Tseen, double total, double smooth) {
        double p_T;
        double p_W;
        double pb_T_S = this.scoreProbTagGivenWordSignature(word, tag, loc, smooth);
        double pb_W_T = Math.log(pb_T_S * (p_W = 1.0 / total) / (p_T = c_Tseen / total));
        if (pb_W_T > -100.0) {
            return pb_W_T;
        }
        return Double.NEGATIVE_INFINITY;
    }

    public double scoreProbTagGivenWordSignature(String word, int tag, int loc, double smooth) {
        String sig = this.getSignature(word, loc);
        double c_TS = this.getUnseenSigTagCount(sig, tag);
        double c_S = this.getUnseenSigCount(sig);
        double c_U = this._totalUnseen;
        double c_T = this.getUnseenTagCount(tag);
        double p_T_U = (c_T + smooth) / (c_U + smooth);
        return (c_TS + smooth * p_T_U) / (c_S + smooth);
    }

    public String getSignature(String word, int loc) {
        StringBuilder sb = new StringBuilder();
        int wlen = word.length();
        int numCaps = 0;
        boolean hasDigit = false;
        boolean hasDash = false;
        boolean hasLower = false;
        int i = 0;
        while (i < wlen) {
            char ch = word.charAt(i);
            if (Character.isDigit(ch)) {
                hasDigit = true;
            } else if (ch == '-') {
                hasDash = true;
            } else if (Character.isLetter(ch)) {
                if (Character.isLowerCase(ch)) {
                    hasLower = true;
                } else if (Character.isTitleCase(ch)) {
                    hasLower = true;
                    ++numCaps;
                } else {
                    ++numCaps;
                }
            }
            ++i;
        }
        char ch0 = word.charAt(0);
        String lowered = word.toLowerCase();
        if (Character.isUpperCase(ch0) || Character.isTitleCase(ch0)) {
            if (loc == 0 && numCaps == 1) {
                sb.append("-INITC");
                if (this.isKnown(lowered)) {
                    sb.append("-KNOWNLC");
                }
            } else {
                sb.append("-CAPS");
            }
        } else if (!Character.isLetter(ch0) && numCaps > 0) {
            sb.append("-CAPS");
        } else if (hasLower) {
            sb.append("-LC");
        }
        if (hasDigit) {
            sb.append("-NUM");
        }
        if (hasDash) {
            sb.append("-DASH");
        }
        if (lowered.endsWith("s") && wlen >= 3) {
            char ch2 = lowered.charAt(wlen - 2);
            if (ch2 != 's' && ch2 != 'i' && ch2 != 'u') {
                sb.append("-s");
            }
        } else if (!(word.length() < 5 || hasDash || hasDigit && numCaps > 0)) {
            if (lowered.endsWith("ed")) {
                sb.append("-ed");
            } else if (lowered.endsWith("ing")) {
                sb.append("-ing");
            } else if (lowered.endsWith("ion")) {
                sb.append("-ion");
            } else if (lowered.endsWith("er")) {
                sb.append("-er");
            } else if (lowered.endsWith("est")) {
                sb.append("-est");
            } else if (lowered.endsWith("ly")) {
                sb.append("-ly");
            } else if (lowered.endsWith("ity")) {
                sb.append("-ity");
            } else if (lowered.endsWith("y")) {
                sb.append("-y");
            } else if (lowered.endsWith("al")) {
                sb.append("-al");
            }
        }
        return sb.toString();
    }

    public void train() {
    }

    public void incrSeenCount(int word, int tag, int count) {
        String wt = String.valueOf(word) + "+" + tag;
        int wtc = count + (this._wtc.containsKey(wt) ? this._wtc.get(wt) : 0);
        this._wtc.put(wt, wtc);
        int wc = count + (this._wc.containsKey(word) ? this._wc.get(word) : 0);
        this._wc.put(word, wc);
        int tc = count + (this._tc.containsKey(tag) ? this._tc.get(tag) : 0);
        this._tc.put(tag, tc);
        this._totalSeen += count;
        HashSet<Integer> seentags = this._wordTags.containsKey(word) ? this._wordTags.get(word) : new HashSet<Integer>();
        seentags.add(tag);
        this._wordTags.put(word, seentags);
    }

    public void incrUnseenCount(String sig, int tag, int count) {
        String st = String.valueOf(sig) + "+" + tag;
        int stc = count + (this._unseenstc.containsKey(st) ? this._unseenstc.get(st) : 0);
        this._unseenstc.put(st, stc);
        int tc = count + (this._unseentc.containsKey(tag) ? this._unseentc.get(tag) : 0);
        this._unseentc.put(tag, tc);
        int sc = count + (this._unseensc.containsKey(sig) ? this._unseensc.get(sig) : 0);
        this._unseensc.put(sig, sc);
        this._totalUnseen += count;
    }

    public int getWordCount(int word) {
        return this._wc.containsKey(word) ? this._wc.get(word) : 0;
    }

    public int getWordCount(String word) {
        int w = this._words.lookup(word);
        if (w == -1) {
            return 0;
        }
        return this.getWordCount(w);
    }

    public int getTagCount(int tag) {
        return this._tc.containsKey(tag) ? this._tc.get(tag) : 0;
    }

    public int getWordTagCount(int word, int tag) {
        return this._wtc.containsKey(String.valueOf(word) + "+" + tag) ? this._wtc.get(String.valueOf(word) + "+" + tag) : 0;
    }

    public int getWordTagCount(String word, int tag) {
        int w = this._words.lookup(word);
        if (w == -1) {
            return 0;
        }
        return this.getWordTagCount(w, tag);
    }

    public int getUnseenTagCount(int tag) {
        return this._unseentc.containsKey(tag) ? this._unseentc.get(tag) : 0;
    }

    public int getUnseenSigTagCount(String sig, int tag) {
        return this._unseenstc.containsKey(String.valueOf(sig) + "+" + tag) ? this._unseenstc.get(String.valueOf(sig) + "+" + tag) : 0;
    }

    public int getUnseenSigCount(String sig) {
        return this._unseensc.containsKey(sig) ? this._unseensc.get(sig) : 0;
    }

    public void incrSeenCount(String wordS, int tag, int count) {
        int word = this._words.register(wordS);
        this.incrSeenCount(word, tag, count);
    }

    public void incrSeenCount(String wordS, String tag, int count) {
        int word = this._words.register(wordS);
        int stag = this._tags.register(tag);
        this.incrSeenCount(word, stag, count);
    }

    public static void main(String[] args) {
        try {
            Lexicon lexicon = new Lexicon();
            lexicon.loadLexicon(args[0]);
            String word = null;
            System.out.println("Number of tags: " + lexicon._tags.size());
            System.out.println("Total seen words (form/instance): " + lexicon._words.size() + "/" + lexicon._totalSeen);
            System.out.println("Total unseen words (instance): " + lexicon._totalUnseen);
            BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
            while ((word = br.readLine()) != null) {
                if (!word.equals("")) {
                    System.out.println(String.valueOf(word) + ": ");
                    Iterator<Integer> iter = lexicon.tagIteratorByWord(word, 1, true);
                    while (iter.hasNext()) {
                        int tag = iter.next();
                        double score = lexicon.score(word, tag, 0);
                        double scoreD = lexicon.scoreD(word, tag, 0);
                        System.out.println(String.valueOf(lexicon.tag(tag)) + "\t" + scoreD + "\t" + score);
                    }
                    continue;
                }
                break;
            }
        }
        catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}

