/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Tag;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.parser.lexparser.AbstractUnknownWordModelTrainer;
import edu.stanford.nlp.parser.lexparser.BaseUnknownWordModel;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.UnknownGTTrainer;
import edu.stanford.nlp.parser.lexparser.UnknownWordModel;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import java.util.Map;
import java.util.Set;

public class BaseUnknownWordModelTrainer
extends AbstractUnknownWordModelTrainer {
    ClassicCounter<IntTaggedWord> seenCounter;
    ClassicCounter<Label> tc;
    Map<Label, ClassicCounter<String>> c;
    ClassicCounter<IntTaggedWord> unSeenCounter;
    Map<Label, ClassicCounter<String>> tagHash;
    Set<String> seenEnd;
    double indexToStartUnkCounting = 0.0;
    UnknownGTTrainer unknownGTTrainer;
    boolean useEnd;
    boolean useFirst;
    boolean useFirstCap;
    boolean useGT;
    UnknownWordModel model;

    @Override
    public void initializeTraining(Options op, Lexicon lex, Index<String> wordIndex, Index<String> tagIndex, double totalTrees) {
        super.initializeTraining(op, lex, wordIndex, tagIndex, totalTrees);
        this.seenCounter = new ClassicCounter();
        this.unSeenCounter = new ClassicCounter();
        this.tagHash = Generics.newHashMap();
        this.tc = new ClassicCounter();
        this.c = Generics.newHashMap();
        this.seenEnd = Generics.newHashSet();
        this.useEnd = op.lexOptions.unknownSuffixSize > 0 && op.lexOptions.useUnknownWordSignatures > 0;
        this.useFirstCap = op.lexOptions.useUnknownWordSignatures > 0;
        this.useGT = op.lexOptions.useUnknownWordSignatures == 0;
        this.useFirst = false;
        if (this.useFirst) {
            System.err.println("Including first letter for unknown words.");
        }
        if (this.useFirstCap) {
            System.err.println("Including whether first letter is capitalized for unknown words");
        }
        if (this.useEnd) {
            System.err.println("Classing unknown word as the average of their equivalents by identity of last " + op.lexOptions.unknownSuffixSize + " letters.");
        }
        if (this.useGT) {
            System.err.println("Using Good-Turing smoothing for unknown words.");
        }
        this.indexToStartUnkCounting = totalTrees * op.trainOptions.fractionBeforeUnseenCounting;
        this.unknownGTTrainer = this.useGT ? new UnknownGTTrainer() : null;
        this.model = this.buildUWM();
    }

    @Override
    public void train(TaggedWord tw, int loc, double weight) {
        if (this.useGT) {
            this.unknownGTTrainer.train(tw, weight);
        }
        String word = tw.word();
        String subString = this.model.getSignature(word, loc);
        Tag tag = new Tag(tw.tag());
        if (!this.c.containsKey(tag)) {
            this.c.put(tag, new ClassicCounter());
        }
        this.c.get(tag).incrementCount(subString, weight);
        this.tc.incrementCount(tag, weight);
        this.seenEnd.add(subString);
        String tagStr = tw.tag();
        IntTaggedWord iW = new IntTaggedWord(word, ".*.", (Index<String>)this.wordIndex, (Index<String>)this.tagIndex);
        this.seenCounter.incrementCount(iW, weight);
        if (this.treesRead > this.indexToStartUnkCounting && this.seenCounter.getCount(iW) < 2.0) {
            IntTaggedWord iT = new IntTaggedWord(".*.", tagStr, (Index<String>)this.wordIndex, (Index<String>)this.tagIndex);
            this.unSeenCounter.incrementCount(iT, weight);
            this.unSeenCounter.incrementCount(NULL_ITW, weight);
        }
    }

    @Override
    public UnknownWordModel finishTraining() {
        if (this.useGT) {
            this.unknownGTTrainer.finishTraining();
        }
        for (Label tag : this.c.keySet()) {
            ClassicCounter<String> wc = this.c.get(tag);
            if (!this.tagHash.containsKey(tag)) {
                this.tagHash.put(tag, new ClassicCounter());
            }
            this.tc.incrementCount(tag);
            wc.setCount("UNK", 1.0);
            for (String end : wc.keySet()) {
                double prob = Math.log(wc.getCount(end) / this.tc.getCount(tag));
                this.tagHash.get(tag).setCount(end, prob);
            }
        }
        return this.model;
    }

    protected UnknownWordModel buildUWM() {
        Map<String, Float> unknownGT = null;
        if (this.useGT) {
            unknownGT = this.unknownGTTrainer.unknownGT;
        }
        return new BaseUnknownWordModel(this.op, this.lex, this.wordIndex, this.tagIndex, this.unSeenCounter, this.tagHash, unknownGT, this.seenEnd);
    }
}

