/*
 * Decompiled with CFR 0.152.
 */
package main.phrases;

import babel.content.corpora.accessors.CorpusAccessor;
import babel.content.corpora.accessors.CrawlCorpusAccessor;
import babel.content.corpora.accessors.EuroParlCorpusAccessor;
import babel.content.corpora.accessors.LexCorpusAccessor;
import babel.content.corpora.accessors.WikiTempCorpusAccessor;
import babel.content.eqclasses.EquivalenceClass;
import babel.content.eqclasses.SimpleEquivalenceClass;
import babel.content.eqclasses.collectors.EquivalenceClassCollector;
import babel.content.eqclasses.collectors.SimpleEquivalenceClassCollector;
import babel.content.eqclasses.comparators.LexComparator;
import babel.content.eqclasses.filters.DictionaryFilter;
import babel.content.eqclasses.filters.EquivalenceClassFilter;
import babel.content.eqclasses.filters.GarbageFilter;
import babel.content.eqclasses.filters.LengthFilter;
import babel.content.eqclasses.filters.NumOccurencesFilter;
import babel.content.eqclasses.filters.RomanizationFilter;
import babel.content.eqclasses.phrases.Phrase;
import babel.content.eqclasses.phrases.PhraseTable;
import babel.content.eqclasses.properties.context.Context;
import babel.content.eqclasses.properties.context.PhraseContextCollector;
import babel.content.eqclasses.properties.lshcontext.LSHContextCollector;
import babel.content.eqclasses.properties.lshorder.LSHPhraseContextCollector;
import babel.content.eqclasses.properties.lshtime.LSHTimeDistributionCollector;
import babel.content.eqclasses.properties.number.Number;
import babel.content.eqclasses.properties.number.NumberCollector;
import babel.content.eqclasses.properties.number.PhraseNumberCollector;
import babel.content.eqclasses.properties.order.PhraseContext;
import babel.content.eqclasses.properties.order.PhraseOrderCollector;
import babel.content.eqclasses.properties.time.PhraseTimeDistributionCollector;
import babel.content.eqclasses.properties.time.TimeDistribution;
import babel.content.eqclasses.properties.type.Type;
import babel.ranking.scorers.Scorer;
import babel.util.config.Configurator;
import babel.util.dict.Dictionary;
import babel.util.dict.SimpleDictionary;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class PhrasePreparer {
    protected static final Log LOG = LogFactory.getLog(PhrasePreparer.class);
    protected PhraseTable m_phraseTable;
    protected Dictionary m_seedDict = null;
    protected SimpleDictionary m_translitDict = null;
    protected Set<EquivalenceClass> m_contextSrcEqs = null;
    protected Set<EquivalenceClass> m_contextTrgEqs = null;
    protected Set<Phrase> m_srcPhrs = null;
    protected Set<Phrase> m_trgPhrs = null;
    protected List<Phrase> m_srcPhrasesToProcess = null;
    protected long m_maxTokCountInSrc = 0L;
    protected long m_maxTokCountInTrg = 0L;
    protected long m_maxPhrCountInSrc = 0L;
    protected long m_maxPhrCountInTrg = 0L;

    public PhraseTable getPhraseTable() {
        return this.m_phraseTable;
    }

    public Dictionary getSeedDict() {
        return this.m_seedDict;
    }

    public SimpleDictionary getTranslitDict() {
        return this.m_translitDict;
    }

    public long getMaxSrcTokCount() {
        return this.m_maxTokCountInSrc;
    }

    public long getMaxTrgTokCount() {
        return this.m_maxTokCountInTrg;
    }

    public long getMaxSrcPhrCount() {
        return this.m_maxPhrCountInSrc;
    }

    public long getMaxTrgPhrCount() {
        return this.m_maxPhrCountInTrg;
    }

    public void clearPhraseTableFeatures(Set<Phrase> phrases) {
        LOG.info((Object)(" - Removing context and time phrase properties for " + phrases.size() + " phrases ..."));
        if (phrases != null) {
            for (Phrase phrase : phrases) {
                phrase.removeProperty(TimeDistribution.class.getName());
                phrase.removeProperty(Context.class.getName());
            }
        }
    }

    public void clearReorderingFeatures(Set<Phrase> phrases) {
        LOG.info((Object)(" - Removing ordering phrase properties for " + phrases.size() + " phrases ..."));
        if (phrases != null) {
            for (Phrase phrase : phrases) {
                phrase.removeProperty(PhraseContext.class.getName());
            }
        }
    }

    protected int readPhraseTableChunk(int chunkSize, boolean verbose) throws Exception {
        if (this.m_phraseTable == null) {
            boolean caseSensitive = Configurator.CONFIG.getBoolean("preprocessing.phrases.CaseSensitive");
            this.m_phraseTable = new PhraseTable(caseSensitive);
        }
        if (verbose) {
            LOG.info((Object)" - Reading candidate phrases from the phrase table...");
        }
        String phraseTableFile = Configurator.CONFIG.getString("resources.phrases.PhraseTable");
        int numRead = this.m_phraseTable.processPhraseTableFile(phraseTableFile, chunkSize);
        this.m_srcPhrs = this.m_phraseTable.getAllSrcPhrases();
        this.m_trgPhrs = this.m_phraseTable.getAllTrgPhrases();
        if (numRead == 0) {
            this.m_phraseTable.closePhraseTableFile();
            if (verbose) {
                LOG.info((Object)" - Read an empty chunk - done processing phrase table.");
            }
        } else if (verbose) {
            LOG.info((Object)(" - Source phrases: " + this.m_srcPhrs.size()));
            LOG.info((Object)(" - Target phrases: " + this.m_trgPhrs.size()));
        }
        return numRead;
    }

    protected void readPhrases(boolean readFromMono) throws Exception {
        boolean caseSensitive = Configurator.CONFIG.getBoolean("preprocessing.phrases.CaseSensitive");
        String phraseTableFile = readFromMono ? String.valueOf(Configurator.CONFIG.getString("output.Path")) + "/" + Configurator.CONFIG.getString("output.PhraseTablePL") : Configurator.CONFIG.getString("resources.phrases.PhraseTable");
        LOG.info((Object)(" - Reading candidate phrases from the" + (readFromMono ? " mono " : " ") + "phrase table (" + phraseTableFile + ") ..."));
        this.m_phraseTable = new PhraseTable(phraseTableFile, -1, caseSensitive);
        this.m_srcPhrs = this.m_phraseTable.getAllSrcPhrases();
        this.m_trgPhrs = this.m_phraseTable.getAllTrgPhrases();
        LOG.info((Object)(" - Source phrases: " + this.m_srcPhrs.size()));
        LOG.info((Object)(" - Target phrases: " + this.m_trgPhrs.size()));
    }

    protected void collectNumberProps(Set<Phrase> srcPhrs, Set<Phrase> trgPhrs, boolean computePhraseCounts, boolean verbose) throws Exception {
        LOG.info((Object)" - Collecting phrase counts...");
        int maxPhraseLength = Configurator.CONFIG.getInt("preprocessing.phrases.MaxPhraseLength");
        boolean caseSensitive = Configurator.CONFIG.getBoolean("preprocessing.phrases.CaseSensitive");
        this.collectNumberProps(true, srcPhrs, maxPhraseLength, caseSensitive);
        this.collectNumberProps(false, trgPhrs, maxPhraseLength, caseSensitive);
        if (computePhraseCounts) {
            this.m_maxPhrCountInSrc = this.collectMaxOccurrenceCount(srcPhrs);
            this.m_maxPhrCountInTrg = this.collectMaxOccurrenceCount(trgPhrs);
            if (verbose) {
                LOG.info((Object)(" - Source phrases max occurrences = " + this.m_maxPhrCountInSrc));
                LOG.info((Object)(" - Target phrases max occurrences = " + this.m_maxPhrCountInTrg));
            }
        }
    }

    protected void collectNumberProps(boolean src, Set<Phrase> phrases, int maxPhraseLength, boolean caseSensitive) throws Exception {
        CorpusAccessor accessor = this.getAccessor(Configurator.CONFIG.getString("preprocessing.input.Context"), src);
        new PhraseNumberCollector(maxPhraseLength, caseSensitive).collectProperty(accessor, phrases);
    }

    protected void collectTypeProp(Set<Phrase> srcPhrs, Set<Phrase> trgPhrs) {
        LOG.info((Object)" - Assigning type phrase properties...");
        this.assignTypeProp(srcPhrs, Type.EqType.SOURCE);
        this.assignTypeProp(trgPhrs, Type.EqType.TARGET);
    }

    protected void collectOrderProps(Set<Phrase> srcPhrases, Set<Phrase> trgPhrases) throws Exception {
        double keepContPhraseProb;
        LOG.info((Object)(" - Collecting phrase ordering properties for " + srcPhrases.size() + " source and " + trgPhrases.size() + " target phrases " + " ..."));
        int maxPhraseLength = Configurator.CONFIG.getInt("preprocessing.phrases.MaxPhraseLength");
        boolean caseSensitive = Configurator.CONFIG.getBoolean("preprocessing.phrases.CaseSensitive");
        double d = keepContPhraseProb = Configurator.CONFIG.containsKey("preprocessing.phrases.reordering.ContPhraseKeepProb") ? Configurator.CONFIG.getDouble("preprocessing.phrases.reordering.ContPhraseKeepProb") : 1.0;
        if (keepContPhraseProb == 1.0) {
            LOG.warn((Object)" - Keeping ALL contextual phrases at collection");
        }
        this.collectOrderProps(true, srcPhrases, maxPhraseLength, this.m_maxPhrCountInSrc, caseSensitive, this.m_srcPhrs, keepContPhraseProb);
        this.collectOrderProps(false, trgPhrases, maxPhraseLength, this.m_maxPhrCountInTrg, caseSensitive, this.m_trgPhrs, keepContPhraseProb);
    }

    protected void collectOrderProps(boolean src, Set<Phrase> phrases, int maxPhraseLength, long maxPhraseCountInCorpus, boolean caseSensitive, Set<Phrase> allPhrases, double keepContPhraseProb) throws Exception {
        CorpusAccessor accessor = this.getAccessor(Configurator.CONFIG.getString("preprocessing.input.Context"), src);
        new PhraseOrderCollector(src, maxPhraseLength, caseSensitive, maxPhraseCountInCorpus, allPhrases, keepContPhraseProb).collectProperty(accessor, phrases);
    }

    protected void collectContextAndTimeProps(Set<Phrase> srcPhrases, Set<Phrase> trgPhrases) throws Exception {
        LOG.info((Object)(" - Collecting context and time phrase properties for " + srcPhrases.size() + " source and " + trgPhrases.size() + " target phrases " + " ..."));
        int maxPhraseLength = Configurator.CONFIG.getInt("preprocessing.phrases.MaxPhraseLength");
        boolean caseSensitive = Configurator.CONFIG.getBoolean("preprocessing.phrases.CaseSensitive");
        int contextWindowSize = Configurator.CONFIG.getInt("preprocessing.context.Window");
        boolean alignDistros = Configurator.CONFIG.getBoolean("preprocessing.time.Align");
        Set<Integer> srcBins = this.collectContextAndTimeProps(true, srcPhrases, maxPhraseLength, this.m_contextSrcEqs, contextWindowSize, caseSensitive);
        Set<Integer> trgBins = this.collectContextAndTimeProps(false, trgPhrases, maxPhraseLength, this.m_contextTrgEqs, contextWindowSize, caseSensitive);
        if (alignDistros) {
            LOG.info((Object)" - Aligning temporal distributions...");
            this.alignDistributions(srcBins, trgBins, srcPhrases, trgPhrases);
        }
    }

    public boolean checkWikiTemp() throws Exception {
        WikiTempCorpusAccessor srcAccessor = this.getWikiTempAccessor(true);
        WikiTempCorpusAccessor trgAccessor = this.getWikiTempAccessor(false);
        String[] srcfiles = srcAccessor.getFileList().getFileNames();
        String[] trgfiles = trgAccessor.getFileList().getFileNames();
        String[] srcfilesFix = new String[srcfiles.length];
        int i = 0;
        String[] stringArray = srcfiles;
        int n = srcfiles.length;
        int n2 = 0;
        while (n2 < n) {
            String s = stringArray[n2];
            srcfilesFix[i] = s.substring(0, s.length() - 3);
            ++i;
            ++n2;
        }
        String[] trgfilesFix = new String[trgfiles.length];
        i = 0;
        String[] stringArray2 = trgfiles;
        int n3 = trgfiles.length;
        n = 0;
        while (n < n3) {
            String s = stringArray2[n];
            trgfilesFix[i] = s.substring(0, s.length() - 3);
            ++i;
            ++n;
        }
        if (srcfilesFix.length != trgfilesFix.length) {
            return false;
        }
        i = 0;
        while (i < srcfilesFix.length) {
            if (!srcfilesFix[i].equals(trgfilesFix[i])) {
                System.out.println("SOURCE FILES:" + srcfilesFix[i]);
                System.out.println("TARGET FILES:" + trgfilesFix[i]);
                return false;
            }
            ++i;
        }
        return true;
    }

    protected Set<Integer> collectContextAndTimeProps(boolean src, Set<Phrase> phrases, int maxPhraseLength, Set<EquivalenceClass> contextEqs, int contextWindowSize, boolean caseSensitive) throws Exception {
        CorpusAccessor accessor = this.getAccessor(Configurator.CONFIG.getString("preprocessing.input.Context"), src);
        new PhraseContextCollector(maxPhraseLength, caseSensitive, contextWindowSize, contextWindowSize, contextEqs).collectProperty(accessor, phrases);
        accessor = this.getAccessor(Configurator.CONFIG.getString("preprocessing.input.Time"), src);
        PhraseTimeDistributionCollector distCollector = new PhraseTimeDistributionCollector(maxPhraseLength, caseSensitive);
        distCollector.collectProperty(accessor, phrases);
        return distCollector.binsCollected();
    }

    protected void collectContextEqs() throws Exception {
        LOG.info((Object)" - Constructing contextual equivalence classes...");
        boolean filterRomanSrc = Configurator.CONFIG.containsKey("preprocessing.FilterRomanSrc") && Configurator.CONFIG.getBoolean("preprocessing.FilterRomanSrc");
        boolean filterRomanTrg = Configurator.CONFIG.containsKey("preprocessing.FilterRomanTrg") && Configurator.CONFIG.getBoolean("preprocessing.FilterRomanTrg");
        Class<EquivalenceClass> srcContClassClass = Class.forName(Configurator.CONFIG.getString("preprocessing.context.SrcEqClass"));
        Class<EquivalenceClass> trgContClassClass = Class.forName(Configurator.CONFIG.getString("preprocessing.context.TrgEqClass"));
        this.m_contextSrcEqs = this.collectContextEqs(true, true, filterRomanSrc, srcContClassClass);
        this.m_contextTrgEqs = this.collectContextEqs(false, true, filterRomanTrg, trgContClassClass);
        this.m_maxTokCountInSrc = this.collectMaxOccurrenceCount(this.m_contextSrcEqs);
        this.m_maxTokCountInTrg = this.collectMaxOccurrenceCount(this.m_contextTrgEqs);
        LOG.info((Object)(" - Source context classes = " + this.m_contextSrcEqs.size() + ", max occurrences = " + this.m_maxTokCountInSrc));
        LOG.info((Object)(" - Target context classes = " + this.m_contextTrgEqs.size() + ", max occurrences = " + this.m_maxTokCountInTrg));
    }

    protected Set<EquivalenceClass> collectContextEqs(boolean src, boolean caseSensitive, boolean filterRoman, Class<EquivalenceClass> contextClassClass) throws Exception {
        ArrayList<EquivalenceClassFilter> filters = new ArrayList<EquivalenceClassFilter>(3);
        filters.add(new GarbageFilter());
        filters.add(new LengthFilter(2));
        if (filterRoman) {
            filters.add(new RomanizationFilter());
        }
        CorpusAccessor accessor = this.getAccessor(Configurator.CONFIG.getString("preprocessing.input.Context"), src);
        SimpleEquivalenceClassCollector collector = new SimpleEquivalenceClassCollector(filters, caseSensitive);
        Set<EquivalenceClass> eqs = collector.collect(accessor.getCorpusReader(), -1);
        new NumberCollector(caseSensitive).collectProperty(accessor, eqs);
        eqs = this.constructEqClasses(src, eqs, contextClassClass);
        this.assignTypeProp(eqs, src ? Type.EqType.SOURCE : Type.EqType.TARGET);
        return eqs;
    }

    protected void filterContextEqs() throws Exception {
        int pruneContEqIfOccursFewerThan = Configurator.CONFIG.getInt("preprocessing.context.PruneEqIfOccursFewerThan");
        int pruneContEqIfOccursMoreThan = Configurator.CONFIG.getInt("preprocessing.context.PruneEqIfOccursMoreThan");
        this.m_contextSrcEqs = this.filterContextEqs(true, this.m_contextSrcEqs, pruneContEqIfOccursFewerThan, pruneContEqIfOccursMoreThan);
        this.m_contextTrgEqs = this.filterContextEqs(false, this.m_contextTrgEqs, pruneContEqIfOccursFewerThan, pruneContEqIfOccursMoreThan);
    }

    protected Set<EquivalenceClass> filterContextEqs(boolean src, Set<EquivalenceClass> eqs, int pruneContEqIfOccursFewerThan, int pruneContEqIfOccursMoreThan) throws Exception {
        LOG.info((Object)(" - Filtering " + (src ? "source" : "target") + " contextual words: keeping those in dict [" + this.m_seedDict.toString() + "] and occuring (" + pruneContEqIfOccursFewerThan + "," + pruneContEqIfOccursMoreThan + ") times..."));
        LinkedList<EquivalenceClassFilter> filters = new LinkedList<EquivalenceClassFilter>();
        filters.add(new DictionaryFilter(this.m_seedDict, true, src));
        filters.add(new NumOccurencesFilter(pruneContEqIfOccursFewerThan, true));
        filters.add(new NumOccurencesFilter(pruneContEqIfOccursMoreThan, false));
        Set<EquivalenceClass> filtContextEqs = EquivalenceClassCollector.filter(eqs, filters);
        LOG.info((Object)(" - Filtered context " + (src ? "source" : "target") + " classes: " + filtContextEqs.size()));
        return filtContextEqs;
    }

    public void prepareForFeaturesAndOrderCollection() throws Exception {
        LOG.info((Object)" - Preparing phrases...");
        this.readPhrases(false);
        this.collectNumberProps(this.m_srcPhrs, this.m_trgPhrs, true, true);
        this.collectTypeProp(this.m_srcPhrs, this.m_trgPhrs);
        this.collectContextEqs();
        this.prepareSeedDictionary(this.m_contextSrcEqs, this.m_contextTrgEqs);
        this.prepareTranslitDictionary(this.m_contextSrcEqs);
        this.filterContextEqs();
        this.collectContextAndTimeProps(this.m_srcPhrs, this.m_trgPhrs);
        this.collectOrderProps(this.m_srcPhrs, this.m_trgPhrs);
    }

    public void prepareForChunkFeaturesCollection() throws Exception {
        LOG.info((Object)" - Preparing phrases for estimating monolingual features only ...");
        this.readPhrases(false);
        this.collectNumberProps(this.m_srcPhrs, this.m_trgPhrs, true, true);
        this.collectTypeProp(this.m_srcPhrs, this.m_trgPhrs);
        this.collectContextEqs();
        this.prepareSeedDictionary(this.m_contextSrcEqs, this.m_contextTrgEqs);
        this.prepareTranslitDictionary(this.m_contextSrcEqs);
        this.filterContextEqs();
        this.m_srcPhrasesToProcess = new ArrayList<Phrase>(this.m_srcPhrs);
        Collections.sort(this.m_srcPhrasesToProcess, new LexComparator(true));
    }

    public void collectPropsForFeaturesOnly(Set<Phrase> srcPhrases, Set<Phrase> trgPhrases) throws Exception {
        this.collectContextAndTimeProps(srcPhrases, trgPhrases);
    }

    public void prepareForChunkFeaturesCollectionForAnni(int chunkSize) throws Exception {
        LOG.info((Object)" - Preparing phrases for estimating monolingual features only ...");
        long curMaxInSrc = 0L;
        long curMaxInTrg = 0L;
        this.m_maxPhrCountInTrg = 0L;
        this.m_maxPhrCountInSrc = 0L;
        while (this.readPhraseTableChunk(chunkSize, false) > 0) {
            this.collectNumberProps(this.m_srcPhrs, this.m_trgPhrs, true, false);
            if (this.m_maxPhrCountInSrc > curMaxInSrc) {
                curMaxInSrc = this.m_maxPhrCountInSrc;
            }
            if (this.m_maxPhrCountInTrg <= curMaxInTrg) continue;
            curMaxInTrg = this.m_maxPhrCountInTrg;
        }
        this.m_maxPhrCountInSrc = curMaxInSrc;
        this.m_maxPhrCountInTrg = curMaxInTrg;
        this.m_phraseTable = null;
        this.m_srcPhrs = null;
        this.m_trgPhrs = null;
        LOG.info((Object)(" - Source phrases max occurrences = " + this.m_maxPhrCountInSrc));
        LOG.info((Object)(" - Target phrases max occurrences = " + this.m_maxPhrCountInTrg));
        this.collectContextEqs();
        this.prepareSeedDictionary(this.m_contextSrcEqs, this.m_contextTrgEqs);
        this.prepareTranslitDictionary(this.m_contextSrcEqs);
        this.filterContextEqs();
    }

    public int readNextChunkForAnni(int chunkSize, int chunkNum) throws Exception {
        LOG.info((Object)(" - Reading chunk " + chunkNum + " of phrase table ..."));
        int numRead = this.readPhraseTableChunk(chunkSize, true);
        if (numRead > 0) {
            this.collectNumberProps(this.m_srcPhrs, this.m_trgPhrs, false, false);
            this.collectTypeProp(this.m_srcPhrs, this.m_trgPhrs);
            this.m_srcPhrasesToProcess = new ArrayList<Phrase>(this.m_srcPhrs);
            Collections.sort(this.m_srcPhrasesToProcess, new LexComparator(true));
        } else {
            this.m_srcPhrasesToProcess = new ArrayList<Phrase>();
        }
        return numRead;
    }

    public void prepareForChunkOrderCollection() throws Exception {
        LOG.info((Object)" - Preparing phrases for estimating ordering features only ...");
        this.readPhrases(true);
        this.collectNumberProps(this.m_srcPhrs, this.m_trgPhrs, true, true);
        this.collectTypeProp(this.m_srcPhrs, this.m_trgPhrs);
        this.m_srcPhrasesToProcess = new ArrayList<Phrase>(this.m_srcPhrs);
        Collections.sort(this.m_srcPhrasesToProcess, new LexComparator(true));
    }

    public void collectPropsForOrderOnly(Set<Phrase> srcPhrases, Set<Phrase> trgPhrases) throws Exception {
        this.collectOrderProps(srcPhrases, trgPhrases);
    }

    public Set<Phrase> getNextChunk(int chunkSize) {
        HashSet chunk = null;
        if (chunkSize > 0 && this.m_srcPhrasesToProcess != null && this.m_srcPhrasesToProcess.size() > 0) {
            int maxIdx = Math.min(chunkSize, this.m_srcPhrasesToProcess.size());
            chunk = new HashSet(this.m_srcPhrasesToProcess.subList(0, maxIdx));
            if (maxIdx < chunkSize) {
                this.m_srcPhrasesToProcess.clear();
            } else {
                this.m_srcPhrasesToProcess = this.m_srcPhrasesToProcess.subList(maxIdx, this.m_srcPhrasesToProcess.size());
            }
        }
        return chunk;
    }

    protected void prepareSeedDictionary(Set<EquivalenceClass> srcContEqs, Set<EquivalenceClass> trgContEqs) throws Exception {
        SimpleDictionary simpSeedDict;
        String dictDir = Configurator.CONFIG.getString("resources.dictionary.Path");
        int ridDictNumTrans = Configurator.CONFIG.containsKey("experiments.DictionaryPruneNumTranslations") ? Configurator.CONFIG.getInt("experiments.DictionaryPruneNumTranslations") : -1;
        LOG.info((Object)" - Reading/preparing seed dictionary ...");
        if (Configurator.CONFIG.containsKey("resources.dictionary.Dictionary")) {
            String dictFileName = Configurator.CONFIG.getString("resources.dictionary.Dictionary");
            simpSeedDict = new SimpleDictionary(String.valueOf(dictDir) + dictFileName, "SeedDictionary");
        } else {
            String srcDictFileName = Configurator.CONFIG.getString("resources.dictionary.SrcName");
            String trgDictFileName = Configurator.CONFIG.getString("resources.dictionary.TrgName");
            simpSeedDict = new SimpleDictionary(new SimpleDictionary.DictHalves(String.valueOf(dictDir) + srcDictFileName, String.valueOf(dictDir) + trgDictFileName), "SeedDictionary");
        }
        simpSeedDict.pruneCounts(ridDictNumTrans);
        this.m_seedDict = new Dictionary(srcContEqs, trgContEqs, simpSeedDict, "SeedDictionary");
        LOG.info((Object)(" - Seed dictionary: " + this.m_seedDict.toString()));
    }

    protected void prepareTranslitDictionary(Set<EquivalenceClass> srcContEqs) throws Exception {
        String dictDir;
        LOG.info((Object)" - Reading/preparing transliteration dictionary ...");
        String string = dictDir = Configurator.CONFIG.containsKey("resources.translit.Path") ? Configurator.CONFIG.getString("resources.translit.Path") : null;
        if (dictDir == null || dictDir.trim().length() == 0) {
            LOG.info((Object)" - No transliteration dictionary specified");
        } else {
            if (Configurator.CONFIG.containsKey("resources.translit.Dictionary")) {
                String dictFileName = Configurator.CONFIG.getString("resources.translit.Dictionary");
                this.m_translitDict = new SimpleDictionary(String.valueOf(dictDir) + dictFileName, "Translit");
            } else {
                String srcDictFileName = Configurator.CONFIG.getString("resources.translit.SrcName");
                String trgDictFileName = Configurator.CONFIG.getString("resources.translit.TrgName");
                this.m_translitDict = new SimpleDictionary(new SimpleDictionary.DictHalves(String.valueOf(dictDir) + srcDictFileName, String.valueOf(dictDir) + trgDictFileName), "TranslitDictionary");
            }
            LOG.info((Object)(" - Transliteration dictionary: " + this.m_translitDict.toString()));
        }
    }

    protected void alignDistributions(Set<Integer> srcBins, Set<Integer> trgBins, Set<Phrase> srcEqs, Set<Phrase> trgEqs) {
        TimeDistribution timeProp;
        HashSet<Integer> toRemove = new HashSet<Integer>(srcBins);
        toRemove.removeAll(trgBins);
        for (EquivalenceClass equivalenceClass : srcEqs) {
            timeProp = (TimeDistribution)equivalenceClass.getProperty(TimeDistribution.class.getName());
            if (timeProp == null) continue;
            timeProp.removeBins(toRemove);
        }
        toRemove.clear();
        toRemove.addAll(trgBins);
        toRemove.removeAll(srcBins);
        for (EquivalenceClass equivalenceClass : trgEqs) {
            timeProp = (TimeDistribution)equivalenceClass.getProperty(TimeDistribution.class.getName());
            if (timeProp == null) continue;
            timeProp.removeBins(toRemove);
        }
        toRemove.clear();
        toRemove.addAll(srcBins);
        toRemove.retainAll(trgBins);
        LOG.info((Object)("There are " + srcBins.size() + " days in src distributions."));
        LOG.info((Object)("There are " + trgBins.size() + " days in trg distributions."));
        LOG.info((Object)("There are " + toRemove.size() + " common days between src and trg distributions."));
    }

    protected Set<EquivalenceClass> constructEqClasses(boolean src, Set<EquivalenceClass> allEqs, Class<? extends EquivalenceClass> eqClassClass) throws Exception {
        HashMap<String, EquivalenceClass> eqsMap = new HashMap<String, EquivalenceClass>();
        for (EquivalenceClass eq : allEqs) {
            String newWord = ((SimpleEquivalenceClass)eq).getWord();
            long newCount = ((Number)eq.getProperty(Number.class.getName())).getNumber();
            EquivalenceClass newEq = eqClassClass.newInstance();
            newEq.init(newWord, true);
            EquivalenceClass foundEq = (EquivalenceClass)eqsMap.get(newEq.getStem());
            if (foundEq == null) {
                newEq.assignId();
                newEq.setProperty(new Number(newCount));
                newEq.setProperty(new Type(src ? Type.EqType.SOURCE : Type.EqType.TARGET));
                eqsMap.put(newEq.getStem(), newEq);
                continue;
            }
            foundEq.merge(newEq);
            ((Number)foundEq.getProperty(Number.class.getName())).increment(newCount);
        }
        return new HashSet<EquivalenceClass>(eqsMap.values());
    }

    protected long collectMaxOccurrenceCount(Set<? extends EquivalenceClass> eqs) {
        long maxOccurCount = 0L;
        for (EquivalenceClass equivalenceClass : eqs) {
            long count;
            Number num = (Number)equivalenceClass.getProperty(Number.class.getName());
            if (num == null || (count = num.getNumber()) <= maxOccurCount) continue;
            maxOccurCount = count;
        }
        return maxOccurCount;
    }

    protected void assignTypeProp(Set<? extends EquivalenceClass> eqClasses, Type.EqType type) {
        Type commonType = new Type(type);
        for (EquivalenceClass equivalenceClass : eqClasses) {
            equivalenceClass.setProperty(commonType);
        }
    }

    public void prepareContextAndTimeProps(boolean src, Set<? extends EquivalenceClass> eqs, Scorer contextScorer, Scorer timeScorer, boolean mapToLSH) throws Exception {
        LOG.info((Object)(" - " + (src ? "Scoring source" : "Projecting and scoring target") + " contextual items with " + contextScorer.toString() + " and time distributions with " + timeScorer.toString() + "..."));
        for (EquivalenceClass equivalenceClass : eqs) {
            contextScorer.prepare(equivalenceClass);
            timeScorer.prepare(equivalenceClass);
        }
        if (mapToLSH) {
            LOG.info((Object)(" - Mapping " + (src ? "source" : "target") + " context into LSH space..."));
            new LSHContextCollector(true).collectProperty(eqs);
            LOG.info((Object)(" - Mapping " + (src ? "source" : "target") + " temporal into LSH space..."));
            new LSHTimeDistributionCollector(true).collectProperty(eqs);
        }
    }

    public void prepareOrderProps(boolean src, Set<? extends EquivalenceClass> eqs, boolean mapToLSH) throws Exception {
        if (mapToLSH) {
            LOG.info((Object)(" - Mapping " + (src ? "source" : "target") + " ordering vectors into LSH space..."));
            new LSHPhraseContextCollector(true, this.m_phraseTable).collectProperty(eqs);
        }
    }

    public void pruneMostFrequentContext(boolean src, Set<? extends EquivalenceClass> phrases) {
        int numKeepDisc;
        int numKeepAfter;
        int numKeepBefore;
        if (src) {
            numKeepBefore = Configurator.CONFIG.containsKey("preprocessing.phrases.reordering.SrcContPhraseKeepBefore") ? Configurator.CONFIG.getInt("preprocessing.phrases.reordering.SrcContPhraseKeepBefore") : -1;
            numKeepAfter = Configurator.CONFIG.containsKey("preprocessing.phrases.reordering.SrcContPhraseKeepAfter") ? Configurator.CONFIG.getInt("preprocessing.phrases.reordering.SrcContPhraseKeepAfter") : -1;
            numKeepDisc = Configurator.CONFIG.containsKey("preprocessing.phrases.reordering.SrcContPhraseKeepDisc") ? Configurator.CONFIG.getInt("preprocessing.phrases.reordering.SrcContPhraseKeepDisc") : -1;
        } else {
            numKeepBefore = Configurator.CONFIG.containsKey("preprocessing.phrases.reordering.TrgContPhraseKeepBefore") ? Configurator.CONFIG.getInt("preprocessing.phrases.reordering.TrgContPhraseKeepBefore") : -1;
            numKeepAfter = Configurator.CONFIG.containsKey("preprocessing.phrases.reordering.TrgContPhraseKeepAfter") ? Configurator.CONFIG.getInt("preprocessing.phrases.reordering.TrgContPhraseKeepAfter") : -1;
            numKeepDisc = Configurator.CONFIG.containsKey("preprocessing.phrases.reordering.TrgContPhraseKeepDisc") ? Configurator.CONFIG.getInt("preprocessing.phrases.reordering.TrgContPhraseKeepDisc") : -1;
        }
        LOG.info((Object)(" - Pruning context for " + (src ? "source" : "target") + " phrases. Keeping most frequent " + numKeepBefore + " before, " + numKeepAfter + " after, and " + numKeepDisc + " discontinous phrases..."));
        long bBefore = 0L;
        long bAfter = 0L;
        long bDisc = 0L;
        long aBefore = 0L;
        long aAfter = 0L;
        long aDisc = 0L;
        for (EquivalenceClass equivalenceClass : phrases) {
            PhraseContext context = (PhraseContext)equivalenceClass.getProperty(PhraseContext.class.getName());
            if (context == null) continue;
            bBefore += (long)context.getBefore().size();
            bAfter += (long)context.getAfter().size();
            bDisc += (long)context.getDiscontinuous().size();
            context.pruneMostFreq(numKeepBefore, numKeepAfter, numKeepDisc);
            aBefore += (long)context.getBefore().size();
            aAfter += (long)context.getAfter().size();
            aDisc += (long)context.getDiscontinuous().size();
        }
        LOG.info((Object)(" - Pruned context: before " + bBefore + "->" + aBefore + ", after " + bAfter + "->" + aAfter + ", discontinous " + bDisc + "->" + aDisc));
    }

    protected CorpusAccessor getAccessor(String kind, boolean src) throws Exception {
        CorpusAccessor accessor = null;
        if ("europarl".equals(kind)) {
            accessor = this.getEuroParlAccessor(src);
        } else if ("wiki".equals(kind)) {
            accessor = this.getWikiAccessor(src);
        } else if ("wikitemp".equals(kind)) {
            accessor = this.getWikiTempAccessor(src);
        } else if ("crawls".equals(kind)) {
            accessor = this.getCrawlsAccessor(src);
        } else if ("dev".equals(kind)) {
            accessor = this.getDevAccessor(src);
        } else if ("test".equals(kind)) {
            accessor = this.getTestAccessor(src);
        } else {
            LOG.error((Object)("Could not find corpus accessor for " + kind));
        }
        return accessor;
    }

    protected LexCorpusAccessor getDevAccessor(boolean src) throws Exception {
        String path = Configurator.CONFIG.getString("corpora.dev.Path");
        boolean oneSentPerLine = Configurator.CONFIG.getBoolean("corpora.dev.OneSentPerLine");
        String name = src ? Configurator.CONFIG.getString("corpora.dev.SrcName") : Configurator.CONFIG.getString("corpora.dev.TrgName");
        return new LexCorpusAccessor(name, this.appendSep(path), oneSentPerLine);
    }

    protected LexCorpusAccessor getTestAccessor(boolean src) throws Exception {
        String path = Configurator.CONFIG.getString("corpora.test.Path");
        boolean oneSentPerLine = Configurator.CONFIG.getBoolean("corpora.test.OneSentPerLine");
        String name = src ? Configurator.CONFIG.getString("corpora.test.SrcName") : Configurator.CONFIG.getString("corpora.test.TrgName");
        return new LexCorpusAccessor(name, this.appendSep(path), oneSentPerLine);
    }

    protected EuroParlCorpusAccessor getEuroParlAccessor(boolean src) throws Exception {
        String path = Configurator.CONFIG.getString("corpora.europarl.Path");
        boolean oneSentPerLine = Configurator.CONFIG.getBoolean("corpora.europarl.OneSentPerLine");
        String subDir = src ? Configurator.CONFIG.getString("corpora.europarl.SrcSubDir") : Configurator.CONFIG.getString("corpora.europarl.TrgSubDir");
        SimpleDateFormat sdf = new SimpleDateFormat("yy-MM-dd");
        Date fromDate = sdf.parse(Configurator.CONFIG.getString("corpora.europarl.DateFrom"));
        Date toDate = sdf.parse(Configurator.CONFIG.getString("corpora.europarl.DateTo"));
        return new EuroParlCorpusAccessor(String.valueOf(this.appendSep(path)) + subDir, fromDate, toDate, oneSentPerLine);
    }

    protected CrawlCorpusAccessor getCrawlsAccessor(boolean src) throws Exception {
        String path = Configurator.CONFIG.getString("corpora.crawls.Path");
        boolean oneSentPerLine = Configurator.CONFIG.getBoolean("corpora.crawls.OneSentPerLine");
        String subDir = src ? Configurator.CONFIG.getString("corpora.crawls.SrcSubDir") : Configurator.CONFIG.getString("corpora.crawls.TrgSubDir");
        SimpleDateFormat sdf = new SimpleDateFormat("yy-MM-dd");
        Date fromDate = sdf.parse(Configurator.CONFIG.getString("corpora.crawls.DateFrom"));
        Date toDate = sdf.parse(Configurator.CONFIG.getString("corpora.crawls.DateTo"));
        return new CrawlCorpusAccessor(String.valueOf(this.appendSep(path)) + subDir, fromDate, toDate, oneSentPerLine);
    }

    protected LexCorpusAccessor getWikiAccessor(boolean src) {
        String path = Configurator.CONFIG.getString("corpora.wiki.Path");
        boolean oneSentPerLine = Configurator.CONFIG.getBoolean("corpora.wiki.OneSentPerLine");
        String fileRegExp = src ? Configurator.CONFIG.getString("corpora.wiki.SrcRegExp") : Configurator.CONFIG.getString("corpora.wiki.TrgRegExp");
        return new LexCorpusAccessor(fileRegExp, this.appendSep(path), oneSentPerLine);
    }

    protected WikiTempCorpusAccessor getWikiTempAccessor(boolean src) {
        String path = Configurator.CONFIG.getString("corpora.wikitemp.Path");
        boolean oneSentPerLine = Configurator.CONFIG.getBoolean("corpora.wikitemp.OneSentPerLine");
        String fileRegExp = src ? Configurator.CONFIG.getString("corpora.wikitemp.SrcRegExp") : Configurator.CONFIG.getString("corpora.wikitemp.TrgRegExp");
        return new WikiTempCorpusAccessor(fileRegExp, this.appendSep(path), oneSentPerLine);
    }

    protected String appendSep(String str) {
        String ret;
        String string = ret = str == null ? null : str.trim();
        if (ret != null && ret.length() > 0 && !ret.endsWith(File.separator)) {
            ret = String.valueOf(ret) + File.separator;
        }
        return ret;
    }
}

