/*
 * Decompiled with CFR 0.152.
 */
package main.lexinduct;

import babel.content.corpora.accessors.CorpusAccessor;
import babel.content.corpora.accessors.CrawlCorpusAccessor;
import babel.content.corpora.accessors.EuroParlCorpusAccessor;
import babel.content.corpora.accessors.LexCorpusAccessor;
import babel.content.eqclasses.EquivalenceClass;
import babel.content.eqclasses.SimpleEquivalenceClass;
import babel.content.eqclasses.collectors.EquivalenceClassCollector;
import babel.content.eqclasses.collectors.SimpleEquivalenceClassCollector;
import babel.content.eqclasses.comparators.NumberComparator;
import babel.content.eqclasses.filters.DictionaryFilter;
import babel.content.eqclasses.filters.EquivalenceClassFilter;
import babel.content.eqclasses.filters.GarbageFilter;
import babel.content.eqclasses.filters.LengthFilter;
import babel.content.eqclasses.filters.NoContextFilter;
import babel.content.eqclasses.filters.NoTimeDistributionFilter;
import babel.content.eqclasses.filters.NumOccurencesFilter;
import babel.content.eqclasses.filters.RomanizationFilter;
import babel.content.eqclasses.filters.StopWordsFilter;
import babel.content.eqclasses.properties.context.Context;
import babel.content.eqclasses.properties.context.ContextCollector;
import babel.content.eqclasses.properties.number.Number;
import babel.content.eqclasses.properties.number.NumberCollector;
import babel.content.eqclasses.properties.time.TimeDistribution;
import babel.content.eqclasses.properties.time.TimeDistributionCollector;
import babel.content.eqclasses.properties.type.Type;
import babel.ranking.scorers.Scorer;
import babel.util.config.Configurator;
import babel.util.dict.Dictionary;
import babel.util.dict.SimpleDictionary;
import babel.util.persistence.EqClassPersister;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Random;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class DataPreparer {
    protected static final Log LOG = LogFactory.getLog(DataPreparer.class);
    protected static final String CONTEXT_SRC_MAP_FILE = "cont.src.map";
    protected static final String CONTEXT_TRG_MAP_FILE = "cont.trg.map";
    protected static final String CONTEXT_SRC_PROP_EXT = ".cont.src.map";
    protected static final String CONTEXT_TRG_PROP_EXT = ".cont.trg.map";
    protected static final String SRC_MAP_FILE = "src.map";
    protected static final String TRG_MAP_FILE = "trg.map";
    protected static final String SRC_PROP_EXT = ".src.map";
    protected static final String TRG_PROP_EXT = ".trg.map";
    protected static final String SRC_TO_INDUCT = "srcinduct.list";
    protected Dictionary m_seedDict;
    protected Dictionary m_testDict;
    protected Set<EquivalenceClass> m_contextSrcEqs;
    protected Set<EquivalenceClass> m_contextTrgEqs;
    protected Set<EquivalenceClass> m_srcEqs;
    protected Set<EquivalenceClass> m_trgEqs;
    protected Set<EquivalenceClass> m_srcEqsToInduct;
    protected double m_numToksInSrc;
    protected double m_numToksInTrg;
    protected double m_maxTokCountInSrc;
    protected double m_maxTokCountInTrg;
    protected Random m_rand = new Random(1L);

    public void prepare() throws Exception {
        boolean filterRomanSrc = Configurator.CONFIG.containsKey("preprocessing.FilterRomanSrc") && Configurator.CONFIG.getBoolean("preprocessing.FilterRomanSrc");
        boolean filterRomanTrg = Configurator.CONFIG.containsKey("preprocessing.FilterRomanTrg") && Configurator.CONFIG.getBoolean("preprocessing.FilterRomanTrg");
        String srcEqClassName = Configurator.CONFIG.getString("preprocessing.candidates.SrcEqClass");
        String trgEqClassName = Configurator.CONFIG.getString("preprocessing.candidates.TrgEqClass");
        String srcContEqClassName = Configurator.CONFIG.getString("preprocessing.context.SrcEqClass");
        String trgContEqClassName = Configurator.CONFIG.getString("preprocessing.context.TrgEqClass");
        boolean alignDistros = Configurator.CONFIG.getBoolean("preprocessing.time.Align");
        String srcStopFileName = Configurator.CONFIG.containsKey("resources.stopwords.SrcStopWords") ? Configurator.CONFIG.getString("resources.stopwords.SrcStopWords") : null;
        String trgStopFileName = Configurator.CONFIG.containsKey("resources.stopwords.TrgStopWords") ? Configurator.CONFIG.getString("resources.stopwords.TrgStopWords") : null;
        Class<?> srcEqClassClass = Class.forName(srcEqClassName);
        Class<?> trgEqClassClass = Class.forName(trgEqClassName);
        Class<?> srcContClassClass = Class.forName(srcContEqClassName);
        Class<?> trgContClassClass = Class.forName(trgContEqClassName);
        try {
            LOG.info((Object)" - Reading context source classes from cont.src.map and target from cont.trg.map...");
            this.m_contextSrcEqs = this.readEqClasses(true, srcContClassClass, CONTEXT_SRC_MAP_FILE, CONTEXT_SRC_PROP_EXT);
            this.m_contextTrgEqs = this.readEqClasses(false, trgContClassClass, CONTEXT_TRG_MAP_FILE, CONTEXT_TRG_PROP_EXT);
            LOG.info((Object)(" - Context source classes: " + this.m_contextSrcEqs.size()));
            LOG.info((Object)(" - Context target classes: " + this.m_contextTrgEqs.size()));
            LOG.info((Object)" - Reading candidate source classes from src.map and target from trg.map...");
            this.m_srcEqs = this.readEqClasses(true, srcEqClassClass, SRC_MAP_FILE, SRC_PROP_EXT);
            this.m_trgEqs = this.readEqClasses(false, trgEqClassClass, TRG_MAP_FILE, TRG_PROP_EXT);
            LOG.info((Object)(" - Candidate source classes: " + this.m_srcEqs.size()));
            LOG.info((Object)(" - Candidate target classes: " + this.m_trgEqs.size()));
            this.prepareDictsAndSrcEqsToInduct(this.m_contextSrcEqs, this.m_contextTrgEqs, this.m_srcEqs, this.m_trgEqs);
            LOG.info((Object)" - Reading source and target properties...");
            this.readProps(true, this.m_srcEqs, SRC_PROP_EXT);
            this.readProps(false, this.m_trgEqs, TRG_PROP_EXT);
        }
        catch (Exception e) {
            LOG.info((Object)(" - Failed to read previously collected stuff (" + e.toString() + "), collecting from scratch ..."));
            Set<EquivalenceClass> allSrcEqs = this.collectInitEqClasses(true, filterRomanSrc);
            Set<EquivalenceClass> allTrgEqs = this.collectInitEqClasses(false, filterRomanTrg);
            LOG.info((Object)(" - All source types: " + allSrcEqs.size() + (filterRomanSrc ? " (without romanization) " : "")));
            LOG.info((Object)(" - All target types: " + allTrgEqs.size() + (filterRomanTrg ? " (without romanization) " : "")));
            LOG.info((Object)" - Constructing context classes...");
            this.m_contextSrcEqs = this.constructEqClasses(true, allSrcEqs, srcContClassClass);
            this.m_contextTrgEqs = this.constructEqClasses(false, allTrgEqs, trgContClassClass);
            LOG.info((Object)(" - Context source classes: " + this.m_contextSrcEqs.size()));
            LOG.info((Object)(" - Context target classes: " + this.m_contextTrgEqs.size()));
            LOG.info((Object)" - Writing context classes...");
            this.writeEqs(this.m_contextSrcEqs, true, CONTEXT_SRC_MAP_FILE, CONTEXT_SRC_PROP_EXT);
            this.writeEqs(this.m_contextTrgEqs, false, CONTEXT_TRG_MAP_FILE, CONTEXT_TRG_PROP_EXT);
            LOG.info((Object)" - Constructing candidate classes...");
            this.m_srcEqs = this.constructEqClasses(true, allSrcEqs, srcEqClassClass);
            this.m_trgEqs = this.constructEqClasses(false, allTrgEqs, trgEqClassClass);
            LOG.info((Object)(" - Candidate source classes: " + this.m_srcEqs.size()));
            LOG.info((Object)(" - Candidate target classes: " + this.m_trgEqs.size()));
            LOG.info((Object)" - Pruning candidate classes...");
            this.m_srcEqs = this.pruneEqClasses(this.m_srcEqs, true, srcStopFileName, filterRomanSrc);
            this.m_trgEqs = this.pruneEqClasses(this.m_trgEqs, false, trgStopFileName, filterRomanTrg);
            LOG.info((Object)(" - Pruned candidate source classes: " + this.m_srcEqs.size()));
            LOG.info((Object)(" - Pruned candidate target classes: " + this.m_trgEqs.size()));
            this.prepareDictsAndSrcEqsToInduct(this.m_contextSrcEqs, this.m_contextTrgEqs, this.m_srcEqs, this.m_trgEqs);
            LOG.info((Object)" - Collecting candidate properties...");
            Set<Integer> srcBins = this.collectProps(true, this.m_srcEqs, this.m_contextSrcEqs, this.m_seedDict);
            Set<Integer> trgBins = this.collectProps(false, this.m_trgEqs, this.m_contextTrgEqs, this.m_seedDict);
            if (alignDistros) {
                LOG.info((Object)" - Aligning temporal distributions...");
                this.alignDistributions(srcBins, trgBins, this.m_srcEqs, this.m_trgEqs);
            }
            LOG.info((Object)" - Cleaning up candidate classes...");
            this.m_srcEqs = this.cleanUpEqClasses(this.m_srcEqs, true);
            this.m_trgEqs = this.cleanUpEqClasses(this.m_trgEqs, false);
            LOG.info((Object)(" - Candidate source classes: " + this.m_srcEqs.size()));
            LOG.info((Object)(" - Candidate target classes: " + this.m_trgEqs.size()));
            LOG.info((Object)" - Writing candidate classes and properties...");
            this.writeEqs(this.m_srcEqs, true, SRC_MAP_FILE, SRC_PROP_EXT);
            this.writeProps(this.m_srcEqs, true, SRC_PROP_EXT);
            this.writeEqs(this.m_trgEqs, false, TRG_MAP_FILE, TRG_PROP_EXT);
            this.writeProps(this.m_trgEqs, false, TRG_PROP_EXT);
        }
        this.dictCoverage(this.m_seedDict, this.m_contextSrcEqs, true);
        this.dictCoverage(this.m_seedDict, this.m_contextTrgEqs, false);
        this.collectTokenCounts(this.m_contextSrcEqs, this.m_contextTrgEqs);
    }

    public void prepareProperties(boolean src, Set<? extends EquivalenceClass> eqs, Scorer contextScorer, Scorer timeScorer) {
        LOG.info((Object)("Projecting and scoring " + (src ? "source" : "target") + " contextual items with " + contextScorer.toString() + " and time distributions with " + timeScorer.toString() + "..."));
        for (EquivalenceClass equivalenceClass : eqs) {
            contextScorer.prepare(equivalenceClass);
            timeScorer.prepare(equivalenceClass);
        }
    }

    public Dictionary getSeedDict() {
        return this.m_seedDict;
    }

    public Dictionary getTestDict() {
        return this.m_testDict;
    }

    public Set<EquivalenceClass> getSrcEqs() {
        return this.m_srcEqs;
    }

    public Set<EquivalenceClass> getSrcEqsToInduct() {
        return this.m_srcEqsToInduct;
    }

    public Set<EquivalenceClass> getTrgEqs() {
        return this.m_trgEqs;
    }

    public double getNumSrcToks() {
        return this.m_numToksInSrc;
    }

    public double getNumTrgToks() {
        return this.m_numToksInTrg;
    }

    public double getMaxSrcTokCount() {
        return this.m_maxTokCountInSrc;
    }

    public double getMaxTrgTokCount() {
        return this.m_maxTokCountInTrg;
    }

    protected Set<EquivalenceClass> readEqClasses(boolean src, Class<? extends EquivalenceClass> eqClsssClass, String eqfileName, String propFileExtension) throws Exception {
        String preProcDir = Configurator.CONFIG.getString("preprocessing.Path");
        Set<EquivalenceClass> eqClasses = EqClassPersister.unpersistEqClasses(eqClsssClass, String.valueOf(preProcDir) + eqfileName);
        EqClassPersister.unpersistProperty(eqClasses, Number.class.getName(), String.valueOf(preProcDir) + Number.class.getSimpleName() + propFileExtension);
        this.assignTypeProp(eqClasses, src ? Type.EqType.SOURCE : Type.EqType.TARGET);
        return eqClasses;
    }

    protected Set<EquivalenceClass> collectInitEqClasses(boolean src, boolean filterRoman) throws Exception {
        ArrayList<EquivalenceClassFilter> filters = new ArrayList<EquivalenceClassFilter>(3);
        filters.add(new GarbageFilter());
        filters.add(new LengthFilter(2));
        if (filterRoman) {
            filters.add(new RomanizationFilter());
        }
        CorpusAccessor accessor = this.getAccessor(Configurator.CONFIG.getString("preprocessing.input.Context"), src);
        SimpleEquivalenceClassCollector collector = new SimpleEquivalenceClassCollector(filters, false);
        Set<EquivalenceClass> eqClasses = collector.collect(accessor.getCorpusReader(), -1);
        new NumberCollector(false).collectProperty(accessor, eqClasses);
        this.assignTypeProp(eqClasses, src ? Type.EqType.SOURCE : Type.EqType.TARGET);
        return eqClasses;
    }

    protected Set<EquivalenceClass> constructEqClasses(boolean src, Set<EquivalenceClass> allEqs, Class<? extends EquivalenceClass> eqClassClass) throws Exception {
        HashMap<String, EquivalenceClass> eqsMap = new HashMap<String, EquivalenceClass>();
        for (EquivalenceClass eq : allEqs) {
            String newWord = ((SimpleEquivalenceClass)eq).getWord();
            long newCount = ((Number)eq.getProperty(Number.class.getName())).getNumber();
            EquivalenceClass newEq = eqClassClass.newInstance();
            newEq.init(newWord, false);
            EquivalenceClass foundEq = (EquivalenceClass)eqsMap.get(newEq.getStem());
            if (foundEq == null) {
                newEq.assignId();
                newEq.setProperty(new Number(newCount));
                newEq.setProperty(new Type(src ? Type.EqType.SOURCE : Type.EqType.TARGET));
                eqsMap.put(newEq.getStem(), newEq);
                continue;
            }
            foundEq.merge(newEq);
            ((Number)foundEq.getProperty(Number.class.getName())).increment(newCount);
        }
        return new HashSet<EquivalenceClass>(eqsMap.values());
    }

    protected void readProps(boolean src, Set<EquivalenceClass> eqClasses, String propFileExtension) throws Exception {
        String preProcDir = Configurator.CONFIG.getString("preprocessing.Path");
        EqClassPersister.unpersistProperty(eqClasses, Number.class.getName(), String.valueOf(preProcDir) + Number.class.getSimpleName() + propFileExtension);
        EqClassPersister.unpersistProperty(eqClasses, Context.class.getName(), String.valueOf(preProcDir) + Context.class.getSimpleName() + propFileExtension);
        EqClassPersister.unpersistProperty(eqClasses, TimeDistribution.class.getName(), String.valueOf(preProcDir) + TimeDistribution.class.getSimpleName() + propFileExtension);
        this.assignTypeProp(eqClasses, src ? Type.EqType.SOURCE : Type.EqType.TARGET);
    }

    protected Set<Integer> collectProps(boolean src, Set<EquivalenceClass> eqClasses, Set<EquivalenceClass> contextEqs, Dictionary contextDict) throws Exception {
        int pruneContEqIfOccursFewerThan = Configurator.CONFIG.getInt("preprocessing.context.PruneEqIfOccursFewerThan");
        int pruneContEqIfOccursMoreThan = Configurator.CONFIG.getInt("preprocessing.context.PruneEqIfOccursMoreThan");
        int contextWindowSize = Configurator.CONFIG.getInt("preprocessing.context.Window");
        Set<EquivalenceClass> filtContextEqs = new HashSet<EquivalenceClass>(contextEqs);
        LOG.info((Object)("Preparing contextual words for " + (src ? "source" : "target") + ": keeping those in dict [" + contextDict.toString() + "] and occuring (" + pruneContEqIfOccursFewerThan + "," + pruneContEqIfOccursMoreThan + ") times..."));
        LinkedList<EquivalenceClassFilter> filters = new LinkedList<EquivalenceClassFilter>();
        filters.add(new DictionaryFilter(contextDict, true, src));
        filters.add(new NumOccurencesFilter(pruneContEqIfOccursFewerThan, true));
        filters.add(new NumOccurencesFilter(pruneContEqIfOccursMoreThan, false));
        filtContextEqs = EquivalenceClassCollector.filter(filtContextEqs, filters);
        LOG.info((Object)("Context " + (src ? "source" : "target") + " classes: " + filtContextEqs.size()));
        CorpusAccessor accessor = this.getAccessor(Configurator.CONFIG.getString("preprocessing.input.Context"), src);
        new ContextCollector(false, contextWindowSize, contextWindowSize, filtContextEqs).collectProperty(accessor, eqClasses);
        accessor = this.getAccessor(Configurator.CONFIG.getString("preprocessing.input.Time"), src);
        TimeDistributionCollector distCollector = new TimeDistributionCollector(false);
        distCollector.collectProperty(accessor, eqClasses);
        this.assignTypeProp(eqClasses, src ? Type.EqType.SOURCE : Type.EqType.TARGET);
        return distCollector.binsCollected();
    }

    protected void alignDistributions(Set<Integer> srcBins, Set<Integer> trgBins, Set<EquivalenceClass> srcEqs, Set<EquivalenceClass> trgEqs) {
        TimeDistribution timeProp;
        HashSet<Integer> toRemove = new HashSet<Integer>(srcBins);
        toRemove.removeAll(trgBins);
        for (EquivalenceClass eq : srcEqs) {
            timeProp = (TimeDistribution)eq.getProperty(TimeDistribution.class.getName());
            if (timeProp == null) continue;
            timeProp.removeBins(toRemove);
        }
        toRemove.clear();
        toRemove.addAll(trgBins);
        toRemove.removeAll(srcBins);
        for (EquivalenceClass eq : trgEqs) {
            timeProp = (TimeDistribution)eq.getProperty(TimeDistribution.class.getName());
            if (timeProp == null) continue;
            timeProp.removeBins(toRemove);
        }
        toRemove.clear();
        toRemove.addAll(srcBins);
        toRemove.retainAll(trgBins);
        LOG.info((Object)("There are " + srcBins.size() + " days in src distributions."));
        LOG.info((Object)("There are " + trgBins.size() + " days in trg distributions."));
        LOG.info((Object)("There are " + toRemove.size() + " common days between src and trg distributions."));
    }

    protected Set<EquivalenceClass> pruneEqClasses(Set<EquivalenceClass> eqClasses, boolean src, String stopWordsFileName, boolean filterRoman) throws Exception {
        String stopWordsDir = Configurator.CONFIG.getString("resources.stopwords.Path");
        int pruneCandIfOccursFewerThan = Configurator.CONFIG.getInt("preprocessing.candidates.PruneIfOccursFewerThan");
        int pruneCandIfOccursMoreThan = Configurator.CONFIG.getInt("preprocessing.candidates.PruneIfOccursMoreThan");
        int pruneMostFreq = src ? Configurator.CONFIG.getInt("preprocessing.candidates.PruneMostFrequentSrc") : Configurator.CONFIG.getInt("preprocessing.candidates.PruneMostFrequentTrg");
        LOG.info((Object)("Pruning " + (src ? "source" : "target") + " candidates..."));
        LinkedList<EquivalenceClassFilter> filters = new LinkedList<EquivalenceClassFilter>();
        filters.add(new GarbageFilter());
        if (filterRoman) {
            filters.add(new RomanizationFilter());
        }
        if (stopWordsFileName != null && stopWordsFileName.trim().length() > 0) {
            SimpleEquivalenceClassCollector collector = new SimpleEquivalenceClassCollector(filters, false);
            HashSet stopEqs = new File(String.valueOf(stopWordsDir) + stopWordsFileName).exists() ? collector.collect(new LexCorpusAccessor(stopWordsFileName, stopWordsDir, true).getCorpusReader(), -1) : new HashSet();
            filters.add(new StopWordsFilter(stopEqs));
        }
        filters.add(new NumOccurencesFilter(pruneCandIfOccursFewerThan, true));
        filters.add(new NumOccurencesFilter(pruneCandIfOccursMoreThan, false));
        Set<EquivalenceClass> filteredEqs = EquivalenceClassCollector.filter(eqClasses, filters);
        if (pruneMostFreq > 0) {
            LOG.info((Object)("Removing  " + pruneMostFreq + " most frequent " + (src ? "source" : "target") + " candidates..."));
            LinkedList<EquivalenceClass> valList = new LinkedList<EquivalenceClass>(filteredEqs);
            Collections.sort(valList, new NumberComparator(false));
            int i = 0;
            while (i < Math.min(pruneMostFreq, valList.size())) {
                filteredEqs.remove(valList.get(i));
                ++i;
            }
        }
        return filteredEqs;
    }

    protected Set<EquivalenceClass> cleanUpEqClasses(Set<EquivalenceClass> eqClasses, boolean src) throws Exception {
        LOG.info((Object)("Throwing out " + (src ? "source" : "target") + " candidate classes without context or time properties..."));
        LinkedList<EquivalenceClassFilter> filters = new LinkedList<EquivalenceClassFilter>();
        filters.add(new NoContextFilter());
        filters.add(new NoTimeDistributionFilter());
        return EquivalenceClassCollector.filter(eqClasses, filters);
    }

    protected void writeEqs(Set<EquivalenceClass> eqClasses, boolean src, String eqfileName, String propFileExtension) throws Exception {
        String preProcDir = Configurator.CONFIG.getString("preprocessing.Path");
        EqClassPersister.persistEqClasses(eqClasses, String.valueOf(preProcDir) + eqfileName);
        EqClassPersister.persistProperty(eqClasses, Number.class.getName(), String.valueOf(preProcDir) + Number.class.getSimpleName() + propFileExtension);
    }

    protected void writeProps(Set<EquivalenceClass> eqClasses, boolean src, String propFileExtension) throws Exception {
        String preProcDir = Configurator.CONFIG.getString("preprocessing.Path");
        EqClassPersister.persistProperty(eqClasses, Context.class.getName(), String.valueOf(preProcDir) + Context.class.getSimpleName() + propFileExtension);
        EqClassPersister.persistProperty(eqClasses, TimeDistribution.class.getName(), String.valueOf(preProcDir) + TimeDistribution.class.getSimpleName() + propFileExtension);
    }

    protected void dictCoverage(Dictionary dict, Set<EquivalenceClass> eqs, boolean src) {
        DecimalFormat df = new DecimalFormat("0.00");
        double tokTotal = 0.0;
        double tokCovered = 0.0;
        double typCovered = 0.0;
        for (EquivalenceClass eq : eqs) {
            Number numProp = (Number)eq.getProperty(Number.class.getName());
            if (numProp == null) continue;
            double num = numProp.getNumber();
            if (src && dict.containsSrc(eq) || !src && dict.containsTrg(eq)) {
                tokCovered += num;
                typCovered += 1.0;
            }
            tokTotal += num;
        }
        LOG.info((Object)("[" + dict.getName() + (src ? "]: source" : "]: target") + " dictionary coverage " + df.format(100.0 * tokCovered / tokTotal) + "% tokens and " + df.format(100.0 * typCovered / (double)eqs.size()) + "% types."));
    }

    protected void collectTokenCounts(Set<? extends EquivalenceClass> srcEqs, Set<? extends EquivalenceClass> trgEqs) {
        Number tmpNum;
        this.m_maxTokCountInSrc = 0.0;
        this.m_maxTokCountInTrg = 0.0;
        this.m_numToksInSrc = 0.0;
        this.m_numToksInTrg = 0.0;
        for (EquivalenceClass equivalenceClass : srcEqs) {
            tmpNum = (Number)equivalenceClass.getProperty(Number.class.getName());
            if (tmpNum == null) continue;
            if ((double)tmpNum.getNumber() > this.m_maxTokCountInSrc) {
                this.m_maxTokCountInSrc = tmpNum.getNumber();
            }
            this.m_numToksInSrc += (double)tmpNum.getNumber();
        }
        for (EquivalenceClass equivalenceClass : trgEqs) {
            tmpNum = (Number)equivalenceClass.getProperty(Number.class.getName());
            if (tmpNum == null) continue;
            if ((double)tmpNum.getNumber() > this.m_maxTokCountInTrg) {
                this.m_maxTokCountInTrg = tmpNum.getNumber();
            }
            this.m_numToksInTrg += (double)tmpNum.getNumber();
        }
        LOG.info((Object)("Maximum occurrences: src = " + this.m_maxTokCountInSrc + ", trg = " + this.m_maxTokCountInTrg + "."));
        LOG.info((Object)("Total Counts: src = " + this.m_numToksInSrc + ", trg = " + this.m_numToksInTrg + "."));
    }

    protected CorpusAccessor getAccessor(String kind, boolean src) throws Exception {
        CorpusAccessor accessor = null;
        if ("europarl".equals(kind)) {
            accessor = this.getEuroParlAccessor(src);
        } else if ("wiki".equals(kind)) {
            accessor = this.getWikiAccessor(src);
        } else if ("crawls".equals(kind)) {
            accessor = this.getCrawlsAccessor(src);
        } else if ("dev".equals(kind)) {
            accessor = this.getDevAccessor(src);
        } else if ("test".equals(kind)) {
            accessor = this.getTestAccessor(src);
        } else {
            LOG.error((Object)("Could not find corpus accessor for " + kind));
        }
        return accessor;
    }

    protected LexCorpusAccessor getDevAccessor(boolean src) throws Exception {
        String path = Configurator.CONFIG.getString("corpora.dev.Path");
        boolean oneSentPerLine = Configurator.CONFIG.getBoolean("corpora.dev.OneSentPerLine");
        String name = src ? Configurator.CONFIG.getString("corpora.dev.SrcName") : Configurator.CONFIG.getString("corpora.dev.TrgName");
        return new LexCorpusAccessor(name, this.appendSep(path), oneSentPerLine);
    }

    protected LexCorpusAccessor getTestAccessor(boolean src) throws Exception {
        String path = Configurator.CONFIG.getString("corpora.test.Path");
        boolean oneSentPerLine = Configurator.CONFIG.getBoolean("corpora.test.OneSentPerLine");
        String name = src ? Configurator.CONFIG.getString("corpora.test.SrcName") : Configurator.CONFIG.getString("corpora.test.TrgName");
        return new LexCorpusAccessor(name, this.appendSep(path), oneSentPerLine);
    }

    protected EuroParlCorpusAccessor getEuroParlAccessor(boolean src) throws Exception {
        String path = Configurator.CONFIG.getString("corpora.europarl.Path");
        boolean oneSentPerLine = Configurator.CONFIG.getBoolean("corpora.europarl.OneSentPerLine");
        String subDir = src ? Configurator.CONFIG.getString("corpora.europarl.SrcSubDir") : Configurator.CONFIG.getString("corpora.europarl.TrgSubDir");
        SimpleDateFormat sdf = new SimpleDateFormat("yy-MM-dd");
        Date fromDate = sdf.parse(Configurator.CONFIG.getString("corpora.europarl.DateFrom"));
        Date toDate = sdf.parse(Configurator.CONFIG.getString("corpora.europarl.DateTo"));
        return new EuroParlCorpusAccessor(String.valueOf(this.appendSep(path)) + subDir, fromDate, toDate, oneSentPerLine);
    }

    protected CrawlCorpusAccessor getCrawlsAccessor(boolean src) throws Exception {
        String path = Configurator.CONFIG.getString("corpora.crawls.Path");
        boolean oneSentPerLine = Configurator.CONFIG.getBoolean("corpora.crawls.OneSentPerLine");
        String subDir = src ? Configurator.CONFIG.getString("corpora.crawls.SrcSubDir") : Configurator.CONFIG.getString("corpora.crawls.TrgSubDir");
        SimpleDateFormat sdf = new SimpleDateFormat("yy-MM-dd");
        Date fromDate = sdf.parse(Configurator.CONFIG.getString("corpora.crawls.DateFrom"));
        Date toDate = sdf.parse(Configurator.CONFIG.getString("corpora.crawls.DateTo"));
        return new CrawlCorpusAccessor(String.valueOf(this.appendSep(path)) + subDir, fromDate, toDate, oneSentPerLine);
    }

    protected LexCorpusAccessor getWikiAccessor(boolean src) {
        String path = Configurator.CONFIG.getString("corpora.wiki.Path");
        boolean oneSentPerLine = Configurator.CONFIG.getBoolean("corpora.wiki.OneSentPerLine");
        String fileRegExp = src ? Configurator.CONFIG.getString("corpora.wiki.SrcRegExp") : Configurator.CONFIG.getString("corpora.wiki.TrgRegExp");
        return new LexCorpusAccessor(fileRegExp, this.appendSep(path), oneSentPerLine);
    }

    protected void prepareDictsAndSrcEqsToInduct(Set<EquivalenceClass> srcContEqs, Set<EquivalenceClass> trgContEqs, Set<EquivalenceClass> srcEqs, Set<EquivalenceClass> trgEqs) throws Exception {
        SimpleDictionary entireDict;
        String dictDir = Configurator.CONFIG.getString("resources.dictionary.Path");
        int ridDictNumTrans = Configurator.CONFIG.containsKey("experiments.DictionaryPruneNumTranslations") ? Configurator.CONFIG.getInt("experiments.DictionaryPruneNumTranslations") : -1;
        LOG.info((Object)"Reading/preparing dictionaries ...");
        if (Configurator.CONFIG.containsKey("resources.dictionary.Dictionary")) {
            String dictFileName = Configurator.CONFIG.getString("resources.dictionary.Dictionary");
            entireDict = new SimpleDictionary(String.valueOf(dictDir) + dictFileName, "EntireDictionary");
        } else {
            String srcDictFileName = Configurator.CONFIG.getString("resources.dictionary.SrcName");
            String trgDictFileName = Configurator.CONFIG.getString("resources.dictionary.TrgName");
            entireDict = new SimpleDictionary(new SimpleDictionary.DictHalves(String.valueOf(dictDir) + srcDictFileName, String.valueOf(dictDir) + trgDictFileName), "EntireDictionary");
        }
        entireDict.pruneCounts(ridDictNumTrans);
        this.m_seedDict = new Dictionary(srcContEqs, trgContEqs, entireDict, "Seed dictionary");
        this.m_testDict = new Dictionary(srcEqs, trgEqs, entireDict, "Test dictionary");
        LOG.info((Object)("Initial seed dictionary: " + this.m_seedDict.toString()));
        LOG.info((Object)("Initial test dictionary: " + this.m_testDict.toString()));
        this.m_srcEqsToInduct = this.selectSrcTokensToInduct(this.m_testDict, srcEqs);
        this.m_seedDict.removeAllSrc(this.map1To2(this.m_seedDict.getAllSrc(), this.m_srcEqsToInduct));
        this.m_testDict.retainAllSrc(this.m_srcEqsToInduct);
        LOG.info((Object)("Seed dictionary: " + this.m_seedDict.toString()));
        LOG.info((Object)("Test dictionary: " + this.m_testDict.toString()));
    }

    protected Set<EquivalenceClass> map1To2(Set<EquivalenceClass> all2, Set<EquivalenceClass> some1) {
        HashSet<EquivalenceClass> some2 = new HashSet<EquivalenceClass>();
        for (EquivalenceClass two : all2) {
            for (EquivalenceClass one : some1) {
                if (!two.sameEqClass(one)) continue;
                some2.add(two);
            }
        }
        return some2;
    }

    protected void assignTypeProp(Set<? extends EquivalenceClass> eqClasses, Type.EqType type) {
        Type commonType = new Type(type);
        for (EquivalenceClass equivalenceClass : eqClasses) {
            equivalenceClass.setProperty(commonType);
        }
    }

    protected Set<EquivalenceClass> selectSrcTokensToInduct(Dictionary dict, Set<EquivalenceClass> srcEqs) throws IOException {
        boolean randomSrc = Configurator.CONFIG.getBoolean("experiments.RandomSource");
        int numToKeep = Configurator.CONFIG.containsKey("experiments.NumSource") ? Configurator.CONFIG.getInt("experiments.NumSource") : -1;
        String outDir = Configurator.CONFIG.getString("output.Path");
        HashSet<EquivalenceClass> srcSubset = new HashSet<EquivalenceClass>(srcEqs);
        srcSubset.retainAll(dict.getAllSrc());
        LinkedList<EquivalenceClass> valList = new LinkedList<EquivalenceClass>(srcSubset);
        if (numToKeep >= 0 && srcSubset.size() > numToKeep) {
            int i;
            if (randomSrc) {
                srcSubset.clear();
                i = 0;
                while (i < numToKeep) {
                    srcSubset.add(valList.remove(this.m_rand.nextInt(valList.size())));
                    ++i;
                }
            } else {
                Collections.sort(valList, new NumberComparator(false));
                i = numToKeep;
                while (i < valList.size()) {
                    srcSubset.remove(valList.get(i));
                    ++i;
                }
            }
        }
        BufferedWriter writer = new BufferedWriter(new FileWriter(String.valueOf(outDir) + SRC_TO_INDUCT));
        valList.clear();
        valList.addAll(srcSubset);
        Collections.sort(valList, new NumberComparator(false));
        for (EquivalenceClass eq : valList) {
            writer.write(String.valueOf(((Number)eq.getProperty(Number.class.getName())).getNumber()) + "\t" + eq.toString() + "\n");
        }
        writer.close();
        LOG.info((Object)("Selected " + srcSubset.size() + (randomSrc ? " random " : " most frequent ") + "test dictionary source classes (see " + outDir + SRC_TO_INDUCT + ")."));
        return srcSubset;
    }

    protected String appendSep(String str) {
        String ret;
        String string = ret = str == null ? null : str.trim();
        if (ret != null && ret.length() > 0 && !ret.endsWith(File.separator)) {
            ret = String.valueOf(ret) + File.separator;
        }
        return ret;
    }
}

