/*
 * Decompiled with CFR 0.152.
 */
package babel.content.eqclasses.properties.order;

import babel.content.corpora.accessors.CorpusAccessor;
import babel.content.eqclasses.EquivalenceClass;
import babel.content.eqclasses.phrases.Phrase;
import babel.content.eqclasses.properties.PhrasePropertyCollector;
import babel.content.eqclasses.properties.order.PhraseContext;
import babel.util.misc.GettableHashSet;
import babel.util.misc.InvertibleHashMap;
import java.io.BufferedReader;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class PhraseOrderCollector
extends PhrasePropertyCollector {
    public static final Log LOG = LogFactory.getLog(PhrasePropertyCollector.class);
    protected boolean m_src;
    protected long m_maxPhrCount;
    protected GettableHashSet<EquivalenceClass> m_allPhrases;
    protected double m_keepContPhraseProb;

    public PhraseOrderCollector(boolean src, int maxPhraseLength, boolean caseSensitive, long maxPhrCount, Set<? extends EquivalenceClass> allPhrases, double keepContPhraseProb) {
        super(maxPhraseLength, caseSensitive);
        this.m_src = src;
        this.m_maxPhrCount = maxPhrCount;
        this.m_allPhrases = new GettableHashSet<EquivalenceClass>(allPhrases);
        this.m_keepContPhraseProb = keepContPhraseProb;
    }

    @Override
    public void collectProperty(CorpusAccessor corpusAccess, Set<? extends EquivalenceClass> phrases) throws Exception {
        String curLine;
        BufferedReader reader = new BufferedReader(corpusAccess.getCorpusReader());
        int logCount = 0;
        int sentCount = 0;
        long beforeCount = 0L;
        long afterCount = 0L;
        long discCount = 0L;
        long allDiscCount = 0L;
        while ((curLine = reader.readLine()) != null) {
            curLine = curLine.trim();
            List<String> curSents = PhraseOrderCollector.getSentences(curLine, corpusAccess.isOneSentencePerLine());
            for (String sent : curSents) {
                if (sentCount++ % 100000 == 0 && sentCount != 1) {
                    LOG.info((Object)(String.valueOf(sentCount - 1) + (this.m_src ? " source" : " target") + " sents processed for reordering."));
                }
                List<IndexedPhrase> sentPhrases = this.getAllIndexedPhrases(sent, this.m_allPhrases);
                for (IndexedPhrase idxPhrase : sentPhrases) {
                    PhraseContext phraseContext = (PhraseContext)idxPhrase.phrase.getProperty(PhraseContext.class.getName());
                    if (phraseContext == null) {
                        phraseContext = new PhraseContext(this.m_keepContPhraseProb);
                        idxPhrase.phrase.setProperty(phraseContext);
                    }
                    IndexedPhrase discontPhrase = null;
                    IndexedPhrase afterPhrase = null;
                    IndexedPhrase beforePhrase = null;
                    for (IndexedPhrase contextIdxPhrase : sentPhrases) {
                        if (idxPhrase.isAfter(contextIdxPhrase)) {
                            if (beforePhrase != null && beforePhrase.phrase.numTokens() >= contextIdxPhrase.phrase.numTokens()) continue;
                            beforePhrase = contextIdxPhrase;
                            continue;
                        }
                        if (idxPhrase.isBefore(contextIdxPhrase)) {
                            if (afterPhrase != null && afterPhrase.phrase.numTokens() >= contextIdxPhrase.phrase.numTokens()) continue;
                            afterPhrase = contextIdxPhrase;
                            continue;
                        }
                        if (this.m_src || !idxPhrase.isOutOfOrderButCloseEnough(contextIdxPhrase, this.m_maxPhraseLength) || discontPhrase != null && discontPhrase.phrase.numTokens() >= contextIdxPhrase.phrase.numTokens()) continue;
                        discontPhrase = contextIdxPhrase;
                    }
                    if (beforePhrase != null) {
                        phraseContext.addBefore(beforePhrase.phrase);
                        if (logCount > 0) {
                            LOG.info((Object)("Phrase " + beforePhrase.toString() + " precedes " + idxPhrase.toString() + " in sentence [" + sent + "]"));
                            --logCount;
                        }
                        ++beforeCount;
                    }
                    if (afterPhrase != null) {
                        phraseContext.addAfter(afterPhrase.phrase);
                        if (logCount > 0) {
                            LOG.info((Object)("Phrase " + afterPhrase.toString() + " follows " + idxPhrase.toString() + " in sentence [" + sent + "]"));
                            --logCount;
                        }
                        ++afterCount;
                    }
                    if (discontPhrase == null) continue;
                    phraseContext.addOutOfOrder(discontPhrase.phrase);
                    if (logCount > 0) {
                        LOG.info((Object)("Phrase " + discontPhrase.toString() + " is discontinuous with " + idxPhrase.toString() + " in sentence [" + sent + "]"));
                        --logCount;
                    }
                    ++discCount;
                    ++allDiscCount;
                }
            }
        }
        LOG.info((Object)("Total collected for " + (this.m_src ? "source" : "target") + ": before = " + beforeCount + ", after = " + afterCount + " and discontinuouos = " + discCount + " (out of " + allDiscCount + ")"));
        reader.close();
    }

    protected List<IndexedPhrase> getAllIndexedPhrases(String sent, GettableHashSet<EquivalenceClass> allPhrases) {
        InvertibleHashMap<PhrasePropertyCollector.IdxPair, Integer> delimIdxs = PhraseOrderCollector.getAllDelims(sent, PHRASE_DELIMS);
        List<PhrasePropertyCollector.IdxPair> sentPhraseIdxs = PhraseOrderCollector.getAllPhraseIdxs(delimIdxs, this.m_maxPhraseLength);
        LinkedList<IndexedPhrase> idxPhrases = new LinkedList<IndexedPhrase>();
        HashMap<Integer, PhrasePropertyCollector.IdxPair> firstCharDelim = new HashMap<Integer, PhrasePropertyCollector.IdxPair>();
        HashMap<Integer, PhrasePropertyCollector.IdxPair> lastCharDelim = new HashMap<Integer, PhrasePropertyCollector.IdxPair>();
        int j = 0;
        while (j < delimIdxs.size()) {
            PhrasePropertyCollector.IdxPair delimIdx = delimIdxs.getKey(j);
            firstCharDelim.put(delimIdx.from, delimIdx);
            lastCharDelim.put(delimIdx.to, delimIdx);
            ++j;
        }
        int i = 0;
        while (i < sentPhraseIdxs.size()) {
            PhrasePropertyCollector.IdxPair phraseIdx = sentPhraseIdxs.get(i);
            Phrase phrase = new Phrase();
            phrase.init(sent.substring(phraseIdx.from, phraseIdx.to), this.m_caseSensitive);
            phrase = (Phrase)allPhrases.get(phrase);
            if (phrase != null) {
                idxPhrases.add(new IndexedPhrase(phrase, phraseIdx, delimIdxs.getValue(lastCharDelim.get(phraseIdx.from)), delimIdxs.getValue(firstCharDelim.get(phraseIdx.to))));
            }
            ++i;
        }
        return idxPhrases;
    }

    class IndexedPhrase {
        public Phrase phrase;
        public PhrasePropertyCollector.IdxPair idxPair;
        public int ordDelimBefore;
        public int ordDelimAfter;

        public IndexedPhrase(Phrase phrase, PhrasePropertyCollector.IdxPair idxPair, int ordDelimBefore, int ordDelimAfter) {
            this.idxPair = idxPair;
            this.phrase = phrase;
            this.ordDelimBefore = ordDelimBefore;
            this.ordDelimAfter = ordDelimAfter;
        }

        public boolean isBefore(IndexedPhrase other) {
            return this.idxPair.to + 1 == other.idxPair.from;
        }

        public boolean isAfter(IndexedPhrase other) {
            return other.idxPair.to + 1 == this.idxPair.from;
        }

        public boolean isOutOfOrder(IndexedPhrase other) {
            return other.idxPair.to + 1 < this.idxPair.from || this.idxPair.to + 1 < other.idxPair.from;
        }

        public boolean contains(IndexedPhrase other) {
            return this.idxPair.from <= other.idxPair.from && this.idxPair.to >= other.idxPair.to;
        }

        public boolean overlaps(IndexedPhrase other) {
            boolean noOverlap = other.idxPair.to <= this.idxPair.from || other.idxPair.from >= this.idxPair.to;
            return !noOverlap;
        }

        public boolean isOutOfOrderButCloseEnough(IndexedPhrase other, int toksBetween) {
            int numToks = -1;
            if (other.ordDelimAfter <= this.ordDelimBefore) {
                numToks = this.ordDelimBefore - other.ordDelimAfter;
            } else if (this.ordDelimAfter <= other.ordDelimBefore) {
                numToks = other.ordDelimBefore - this.ordDelimAfter;
            }
            return numToks >= 0 && numToks <= toksBetween;
        }

        public String toString() {
            return "[" + this.phrase.toString() + "|" + this.idxPair.toString() + "]";
        }
    }
}

