/*
 * Decompiled with CFR 0.152.
 */
package similarity;

import com.aliasi.tokenizer.TokenizerFactory;
import java.util.Set;
import java.util.TreeMap;
import segmenter.IDataSource;
import similarity.TokenDictionary;

public class DfDictionary {
    IDataSource[] corpus = null;
    TokenDictionary tokenDict = null;
    TreeMap<Integer, Double> dfDictionary = null;
    TokenizerFactory tFactory = null;
    int numDocuments = -1;

    public DfDictionary(IDataSource[] iDataSourceArray, TokenizerFactory tokenizerFactory, int n) {
        this.corpus = iDataSourceArray;
        this.dfDictionary = new TreeMap();
        this.tFactory = tokenizerFactory;
        this.numDocuments = n;
        this.tokenDict = new TokenDictionary("", this.tFactory);
        this.tokenDict.ProcessText();
        for (IDataSource iDataSource : this.corpus) {
            int n2 = iDataSource.GetNumChunks();
            for (int i = 0; i < n2; ++i) {
                String string = iDataSource.GetChunk(i);
                this.tokenDict.AddText(string);
            }
        }
        for (Integer n3 : this.tokenDict.GetAllTokenIds()) {
            this.dfDictionary.put(n3, new Double(0.0));
        }
    }

    public void ForgetCorpus() {
        this.corpus = null;
    }

    public void ProcessCorpus() throws Exception {
        if (this.corpus.length == 1) {
            this.ProcessDocSegments(this.corpus[0], this.numDocuments);
            return;
        }
        for (int i = 0; i < this.corpus.length; ++i) {
            IDataSource iDataSource = this.corpus[i];
            TokenDictionary tokenDictionary = new TokenDictionary(iDataSource, this.tFactory);
            tokenDictionary.ProcessText();
            Set<String> set = tokenDictionary.GetAllTokenStrings();
            for (String string : set) {
                Integer n = this.tokenDict.GetTokenId(string);
                if (n == null) {
                    System.out.println("Warning in DfDictionary.ProcessCorpus. Token in doc not found in this.tokenDict:\t" + string);
                    continue;
                }
                Double d = this.dfDictionary.get(n);
                this.dfDictionary.put(n, d + 1.0);
            }
        }
    }

    public void ProcessDocSegments(IDataSource iDataSource, int n) {
        int n2 = (int)Math.floor(iDataSource.GetNumChunks() / n);
        int n3 = 0;
        int n4 = 0;
        String string = "";
        while (n3 < iDataSource.GetNumChunks()) {
            n4 = n3 + n2;
            if (n4 >= iDataSource.GetNumChunks()) {
                n4 = iDataSource.GetNumChunks() - 1;
            }
            StringBuilder stringBuilder = new StringBuilder();
            for (int i = n3; i <= n4; ++i) {
                stringBuilder.append(" " + iDataSource.GetChunk(i) + " ");
            }
            string = stringBuilder.toString();
            TokenDictionary tokenDictionary = new TokenDictionary(string, this.tFactory);
            tokenDictionary.ProcessText();
            Set<String> set = tokenDictionary.GetAllTokenStrings();
            for (String string2 : set) {
                Integer n5 = this.tokenDict.GetTokenId(string2);
                if (n5 == null) {
                    System.out.println("Warning in DfDictionary.ProcessSegments. Token in doc not found in this.tokenDict:\t" + string2);
                    continue;
                }
                Double d = this.dfDictionary.get(n5);
                this.dfDictionary.put(n5, d + 1.0);
            }
            n3 = n4 + 1;
        }
    }

    public void PrintDf() {
        Set<Integer> set = this.tokenDict.GetAllTokenIds();
        for (Integer n : set) {
            String string = this.tokenDict.GetTokenString(n);
            System.out.println(n.toString() + "\t" + string + ":\t" + String.valueOf(this.dfDictionary.get(n)));
            if (n <= 400) continue;
            break;
        }
    }

    public TreeMap<Integer, Double> getDfDictionary() {
        return this.dfDictionary;
    }

    public Integer GetTokenId(String string) {
        return this.tokenDict.GetTokenId(string);
    }

    public String GetTokenString(Integer n) {
        return this.tokenDict.GetTokenString(n);
    }
}

