/*
 * Decompiled with CFR 0.152.
 */
package babel.prep.langid;

import babel.content.pages.Page;
import babel.content.pages.PageVersion;
import babel.prep.langid.LangIdentifier;
import babel.util.language.GoogleLangDetector;
import babel.util.language.LangDetectionResult;
import babel.util.language.LangDetector;
import babel.util.language.Language;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class LangIdMapper
extends MapReduceBase
implements Mapper<Text, Page, Text, Page> {
    static final Log LOG = LogFactory.getLog(LangIdMapper.class);
    private LangDetector m_detector;

    public void configure(JobConf job) {
        String referrer = job.get("langidentifier.referrer");
        this.m_detector = new GoogleLangDetector(referrer);
    }

    public void map(Text url, Page page, OutputCollector<Text, Page> output, Reporter reporter) throws IOException {
        if (page.getLanguage() == null) {
            String lang = this.detectLang(page);
            if (lang != null) {
                LangIdentifier.Stats.incLangPageCount(lang);
            } else {
                LangIdentifier.Stats.incFailedCount();
            }
        } else {
            LangIdentifier.Stats.incOldLangPageCount();
        }
        output.collect((Object)url, (Object)page);
    }

    public String detectLang(Page page) {
        Language lang;
        block5: {
            lang = null;
            try {
                for (PageVersion ver : page.pageVersions()) {
                    String content = ver.getContent();
                    if (content.length() <= 0) continue;
                    LangDetectionResult langResult = this.m_detector.detect(content);
                    if (LOG.isInfoEnabled()) {
                        LOG.info((Object)("Language " + langResult.language().toString() + " for page " + page.pageURL()));
                    }
                    if (langResult.language() != null && langResult.isReliable().booleanValue()) {
                        lang = langResult.language();
                        page.setLanguage(lang);
                    }
                    break;
                }
            }
            catch (Exception e) {
                if (!LangIdentifier.LOG.isErrorEnabled()) break block5;
                LangIdentifier.LOG.error((Object)("Failed to detect language for page " + page.pageURL() + ": " + e.toString()));
            }
        }
        return lang == null ? null : lang.toString();
    }
}

