/*
 * Decompiled with CFR 0.152.
 */
package babel.prep.langidtime;

import babel.content.pages.Page;
import babel.content.pages.PageVersion;
import babel.prep.langidtime.LangAndTimeExtractor;
import babel.prep.langidtime.URLAndContentsLangTimeExtractor;
import babel.util.language.Language;
import java.io.IOException;
import java.util.Date;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class LangAndTimeMapper
extends MapReduceBase
implements Mapper<Text, Page, Text, Page> {
    static final Log LOG = LogFactory.getLog(LangAndTimeMapper.class);
    private URLAndContentsLangTimeExtractor m_extractor;

    public void configure(JobConf job) {
        String referrer = job.get("langidentifier.referrer");
        this.m_extractor = new URLAndContentsLangTimeExtractor(referrer);
    }

    public void map(Text url, Page page, OutputCollector<Text, Page> output, Reporter reporter) throws IOException {
        this.detectAndSetLangTime(page);
        output.collect((Object)url, (Object)page);
    }

    public void detectAndSetLangTime(Page page) {
        URLAndContentsLangTimeExtractor.DetectionResult result = this.m_extractor.detect(page);
        Language pageLang = null;
        Language newLang = result == null ? null : result.m_langDet.language();
        Language oldLang = page.getLanguage();
        if (oldLang != null) {
            if (newLang != null && !oldLang.equals((Object)newLang)) {
                LOG.warn((Object)("Detected language " + (Object)((Object)newLang) + " conflicts with old language " + (Object)((Object)oldLang) + " for page " + page.pageURL() + ", not changing."));
            }
            pageLang = oldLang;
            LangAndTimeExtractor.Stats.incLangPageCount(oldLang.toString(), page);
        } else if (newLang != null) {
            page.setLanguage(newLang);
            pageLang = newLang;
            LangAndTimeExtractor.Stats.incLangPageCount(newLang.toString(), page);
            LangAndTimeExtractor.Stats.incNewLangPageCount(newLang.toString(), page);
        } else {
            LangAndTimeExtractor.Stats.incFailedLangPageCount(page);
        }
        for (PageVersion ver : page.pageVersions()) {
            Long newTime;
            Long pageTime = null;
            Long oldTime = ver.getModificationTime();
            if (oldTime == 0L) {
                oldTime = null;
            }
            Long l = newTime = result != null && result.m_modTimes.containsKey(ver) ? Long.valueOf(result.m_modTimes.get(ver).getTime()) : null;
            if (oldTime != null) {
                if (newTime != null && !oldTime.equals(newTime)) {
                    LOG.warn((Object)("Detected mod time " + new Date(newTime) + " conflicts with old time " + new Date(oldTime) + " for page " + page.pageURL() + ", not changing."));
                }
                pageTime = oldTime;
                if (pageLang != null) {
                    LangAndTimeExtractor.Stats.incTimeVerCount(pageLang.toString());
                }
            } else if (newTime != null) {
                ver.setModificationTime(newTime);
                pageTime = newTime;
                if (pageLang != null) {
                    LangAndTimeExtractor.Stats.incTimeVerCount(pageLang.toString());
                    LangAndTimeExtractor.Stats.incNewTimeVerCount(pageLang.toString());
                }
            } else if (pageLang != null) {
                LangAndTimeExtractor.Stats.incFailedTimeVerCount();
            }
            LOG.info((Object)("PageVersion " + page.pageURL() + (pageLang != null ? " Language = " + (Object)((Object)pageLang) : "") + (pageTime != null ? " Time = " + new Date(pageTime) : "")));
        }
    }
}

