/*
 * Decompiled with CFR 0.152.
 */
package babel.prep.extract;

import babel.content.pages.Page;
import babel.prep.extract.NutchChunk;
import babel.prep.extract.NutchPageExtractor;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

class PageExtReducer
extends MapReduceBase
implements Reducer<Text, NutchChunk, Text, Page> {
    PageExtReducer() {
    }

    public void reduce(Text key, Iterator<NutchChunk> values, OutputCollector<Text, Page> output, Reporter reporter) throws IOException {
        Page page = new Page(key.toString(), values);
        int numVersions = page.numVersions();
        if (numVersions > 0 && page.pageURL().length() > 0) {
            NutchPageExtractor.Stats.incPages();
            NutchPageExtractor.Stats.incVersions(numVersions);
            output.collect((Object)key, (Object)page);
        } else {
            NutchPageExtractor.Stats.incIgnoredPages();
        }
    }

    protected boolean isBBCEnglish(Page page) {
        String url = this.removeProtocolAndPrefix(page.pageURL());
        return url.matches("^bbc.co.uk/(hi/|low/)?english/.*") || url.matches("^bbc.co.uk/local/.*") || url.matches("^bbc.co.uk/[12]/.*");
    }

    protected String removeProtocolAndPrefix(String url) {
        return url.substring(url.indexOf(".") + 1).toLowerCase();
    }
}

