/*
 * Decompiled with CFR 0.152.
 */
package babel.prep.merge;

import babel.content.pages.Page;
import babel.prep.PrepStep;
import babel.prep.merge.PageMergeReducer;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;

public class PageMerger
extends PrepStep {
    static final Log LOG = LogFactory.getLog(PageMerger.class);
    protected static final String PAGES_SUBDIR = "pages";

    protected JobConf createJobConf(String crawlDir, String pagesSubDirOne, String pagesSubDirTwo) throws IOException {
        JobConf job = new JobConf(this.getConf());
        job.setJobName("merge pages in " + pagesSubDirOne + " and " + pagesSubDirTwo);
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setReducerClass(PageMergeReducer.class);
        job.setOutputFormat(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Page.class);
        FileInputFormat.addInputPath((JobConf)job, (Path)new Path(crawlDir, pagesSubDirOne));
        FileInputFormat.addInputPath((JobConf)job, (Path)new Path(crawlDir, pagesSubDirTwo));
        Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.merge." + this.getCurTimeStamp());
        this.m_fs.delete(outDir, true);
        FileOutputFormat.setOutputPath((JobConf)job, (Path)outDir);
        this.setUniqueTempDir(job);
        return job;
    }

    public static void main(String[] args) throws Exception {
        if (args.length != 3) {
            PageMerger.usage();
            return;
        }
        PageMerger merger = new PageMerger();
        JobConf job = merger.createJobConf(args[0], args[1], args[2]);
        if (LOG.isInfoEnabled()) {
            LOG.info((Object)("PageMerger: " + job.getJobName()));
        }
        merger.runPrepStep(job);
        if (LOG.isInfoEnabled()) {
            LOG.info((Object)(String.valueOf(Stats.dumpStats()) + "\n"));
            LOG.info((Object)("Output: " + FileOutputFormat.getOutputPath((JobConf)job)));
            LOG.info((Object)"PageMerger: done");
        }
    }

    protected static void usage() {
        System.err.println("Usage: PageMerger crawl_dir pages_subdir_1 pages_subdir_2\n");
    }

    static class Stats {
        private static int pageCount = 0;
        private static int mergedPageCount = 0;

        Stats() {
        }

        public static synchronized void incPageCount() {
            ++pageCount;
        }

        public static synchronized void incMergedPageCount() {
            ++mergedPageCount;
        }

        public static String dumpStats() {
            StringBuilder strBld = new StringBuilder();
            strBld.append(String.valueOf(pageCount) + " pages generated of which ");
            strBld.append(String.valueOf(mergedPageCount) + " were merged\n");
            return strBld.toString();
        }
    }
}

