/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.meteor.util;

import edu.cmu.meteor.util.Constants;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class Normalizer {
    private static String s_space = " ";
    private static String alpha = "A-Za-z\u0160\u017d\u0161\u017e\u0178\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u017e\u0400-\u04ff\u0500-\u0527\ua640-\ua66e\ua67e-\ua697\u1d00-\u1d7f";
    private static String alnum = "0-9A-Za-z\u0160\u017d\u0161\u017e\u0178\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u017e\u0400-\u04ff\u0500-\u0527\ua640-\ua66e\ua67e-\ua697\u1d00-\u1d7f";
    private static Pattern r_sep_other = Pattern.compile("([^" + alnum + "\\s\\.\\'\\`\\,\\-\\\u2018\\\u2019])");
    private static String s_sep_other = " $1 ";
    private static Pattern r_multi_dot = Pattern.compile("\\.([\\.]+)");
    private static String s_multi_dot = " DOTMULTI$1";
    private static String s_multi_dot2 = "DOTMULTI.";
    private static Pattern r_multi_dot2 = Pattern.compile("DOTMULTI\\.([^\\.])");
    private static String s_multi_dot3 = "DOTDOTMULTI $1";
    private static String s_multi_dot4 = "DOTDOTMULTI";
    private static String s_multi_dot5 = "DOTMULTI";
    private static String s_multi_dot6 = ".";
    private static Pattern r_comma = Pattern.compile("([^\\p{Digit}])[,]([^\\p{Digit}])");
    private static String s_comma = "$1 , $2";
    private static Pattern r_comma2 = Pattern.compile("([\\p{Digit}])[,]([^\\p{Digit}])");
    private static Pattern r_comma3 = Pattern.compile("([^\\p{Digit}])[,]([\\p{Digit}])");
    private static Pattern r_quote_norm = Pattern.compile("([`\u2018\u2019])");
    private static String s_quote_norm = "'";
    private static Pattern r_quote_norm2 = Pattern.compile("([\u201c\u201d]|'')");
    private static String s_quote_norm2 = " \" ";
    private static String s_dash_norm = "\u2013";
    private static String s_dash_norm2 = "-";
    private static String s_dash_norm3 = "--";
    private static Pattern r_cont_en = Pattern.compile("([^" + alpha + "])[']([^" + alpha + "])");
    private static String s_cont_en = "$1 ' $2";
    private static Pattern r_cont_en2 = Pattern.compile("([^" + alpha + "\\p{Digit}])[']([" + alpha + "])");
    private static Pattern r_cont_en3 = Pattern.compile("([" + alpha + "])[']([^" + alpha + "])");
    private static Pattern r_cont_en4 = Pattern.compile("([" + alpha + "])[']([" + alpha + "])");
    private static String s_cont_en2 = "$1 '$2";
    private static Pattern r_cont_en5 = Pattern.compile("([\\p{Digit}])[']([s])");
    private static Pattern r_cont_fr = Pattern.compile("([^" + alpha + "])[']([^" + alpha + "])");
    private static String s_cont_fr = "$1 ' $2";
    private static Pattern r_cont_fr2 = Pattern.compile("([^" + alpha + "])[']([" + alpha + "])");
    private static Pattern r_cont_fr3 = Pattern.compile("([" + alpha + "])[']([^" + alpha + "])");
    private static Pattern r_cont_fr4 = Pattern.compile("([" + alpha + "])[']([" + alpha + "])");
    private static String s_cont_fr2 = "$1' $2";
    private static String s_cont_other1 = "'";
    private static String s_cont_other2 = " ' ";
    private static Pattern r_punct_strip = Pattern.compile("[^" + alnum + "]");
    private static String s_punct_strip = " ";
    private static Pattern r_rm_dash = Pattern.compile("([" + alnum + "\\.])[\\-]([" + alnum + "])");
    private static String s_rm_dash = "$1 $2";
    private static Pattern r_white = Pattern.compile("[  \u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000\u00a0]+");
    private static String s_white = " ";
    private static Hashtable<String, Integer> nbpDict = null;
    private static int nbpLangID = 99;
    private static String s_nbp = ".";
    private static String s_nbp2 = "";
    private static Pattern r_nbp1 = Pattern.compile("[" + alpha + "]");
    private static Pattern r_nbp2 = Pattern.compile("^[\\p{Lower}]");
    private static Pattern r_nbp3 = Pattern.compile("^[0-9]+");
    private static String s_nbp3 = " .";
    private static String s_nbp4 = " ";
    private static Pattern r_punct_nonwest = Pattern.compile("([\\!-\\+\\-\\/\\:-\\@\\[-\\`\\{-\u00bf\u060c])");
    private static String s_punct_nonwest = " $1 ";
    private static String s_punct_nonwest2 = " ";
    private static String s_punct_nonwest3 = ".";
    private static Pattern r_dot_nonwest = Pattern.compile("([^\\p{Digit}])[\\.]([^\\p{Digit}])");
    private static String s_dot_nonwest = "$1 . $2";
    private static Pattern r_dot_nonwest2 = Pattern.compile("([\\p{Digit}])[\\.]([^\\p{Digit}])");
    private static Pattern r_dot_nonwest3 = Pattern.compile("([^\\p{Digit}])[\\.]([\\p{Digit}])");
    private static Pattern r_quot = Pattern.compile("&quot;", 2);
    private static Pattern r_apos = Pattern.compile("&apos;", 2);
    private static Pattern r_lt = Pattern.compile("&lt;", 2);
    private static Pattern r_gt = Pattern.compile("&gt;", 2);
    private static Pattern r_amp = Pattern.compile("&amp;", 2);
    private static String quot = "\"";
    private static String apos = "'";
    private static String lt = "<";
    private static String gt = ">";
    private static String amp = "&";

    private static Hashtable<String, Integer> nbpList(int n) {
        if (nbpDict != null && nbpLangID == n) {
            return nbpDict;
        }
        nbpDict = new Hashtable();
        nbpLangID = 99;
        try {
            String string;
            URL uRL = new URL(Constants.DEFAULT_NBP_DIR_URL.toString() + "/" + Constants.getLanguageName(n) + ".prefixes");
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(uRL.openStream(), "UTF-8"));
            while ((string = bufferedReader.readLine()) != null) {
                String string2;
                StringTokenizer stringTokenizer = new StringTokenizer(string);
                if (stringTokenizer.countTokens() == 0 || (string2 = stringTokenizer.nextToken()).startsWith("#")) continue;
                int n2 = 1;
                if (stringTokenizer.hasMoreTokens() && stringTokenizer.nextToken().equals("#NUMERIC_ONLY#")) {
                    n2 = 2;
                }
                nbpDict.put(string2, n2);
            }
            bufferedReader.close();
            nbpLangID = n;
        }
        catch (Exception exception) {
            System.err.println("Error: Nonbreaking prefix list could not be loaded:");
            exception.printStackTrace();
        }
        return nbpDict;
    }

    public static String normalizeLine(String string, int n, boolean bl) {
        if (!Constants.isSupported(n)) {
            System.err.println("Error: Pre-process the input files and run Meteor without the -norm option.");
            String string2 = "";
            try {
                string2 = Constants.getLanguageName(n);
            }
            catch (Exception exception) {
                // empty catch block
            }
            throw new RuntimeException("No normalizer for language (" + string2 + ")");
        }
        if (n == 5 || n == 99) {
            return Normalizer.normalizeNonWestern(string, bl);
        }
        String string3 = s_space + string + s_space;
        string3 = r_sep_other.matcher(string3).replaceAll(s_sep_other);
        string3 = r_multi_dot.matcher(string3).replaceAll(s_multi_dot);
        while (string3.contains(s_multi_dot2)) {
            string3 = r_multi_dot2.matcher(string3).replaceAll(s_multi_dot3);
            string3 = string3.replace(s_multi_dot2, s_multi_dot4);
        }
        string3 = r_comma.matcher(string3).replaceAll(s_comma);
        string3 = r_comma2.matcher(string3).replaceAll(s_comma);
        string3 = r_comma3.matcher(string3).replaceAll(s_comma);
        string3 = r_quote_norm.matcher(string3).replaceAll(s_quote_norm);
        string3 = r_quote_norm2.matcher(string3).replaceAll(s_quote_norm2);
        string3 = string3.replace(s_dash_norm, s_dash_norm2);
        string3 = string3.replace(s_dash_norm3, s_dash_norm2);
        string3 = r_rm_dash.matcher(string3).replaceAll(s_rm_dash);
        if (n == 0) {
            string3 = r_cont_en.matcher(string3).replaceAll(s_cont_en);
            string3 = r_cont_en2.matcher(string3).replaceAll(s_cont_en);
            string3 = r_cont_en3.matcher(string3).replaceAll(s_cont_en);
            string3 = r_cont_en4.matcher(string3).replaceAll(s_cont_en2);
            string3 = r_cont_en5.matcher(string3).replaceAll(s_cont_en2);
        } else if (n == 2) {
            string3 = r_cont_fr.matcher(string3).replaceAll(s_cont_fr);
            string3 = r_cont_fr2.matcher(string3).replaceAll(s_cont_fr);
            string3 = r_cont_fr3.matcher(string3).replaceAll(s_cont_fr);
            string3 = r_cont_fr4.matcher(string3).replaceAll(s_cont_fr2);
        } else {
            string3 = string3.replace(s_cont_other1, s_cont_other2);
        }
        StringTokenizer stringTokenizer = new StringTokenizer(string3);
        String[] stringArray = new String[stringTokenizer.countTokens()];
        int n2 = 0;
        while (n2 < stringArray.length) {
            stringArray[n2++] = stringTokenizer.nextToken();
        }
        StringBuilder stringBuilder = new StringBuilder();
        Hashtable<String, Integer> hashtable = Normalizer.nbpList(n);
        for (int i = 0; i < stringArray.length; ++i) {
            if (stringArray[i].length() > 1 && stringArray[i].endsWith(s_nbp)) {
                String string4 = stringArray[i].substring(0, stringArray[i].length() - 1);
                Integer n3 = hashtable.get(string4);
                if (string4.contains(s_nbp) && r_nbp1.matcher(string4).find()) {
                    stringBuilder.append(stringArray[i].replace(s_nbp, s_nbp2));
                } else if (n3 != null && n3 == 1 || i < stringArray.length - 1 && r_nbp2.matcher(stringArray[i + 1]).find()) {
                    stringBuilder.append(stringArray[i]);
                } else if (n3 != null && n3 == 2 && i < stringArray.length - 1 && r_nbp3.matcher(stringArray[i + 1]).find()) {
                    stringBuilder.append(stringArray[i]);
                } else {
                    stringBuilder.append(string4);
                    stringBuilder.append(s_nbp3);
                }
            } else {
                stringBuilder.append(stringArray[i]);
            }
            stringBuilder.append(s_nbp4);
        }
        string3 = stringBuilder.toString();
        while (string3.contains(s_multi_dot4)) {
            string3 = string3.replace(s_multi_dot4, s_multi_dot2);
        }
        string3 = string3.replace(s_multi_dot5, s_multi_dot6);
        if (!bl) {
            string3 = r_punct_strip.matcher(string3).replaceAll(s_punct_strip);
        }
        string3 = r_white.matcher(string3).replaceAll(s_white).trim();
        return string3;
    }

    private static String normalizeNonWestern(String string, Boolean bl) {
        String string2 = s_space + string + s_space;
        string2 = r_multi_dot.matcher(string2).replaceAll(s_multi_dot);
        while (string2.contains(s_multi_dot2)) {
            string2 = r_multi_dot2.matcher(string2).replaceAll(s_multi_dot3);
            string2 = string2.replace(s_multi_dot2, s_multi_dot4);
        }
        string2 = r_comma.matcher(string2).replaceAll(s_comma);
        string2 = r_comma2.matcher(string2).replaceAll(s_comma);
        string2 = r_comma3.matcher(string2).replaceAll(s_comma);
        string2 = r_quote_norm.matcher(string2).replaceAll(s_quote_norm);
        string2 = r_quote_norm2.matcher(string2).replaceAll(s_quote_norm2);
        string2 = string2.replace(s_dash_norm, s_dash_norm2);
        string2 = string2.replace(s_dash_norm3, s_dash_norm2);
        string2 = r_dot_nonwest.matcher(string2).replaceAll(s_dot_nonwest);
        string2 = r_dot_nonwest2.matcher(string2).replaceAll(s_dot_nonwest);
        string2 = r_dot_nonwest3.matcher(string2).replaceAll(s_dot_nonwest);
        string2 = r_punct_nonwest.matcher(string2).replaceAll(s_punct_nonwest);
        if (!bl.booleanValue()) {
            string2 = r_punct_nonwest.matcher(string2).replaceAll(s_punct_nonwest2);
            string2 = string2.replace(s_punct_nonwest3, s_punct_nonwest2);
        }
        while (string2.contains(s_multi_dot4)) {
            string2 = string2.replace(s_multi_dot4, s_multi_dot2);
        }
        string2 = string2.replace(s_multi_dot5, s_multi_dot6);
        string2 = r_white.matcher(string2).replaceAll(s_white);
        string2 = string2.trim();
        return string2;
    }

    public static String unescapeSGML(String string) {
        String string2 = string;
        string2 = r_quot.matcher(string2).replaceAll(quot);
        string2 = r_apos.matcher(string2).replaceAll(apos);
        string2 = r_lt.matcher(string2).replaceAll(lt);
        string2 = r_gt.matcher(string2).replaceAll(gt);
        string2 = r_amp.matcher(string2).replaceAll(amp);
        return string2;
    }

    public static void main(String[] stringArray) throws IOException {
        String string;
        if (stringArray.length < 2) {
            System.out.println("Usage: Normalizer lang punct");
            System.out.println("where puct is true/false");
            return;
        }
        int n = Constants.getLanguageID(Constants.normLanguageName(stringArray[0]));
        boolean bl = Boolean.parseBoolean(stringArray[1]);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(System.in));
        while ((string = bufferedReader.readLine()) != null) {
            System.out.println(Normalizer.normalizeLine(string, n, bl));
        }
    }
}

