/*
 * Decompiled with CFR 0.152.
 */
package wordModel;

import java.util.HashSet;
import java.util.Map;
import wordModel.UkWordMapping;

public class UkWordMappingPetrov
extends UkWordMapping {
    public static int unknownLevel = 5;
    HashSet<String> knownWordsLower;

    @Override
    protected void loadDefaultParameters() {
        this.knownWordsLower = new HashSet();
        for (Map.Entry e : this.lexFrequency.entrySet()) {
            int freq = ((int[])e.getValue())[0];
            if (freq <= ukThreashold) continue;
            String wordLower = ((String)e.getKey()).toLowerCase();
            this.knownWordsLower.add(wordLower);
        }
    }

    @Override
    protected void printParametersInfo() {
        System.out.println("Known word lowercase: " + this.knownWordsLower.size());
    }

    @Override
    public String getFeatureOfWord(String word, boolean firstWord, int trainingDevelop) {
        return this.getNewSignature(word, firstWord);
    }

    @Override
    protected void printModelStats() {
    }

    public String getNewSignature(String word, boolean firstWord) {
        StringBuffer sb = new StringBuffer("UNK");
        switch (unknownLevel) {
            case 5: {
                int wlen = word.length();
                int numCaps = 0;
                boolean hasDigit = false;
                boolean hasDash = false;
                boolean hasLower = false;
                int i = 0;
                while (i < wlen) {
                    char ch = word.charAt(i);
                    if (Character.isDigit(ch)) {
                        hasDigit = true;
                    } else if (ch == '-') {
                        hasDash = true;
                    } else if (Character.isLetter(ch)) {
                        if (Character.isLowerCase(ch)) {
                            hasLower = true;
                        } else if (Character.isTitleCase(ch)) {
                            hasLower = true;
                            ++numCaps;
                        } else {
                            ++numCaps;
                        }
                    }
                    ++i;
                }
                char ch0 = word.charAt(0);
                String lowered = word.toLowerCase();
                if (Character.isUpperCase(ch0) || Character.isTitleCase(ch0)) {
                    if (firstWord && numCaps == 1) {
                        sb.append("-INITC");
                        if (this.knownWordsLower.contains(lowered)) {
                            sb.append("-KNOWNLC");
                        }
                    } else {
                        sb.append("-CAPS");
                    }
                } else if (!Character.isLetter(ch0) && numCaps > 0) {
                    sb.append("-CAPS");
                } else if (hasLower) {
                    sb.append("-LC");
                }
                if (hasDigit) {
                    sb.append("-NUM");
                }
                if (hasDash) {
                    sb.append("-DASH");
                }
                if (lowered.endsWith("s") && wlen >= 3) {
                    char ch2 = lowered.charAt(wlen - 2);
                    if (ch2 == 's' || ch2 == 'i' || ch2 == 'u') break;
                    sb.append("-s");
                    break;
                }
                if (word.length() < 5 || hasDash || hasDigit && numCaps > 0) break;
                if (lowered.endsWith("ed")) {
                    sb.append("-ed");
                    break;
                }
                if (lowered.endsWith("ing")) {
                    sb.append("-ing");
                    break;
                }
                if (lowered.endsWith("ion")) {
                    sb.append("-ion");
                    break;
                }
                if (lowered.endsWith("er")) {
                    sb.append("-er");
                    break;
                }
                if (lowered.endsWith("est")) {
                    sb.append("-est");
                    break;
                }
                if (lowered.endsWith("ly")) {
                    sb.append("-ly");
                    break;
                }
                if (lowered.endsWith("ity")) {
                    sb.append("-ity");
                    break;
                }
                if (lowered.endsWith("y")) {
                    sb.append("-y");
                    break;
                }
                if (!lowered.endsWith("al")) break;
                sb.append("-al");
                break;
            }
            case 4: {
                char ch;
                boolean hasDigit = false;
                boolean hasNonDigit = false;
                boolean hasLetter = false;
                boolean hasLower = false;
                boolean hasDash = false;
                boolean hasPeriod = false;
                boolean hasComma = false;
                int i = 0;
                while (i < word.length()) {
                    char ch2 = word.charAt(i);
                    if (Character.isDigit(ch2)) {
                        hasDigit = true;
                    } else {
                        hasNonDigit = true;
                        if (Character.isLetter(ch2)) {
                            hasLetter = true;
                            if (Character.isLowerCase(ch2) || Character.isTitleCase(ch2)) {
                                hasLower = true;
                            }
                        } else if (ch2 == '-') {
                            hasDash = true;
                        } else if (ch2 == '.') {
                            hasPeriod = true;
                        } else if (ch2 == ',') {
                            hasComma = true;
                        }
                    }
                    ++i;
                }
                if (Character.isUpperCase(word.charAt(0)) || Character.isTitleCase(word.charAt(0))) {
                    if (!hasLower) {
                        sb.append("-AC");
                    } else if (firstWord) {
                        sb.append("-SC");
                    } else {
                        sb.append("-C");
                    }
                } else if (hasLower) {
                    sb.append("-L");
                } else if (hasLetter) {
                    sb.append("-U");
                } else {
                    sb.append("-S");
                }
                if (hasDigit && !hasNonDigit) {
                    sb.append("-N");
                } else if (hasDigit) {
                    sb.append("-n");
                }
                if (hasDash) {
                    sb.append("-H");
                }
                if (hasPeriod) {
                    sb.append("-P");
                }
                if (hasComma) {
                    sb.append("-C");
                }
                if (word.length() <= 3 || !Character.isLetter(ch = word.charAt(word.length() - 1))) break;
                sb.append("-");
                sb.append(Character.toLowerCase(ch));
                break;
            }
            case 3: {
                sb.append("-");
                int lastClass = 45;
                int num = 0;
                int i = 0;
                while (i < word.length()) {
                    char ch = word.charAt(i);
                    int newClass = Character.isUpperCase(ch) || Character.isTitleCase(ch) ? (firstWord ? 83 : 76) : (Character.isLetter(ch) ? 108 : (Character.isDigit(ch) ? 100 : (ch == '-' ? 104 : (ch == '.' ? 112 : 115))));
                    if (newClass != lastClass) {
                        lastClass = newClass;
                        sb.append((char)lastClass);
                        num = 1;
                    } else {
                        if (num < 2) {
                            sb.append('+');
                        }
                        ++num;
                    }
                    ++i;
                }
                if (word.length() <= 3) break;
                char ch = Character.toLowerCase(word.charAt(word.length() - 1));
                sb.append('-');
                sb.append(ch);
                break;
            }
            case 2: {
                boolean hasDigit = false;
                boolean hasNonDigit = false;
                boolean hasLower = false;
                int i = 0;
                while (i < word.length()) {
                    char ch = word.charAt(i);
                    if (Character.isDigit(ch)) {
                        hasDigit = true;
                    } else {
                        hasNonDigit = true;
                        if (Character.isLetter(ch) && (Character.isLowerCase(ch) || Character.isTitleCase(ch))) {
                            hasLower = true;
                        }
                    }
                    ++i;
                }
                if (Character.isUpperCase(word.charAt(0)) || Character.isTitleCase(word.charAt(0))) {
                    if (!hasLower) {
                        sb.append("-ALLC");
                    } else if (firstWord) {
                        sb.append("-INIT");
                    } else {
                        sb.append("-UC");
                    }
                } else if (hasLower) {
                    sb.append("-LC");
                }
                if (word.indexOf(45) >= 0) {
                    sb.append("-DASH");
                }
                if (hasDigit) {
                    if (!hasNonDigit) {
                        sb.append("-NUM");
                        break;
                    }
                    sb.append("-DIG");
                    break;
                }
                if (word.length() <= 3) break;
                char ch = word.charAt(word.length() - 1);
                sb.append(Character.toLowerCase(ch));
                break;
            }
            default: {
                sb.append("-");
                sb.append(word.substring(Math.max(word.length() - 2, 0), word.length()));
                sb.append("-");
                if (Character.isLowerCase(word.charAt(0))) {
                    sb.append("LOWER");
                    break;
                }
                if (Character.isUpperCase(word.charAt(0))) {
                    if (firstWord) {
                        sb.append("INIT");
                        break;
                    }
                    sb.append("UPPER");
                    break;
                }
                sb.append("OTHER");
            }
        }
        return sb.toString();
    }
}

