/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.CoreMapNodePattern;
import edu.stanford.nlp.ling.tokensregex.Env;
import edu.stanford.nlp.ling.tokensregex.MultiPatternMatcher;
import edu.stanford.nlp.ling.tokensregex.SequenceMatchResult;
import edu.stanford.nlp.ling.tokensregex.SequencePattern;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.ling.tokensregex.matcher.TrieMap;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;

public class TokensRegexNERAnnotator
implements Annotator {
    protected static final Redwood.RedwoodChannels logger = Redwood.channels("TokenRegexNER");
    private final boolean ignoreCase;
    private final List<Entry> entries;
    private final Map<SequencePattern<CoreMap>, Entry> patternToEntry;
    private final MultiPatternMatcher<CoreMap> multiPatternMatcher;
    private final Set<String> myLabels;
    private final Pattern validPosPattern;
    private final boolean verbose;
    private final Set<String> noDefaultOverwriteLabels;
    private final PosMatchType posMatchType;
    public static final PosMatchType DEFAULT_POS_MATCH_TYPE = PosMatchType.MATCH_AT_LEAST_ONE_TOKEN;
    public static final String DEFAULT_BACKGROUND_SYMBOL = "O,MISC";
    public static PropertiesUtils.Property[] SUPPORTED_PROPERTIES = new PropertiesUtils.Property[]{new PropertiesUtils.Property("mapping", "edu/stanford/nlp/models/regexner/type_map_clean", "Comma separated list of mapping files to use."), new PropertiesUtils.Property("ignorecase", "false", "Whether to ignore case or not when matching patterns."), new PropertiesUtils.Property("validpospattern", "", "Regular expression pattern for matching POS tags."), new PropertiesUtils.Property("posmatchtype", DEFAULT_POS_MATCH_TYPE.name(), "How should 'validpospattern' be used to match the POS of the tokens."), new PropertiesUtils.Property("noDefaultOverwriteLabels", "", "Comma separated list of output types for which default NER labels are not overwritten.\n For these types, only if the matched expression has NER type matching the\n specified overwriteableType for the regex will the NER type be overwritten."), new PropertiesUtils.Property("backgroundSymbol", "O,MISC", "Comma separated list of NER labels to always replace."), new PropertiesUtils.Property("verbose", "false", "")};

    public TokensRegexNERAnnotator(String mapping) {
        this(mapping, false);
    }

    public TokensRegexNERAnnotator(String mapping, boolean ignoreCase) {
        this(mapping, ignoreCase, null);
    }

    public TokensRegexNERAnnotator(String mapping, boolean ignoreCase, String validPosRegex) {
        this("tokenregexner", TokensRegexNERAnnotator.getProperties("tokenregexner", mapping, ignoreCase, validPosRegex));
    }

    private static Properties getProperties(String name, String mapping, boolean ignoreCase, String validPosRegex) {
        String prefix = name != null && !name.isEmpty() ? name + "." : "";
        Properties props = new Properties();
        props.setProperty(prefix + "mapping", mapping);
        props.setProperty(prefix + "ignorecase", String.valueOf(ignoreCase));
        if (validPosRegex != null) {
            props.setProperty(prefix + "validpospattern", validPosRegex);
        }
        return props;
    }

    public TokensRegexNERAnnotator(String name, Properties properties) {
        String prefix = name != null && !name.isEmpty() ? name + "." : "";
        String backgroundSymbol = properties.getProperty(prefix + "backgroundSymbol", DEFAULT_BACKGROUND_SYMBOL);
        String[] backgroundSymbols = backgroundSymbol.split("\\s*,\\s*");
        String mappingFiles = properties.getProperty(prefix + "mapping", "edu/stanford/nlp/models/regexner/type_map_clean");
        String[] mappings = mappingFiles.split("\\s*[,;]\\s*");
        String validPosRegex = properties.getProperty(prefix + "validpospattern");
        this.posMatchType = PosMatchType.valueOf(properties.getProperty(prefix + "posmatchtype", DEFAULT_POS_MATCH_TYPE.name()));
        String noDefaultOverwriteLabelsProp = properties.getProperty(prefix + "noDefaultOverwriteLabels");
        this.noDefaultOverwriteLabels = noDefaultOverwriteLabelsProp != null ? Collections.unmodifiableSet(CollectionUtils.asSet(noDefaultOverwriteLabelsProp.split("\\s*,\\s*"))) : Collections.unmodifiableSet(new HashSet());
        this.ignoreCase = PropertiesUtils.getBool(properties, prefix + "ignorecase", false);
        this.verbose = PropertiesUtils.getBool(properties, prefix + "verbose", false);
        this.validPosPattern = validPosRegex != null && !validPosRegex.equals("") ? Pattern.compile(validPosRegex) : null;
        this.entries = Collections.unmodifiableList(TokensRegexNERAnnotator.readEntries(name, this.noDefaultOverwriteLabels, this.ignoreCase, this.verbose, mappings));
        IdentityHashMap<SequencePattern<CoreMap>, Entry> patternToEntry = new IdentityHashMap<SequencePattern<CoreMap>, Entry>();
        this.multiPatternMatcher = this.createPatternMatcher(patternToEntry);
        this.patternToEntry = Collections.unmodifiableMap(patternToEntry);
        Set<String> myLabels = Generics.newHashSet();
        Collections.addAll(myLabels, backgroundSymbols);
        myLabels.add(null);
        for (Entry entry : this.entries) {
            myLabels.add(entry.type);
        }
        this.myLabels = Collections.unmodifiableSet(myLabels);
    }

    @Override
    public void annotate(Annotation annotation) {
        List sentences;
        if (this.verbose) {
            System.err.print("Adding TokensRegexNER annotations ... ");
        }
        if ((sentences = (List)annotation.get(CoreAnnotations.SentencesAnnotation.class)) != null) {
            for (CoreMap sentence : sentences) {
                List tokens = (List)sentence.get(CoreAnnotations.TokensAnnotation.class);
                this.annotateMatched(tokens);
            }
        } else {
            List tokens = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
            if (tokens != null) {
                this.annotateMatched(tokens);
            } else {
                throw new RuntimeException("Unable to find sentences or tokens in " + annotation);
            }
        }
        if (this.verbose) {
            System.err.println("done.");
        }
    }

    private MultiPatternMatcher<CoreMap> createPatternMatcher(Map<SequencePattern<CoreMap>, Entry> patternToEntry) {
        int patternFlags = this.ignoreCase ? 2 : 0;
        int stringMatchFlags = this.ignoreCase ? 2 : 0;
        Env env = TokenSequencePattern.getNewEnv();
        env.setDefaultStringPatternFlags(patternFlags);
        env.setDefaultStringMatchFlags(stringMatchFlags);
        CoreMapNodePattern.StringAnnotationRegexPattern posTagPattern = this.validPosPattern != null && PosMatchType.MATCH_ALL_TOKENS.equals((Object)this.posMatchType) ? new CoreMapNodePattern.StringAnnotationRegexPattern(this.validPosPattern) : null;
        ArrayList<TokenSequencePattern> patterns = new ArrayList<TokenSequencePattern>(this.entries.size());
        for (Entry entry : this.entries) {
            TokenSequencePattern pattern;
            if (entry.tokensRegex != null) {
                pattern = TokenSequencePattern.compile(env, entry.tokensRegex);
            } else {
                ArrayList<SequencePattern.PatternExpr> nodePatterns = new ArrayList<SequencePattern.PatternExpr>();
                for (String p : entry.regex) {
                    CoreMapNodePattern c = CoreMapNodePattern.valueOf(p, patternFlags);
                    if (posTagPattern != null) {
                        c.add(CoreAnnotations.PartOfSpeechAnnotation.class, posTagPattern);
                    }
                    nodePatterns.add(new SequencePattern.NodePatternExpr(c));
                }
                pattern = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(nodePatterns));
            }
            if (entry.annotateGroup < 0 || entry.annotateGroup > pattern.getTotalGroups()) {
                throw new RuntimeException("Invalid match group for entry " + entry);
            }
            pattern.setPriority(entry.priority);
            patterns.add(pattern);
            patternToEntry.put(pattern, entry);
        }
        return TokenSequencePattern.getMultiPatternMatcher(patterns);
    }

    private void annotateMatched(List<CoreLabel> tokens) {
        List<SequenceMatchResult<CoreLabel>> matched = this.multiPatternMatcher.findNonOverlapping(tokens);
        for (SequenceMatchResult<CoreLabel> m : matched) {
            int end;
            Entry entry = this.patternToEntry.get(m.pattern());
            int g = entry.annotateGroup;
            int start = m.start(g);
            boolean overwriteOriginalNer = this.checkPosTags(tokens, start, end = m.end(g));
            if (overwriteOriginalNer) {
                overwriteOriginalNer = this.checkOrigNerTags(entry, tokens, start, end);
            }
            if (overwriteOriginalNer) {
                for (int i = start; i < end; ++i) {
                    tokens.get(i).set(CoreAnnotations.NamedEntityTagAnnotation.class, entry.type);
                }
                continue;
            }
            if (!this.verbose) continue;
            System.err.println("Not annotating  '" + m.group(g) + "': " + StringUtils.joinFields(m.groupNodes(g), CoreAnnotations.NamedEntityTagAnnotation.class) + " with " + entry.type + ", sentence is '" + StringUtils.joinWords(tokens, " ") + "'");
        }
    }

    private boolean checkPosTags(List<CoreLabel> tokens, int start, int end) {
        if (this.validPosPattern != null) {
            switch (this.posMatchType) {
                case MATCH_ONE_TOKEN_PHRASE_ONLY: {
                    if (tokens.size() > 1) {
                        return true;
                    }
                }
                case MATCH_AT_LEAST_ONE_TOKEN: {
                    for (int i = start; i < end; ++i) {
                        CoreLabel token = tokens.get(i);
                        String pos = (String)token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                        if (pos == null || !this.validPosPattern.matcher(pos).matches()) continue;
                        return true;
                    }
                    return false;
                }
                case MATCH_ALL_TOKENS: {
                    return true;
                }
            }
            return true;
        }
        return true;
    }

    private boolean checkOrigNerTags(Entry entry, List<CoreLabel> tokens, int start, int end) {
        String ner;
        int prevNerEndIndex;
        int nextNerStartIndex = end;
        String startNer = tokens.get(start).ner();
        String endNer = tokens.get(end - 1).ner();
        if (startNer != null && !this.myLabels.contains(startNer)) {
            for (prevNerEndIndex = start - 1; prevNerEndIndex >= 0 && (ner = tokens.get(prevNerEndIndex).ner()) != null && ner.equals(startNer); --prevNerEndIndex) {
            }
        }
        if (endNer != null && !this.myLabels.contains(endNer)) {
            while (nextNerStartIndex < tokens.size() && (ner = tokens.get(nextNerStartIndex).ner()) != null && ner.equals(endNer)) {
                ++nextNerStartIndex;
            }
        }
        boolean overwriteOriginalNer = false;
        if (prevNerEndIndex == start - 1 && nextNerStartIndex == end) {
            if (startNer == null) {
                overwriteOriginalNer = true;
            } else {
                for (int i = start + 1; i < end; ++i) {
                    if (startNer.equals(tokens.get(i).ner())) continue;
                    overwriteOriginalNer = true;
                    break;
                }
                if (!overwriteOriginalNer) {
                    if (entry.overwritableTypes.contains(startNer)) {
                        overwriteOriginalNer = true;
                    } else if (!this.noDefaultOverwriteLabels.contains(entry.type)) {
                        overwriteOriginalNer = this.myLabels.contains(startNer);
                    }
                }
            }
        }
        return overwriteOriginalNer;
    }

    private static List<Entry> readEntries(String annotatorName, Set<String> noDefaultOverwriteLabels, boolean ignoreCase, boolean verbose, String ... mappings) {
        ArrayList<Entry> entries = new ArrayList<Entry>();
        TrieMap<String, Entry> seenRegexes = new TrieMap<String, Entry>();
        Arrays.sort(mappings);
        for (String mapping : mappings) {
            BufferedReader rd = null;
            try {
                rd = IOUtils.readerFromString(mapping);
                TokensRegexNERAnnotator.readEntries(annotatorName, entries, seenRegexes, mapping, rd, noDefaultOverwriteLabels, ignoreCase, verbose);
            }
            catch (IOException e) {
                throw new RuntimeIOException("Couldn't read TokensRegexNER from " + mapping, e);
            }
            finally {
                IOUtils.closeIgnoringExceptions(rd);
            }
        }
        if (mappings.length != 1) {
            logger.log("TokensRegexNERAnnotator " + annotatorName + ": Read " + entries.size() + " unique entries from " + mappings.length + " files");
        }
        return entries;
    }

    private static List<Entry> readEntries(String annotatorName, List<Entry> entries, TrieMap<String, Entry> seenRegexes, String mappingFilename, BufferedReader mapping, Set<String> noDefaultOverwriteLabels, boolean ignoreCase, boolean verbose) throws IOException {
        String line;
        int origEntriesSize = entries.size();
        int isTokensRegex = 0;
        int lineCount = 0;
        while ((line = mapping.readLine()) != null) {
            int commaPos;
            String[] stringArray;
            ++lineCount;
            String[] split = line.split("\t");
            if (split.length < 2 || split.length > 5) {
                throw new IllegalArgumentException("Provided mapping file is in wrong format. This line is bad: " + line);
            }
            String regex = split[0].trim();
            String tokensRegex = null;
            String[] regexes = null;
            if (regex.startsWith("( ") && regex.endsWith(" )")) {
                tokensRegex = regex.substring(1, regex.length() - 1).trim();
            } else {
                regexes = regex.split("\\s+");
            }
            if (regexes != null) {
                stringArray = regexes;
            } else {
                String[] stringArray2 = new String[1];
                stringArray = stringArray2;
                stringArray2[0] = tokensRegex;
            }
            String[] key = stringArray;
            if (ignoreCase) {
                String[] norm = new String[key.length];
                for (int i = 0; i < key.length; ++i) {
                    norm[i] = key[i].toLowerCase();
                }
                key = norm;
            }
            String type = split[1].trim();
            Set<String> overwritableTypes = Generics.newHashSet();
            double priority = 0.0;
            if (split.length >= 3) {
                overwritableTypes.addAll(Arrays.asList(split[2].trim().split("\\s*,\\s*")));
            }
            if (split.length >= 4) {
                try {
                    priority = Double.parseDouble(split[3].trim());
                }
                catch (NumberFormatException e) {
                    throw new IllegalArgumentException("ERROR: Invalid priority in line " + lineCount + " in regexner file " + mappingFilename + ": \"" + line + "\"!", e);
                }
            }
            int annotateGroup = 0;
            if (split.length >= 5) {
                String context = split[4].trim();
                try {
                    annotateGroup = Integer.parseInt(context);
                }
                catch (NumberFormatException e) {
                    throw new IllegalArgumentException("ERROR: Invalid group in line " + lineCount + " in regexner file " + mappingFilename + ": \"" + line + "\"!", e);
                }
            }
            if ((commaPos = type.indexOf(44)) > 0) {
                String newType = type.substring(0, commaPos).trim();
                logger.warn("TokensRegexNERAnnotator " + annotatorName + ": Entry has multiple types: " + line + ".  Taking type to be " + newType);
                type = newType;
            }
            Entry entry = new Entry(tokensRegex, regexes, type, overwritableTypes, priority, annotateGroup);
            if (seenRegexes.containsKey(key)) {
                Entry oldEntry = seenRegexes.get((K[])key);
                if (priority > oldEntry.priority) {
                    logger.warn("TokensRegexNERAnnotator " + annotatorName + ": Replace duplicate entry (higher priority): old=" + oldEntry + ", new=" + entry);
                } else {
                    if (oldEntry.type.equals(type) || !verbose) continue;
                    logger.warn("TokensRegexNERAnnotator " + annotatorName + ": Ignoring duplicate entry: " + split[0] + ", old type = " + oldEntry.type + ", new type = " + type);
                    continue;
                }
            }
            if (entry.overwritableTypes.isEmpty() && noDefaultOverwriteLabels.contains(entry.type)) {
                logger.warn("TokensRegexNERAnnotator " + annotatorName + ": Entry doesn't have overwriteable types " + entry + ", but entry type is in noDefaultOverwriteLabels");
            }
            entries.add(entry);
            seenRegexes.put((String[])key, entry);
            if (entry.tokensRegex == null) continue;
            ++isTokensRegex;
        }
        logger.log("TokensRegexNERAnnotator " + annotatorName + ": Read " + (entries.size() - origEntriesSize) + " unique entries out of " + lineCount + " from " + mappingFilename + ", " + isTokensRegex + " TokensRegex patterns.");
        return entries;
    }

    @Override
    public Set<Annotator.Requirement> requires() {
        return StanfordCoreNLP.TOKENIZE_AND_SSPLIT;
    }

    @Override
    public Set<Annotator.Requirement> requirementsSatisfied() {
        return Collections.emptySet();
    }

    private static class Entry {
        public final String tokensRegex;
        public final String[] regex;
        public final String type;
        public final Set<String> overwritableTypes;
        public final double priority;
        public final int annotateGroup;

        public Entry(String tokensRegex, String[] regex, String type, Set<String> overwritableTypes, double priority, int annotateGroup) {
            this.tokensRegex = tokensRegex;
            this.regex = regex;
            this.type = type.intern();
            this.overwritableTypes = overwritableTypes;
            this.priority = priority;
            this.annotateGroup = annotateGroup;
        }

        public String toString() {
            return "Entry{" + (this.tokensRegex != null ? this.tokensRegex : StringUtils.join(this.regex)) + ' ' + this.type + ' ' + this.overwritableTypes + ' ' + this.priority + '}';
        }
    }

    static enum PosMatchType {
        MATCH_ALL_TOKENS,
        MATCH_AT_LEAST_ONE_TOKEN,
        MATCH_ONE_TOKEN_PHRASE_ONLY;

    }
}

