/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.pipeline;

import edu.stanford.nlp.trees.treebank.Mapper;
import edu.stanford.nlp.util.Generics;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class LDCPosMapper
implements Mapper {
    protected Pattern startOfTagMap = Pattern.compile("\\(tag-map");
    protected Pattern endOfTagMap = Pattern.compile("^\\s*\\)\\s*$");
    protected Pattern mapping = Pattern.compile("\\((\\S+)\\s+(\\S+)\\)\\s*$");
    protected int numExpectedTokens = 2;
    private boolean addDT = false;
    private final Pattern determiner = Pattern.compile("DET");
    private final Pattern nounBaseTag = Pattern.compile("NN");
    private final Pattern adjBaseTag = Pattern.compile("JJ");
    private final Pattern LDCdeterminer = Pattern.compile("DT\\+");
    protected final Map<String, String> tagMap;
    protected final Set<String> tagsToEscape;

    public LDCPosMapper() {
        this(false);
    }

    public LDCPosMapper(boolean addDeterminer) {
        this.addDT = addDeterminer;
        this.tagMap = Generics.newHashMap();
        this.tagsToEscape = Generics.newHashSet();
        this.tagsToEscape.add("-NONE-");
        this.tagsToEscape.add("PUNC");
    }

    @Override
    public String map(String posTag, String terminal) {
        String rawTag = posTag.trim();
        if (this.tagMap.containsKey(rawTag)) {
            return this.tagMap.get(rawTag);
        }
        if (this.tagsToEscape.contains(rawTag)) {
            return rawTag;
        }
        System.err.printf("%s: No mapping for %s%n", this.getClass().getName(), rawTag);
        return rawTag;
    }

    private String processShortTag(String longTag, String shortTag) {
        if (shortTag == null) {
            return null;
        }
        if (shortTag.startsWith("DT+")) {
            shortTag = this.LDCdeterminer.matcher(shortTag).replaceAll("");
        }
        if (longTag.equals("NUMERIC_COMMA")) {
            shortTag = "PUNC";
        }
        if (this.addDT && longTag != null) {
            Matcher detInLongTag = this.determiner.matcher(longTag);
            Matcher someKindOfNoun = this.nounBaseTag.matcher(shortTag);
            Matcher someKindOfAdj = this.adjBaseTag.matcher(shortTag);
            if (detInLongTag.find() && (someKindOfNoun.find() || someKindOfAdj.find())) {
                shortTag = "DT" + shortTag.trim();
            }
        }
        if (this.tagMap.containsKey(longTag)) {
            String existingShortTag = this.tagMap.get(longTag);
            if (!existingShortTag.equals(shortTag)) {
                System.err.printf("%s: Union of mapping files will cause overlap for %s (current: %s new: %s)%n", this.getClass().getName(), longTag, existingShortTag, shortTag);
            }
            return existingShortTag;
        }
        return shortTag;
    }

    @Override
    public void setup(File path, String ... options) {
        if (path == null || !path.exists()) {
            return;
        }
        LineNumberReader reader = null;
        try {
            String line;
            reader = new LineNumberReader(new FileReader(path));
            boolean insideTagMap = false;
            while ((line = reader.readLine()) != null) {
                Matcher isEndSymbol;
                Matcher isStartSymbol = this.startOfTagMap.matcher(line = line.trim());
                insideTagMap = isStartSymbol.matches() || insideTagMap;
                if (!insideTagMap || line.startsWith(";")) continue;
                Matcher mappingLine = this.mapping.matcher(line);
                if (mappingLine.find()) {
                    if (mappingLine.groupCount() == this.numExpectedTokens) {
                        String finalShortTag = this.processShortTag(mappingLine.group(1), mappingLine.group(2));
                        this.tagMap.put(mappingLine.group(1), finalShortTag);
                    } else {
                        System.err.printf("%s: Skipping bad mapping in %s (line %d)%n", this.getClass().getName(), path.getPath(), reader.getLineNumber());
                    }
                }
                if (!(isEndSymbol = this.endOfTagMap.matcher(line)).matches()) continue;
                break;
            }
            reader.close();
        }
        catch (FileNotFoundException e) {
            System.err.printf("%s: Could not open mapping file %s%n", this.getClass().getName(), path.getPath());
        }
        catch (IOException e) {
            int lineNum = reader == null ? -1 : reader.getLineNumber();
            System.err.printf("%s: Error reading %s (line %d)%n", this.getClass().getName(), path.getPath(), lineNum);
        }
    }

    @Override
    public boolean canChangeEncoding(String parent, String element) {
        return true;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        for (String longTag : this.tagMap.keySet()) {
            sb.append(longTag).append('\t').append(this.tagMap.get(longTag)).append('\n');
        }
        return sb.toString();
    }

    public static void main(String[] args) {
        LDCPosMapper mapper = new LDCPosMapper(true);
        File mapFile = new File("/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp");
        mapper.setup(mapFile, new String[0]);
        String test1 = "DET+NOUN+NSUFF_FEM_SG+CASE_DEF_ACC";
        String test2 = "ADJXXXXX";
        String test3 = "REL_ADV";
        String test4 = "NUMERIC_COMMA";
        System.out.printf("%s --> %s\n", test1, mapper.map(test1, null));
        System.out.printf("%s --> %s\n", test2, mapper.map(test2, null));
        System.out.printf("%s --> %s\n", test3, mapper.map(test3, null));
        System.out.printf("%s --> %s\n", test4, mapper.map(test4, null));
    }
}

