/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.tokenizer;

import com.aliasi.tokenizer.Tokenizer;

class IndoEuropeanTokenizer
extends Tokenizer {
    private final char[] mChars;
    private final int mLastPosition;
    private final int mStartPosition;
    private int mPosition;
    private int mTokenStart;
    private int mLastTokenIndex;
    private int mLastTokenStartPosition = -1;
    private int mLastTokenEndPosition = -1;

    public IndoEuropeanTokenizer(char[] ch, int offset, int length) {
        if (offset < 0 || offset + length > ch.length) {
            String msg = "Illegal slice. cs.length=" + ch.length + " offset=" + offset + " length=" + length;
            throw new IllegalArgumentException(msg);
        }
        this.mChars = ch;
        this.mPosition = offset;
        this.mLastPosition = offset + length;
        this.mTokenStart = -1;
        this.mLastTokenIndex = -1;
        this.mStartPosition = offset;
    }

    public IndoEuropeanTokenizer(String chars) {
        this(chars.toCharArray(), 0, chars.length());
    }

    public IndoEuropeanTokenizer(StringBuilder chars) {
        this(chars.toString());
    }

    public int lastTokenStartPosition() {
        return this.mLastTokenStartPosition;
    }

    public int lastTokenEndPosition() {
        return this.mLastTokenEndPosition;
    }

    public String nextWhitespace() {
        StringBuilder sb = new StringBuilder();
        while (this.hasMoreCharacters() && Character.isWhitespace(this.currentChar())) {
            sb.append(this.currentChar());
            ++this.mPosition;
        }
        return sb.toString();
    }

    private static boolean isLetter(char c) {
        return Character.isLetter(c) || IndoEuropeanTokenizer.devanagari(c);
    }

    private static boolean devanagari(char unicode) {
        return unicode >= '\u0900' && unicode <= '\u097f';
    }

    public String nextToken() {
        char startChar;
        this.skipWhitespace();
        if (!this.hasMoreCharacters()) {
            return null;
        }
        this.mTokenStart = this.mPosition;
        ++this.mLastTokenIndex;
        if ((startChar = this.mChars[this.mPosition++]) == '.') {
            while (this.currentCharEquals('.')) {
                ++this.mPosition;
            }
            return this.currentToken();
        }
        if (startChar == '-') {
            while (this.currentCharEquals('-')) {
                ++this.mPosition;
            }
            return this.currentToken();
        }
        if (startChar == '=') {
            while (this.currentCharEquals('=')) {
                ++this.mPosition;
            }
            return this.currentToken();
        }
        if (startChar == '\'') {
            if (this.currentCharEquals('\'')) {
                ++this.mPosition;
            }
            return this.currentToken();
        }
        if (startChar == '`') {
            if (this.currentCharEquals('`')) {
                ++this.mPosition;
            }
            return this.currentToken();
        }
        if (IndoEuropeanTokenizer.isLetter(startChar)) {
            return this.alphaNumToken();
        }
        if (Character.isDigit(startChar)) {
            return this.numToken();
        }
        return this.currentToken();
    }

    private boolean hasMoreCharacters() {
        return this.mPosition < this.mLastPosition;
    }

    private char currentChar() {
        return this.mChars[this.mPosition];
    }

    private boolean currentCharEquals(char c) {
        return this.hasMoreCharacters() && this.currentChar() == c;
    }

    private void skipWhitespace() {
        while (this.hasMoreCharacters() && Character.isWhitespace(this.currentChar())) {
            ++this.mPosition;
        }
    }

    private String currentToken() {
        int length = this.mPosition - this.mTokenStart;
        this.mLastTokenStartPosition = this.mTokenStart - this.mStartPosition;
        this.mLastTokenEndPosition = this.mLastTokenStartPosition + length;
        return new String(this.mChars, this.mTokenStart, length);
    }

    private String alphaNumToken() {
        while (this.hasMoreCharacters() && (IndoEuropeanTokenizer.isLetter(this.currentChar()) || Character.isDigit(this.currentChar()))) {
            ++this.mPosition;
        }
        return this.currentToken();
    }

    private String numToken() {
        while (this.hasMoreCharacters()) {
            if (IndoEuropeanTokenizer.isLetter(this.currentChar())) {
                ++this.mPosition;
                return this.alphaNumToken();
            }
            if (Character.isDigit(this.currentChar())) {
                ++this.mPosition;
                continue;
            }
            if (this.currentChar() == '.' || this.currentChar() == ',') {
                return this.numPunctToken();
            }
            return this.currentToken();
        }
        return this.currentToken();
    }

    private String numPunctToken() {
        while (this.hasMoreCharacters()) {
            if (Character.isDigit(this.currentChar())) {
                ++this.mPosition;
                continue;
            }
            if (this.currentChar() == '.' || this.currentChar() == ',') {
                ++this.mPosition;
                if (this.hasMoreCharacters() && Character.isDigit(this.currentChar())) continue;
                --this.mPosition;
                return this.currentToken();
            }
            return this.currentToken();
        }
        return this.currentToken();
    }

    public static String[] tokenize(String phrase) {
        return new IndoEuropeanTokenizer(phrase).tokenize();
    }
}

