/*
 * Decompiled with CFR 0.152.
 */
package matetools.is2.lemmatizer;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
import matetools.is2.data.Cluster;
import matetools.is2.data.F2SF;
import matetools.is2.data.FV;
import matetools.is2.data.Instances;
import matetools.is2.data.InstancesTagger;
import matetools.is2.data.Long2Int;
import matetools.is2.data.ParametersFloat;
import matetools.is2.data.PipeGen;
import matetools.is2.data.SentenceData09;
import matetools.is2.io.CONLLReader09;
import matetools.is2.io.CONLLWriter09;
import matetools.is2.lemmatizer.Evaluator;
import matetools.is2.lemmatizer.MFO;
import matetools.is2.lemmatizer.Options;
import matetools.is2.lemmatizer.Pipe;
import matetools.is2.lemmatizer.StringEdit;
import matetools.is2.tools.IPipe;
import matetools.is2.tools.Tool;
import matetools.is2.tools.Train;
import matetools.is2.util.DB;
import matetools.is2.util.OptionsSuper;

public class Lemmatizer
implements Tool,
Train {
    public Pipe pipe;
    public ParametersFloat params;
    private Long2Int li;
    private boolean doUppercase = false;
    private long[] vs = new long[40];

    public Lemmatizer(String modelFileName) {
        try {
            Options m_options = new Options(new String[]{"-model", modelFileName});
            this.li = new Long2Int(m_options.hsize);
            this.readModel(m_options);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public Lemmatizer(boolean doUppercase) {
        this.doUppercase = doUppercase;
    }

    public static void main(String[] args) throws FileNotFoundException, Exception {
        Options options = new Options(args);
        Lemmatizer lemmatizer = new Lemmatizer(options.upper);
        long start = System.currentTimeMillis();
        if (options.train) {
            lemmatizer.li = new Long2Int(options.hsize);
            lemmatizer.pipe = new Pipe(options, lemmatizer.li);
            InstancesTagger is = lemmatizer.pipe.createInstances(options.trainfile);
            DB.println("Features: " + lemmatizer.pipe.mf.size() + " Operations " + lemmatizer.pipe.mf.getFeatureCounter().get("OP"));
            ParametersFloat params = new ParametersFloat(lemmatizer.li.size());
            lemmatizer.train(options, lemmatizer.pipe, params, is);
            lemmatizer.writeModel(options, lemmatizer.pipe, params);
        }
        if (options.test) {
            lemmatizer.readModel(options);
            lemmatizer.out(options, lemmatizer.pipe, lemmatizer.params);
        }
        System.out.println();
        if (options.eval) {
            System.out.println("\nEVALUATION PERFORMANCE:");
            Evaluator.evaluate(options.goldfile, options.outfile, options.format);
        }
        long end = System.currentTimeMillis();
        System.out.println("used time " + (float)((end - start) / 100L) / 10.0f);
    }

    @Override
    public void writeModel(OptionsSuper options, IPipe pipe, ParametersFloat params) {
        try {
            ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(options.modelName)));
            zos.putNextEntry(new ZipEntry("data"));
            DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos));
            MFO cfr_ignored_0 = this.pipe.mf;
            MFO.writeData(dos);
            dos.flush();
            params.write(dos);
            pipe.write(dos);
            dos.writeBoolean(this.doUppercase);
            dos.flush();
            dos.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public void readModel(OptionsSuper options) {
        try {
            ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName)));
            zis.getNextEntry();
            DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));
            MFO mf = new MFO();
            mf.read(dis);
            this.params = new ParametersFloat(0);
            this.params.read(dis);
            this.li = new Long2Int(this.params.size());
            this.pipe = new Pipe(options, this.li);
            this.pipe.mf = mf;
            this.pipe.initFeatures();
            this.pipe.initValues();
            this.pipe.readMap(dis);
            for (Map.Entry<String, Integer> e : MFO.getFeatureSet().get("OP").entrySet()) {
                this.pipe.types[e.getValue().intValue()] = e.getKey();
            }
            this.pipe.cl = new Cluster(dis);
            if (dis.available() > 0) {
                this.doUppercase = dis.readBoolean();
            }
            dis.close();
            DB.println("Loading data finished. ");
            DB.println("number of params  " + this.params.parameters.length);
            DB.println("number of classes " + this.pipe.types.length);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public void train(OptionsSuper options, IPipe p, ParametersFloat params, Instances ist) {
        InstancesTagger is = (InstancesTagger)ist;
        int i = 0;
        int del = 0;
        FV g = new FV();
        FV f = new FV();
        int LC = this.pipe.types.length + 1;
        int UC = LC + 1;
        Pipe pipe = this.pipe;
        String[] wds = MFO.reverse(pipe.mf.getFeatureSet().get("WORD"));
        F2SF fs = params.getFV();
        double upd = 0.0;
        i = 0;
        while (i < options.numIters) {
            System.out.print("Iteration " + i + ": ");
            long start = System.currentTimeMillis();
            int numInstances = is.size();
            int correct = 0;
            int count = 0;
            long last = System.currentTimeMillis();
            int wrongOp = 0;
            int correctOp = 0;
            int correctUC = 0;
            int wrongUC = 0;
            HashMap map = new HashMap();
            int n = 0;
            while (n < numInstances) {
                if ((n + 1) % 500 == 0) {
                    del = Pipe.outValueErr(n + 1, count - correct, (float)correct / (float)count, del, last, upd);
                }
                upd = options.numIters * numInstances - (numInstances * i + (n + 1)) + 1;
                int k = 0;
                while (k < is.length(n)) {
                    double best = -1000.0;
                    String bestOp = "";
                    ++count;
                    this.pipe.addCoreFeatures(is, n, k, 0, wds[is.forms[n][k]], this.vs);
                    String lemma = this.pipe.opse.get(wds[is.forms[n][k]].toLowerCase());
                    if (lemma == null) {
                        int t = 0;
                        while (t < this.pipe.types.length) {
                            fs.clear();
                            int l = this.vs.length - 1;
                            while (l >= 0) {
                                if (this.vs[l] > 0L) {
                                    fs.add(this.li.l2i(this.vs[l] + (long)(t * Pipe.s_type)));
                                }
                                --l;
                            }
                            float score = (float)fs.getScore();
                            if ((double)score > best) {
                                bestOp = this.pipe.types[t];
                                best = score;
                            }
                            ++t;
                        }
                    }
                    if (this.doUppercase) {
                        fs.clear();
                        int l = this.vs.length - 1;
                        while (l >= 0) {
                            if (this.vs[l] > 0L) {
                                fs.add(this.li.l2i(this.vs[l] + (long)(LC * Pipe.s_type)));
                            }
                            --l;
                        }
                        int correctOP = -1;
                        int selectedOP = -1;
                        if (wds[is.glemmas[n][k]].length() > 0 && Character.isUpperCase(wds[is.glemmas[n][k]].charAt(0)) && fs.score > 0.0f) {
                            correctOP = UC;
                            selectedOP = LC;
                        } else if (wds[is.glemmas[n][k]].length() > 0 && Character.isLowerCase(wds[is.glemmas[n][k]].charAt(0)) && fs.score <= 0.0f) {
                            correctOP = LC;
                            selectedOP = UC;
                        }
                        if (correctOP != -1 && wds[is.glemmas[n][k]].length() > 0) {
                            ++wrongUC;
                            f.clear();
                            int l2 = this.vs.length - 1;
                            while (l2 >= 0) {
                                if (this.vs[l2] > 0L) {
                                    f.add(this.li.l2i(this.vs[l2] + (long)(selectedOP * Pipe.s_type)));
                                }
                                --l2;
                            }
                            g.clear();
                            l2 = this.vs.length - 1;
                            while (l2 >= 0) {
                                if (this.vs[l2] > 0L) {
                                    g.add(this.li.l2i(this.vs[l2] + (long)(correctOP * Pipe.s_type)));
                                }
                                --l2;
                            }
                            double lam_dist = params.getScore(g) - params.getScore(f);
                            double loss = 1.0 - lam_dist;
                            FV dist = g.getDistVector(f);
                            dist.update(params.parameters, params.total, params.update(dist, loss), upd, false);
                        } else {
                            ++correctUC;
                        }
                    }
                    if (lemma != null) {
                        ++correct;
                        ++correctOp;
                    } else {
                        String op = Pipe.getOperation(is, n, k, wds);
                        if (op.equals(bestOp)) {
                            ++correct;
                            ++correctOp;
                        } else {
                            ++wrongOp;
                            f.clear();
                            int bop = this.pipe.mf.getValue("OP", bestOp);
                            int r = this.vs.length - 1;
                            while (r >= 0) {
                                if (this.vs[r] > 0L) {
                                    f.add(this.li.l2i(this.vs[r] + (long)(bop * Pipe.s_type)));
                                }
                                --r;
                            }
                            g.clear();
                            int gop = this.pipe.mf.getValue("OP", op);
                            int r2 = this.vs.length - 1;
                            while (r2 >= 0) {
                                if (this.vs[r2] > 0L) {
                                    g.add(this.li.l2i(this.vs[r2] + (long)(gop * Pipe.s_type)));
                                }
                                --r2;
                            }
                            double lam_dist = params.getScore(g) - params.getScore(f);
                            double loss = 1.0 - lam_dist;
                            FV dist = g.getDistVector(f);
                            dist.update(params.parameters, params.total, params.update(dist, loss), upd, false);
                        }
                    }
                    ++k;
                }
                ++n;
            }
            ArrayList opsl = new ArrayList();
            for (Map.Entry e : map.entrySet()) {
                if ((Integer)e.getValue() <= 1) continue;
                opsl.add(e);
            }
            Collections.sort(opsl, new Comparator<Map.Entry<String, Integer>>(){

                @Override
                public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
                    return o1.getValue() == o2.getValue() ? 0 : (o1.getValue() > o2.getValue() ? 1 : -1);
                }
            });
            if (opsl.size() > 0) {
                System.out.println();
            }
            for (Map.Entry e : opsl) {
                System.out.println(String.valueOf((String)e.getKey()) + "  " + e.getValue());
            }
            map.clear();
            del = Pipe.outValueErr(numInstances, count - correct, (float)correct / (float)count, del, last, upd, "time " + (System.currentTimeMillis() - start) + " corr/wrong " + correctOp + " " + wrongOp + " uppercase corr/wrong  " + correctUC + " " + wrongUC);
            del = 0;
            System.out.println();
            ++i;
        }
        params.average(i * is.size());
    }

    @Override
    public void out(OptionsSuper options, IPipe pipe, ParametersFloat params) {
        long start = System.currentTimeMillis();
        CONLLReader09 depReader = new CONLLReader09(options.testfile, false);
        depReader.setInputFormat(options.formatTask);
        CONLLWriter09 depWriter = new CONLLWriter09(options.outfile);
        depWriter.setOutputFormat(options.formatTask);
        System.out.print("Processing Sentence: ");
        int cnt = 0;
        int del = 0;
        try {
            while (true) {
                InstancesTagger is = new InstancesTagger();
                is.init(1, new MFO());
                SentenceData09 instance = depReader.getNext(is);
                if (instance == null) break;
                is.fillChars(instance, 0, Pipe._CEND);
                ++cnt;
                SentenceData09 i09 = this.lemmatize(is, instance, this.li);
                if (options.normalize) {
                    int k = 0;
                    while (k < i09.length()) {
                        boolean save = depReader.normalizeOn;
                        depReader.normalizeOn = true;
                        i09.plemmas[k] = depReader.normalize(i09.plemmas[k]);
                        depReader.normalizeOn = save;
                        ++k;
                    }
                }
                if (options.overwritegold) {
                    i09.lemmas = i09.plemmas;
                }
                depWriter.write(i09);
                if (cnt % 100 != 0) continue;
                del = Pipe.outValue(cnt, del);
            }
            depWriter.finishWriting();
            del = Pipe.outValue(cnt, del);
            long end = System.currentTimeMillis();
            System.out.println(PipeGen.getSecondsPerInstnace(cnt, end - start));
            System.out.println(PipeGen.getUsedTime(end - start));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private SentenceData09 lemmatize(InstancesTagger is, SentenceData09 instance, Long2Int li) {
        int LC = this.pipe.types.length + 1;
        is.feats[0] = new short[instance.length()][11];
        is.fillChars(instance, 0, Pipe._CEND);
        int length = instance.length();
        F2SF fs = new F2SF(this.params.parameters);
        int w1 = 0;
        while (w1 < length) {
            instance.plemmas[w1] = "_";
            this.pipe.addCoreFeatures(is, 0, w1, 0, instance.forms[w1], this.vs);
            String f = null;
            if (is.forms[0][w1] != -1 && (f = this.pipe.opse.get(instance.forms[w1].toLowerCase())) != null) {
                instance.plemmas[w1] = f;
            }
            double best = -1000.0;
            int bestOp = 0;
            int t = 0;
            while (t < this.pipe.types.length) {
                fs.clear();
                int l = this.vs.length - 1;
                while (l >= 0) {
                    if (this.vs[l] > 0L) {
                        fs.add(li.l2i(this.vs[l] + (long)(t * Pipe.s_type)));
                    }
                    --l;
                }
                if ((double)fs.score >= best) {
                    best = fs.score;
                    bestOp = t;
                }
                ++t;
            }
            if (f == null) {
                instance.plemmas[w1] = StringEdit.change(this.doUppercase ? instance.forms[w1] : instance.forms[w1].toLowerCase(), this.pipe.types[bestOp]);
            }
            if (instance.plemmas[w1].length() == 0) {
                instance.plemmas[w1] = "_";
            }
            if (this.doUppercase) {
                fs.clear();
                int l = this.vs.length - 1;
                while (l >= 0) {
                    if (this.vs[l] > 0L) {
                        fs.add(li.l2i(this.vs[l] + (long)(LC * Pipe.s_type)));
                    }
                    --l;
                }
                try {
                    if (fs.score <= 0.0f && instance.plemmas[w1].length() > 1) {
                        instance.plemmas[w1] = String.valueOf(Character.toUpperCase(instance.plemmas[w1].charAt(0))) + instance.plemmas[w1].substring(1);
                    } else if (fs.score <= 0.0f && instance.plemmas[w1].length() > 0) {
                        instance.plemmas[w1] = String.valueOf(Character.toUpperCase(instance.plemmas[w1].charAt(0)));
                    } else if (fs.score > 0.0f) {
                        instance.plemmas[w1] = instance.plemmas[w1].toLowerCase();
                    }
                }
                catch (Exception e) {
                    e.printStackTrace();
                }
            }
            ++w1;
        }
        SentenceData09 i09 = new SentenceData09(instance);
        i09.createSemantic(instance);
        return i09;
    }

    @Override
    public SentenceData09 apply(SentenceData09 snt) {
        InstancesTagger is = new InstancesTagger();
        if (snt.length() == 0) {
            return snt;
        }
        SentenceData09 it = new SentenceData09();
        it.createWithRoot(snt);
        is.init(1, new MFO());
        is.createInstance09(it.length());
        is.fillChars(it, 0, Pipe._CEND);
        int j = 0;
        while (j < it.length()) {
            is.setForm(0, j, it.forms[j]);
            ++j;
        }
        return this.lemmatize(is, it, this.li);
    }
}

