#!/bin/bash

set -ex

MARIAN_HOME=$HOME/marian

while [ "$1" != "" ]; do
    case $1 in
        -m | --model )   shift
                       MODEL=$1
                       ;;
        -h | --help )  usage
                       exit
                       ;;
        * )            usage
                       exit 1
    esac
    shift
done

if [[ $MODEL =~ /$ ]]; then
    MODEL=${MODEL:0:-1}
fi

SRC=${MODEL:7:2}
TGT=${MODEL:9:2}

if [[ $SRC == en ]]; then
    DATA_DIR=en${TGT}
else
    DATA_DIR=en${SRC}
fi

if [[ ! $MODEL =~ 'bpew' ]]; then
    echo "This script only with wordpiece-like BPE model." > /dev/stderr
    exit 1
fi

if [[ ! -d $MODEL ]]; then
    echo "Model directory \"$MODEL\" does not exist." > /dev/stderr
    exit 1
fi

# CHECKPOINT AVERAGING
if [ ! -f $MODEL/model.avg.npz -o $MODEL/model.npz -nt $MODEL/model.avg.npz ]; then
    python3 average.py -m $(ls -t $MODEL/model.iter*.npz | head -n 5) -o $MODEL/model.avg.npz
fi

for TOK in $(echo ${MODEL:12:1000} | sed -e 's/x//g;s/_/ /g'); do
    if [[ $TOK =~ ^bpew.*$ ]]; then
        INPUT_TYPE=$TOK
    fi
done
echo $INPUT_TYPE

TEST_FILE=data/${DATA_DIR}/test/${SRC}.${INPUT_TYPE}
TEST_TARGET=data/$DATA_DIR/test/$TGT

if [ ! -f $MODEL/test.txt -o $MODEL/model.avg.npz -nt $MODEL/test.txt ]; then
    $MARIAN_HOME/build/marian-decoder -c $MODEL/model.npz.decoder.yml -m $MODEL/model.avg.npz --beam-size 12 --normalize 0.4 --mini-batch 8 < $TEST_FILE > $MODEL/test.output
    sed -e 's/ //g;s/▁/ /g;s/^ //' $MODEL/test.output > $MODEL/test.txt

    sacrebleu $TEST_TARGET --score-only --width 2 --metrics bleu < $MODEL/test.txt > $MODEL/test_bleu
    sacrebleu $TEST_TARGET --score-only --width 4 --metrics chrf < $MODEL/test.txt > $MODEL/test_chrf
    java -jar meteor/meteor-1.5.jar $TEST_TARGET $MODEL/test.txt -l $TGT -norm | tail -n 1 | sed -e 's/.* //' > $MODEL/test_meteor

fi

echo -n 'Test BLEU score: '
cat $MODEL/test_bleu

# TODO do the noise evaluation
if [ ! -f $MODEL/test.noisy.txt -o $MODEL/model.avg.npz -nt $MODEL/test.noisy.txt ]; then
    NOISY_TEST_FILE=data/${DATA_DIR}/test/${SRC}.wtok.noisy.${INPUT_TYPE:4:1000}
    $MARIAN_HOME/build/marian-decoder -c $MODEL/model.npz.decoder.yml -m $MODEL/model.avg.npz --beam-size 12 --normalize 0.4  --mini-batch 8 < $NOISY_TEST_FILE > $MODEL/test.noisy.output
    sed -e 's/ //g;s/▁/ /g;s/^ //' $MODEL/test.noisy.output > $MODEL/test.noisy.txt

    split -l $(wc -l < $TEST_FILE) -d $MODEL/test.noisy.txt $MODEL/test.noisy.

    cat $MODEL/test_bleu > $MODEL/noisy_bleu
    cat $MODEL/test_chrf > $MODEL/noisy_chrf
    cat $MODEL/test_meteor > $MODEL/noisy_meteor

    for FILE in $MODEL/test.noisy.{00..09}; do
        sacrebleu $TEST_TARGET --score-only --width 2 --metrics bleu < $FILE >> $MODEL/noisy_bleu
        sacrebleu $TEST_TARGET --score-only --width 4 --metrics chrf < $FILE >> $MODEL/noisy_chrf
        java -jar meteor/meteor-1.5.jar $TEST_TARGET $FILE -l $TGT -norm | tail -n 1 | sed -e 's/.* //' >> $MODEL/noisy_meteor
    done

    python3 noisy_slope.py $MODEL/noisy_{bleu,chrf,meteor} | tee $MODEL/noisy_correlation
fi

# DO ALL THE FANCY STUFF FOR MORPHEVAL
if [[ $SRC$TGT =~ en(de|cs|fr) ]]; then
    if [ ! -f $MODEL/morpheval.tok -o $MODEL/model.avg.npz -nt $MODEL/morpheval.tok ]; then
            INPUT_TYPE=${INPUT_TYPE/bpew/wbpe}
            TEST_FILE=morpheval/segmented/${SRC}${TGT}/sents.${SRC}${TGT}.${INPUT_TYPE}
            $MARIAN_HOME/build/marian-decoder -c $MODEL/model.npz.decoder.yml -m $MODEL/model.avg.npz  --beam-size 12 --normalize 0.4 --mini-batch 12 < $TEST_FILE > $MODEL/morpheval.output
            sed -e 's/ //g;s/▁/ /g;s/^ //' $MODEL/morpheval.output | sacremoses tokenize -l $TGT -x > $MODEL/morpheval.tok

            if [[ $TGT == "de" ]]; then
                cd morpheval/SMOR
                tr ' ' '\n' < ../../$MODEL/morpheval.tok | sort | uniq | ./smor > ../../$MODEL/morpheval.smored
                cd ../..
                python3 morpheval/morpheval_v2/evaluate_de.py -i $MODEL/morpheval.tok -n morpheval/morpheval.limsi.v2.en.info -d $MODEL/morpheval.smored | tee $MODEL/morpheval.analysis
            fi

            if [[ $TGT == "cs" ]]; then
                sed 's/$/\n/' $MODEL/morpheval.tok | tr ' ' '\n' | morpheval/morphodita-1.3.0-bin/bin-linux64/run_morpho_analyze --input=vertical --output=vertical  morpheval/czech-morfflex-pdt-131112/czech-morfflex-131112.dict 1  > $MODEL/morpheval.morphodita
                python3 morpheval/morpheval_v2/evaluate_cs.py -i $MODEL/morpheval.morphodita -n morpheval/morpheval.limsi.v2.en.info | tee $MODEL/morpheval.analysis
            fi

            if [[ $TGT == "fr" ]]; then
                python3 morpheval/morpheval_v2/evaluate_fr.py -i output.tokenized -n morpheval.limsi.v2.en.info -d morpheval/lefff.pkl
            fi
    fi
fi

echo -n 'Test BLEU score: '
cat $MODEL/test_bleu
