#!/bin/bash
# bash sh/run/en-fr/iwslt17/transfo_base.sh --t=finetune --sdir=standard/power_k0 --cuda=1 --pretrained=checkpoints/en-fr/wmt14/transfo_base/checkpoint.avg10.pt
# bash sh/run/en-fr/iwslt17/transfo_base.sh --t=finetune --sdir=split/k0 --cuda=0 --pretrained=checkpoints/en-fr/wmt14/transfo_base/checkpoint.avg10.pt --datadir=data/data-bin/iwslt17/split
# bash sh/run/en-fr/iwslt17/transfo_base.sh --t=test --sdir=standard/k0 --cuda=0 --datadir=data/data-bin/wmt14/test_suites/large_pronoun/k3 --testlog=test_pronoun
# bash sh/run/en-fr/iwslt17/transfo_base.sh --t=finetune --cuda=1  --sdir=fromsplit/k0 --pretrained=checkpoints/iwslt17/split/k0/checkpoint_best.pt

# Read script arguments and assign them to variables
for argument in "$@" 
do

    key=$(echo $argument | cut -f1 -d=)
    value=$(echo $argument | cut -f2 -d=)   
    if [[ $key == *"--"* ]]; then
        v="${key/--/}"
        declare $v="${value}" 
   fi
done

# Set variables
src=en
tgt=fr
lang=$src-$tgt
script=sh/run/$lang/iwslt17/transfo_base.sh
task=translation
architecture=transformer_vaswani_wmt_en_fr
test_suites=data/$lang/data-bin/wmt14/test_suites
bawden=data/$lang/bawden
if [ -n "$datadir" ]; then datadir=$datadir ; else datadir=data/$lang/data-bin/iwslt17/standard ; fi
if [ -n "$lenpen" ]; then lenpen=$lenpen ; else lenpen=0.6 ; fi
if [[ $sdir != "checkpoints/"* ]]; then sdir=checkpoints/$lang/iwslt17/$sdir; fi

num_workers=8
n_best_checkpoints=5
checkpoint_path=$sdir/checkpoint_best.pt
# checkpoint_path=$sdir/checkpoint.avg_last$n_best_checkpoints.pt
detokenizer=tools/mosesdecoder/scripts/tokenizer/detokenizer.perl
if [ -n "$cuda" ] ; then export CUDA_VISIBLE_DEVICES=$cuda ; fi
if [ -n "$seed" ]; then seed=$seed ; else seed=0 ; fi
if [ -n "$pretrained" ]; then pretrained=$pretrained ; else pretrained=None ; fi
if [ -n "$testlog" ]; then testlog=$testlog ; else testlog=test ; fi
if [ -n "$mover" ]; then mover=$mover ; else mover="{}" ; fi
if [ -n "$mt" ]; then maxtok=$mt ; else maxtok=8000 ; fi
if [ -n "$uf" ]; then updatefreq=$uf ; else updatefreq=2 ; fi

if [ $t = "finetune" ]
then
    mkdir -p $sdir/logs
    fairseq-train $datadir \
    --save-dir $sdir \
    --seed $seed \
    --source-lang $src \
    --target-lang $tgt \
    --num-workers $num_workers \
    --finetune-from-model $pretrained \
    --task $task \
    --arch $architecture \
    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
    --lr-scheduler inverse_sqrt --warmup-updates 4000 --min-lr 1e-09 \
    --lr 0.0005 --warmup-init-lr 1e-07 \
    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
    --max-tokens $maxtok \
    --update-freq $updatefreq \
    --patience 5 \
    --keep-best-checkpoints $n_best_checkpoints \
    --no-epoch-checkpoints \
    --log-format json \
    | tee -a $sdir/logs/train.log
###############################################################################
elif [ $t = "boom" ]
then
    for s in "" ".shuffled"; do
        datadir=$test_suites/large_pronoun_testset/k3$s
        d=large_pronoun_testset$s
        # score reference
        bash $script --t=test --sdir=$sdir --testlog=$d --cuda=$cuda --datadir=$datadir
    done
    for s in "" ".shuffled"; do
        d=large_pronoun_testset$s
        echo "Results for $d"
        cat $sdir/logs/$d.score
        echo "-----------------------------------"
        echo ""
    done
###############################################################################
elif [ $t = "test" ]
then
    fairseq-generate $datadir \
    --task $task \
    --source-lang $src \
    --target-lang $tgt \
    --path $checkpoint_path \
    --batch-size 64 \
    --remove-bpe \
    --beam 4 \
    --lenpen $lenpen \
    --temperature 1.0 \
    --num-workers $num_workers \
    | tee $sdir/logs/$testlog.log
    # score with sacrebleu
    grep ^S $sdir/logs/$testlog.log | sed 's/^S-//g' | sort -nk 1 | cut -f2- | sacremoses detokenize > $sdir/logs/$testlog.out.src
    grep ^T $sdir/logs/$testlog.log | sed 's/^T-//g' | sort -nk 1 | cut -f2- | sacremoses detokenize > $sdir/logs/$testlog.out.ref
    grep ^H $sdir/logs/$testlog.log | sed 's/^H-//g' | sort -nk 1 | cut -f3- | sacremoses detokenize > $sdir/logs/$testlog.out.sys
    tools/mosesdecoder/scripts/generic/multi-bleu-detok.perl $sdir/logs/$testlog.out.ref < $sdir/logs/$testlog.out.sys | tee $sdir/logs/$testlog.score
###############################################################################
elif [ $t = "boom" ]
then
    # BLEU on PRO (normal and shuffled)
    for s in "" ".shuffled"; do
        datadir=$test_suites/large_pronoun_testset/k3$s
        d=large_pronoun_testset$s
        # score reference
        bash $script --t=test --sdir=$sdir --testlog=$d --cuda=$cuda --datadir=$datadir --lenpen=$lenpen
    done
    # BLEU on test set
    bash $script --t=test --sdir=$sdir --cuda=$cuda --lenpen=$lenpen
    # BLEU on shuffled test set
    datadir=data/$lang/data-bin/iwslt17/test_shuffled
    bash $script --t=test --sdir=$sdir --testlog=test.shuffled --cuda=$cuda --datadir=$datadir --lenpen=$lenpen
###############################################################################
elif [ $t = "results" ]
then
    for s in "" ".shuffled"; do
        d=test$s
        echo "RESULTS FOR $d"
        echo ""
        cat $sdir/logs/$d.score
        echo "-----------------------------------"
        echo ""
    done
    for s in "" ".shuffled"; do
        d=large_pronoun_testset$s
        echo "RESULTS FOR $d"
        echo ""
        cat $sdir/logs/$d.score
        echo "-----------------------------------"
        echo ""
    done
###############################################################################
elif [ $t = "score" ]
then
    # grep ^S $sdir/logs/$testlog.log | sed 's/^S-//g' | sort -nk 1 | cut -f2- | sacremoses detokenize > $sdir/logs/$testlog.out.src
    # grep ^T $sdir/logs/$testlog.log | sed 's/^T-//g' | sort -nk 1 | cut -f2- | sacremoses detokenize > $sdir/logs/$testlog.out.ref
    # grep ^H $sdir/logs/$testlog.log | sed 's/^H-//g' | sort -nk 1 | cut -f3- | sacremoses detokenize > $sdir/logs/$testlog.out.sys
    # tools/mosesdecoder/scripts/generic/multi-bleu-detok.perl $sdir/logs/$testlog.out.ref < $sdir/logs/$testlog.out.sys | tee $sdir/logs/$testlog.score
    grep ^S $sdir/logs/$testlog.log | sed 's/^S-//g' | sort -nk 1 | cut -f2- | $detokenizer -l $tgt > $sdir/logs/$testlog.out.src
    grep ^T $sdir/logs/$testlog.log | sed 's/^T-//g' | sort -nk 1 | cut -f2- | $detokenizer -l $tgt > $sdir/logs/$testlog.out.ref
    grep ^H $sdir/logs/$testlog.log | sed 's/^H-//g' | sort -nk 1 | cut -f3- | $detokenizer -l $tgt > $sdir/logs/$testlog.out.sys
    tools/mosesdecoder/scripts/generic/multi-bleu-detok.perl $sdir/logs/$testlog.out.ref < $sdir/logs/$testlog.out.sys | tee $sdir/logs/$testlog.score
###############################################################################
elif [ $t = "score-split" ]
then
    grep ^S $sdir/logs/$testlog.log | sed 's/^S-//g' | sort -nk 1 | cut -f2- | paste -d " "  - - > $sdir/logs/$testlog.out.src
    grep ^T $sdir/logs/$testlog.log | sed 's/^T-//g' | sort -nk 1 | cut -f2- | paste -d " "  - - > $sdir/logs/$testlog.out.ref
    grep ^H $sdir/logs/$testlog.log | sed 's/^H-//g' | sort -nk 1 | cut -f3- | paste -d " "  - - > $sdir/logs/$testlog.out.sys
    fairseq-score \
    --sys $sdir/logs/$testlog.out.sys \
    --ref $sdir/logs/$testlog.out.ref \
    | tee $sdir/logs/$testlog.score
###############################################################################
elif [ $t = "score-ref" ]
then
    fairseq-generate $datadir \
    --task $task \
    --source-lang $src \
    --target-lang $tgt \
    --path $checkpoint_path \
    --score-reference \
    --batch-size 64 \
    --remove-bpe \
    --num-workers $num_workers \
    | tee $sdir/logs/$testlog.log
###############################################################################
elif [ $t = "average" ]
then
    python scripts/average_checkpoints.py \
        --inputs $sdir/checkpoint.best_* \
        --output $sdir/checkpoint.$n_best_checkpoints.best.average.pt
###############################################################################
elif [ $t = "test-suites" ]
then
    # evaluate on Bawden's test suites
    for d in lexical_choice; do
        datadir=$test_suites/$d
        # score reference
        bash $script --t=score-ref --src=$src --tgt=$tgt --sdir=$sdir --datadir=$datadir --testlog=$d --cuda=$cuda
        # evaluate
        echo "extract scores..."
        grep ^H $sdir/logs/$d.log | sed 's/^H-//g' | sort -nk 1 | cut -f2 > $sdir/logs/$d.full_score 
        awk 'NR % 2 == 0' $sdir/logs/$d.full_score > $sdir/logs/$d.score
        echo "evaluate model performance on test-suite by comparing scores..."
        orig=$bawden/discourse-mt-test-sets/
        python3 $orig/scripts/evaluate.py $orig/test-sets/$d.json $d $sdir/logs/$d.score --maximise > $sdir/logs/$d.result
    done
    # evaluate on large pronouns test suite (original and with shuffled context)
    for s in ""; do
        datadir=$test_suites/large_pronoun/k3$s
        d=large_pronoun$s
        # score reference
        bash $script --t=score-ref --src=$src --tgt=$tgt --sdir=$sdir --datadir=$datadir --testlog=$d --cuda=$cuda
        # evaluate
        echo "extract scores..."
        grep ^H $sdir/logs/$d.log | sed 's/^H-//g' | sort -nk 1 | cut -f2 > $sdir/logs/$d.full_score 
        awk 'NR % 4 == 0' $sdir/logs/$d.full_score > $sdir/logs/$d.score
        echo "evaluate model performance on test-suite by comparing scores..."
        orig=$bawden/Large-contrastive-pronoun-testset-EN-FR/OpenSubs
        python3 $orig/scripts/evaluate.py --reference $orig/testset-en-fr.json --scores $sdir/logs/$d.score --maximize > $sdir/logs/$d.result
        # test with BLEU on test suite
        # bash $script --t=test --src=$src --tgt=$tgt --sdir=$sdir --datadir=$datadir --testlog=test_pronoun$s --cuda=$cuda
    done
    echo "-----------------------------------"
    echo ""
    # print results
    for d in lexical_choice; do
        echo "Results for $d"
        cat $sdir/logs/$d.result
        echo "-----------------------------------"
        echo ""
    done
    for s in ""; do
        d=large_pronoun$s
        echo "Results for $d"
        grep total $sdir/logs/$d.result
        tail $sdir/logs/test_pronoun$s.score
        echo "-----------------------------------"
        echo ""
    done
###############################################################################
else
    echo "Argument t is not valid."
fi

