#!/bin/bash
# example usage:
# bash sh/score/large_pronoun_mcnemar.sh checkpoints/en-fr/iwslt17/standard/k1 checkpoints/en-fr/iwslt17/standard/fromnei/k1


# Read script arguments and assign them to variables
for argument in "$@" 
do

    key=$(echo $argument | cut -f1 -d=)
    value=$(echo $argument | cut -f2 -d=)   
    if [[ $key == *"--"* ]]; then
        v="${key/--/}"
        declare $v="${value}" 
   fi
done

# Note: we modified the evaluate script for the purpose of significance testing
evaluate=data/en-fr/bawden/Large-contrastive-pronoun-testset-EN-FR/OpenSubs/scripts/evaluate.py
ref=data/en-de/test_suites/ContraPro/contrapro.json
# ref=data/en-fr/bawden/Large-contrastive-pronoun-testset-EN-FR/OpenSubs/testset-en-fr.json

mcnemar=scripts/mcnemar.py
alpha=0.001

for checkpoint in $1 $2; do
    sc=$checkpoint/logs/large_pronoun.score
    res=$checkpoint/logs/large_pronoun.results
    python3 $evaluate --reference $ref --scores $sc --maximize --results-file $res
    echo "----------------------------------------"
done

python3 scripts/mcnemar.py \
    --r1=$1/logs/large_pronoun.results \
    --r2=$2/logs/large_pronoun.results \
    --alpha=$alpha
