#!/usr/bin/env bash

# bash calrouge_xgiga_generate_zero.sh mspm4_xgiga en zh mbartV2_adapterV1_rouge_en dev --prefix-tokens zh_CN --gen-subset valid

# bash calrouge_xgiga_generate_zero.sh mspm4_xgiga en fr mbartV2_adapterV1_rouge_en --prefix-tokens fr_XX --gen-subset valid --fp16;

DATAVER="$1"
TRAIN_LG="$2"
TEST_LG="$3"
NAME="$4"

argslist=""
for (( i = 5; i <= $# ; i++ ))
  do
    j=${!i}
    argslist="${argslist} $j "
  done
echo $argslist >&2

cd "$(dirname $0)" || return

# echo "Install fairseq" >&2

# pip3 install -r requirements.txt -i http://pypi.byted.org/simple/ --trusted-host=pypi.byted.org

# sudo apt-get update
# sudo apt-get install libxml-perl libxml-dom-perl

export PYROUGE_HOME_DIR=$(pwd)/RELEASE-1.5.5
export PYROUGE_TEMP_PATH=/opt/tiger

pyrouge_set_rouge_path $PYROUGE_HOME_DIR
chmod +x $PYROUGE_HOME_DIR/ROUGE-1.5.5.pl

wxz_prefix=hdfs://haruna/home/byte_arnold_hl_mlnlc/user/wuxianze.0

dataset_path=${wxz_prefix}/Datasets/multilingual/data-bin/${DATAVER}/${TEST_LG}
tensorboard_logdir=${wxz_prefix}/Workspace/Multilingual/xgiga/${TEST_LG}/logs/${NAME}_${TRAIN_LG}
old_checkpoint_path=${wxz_prefix}/Workspace/Multilingual/${TRAIN_LG}/checkpoints/${NAME}
new_checkpoint_path=${wxz_prefix}/Workspace/Multilingual/xgiga/${TRAIN_LG}/checkpoints/${NAME}
pretrained_path=${wxz_prefix}/Workspace/Multilingual/pretrained

local_root=~/xgiga_${NAME}_${TEST_LG}
resource_root=${local_root}/resource
output_path=${local_root}/output
model_path=${local_root}/model
local_dataset_path=${resource_root}/dataset
mkdir -p ${resource_root}
mkdir -p ${output_path}
mkdir -p ${model_path}
mkdir -p ${local_dataset_path}

local_tensorboard_path=${output_path}/tensorboard_logdir
rm -r $local_tensorboard_path
mkdir -p ${local_tensorboard_path}
echo "$tensorboard_logdir"
echo "$local_tensorboard_path"
hdfs dfs -get ${tensorboard_logdir}/* ${local_tensorboard_path}/

echo "Finish download files" >&2

suffix=$(echo "$argslist" | sed -e "s/-//g"  -e "s/  */_/g")

split="test"
if [[ $argslist =~ "valid" ]]; then
  split="dev"
fi

echo "Load ground truth file from ${wxz_prefix}/Datasets/multilingual/xgiga/raw/$split.y.${TEST_LG}"
hadoop fs -get ${wxz_prefix}/Datasets/multilingual/xgiga/raw/$split.y.${TEST_LG} ${local_dataset_path}/

hypo=${local_tensorboard_path}/"test$suffix.hypo"
ref=${local_dataset_path}/$split.y.${TEST_LG}

if [ ${TEST_LG} == "zh" ]; then
  # split the reference and hypothesis into chars
  cat ${hypo} | python3 -u ./xnlg/zh_split_words.py > ${local_tensorboard_path}/"test$suffix.hypo.char"
  cat ${ref} | python3 -u ./xnlg/zh_split_words.py > ${local_dataset_path}/$split.y.${TEST_LG}.char
  hypo=${local_tensorboard_path}/"test$suffix.hypo.char"
  ref=${local_dataset_path}/$split.y.${TEST_LG}.char
  python3 ./xnlg/calc_rouge.py --ref ${ref} --hyp ${hypo} --zh True --use_rouge True
else
  python3 ./xnlg/calc_rouge.py --ref ${ref} --hyp ${hypo} --use_rouge True
fi
