#!/usr/bin/env bash

DATAVER="$1"
LOADLG="$2"
NAME="$3"

argslist=""
for (( i = 4; i <= $# ; i++ ))
  do
    j=${!i}
    argslist="${argslist} $j "
  done
echo $argslist >&2

cd "$(dirname $0)" || return

echo "Install fairseq" >&2

pip3 install -e fairseq -i http://pypi.byted.org/simple/ --trusted-host=pypi.byted.org
pip3 install -r requirements.txt -i http://pypi.byted.org/simple/ --trusted-host=pypi.byted.org

# sudo apt-get update
# sudo apt-get install libxml-perl libxml-dom-perl

export PYROUGE_HOME_DIR=$(pwd)/RELEASE-1.5.5
export PYROUGE_TEMP_PATH=/opt/tiger

pyrouge_set_rouge_path $PYROUGE_HOME_DIR
chmod +x $PYROUGE_HOME_DIR/ROUGE-1.5.5.pl

prefix=hdfs://haruna/home/byte_arnold_lq_mlnlc/user/wangdanqing.122

dataset_path=${prefix}/Datasets/multilingual/data-bin/${DATAVER}/cnndm
tensorboard_logdir=${prefix}/Workshop/MultiLingual/cnndm/logs/${NAME}
checkpoint_path=${prefix}/Workshop/MultiLingual/${LOADLG}/checkpoints/${NAME}

# change checkpoint path
hdfs dfs -mkdir -p $tensorboard_logdir
hdfs dfs -mkdir -p $checkpoint_path

local_root=~/cnndm_${NAME}
resource_root=${local_root}/resource
output_path=${local_root}/output
model_path=${local_root}/model
mkdir -p ${resource_root}
mkdir -p ${output_path}
mkdir -p ${model_path}

local_dataset_path=${resource_root}/dataset
mkdir -p ${local_dataset_path}
hadoop fs -copyToLocal ${dataset_path}/* ${local_dataset_path}
echo "Download resource from ${dataset_path} to ${local_dataset_path}" >&2

local_checkpoint_path=${output_path}/checkpoint_path
mkdir -p ${local_checkpoint_path}

local_tensorboard_path=${output_path}/tensorboard_logdir
#hadoop fs -copyToLocal ${tensorboard_logdir} ${local_tensorboard_path}
mkdir -p ${local_tensorboard_path}

echo "Finish download files" >&2

langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN

echo "Generating..."

hadoop fs -copyToLocal ${checkpoint_path}/checkpoint_best.pt ${local_checkpoint_path}
echo "Load checkpoints from ${checkpoint_path}/checkpoint_best.pt to ${local_checkpoint_path}" >&2

suffix=$(echo "$argslist" | sed -e "s/-//g"  -e "s/  */_/g")cnndm

python fairseq/generate.py ${local_dataset_path}  \
--path ${local_checkpoint_path}/checkpoint_best.pt \
--task summarization_from_pretrained_mbart \
--gen-subset test \
--source-lang doc --target-lang sum \
--langs $langs \
--remove-bpe 'sentencepiece'  \
--min-len 100 \
--max-len-b 300 \
--lenpen 2 \
--no-repeat-ngram-size 3 \
--truncate-source \
--user-dir examples/summarization \
$argslist \
> ${local_tensorboard_path}/"output$suffix"

cat ${local_tensorboard_path}/"output$suffix" | grep -P "^H" | sort -V |cut -f 3- > ${local_tensorboard_path}/"test$suffix.hypo"


echo "Load ground truth file from ${prefix}/Datasets/multilingual/clean0702/${LG}/test.sum"
hadoop fs -get ${prefix}/Datasets/cnndm/bert_format/token/test.sum ${local_dataset_path}

python utils/calRouge.py \
-c ${local_tensorboard_path}/"test$suffix.hypo" \
-r ${local_dataset_path}/test.sum \
-l en -d "<q>"


echo "Put ${local_tensorboard_path} to ${tensorboard_logdir}" >&2
hadoop fs -put -f ${local_tensorboard_path}/* ${tensorboard_logdir}/
sleep 600
