#!/usr/bin/env bash

MODE="$1"
LG="$2"
DATAVER="$3"
NAME="$4"
LOAD="$5"

argslist=""
for (( i = 6; i <= $# ; i++ ))
  do
    j=${!i}
    argslist="${argslist} $j "
  done
echo $argslist >&2

cd "$(dirname $0)" || return

echo "Install fairseq" >&2

pip3 install -e fairseq -i http://pypi.byted.org/simple/ --trusted-host=pypi.byted.org
pip3 install -r requirements.txt -i http://pypi.byted.org/simple/ --trusted-host=pypi.byted.org

# sudo apt-get update
# sudo apt-get install libxml-perl libxml-dom-perl

export PYROUGE_HOME_DIR=$(pwd)/RELEASE-1.5.5
export PYROUGE_TEMP_PATH=/opt/tiger

pyrouge_set_rouge_path $PYROUGE_HOME_DIR
chmod +x $PYROUGE_HOME_DIR/ROUGE-1.5.5.pl

prefix=hdfs://haruna/home/byte_arnold_lq_mlnlc/user/wangdanqing.122

dataset_path=${prefix}/Datasets/multilingual/data-bin/${DATAVER}/${LG}
tensorboard_logdir=${prefix}/Workshop/MultiLingual/${LG}/logs/${NAME}
checkpoint_path=${prefix}/Workshop/MultiLingual/${LG}/checkpoints/${NAME}
pretrained_path=${prefix}/Workshop/MultiLingual/${LOAD}/checkpoints/trans3

# change checkpoint path
hdfs dfs -mkdir -p $tensorboard_logdir
hdfs dfs -mkdir -p $checkpoint_path

local_root=~/mbart
resource_root=${local_root}/resource
output_path=${local_root}/output
model_path=${local_root}/model
mkdir -p ${resource_root}
mkdir -p ${output_path}
mkdir -p ${model_path}

local_dataset_path=${resource_root}/dataset
mkdir -p ${local_dataset_path}
hadoop fs -copyToLocal ${dataset_path}/* ${local_dataset_path}
echo "Download resource from ${dataset_path} to ${local_dataset_path}" >&2

local_tensorboard_path=${output_path}/tensorboard_logdir
#hadoop fs -copyToLocal ${tensorboard_logdir} ${local_tensorboard_path}
mkdir -p ${local_tensorboard_path}

local_checkpoint_path=${output_path}/checkpoint_path
mkdir -p ${local_checkpoint_path}
hadoop fs -copyToLocal ${checkpoint_path}/checkpoint_last.pt ${local_checkpoint_path}
echo "Load checkpoints from ${checkpoint_path}/checkpoint_last.pt to ${local_checkpoint_path}" >&2

local_pretrained_path=${model_path}/trans3
if [ ! -d ${local_pretrained_path} ]; then
  echo "Load pretrained model from ${pretrained_path}/checkpoint_best.pt to ${local_pretrained_path}" >&2
  mkdir -p ${local_pretrained_path}
  hadoop fs -copyToLocal ${pretrained_path}/checkpoint_best.pt ${local_pretrained_path}
else
  echo "Pretrained model in ${local_pretrained_path}" >&2
fi

echo "Finish download files" >&2

langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN


if [ "$MODE" == "train" ]; then
  echo "Training..."

  (inotifywait -m ${local_checkpoint_path} -e close_write |
      while read path action file; do
          if [[ "$file" =~ .*pt$ ]]; then
              echo "Checkpoint detected: $file" >&2
              # echo -e "checkpoint detected: $file" | hadoop fs -appendToFile - ${hdfs_log_file}
              # upload checkpoint
              hadoop fs -put -f ${local_checkpoint_path}/$file ${checkpoint_path}/ && echo "checkpoint uploaded: $file to ${checkpoint_path}/$file" >&2
              rm ${local_checkpoint_path}/$file
          fi
      done) &

  python fairseq/train.py ${local_dataset_path} --ddp-backend=no_c10d \
    --save-dir ${local_checkpoint_path} \
    --tensorboard-logdir ${local_tensorboard_path} \
    --restore-file ${local_pretrained_path}/checkpoint_best.pt \
    --task summarization_mbart_rank \
    --arch mbart_large \
    --langs $langs \
    --encoder-normalize-before --decoder-normalize-before \
    --reset-optimizer --reset-dataloader --reset-meters --reset-lr-scheduler \
    --only-encoder-for-cls \
    --num-classes 4 \
    --init-token 0 --separator-token 2 \
    --max-option-length 128 \
    --max-positions 1024 \
    --truncate-sequence \
    --shorten-method "truncate" \
    --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 \
    --criterion sentence_ranking \
    --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-06 \
    --clip-norm 0.0 \
    --lr-scheduler fixed --lr 1e-05   \
    --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \
    --max-sentences 1 \
    --required-batch-size-multiple 1 \
    --update-freq 8 \
    --max-epoch 5 \
    --log-interval 10 \
    --log-format simple \
    --keep-best-checkpoints 3 \
    --save-interval-updates 2000 \
    --patience 5 \
    --user-dir examples/summarization \
    $argslist


elif [ "$MODE" == "generate" ]; then
  echo "Ranking..."


fi

echo "Put ${local_tensorboard_path} to ${tensorboard_logdir}" >&2
hadoop fs -put -f ${local_tensorboard_path}/* ${tensorboard_logdir}/
sleep 600
