#!/usr/bin/env bash


NAME="$1"
LG="$2"

argslist=""
for (( i = 3; i <= $# ; i++ ))
  do
    j=${!i}
    argslist="${argslist} $j "
  done
echo $argslist >&2

# NAME="debug_denoiseRankingSumm"
# LG="zh"

prefix=hdfs://haruna/home/byte_arnold_lq_mlnlc/user/wangdanqing.122
wxz_prefix=hdfs://haruna/home/byte_arnold_hl_mlnlc/user/wuxianze.0

# dataset_path=${wxz_prefix}/Datasets/Multilingual/data-bin/
dataset_path=${wxz_prefix}/Datasets/multilingual/data-bin/${DATAVER}/${LG}
tensorboard_logdir=${wxz_prefix}/Workspace/Multilingual/${LG}/logs/${NAME}
checkpoint_path=${wxz_prefix}/Workspace/Multilingual/${LG}/checkpoints/${NAME}
pretrained_path=${prefix}/Workshop/MultiLingual/pretrained

local_root=~/${NAME}_${LG}
resource_root=${local_root}/resource
output_path=${local_root}/output
model_path=${local_root}/model
mkdir -p ${resource_root}
mkdir -p ${output_path}
mkdir -p ${model_path}
langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN

local_dataset_path=${resource_root}/dataset
local_tensorboard_path=${output_path}/tensorboard_logdir
local_checkpoint_path=${output_path}/checkpoint_path
local_pretrained_path=${model_path}/mbart.cc25
if [ ! -d ${local_pretrained_path} ]; then
  echo "Load pretrained model from ${pretrained_path}/mbart.CC25.tar.gz to ${local_pretrained_path}" >&2
  hadoop fs -copyToLocal ${pretrained_path}/mbart.CC25.tar.gz ${model_path}
  tar -xvzf ${model_path}/mbart.CC25.tar.gz -C ${model_path}
else
  echo "Pretrained model in ${local_pretrained_path}" >&2
fi


python3 fairseq/train.py ${local_dataset_path} --ddp-backend=no_c10d \
    --save-dir ${local_checkpoint_path} \
    --tensorboard-logdir ${local_tensorboard_path} \
    --restore-file ${local_pretrained_path}/model.pt \
    --arch rank_summ_gen_large \
    --source-lang doc --target-lang sum \
    --langs $langs \
    --dataset-impl mmap \
    --truncate-source \
    --encoder-normalize-before --decoder-normalize-before \
    --layernorm-embedding \
    --criterion cross_entropy_and_ranking_document --label-smoothing 0.2 \
    --reset-optimizer --reset-dataloader --reset-meters --reset-lr-scheduler \
    --required-batch-size-multiple 1 \
    --dropout 0.1 --attention-dropout 0.1 \
    --weight-decay 0.01 --optimizer adam --adam-betas "(0.9, 0.999)" --adam-eps 1e-08 \
    --lr 3e-5 --min-lr -1 \
    --lr-scheduler polynomial_decay \
    --clip-norm 0.1 \
    --update-freq 4 \
    --skip-invalid-size-inputs-valid-test \
    --find-unused-parameters \
    --num-workers 100 \
    --fp16 \
    --max-tokens 4096 \
    --total-num-update 200000 --warmup-updates 2500 \
    --log-interval 1 \
    --log-format simple \
    --keep-best-checkpoints 3 \
    --no-epoch-checkpoints \
    --patience 3 \
    --user-dir examples/summarization \
    --ranking-head-name sentence_contrastive_head \
    --negative-sample-number 3 \
    --ranking-loss-weight 1 \
    --max-tokens 1024 \
    --task denoise_ranking_document_summarization \
    --ranking-loss-reduction sum \
    $argslist