root=./
#export PYTHONPATH=$root:$PYTHONPATH
if [ ! $1 ]; then
    $1=''
fi
#./stage1_train_chen.sh joint_at-natpossinu 1.0 1.0 0.7
SUFFIX=$1
model_signature=${SUFFIX}
enc_lr_scaling=$2
dec_lr_scaling=$3
lambda_at=$4
export CUDA_VISIBLE_DEVICES=0,1,2,3
python ${root}/fairseq_cli/my_train.py \
    ${root}/data-bin/wmt19_ch_en \
    --task my_translation \
    --mode joint-at \
    --default_task_mode joint-at \
    --default_model_mode at \
    --source-lang ch --target-lang en \
    --arch my_at_nat_transformer \
    --at_nat_share_encoder --at_nat_share_emb \
    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
    --save-dir ${root}/checkpoints_v7/wmt19_ch_en/${model_signature} \
    --ddp-backend=no_c10d \
    --max-update 300000 --save-interval-updates 5000 \
    --keep-interval-updates 40 --seed 1 \
    --lr 7e-4 --enc_lr_scaling ${enc_lr_scaling} --dec_lr_scaling ${dec_lr_scaling} --lr-scheduler inverse_sqrt --warmup-updates 4000 \
    --weight-decay 0.0 \
    --criterion my_criterion --label-smoothing 0.1 \
    --encoder-normalize-before --at-decoder-normalize-before --nat-decoder-normalize-before \
    --max-tokens 8192 \
    --eval-bleu \
    --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' \
    --eval-bleu-detok moses \
    --eval-bleu-remove-bpe \
    --eval-bleu-print-samples \
    --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \
    --encoder-layers 6 \
    --at-decoder-layers 6 \
    --nat-decoder-layers 6 \
    --apply-bert-init \
    --noise random_mask \
    --lambda_at ${lambda_at}
