#!/usr/bin/bash
workspace=/workspace
save_dir=$workspace/models/opus_predict
codes_dir=$workspace/codes/git-repo/fairseq-master
data_dir=$workspace/experiments/experiments-opus-100/opus_enzh2en

seed=1111
max_tokens=8192
dropout=0.1
attention_heads=8
embed_dim=512
ffn_embed_dim=2048
encoder_layers=6
decoder_layers=6

source_lang='en'
target_lang='zh'
lr=0.001
update_freq=10

save_dir=${save_dir}_${source_lang}${target_lang}2${target_lang}_${encoder_layers}_${decoder_layers}_${lr}_${update_freq}_noise_0.6_del_replace_insert

mkdir -p $save_dir

export CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7'
python $codes_dir/train.py $data_dir/data-bin \
    --task multilingual_translation \
    --arch multilingual_transformer \
    --lang-pairs en-zh \
    --save-dir $save_dir \
    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
    --lr-scheduler inverse_sqrt --lr ${lr} --min-lr 1e-09 --max-update 300000 \
    --warmup-updates 8000 --warmup-init-lr '1e-07' \
    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
    --seed ${seed} \
    --max-tokens ${max_tokens} --update-freq ${update_freq} \
    --dropout ${dropout} --relu-dropout 0.1 --attention-dropout 0.1 \
    --decoder-attention-heads ${attention_heads} --encoder-attention-heads ${attention_heads} \
    --decoder-embed-dim ${embed_dim} --encoder-embed-dim ${embed_dim} \
    --decoder-ffn-embed-dim ${ffn_embed_dim} --encoder-ffn-embed-dim ${ffn_embed_dim} \
    --encoder-layers ${encoder_layers} --decoder-layers ${decoder_layers} \
    --shared-encoder-decoder-emb --shared-decoder-emb-output \
    --log-format simple  --log-interval 500 \
    --keep-interval-updates 10 --save-interval-updates 3000 \
    --ddp-backend=no_c10d \
    --skip-invalid-size-inputs-valid-test \
    --fp16 \
    >$save_dir/train.log 2>&1  
