export CUDA_VISIBLE_DEVICES=0,1,2,3
data_bin_dir=examples/translation/reddit-tok-rm/data-bin
model_dir=./adapt_reddit_model_1_de10_rm_1000
python train.py $data_bin_dir \
    --arch adapt_transformer_wmt_en_de --share-all-embeddings \
    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 --lr-scheduler inverse_sqrt \
    --warmup-init-lr 1e-07 --warmup-updates 4000 --lr 0.0007 --min-lr 1e-09 \
    --weight-decay 0.0 --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
    --max-tokens 8192 --update-freq 2 --log-format tqdm --max-epoch 1000 \
    --log-interval 20  --keep-interval-updates 10 --save-interval 20  --seed 1111  \
    --use-greed-gumbel-noise --gumbel-noise 0.8 \
    --distributed-port 31121 --distributed-world-size 4 --ddp-backend=no_c10d \
    --train-subset valid \
    --valid-subset test \
    --train_adapt_type 1 \
    --decay-k 10 \
    --eval-bleu \
    --reset-optimizer \
    --source-lang qu --target-lang re --save-dir $model_dir | tee -a $model_dir/training.log \
    
