export CUDA_VISIBLE_DEVICES=0,1
data_bin_dir=examples/translation/reddit-tok-shh/data-bin
model_dir=./base_model_reddit_sentence_oracles_decay_20_t4
python train.py $data_bin_dir \
    --arch oracle_transformer_wmt_en_de --share-all-embeddings \
    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 --lr-scheduler inverse_sqrt \
    --warmup-init-lr 1e-07 --warmup-updates 4000 --lr 0.0007 --min-lr 1e-09 \
    --weight-decay 0.0 --criterion oracle_label_smoothed_cross_entropy --label-smoothing 0.1 \
    --max-tokens 8192 --update-freq 1 --no-progress-bar --log-format tqdm --max-epoch 64 \
    --log-interval 20 --keep-interval-updates 10 --save-interval 1 \
    --seed 1111 --use-epoch-numbers-decay \
    --skip-invalid-size-inputs-valid-test \
    --eval-bleu \
    --train-subset train \
    --valid-subset valid \
    --reset-optimizer \
    --use-sentence-level-oracles --decay-k 20 --use-greed-gumbel-noise --gumbel-noise 0.8 \
    --distributed-port 31111 --distributed-world-size 2 --ddp-backend=no_c10d \
    --source-lang qu --target-lang re  --save-dir $model_dir | tee -a $model_dir/training.log
