# Requirements and Installation

* PyTorch version >= 1.5.0
* Python version >= 3.6
* To install fairseq** and develop locally:

donwload the code and then run the follow code

``` bash
cd fairseq
pip install --editable ./
git clone https://github.com/rsennrich/subword-nmt.git
```

### Training and Testing

IWSLT'14 German to English

``` bash
1. Download Dataset:
cd examples/translation/
bash prepare-iwslt14.sh
cd ../..

2. Preprocess/binarize the data:
TEXT=examples/translation/iwslt14.tokenized.de-en
fairseq-preprocess --source-lang de --target-lang en \
    --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \
    --destdir data-bin/iwslt14.tokenized.de-en \
    --workers 20

3. Train
CUDA_VISIBLE_DEVICES=0 python train.py data-bin/iwslt14.tokenized.de-en \
	--max-epoch 80  \
	--arch transformer_iwslt_de_en  --share-decoder-input-output-embed \
	--optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0  \
	--lr 5e-4 --lr-scheduler inverse_sqrt --warmup-updates 4000  \  
	--dropout 0.3  --attention-dropout 0.1 --weight-decay 0.0001 \
	--criterion label_smoothed_cross_entropy --label-smoothing 0.1  \
	--max-tokens 4096     --eval-bleu     --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}'    \
	--eval-bleu-detok moses     --eval-bleu-remove-bpe   \
	--n_centroid 3  --n_hashes 1   \
	--truncate-source     --max-source-positions 96   \
	--best-checkpoint-metric bleu --maximize-best-checkpoint-metric    \
	--save-dir checkpoints/iwslt_de_en_reformer  \
4. Test	
CUDA_VISIBLE_DEVICES=0  fairseq-generate data-bin/iwslt14.tokenized.de-en \
    --path checkpoints/iwslt_de_en_reformer/checkpoint_best.pt \
    --batch-size 128 --beam 5 --remove-bpe --quiet --truncate-source \
    --max-source-positions 96
```

WMT'14 English to German 

``` bash
1. Download Dataset:
cd examples/translation/
bash prepare-wmt14en2de.sh --icml17
cd ../..

2. Preprocess/binarize the data:
TEXT=examples/translation/wmt14_en_de
fairseq-preprocess \
    --source-lang en --target-lang de \
    --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \
    --destdir data-bin/wmt14_en_de --thresholdtgt 0 --thresholdsrc 0 \
    --workers 20

3. Train
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7  python -m torch.distributed.launch --nproc_per_node 8 train.py data-bin/wmt14_en_de    \
    --max-epoch 200 \
    --arch transformer_wmt_en_de --share-decoder-input-output-embed \
    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
    --lr-scheduler inverse_sqrt --warmup-init-lr 1e-07 --warmup-updates 4000  \
    --lr 0.0007 --seed 42 \
    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --weight-decay 0.0 \
    --max-tokens  3000 --save-dir checkpoints/wmt14_en_de  \
    --no-progress-bar --log-format json --log-interval 50  \
    --n_centroid 3  --n_hashes 1 \
    --eval-bleu --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}'  \
    --eval-bleu-detok moses --eval-bleu-remove-bpe --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \
    --truncate-source  --max-source-positions 96 \
    --save-interval-updates 1000 --ddp-backend=legacy_ddp --keep-interval-updates 20
	
4. Average Checkpoints
CUDA_VISIBLE_DEVICES=0 python scripts/average_checkpoints.py \
    --inputs checkpoints/wmt14_en_de/ \
    --num-epoch-checkpoints  5 --output checkpoints/wmt14_en_de/averaged_model.pt

5. Test Tokenized BLEU
CUDA_VISIBLE_DEVICES=0 fairseq-generate data-bin/wmt14_en_de \
    --path checkpoints/wmt14_en_de/averaged_model.pt \
    --batch-size 64 --beam 4 --remove-bpe --quiet --truncate-source \
    --max-source-positions 96

6. Test SacreBLEU BLEU
sacrebleu -t wmt14 -l en-de --echo src | sacremoses -l en -q tokenize -a | python subword-nmt/subword_nmt/apply_bpe.py -c data/wmt14_en_de/code | python truncate.py > wmt14.en.bpe
cat wmt14.en.bpe | fairseq-interactive data/wmt14_en_de --path checkpoints/wmt14_en_de/average_model.pt -s en -t de --beam 5 --remove-bpe --truncate-source --max-source-positions 96 > wmt14.de.gen
cat wmt14.de.gen | grep ^H- | cut -f 3- | sacremoses -l de -q detokenize > wmt14.de.det 
cat wmt14.de.det | sacrebleu -t wmt14 -l en-de
```