# Docker

```shell
sudo docker run \
   -it --rm --runtime=nvidia --ipc=host \
   --privileged -v /home/t-habao/docker_home:/home/hangbo \
   --privileged -v /mnt:/data \
   -w /home/hangbo \
   hangbo/pytorch:1.2.0-cuda10-apex-blob bash
```

# Run

```shell

python nlu_finetune/checkpoint_fusing.py \
  --model_dir /data/mnli_output_2e-6x5 \
  --num_ckpts 5 --step_length 1000 --num_max_steps 61360

cp /data/mnli_output_2e-6x5/steps-61360/config.json /data/mnli_output_2e-6x5/n5-l1000-0.000000/config.json

export CKPT_PATH=/mnt/unilm/hangbo/exp_for_unilm/model_to_test/v3_large_bsz4k_turing_cased_fix_attention_scale/1m.bin 
export OUTPUT_DIR=/data/mnli_output_2e-6x5
CUDA_VISIBLE_DEVICES=1 python run_classifier.py \
    --model_type unilm --model_name_or_path $CKPT_PATH --task_name mnli \
    --data_dir /mnt/unilm/hangbo/exp_for_unilm/datasets/glue_data/mnli \
    --cached_train_file /mnt/unilm/hangbo/exp_for_unilm/datasets/glue_data/mnli.train.unilm3-base-cased.cache \
    --cached_dev_file /mnt/unilm/hangbo/exp_for_unilm/datasets/glue_data/mnli.dev.unilm3-base-cased.cache \
    --config_name ./unilm/configs/unilm3-large-cased.json --tokenizer_name unilm3-base-cased \
    --do_eval --logging_steps 100 --output_dir $OUTPUT_DIR --per_gpu_eval_batch_size 256 \
    --max_seq_length 128 --per_gpu_train_batch_size 32 --learning_rate 2e-6 --max_grad_norm 1.0 \
    --num_training_epochs 5 --weight_decay 0.1 --warmup_ratio 0.1 \
    --fp16 --fp16_opt_level O2 --seed 4321 --overwrite_output_dir --do_not_save \
    --fp16_init_loss_scale 128.0 --adam_epsilon 1e-6 --adam_betas 0.9,0.999 --layer_decay 1 \
    --num_save_ckpts 100 --save_checkpoint_steps 200 --checkpoints_to_eval /data/mnli_output_2e-6x5/n5-l1000-0.000000

```
