#!/bin/bash

# DATA_PATH=/datadrive/data/wiki_bert_text_sentence
# DATA_PATH="0.5 /datadrive/data/wiki_bert_text_sentence 0.5 /datadrive/data/bookcorpus_text_text_sentence"
DATA_PATH="/datadrive/data/bc+wiki_text_sentence"
CHECKPOINT_PATH="/datadrive/checkpoints/bert_bsz256_len128_save_tensors"

# train with length 128 to speed up
python pretrain_bert.py \
       --stop-iter 10000 \
       --num-layers 12 \
       --hidden-size 768 \
       --num-attention-heads 12 \
       --batch-size 256 \
       --seq-length 128 \
       --num-workers 12 \
       --max-position-embeddings 128 \
       --train-iters 10000 \
       --save $CHECKPOINT_PATH \
       --load $CHECKPOINT_PATH \
       --tensorboard-dir $CHECKPOINT_PATH \
       --tokenizer-type Auto \
       --tokenizer-model-type bert-base-uncased \
       --data-impl mmap \
       --split 949,50,1 \
       --lr 0.0001 \
       --lr-decay-style linear \
       --min-lr 1.0e-5 \
       --lr-decay-iters 990000 \
       --weight-decay 1e-2 \
       --clip-grad 1.0 \
       --warmup .01 \
       --log-interval 100 \
       --save-interval 10000 \
       --eval-interval 10000 \
       --eval-iters 10 \
       --data-path $DATA_PATH \
       --num-gpus 1
       
       # --save-initial-checkpoint