#!/bin/bash

model_path="BERT-base path"
output_dir="save path"
name="debug"
train_path="train-data path"
dev_path="dev path"

CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7


nohup python -m torch.distributed.launch --nproc_per_node 8 run_pre_training.py \
  --output_dir  $output_dir \
  --model_name_or_path $model_path \
  --do_train \
  --save_steps 3000 \
  --per_device_train_batch_size 64 \
  --per_device_eval_batch_size 64 \
  --gradient_accumulation_steps 8 \
  --eval_accumulation_steps 4 \
  --fp16 \
  --warmup_ratio 0.1 \
  --learning_rate 1e-4 \
  --num_train_epochs 10 \
  --overwrite_output_dir \
  --dataloader_num_workers 32 \
  --n_head_layers 1 \
  --skip_from 2 \
  --max_seq_length 512 \
  --train_dir $train_path \
  --validation_file $dev_path \
  --do_eval \
  --evaluation_strategy steps \
  --eval_steps 3000 \
  --weight_decay 0.01 \
  --late_mlm \
  > ./logs/${name}.log 2>&1 &
