#!/bin/bash

model_path="model path"
output_dir="result path"
train_path="train-data path"
dev_path="dev path"

CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python -m torch.distributed.launch --nproc_per_node 8 run_pre_training.py \
  --output_dir  $output_dir \
  --model_name_or_path $model_path \
  --save_steps 50 \
  --per_device_train_batch_size 32 \
  --per_device_eval_batch_size 32 \
  --gradient_accumulation_steps 8 \
  --eval_accumulation_steps 8 \
  --fp16 \
  --warmup_ratio 0.1 \
  --learning_rate 2e-5 \
  --num_train_epochs 8 \
  --overwrite_output_dir \
  --dataloader_num_workers 32 \
  --n_head_layers 2 \
  --skip_from 2 \
  --max_seq_length 512 \
  --train_dir $train_path \
  --validation_file $dev_path \
  --do_eval \
  --evaluation_strategy steps \
  --eval_steps 10000 \
  --weight_decay 0.01 \
  --late_mlm
