#!/bin/bash
set -x
conda activate deepspeed
BASEDIR=$(pwd)
WS=$BASEDIR

model_dir=<LLM dir>
data_dir=$WS/data/FT
out_dir=$WS/output/model
mkdir -p $out_dir

export CUDA_VISIBLE_DEVICES=0,1
deepspeed --master_port=12345 $WS/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/main.py \
--data_path $data_dir \
--data_split 1,0,0 --model_name_or_path $model_dir \
--per_device_train_batch_size 4 --only_optimize_lora --per_device_eval_batch_size 1 \
--learning_rate 1e-4 --weight_decay 0.1 --num_train_epochs 4 --gradient_accumulation_steps 2 --num_warmup_steps 0 \
--seed 1234 --zero_stage 3 --lora_dim 8 --lora_module_name model.layers. \
--lr_scheduler_type cosine --deepspeed --max_seq_len 1024 --output_dir $out_dir \
--save_epoch_ckpt --save_ckpt_step 500 --train_on_targets --lora_dropout 0.1 --bf16