train_file=train

######################### Image Setting #########################
my_testing_image=V6
input_size=224
image_depth=12
Image_embed_dims=640
Image_patch_size=32
Image_hidden_rate=5
Image_num_heads=8
drop_path_rate=0.3

######################### Text Setting #########################
my_testing=V6_bidirectional
ctx_len=77
vocab_size=49408
head_size_a=64
head_size_divisor=8
n_embd=640
n_layer=6
text_initialization=True

######################### Others #########################
ip_list=(YOUR_ADDRESS) # e.g. one node ("1.1.1.1"); multi node ("1,1,1,1" "2,2,2,2")
lr=0.001
dropout=0.0
model_name=clip_rwkv
opt=adamw
weight_decay=0.2
train_num_samples=15061515
epochs=32
batch_size=512
precision=bf16
open_checkpoint=False
traindata=TRAINING_DATA_PATH
output=OUTPUT_PATH

for((node_rank=0;node_rank<${#ip_list[*]};node_rank++));
do
  ssh root@${ip_list[node_rank]} "cd `pwd`;PATH=$PATH \
  CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
  NCCL_ALGO=Ring \
  NCCL_SOCKET_IFNAME=eth0 \
  NCCL_SOCKET_NTHREADS=8 \
  NCCL_NSOCKS_PERTHREAD=2 \
  torchrun --nproc_per_node 8 \
  --nnodes=${#ip_list[*]} \
  --node_rank=$node_rank \
  --master_addr=${ip_list[0]} \
  --master_port=23760  ${train_file}.py \
    --open_checkpoint $open_checkpoint \
    --drop_path_rate $drop_path_rate \
    --my_testing_image $my_testing_image \
    --Image_num_heads $Image_num_heads \
    --input-size $input_size \
    --dropout $dropout \
    --precision $precision \
    --Image_depth $image_depth \
    --Image_embed_dims $Image_embed_dims \
    --Image_patch_size $Image_patch_size \
    --Image_hidden_rate $Image_hidden_rate \
    --ctx_len $ctx_len \
    --vocab_size $vocab_size \
    --my_testing $my_testing \
    --head_size_a $head_size_a \
    --head_size_divisor $head_size_divisor \
    --n_embd $n_embd \
    --n_layer $n_layer \
    --text_initialization $text_initialization \
    --batch-size $batch_size \
    --epochs $epochs \
    --lr $lr \
    --optimizer $opt \
    --output $output \
    --train-data $traindata \
    --train-num-samples $train_num_samples \
    --weight-decay $weight_decay" &
done
