
student_model=skipbert-tiny

model_type_student=SkipBert
# model_type_student=SkipBertEarlyExit

num_layers_student=10
num_full_hidden_layers_student=4
num_masked_layers_teacher=4
num_masked_last_layers_teacher=0

TASK_NAMES=( CoLA SST-2 MRPC STS-B QQP MNLI QNLI RTE WNLI)
task_names=( cola sst-2 mrpc sts-b qqp mnli qnli rte wnli)
eval_steps=(   50   100   50    50 500  500  200  50   50)
epochs_no_cls=(10     1    1    10   0    0    0   1    1)
epochs_no_evals=(0    0    0     0  20   20   30   0    0)
batch_sizes=(  32    32   32    16  32   32   32  16   16)
alphas=(        1     1    0     0   1    1    1   0    0)
lrs=(           2     2    2     2   2    2    2   2    2)
epochs=(       20    10   20    20  30   30   40  20   10)
betas=(       0.1   0.2  0.1   0.2 0.1  0.1  0.1 0.1  0.2)

for i in 0 1 2 3 4 5 6 7 8
do

    TASK_NAME=${TASK_NAMES[i]}
    task_name=${task_names[i]}
    
    teacher_model=../teachers/${TASK_NAME}
    
    OUTPUT_DIR=./model-tiny/${TASK_NAME}/
    LOG_OUTPUT_PATH=${OUTPUT_DIR}log_${student_model}.txt
    
    mkdir -p ${OUTPUT_DIR}

    python task_distill.py \
        --fp16 \
        --train_batch_size ${batch_sizes[i]} \
        --eval_batch_size 128 \
        --data_dir ../data/${TASK_NAME}/ \
        --teacher_model ${teacher_model} \
        --student_model ${student_model} \
        --model_type_student ${model_type_student} \
        --task_name ${TASK_NAME} \
        --output_dir ${OUTPUT_DIR} \
        --num_layers_student ${num_layers_student} \
        --num_full_hidden_layers_student ${num_full_hidden_layers_student} \
        --num_masked_layers_teacher ${num_masked_layers_teacher} \
        --num_masked_last_layers_teacher ${num_masked_last_layers_teacher} \
        --epochs_no_cls ${epochs_no_cls[i]} \
        --epochs_no_eval ${epochs_no_evals[i]} \
        --use_embedding false \
        --use_att true \
        --use_rep true \
        --use_logits true \
        --learning_rate ${lrs[i]}e-5 \
        --num_train_epochs $((${epochs[i]} + ${epochs_no_cls[i]})) \
        --alpha ${alphas[i]} \
        --beta ${betas[i]} \
        --eval_step ${eval_steps[i]} \
        --freeze_lower_layers true \
        --do_fit true \
        --share_param false
#     &> ${LOG_OUTPUT_PATH}

done