#!/bin/bash

conda activate # activate your conda env.
python --version

#!/bin/bash -l

# Base path to store the outputs.
BASE_PATH="../"

YAML_FILE="yaml_files/t5large_reddit_ppo_on_supervised.yaml"

# Name of the project and experiment for wandb.
PROJECT_NAME="t5_base"
REWARD_FN="all_purity;adjust_by_complexity;long_sentence;frequency_based"
EXPERIMENT_NAME="t5base_curriculum_learning"
EXPERIMENT_NAME="${EXPERIMENT_NAME}${REWARD_FN}"
echo $EXPERIMENT_NAME
# where the supervised model is stored
MODEL_NAME="eval_gold_t5_only_gold_train_t5_base/model"
data_path="../../Dataset_Release/"
test_data_path=${data_path}
epoch_to_evaluate=80
path_to_save_output="rl_results_${EXPERIMENT_NAME}${REWARD_FN}.txt"

# Wandb API key.
# WANDB_KEY=$(<wandb_key)
WANDB_KEY=""

# Wandb entity name.
WANDB_ENTITY=""
# Create the directory to store the results.
mkdir -p $BASE_PATH/$PROJECT_NAME/$EXPERIMENT_NAME



echo "Running"
WANDB_API_KEY=$WANDB_KEY python -u train_text_generation.py \
--base_path_to_store_results $BASE_PATH \
--config_path $YAML_FILE \
--project_name $PROJECT_NAME \
--experiment_name $EXPERIMENT_NAME \
--model_name ${MODEL_NAME} \
--entity_name $WANDB_ENTITY \
--log_to_wandb --evaluate_bias \
--epoch_to_evaluate ${epoch_to_evaluate} \
--data_path ${data_path} --test_data_path ${test_data_path} --reward_fn_to_use ${REWARD_FN} --eval_every 2 --batch_size 20 --run_curriculum_learning --openai_url "http://128.10.12.201:47526/v1" --should_save | tee $path_to_save_output