MODEL_NAME=$1
SETTING=$2
BATCH_SIZE=$3
export CUDA_VISIBLE_DEVICES=$4

if [[ $SETTING == "zero-shot" ]]
then
  MODEL_DIR="../od_splinter/runs/$MODEL_NAME"
  if [[ $MODEL_NAME == "bert" ]] || [[ $MODEL_NAME == "orqa-unsup" ]] || [[ $MODEL_NAME == "condenser" ]] || [[ $MODEL_NAME == "cocondenser" ]]
  then
    MODEL_FILE=""
  elif [[ $MODEL_NAME == "dpr-single" ]]
  then
    MODEL_FILE="../DPR/data/retriever/checkpoint/single_hf_bert_base.cp"
  elif [[ $MODEL_NAME == "dpr-multi" ]]
  then
    MODEL_FILE="../DPR/data/retriever/checkpoint/multi_hf_bert_base.cp"
  else
    MODEL_FILE="$MODEL_DIR/checkpoints/dpr_biencoder.100000"
  fi
else
  MODEL_DIR="../od_splinter/runs/$MODEL_NAME/$SETTING"
  MODEL_FILE="$MODEL_DIR/best_cp"
fi
echo "Model dir: $MODEL_DIR"
echo "Model file: $MODEL_FILE"

PRETRAINED_MODEL_CFG="bert-base-uncased"
if [[ $MODEL_NAME == "orqa-unsup" ]]
then
  PRETRAINED_MODEL_CFG="../dpr_models/orqa_torch"
elif [[ $MODEL_NAME == "condenser" ]]
then
  PRETRAINED_MODEL_CFG="Luyu/condenser"
elif [[ $MODEL_NAME == "cocondenser" ]]
then
  PRETRAINED_MODEL_CFG="Luyu/co-condenser-wiki"
fi

OUTPUT_DIR="$MODEL_DIR/embeddings"

python generate_dense_embeddings_from_tokenized.py \
--encoder_model_type hf_bert \
--pretrained_model_cfg $PRETRAINED_MODEL_CFG \
--model_file $MODEL_FILE \
--input_file ../dpr_embeddings/tokenized-bert-base-uncased-240/tokenized_passages_\*.pkl \
--output_dir $OUTPUT_DIR \
--fp16 \
--do_lower_case \
--batch_size $BATCH_SIZE