MODEL_NAME=$1
SETTING=$2
QA_FILE=$3
MODE=$4
export CUDA_VISIBLE_DEVICES=$5

echo "Model name: $MODEL_NAME"
echo "Setting: $SETTING"
echo "Dataset: $QA_FILE"
echo "Mode: $MODE"

if [[ $SETTING == "zero-shot" ]]
then
  MODEL_DIR="../od_splinter/runs/$MODEL_NAME"
  WANDB_NAME="eval-$MODEL_NAME"
  if [[ $MODEL_NAME == "bert" ]] || [[ $MODEL_NAME == "orqa-unsup" ]] || [[ $MODEL_NAME == "condenser" ]] || [[ $MODEL_NAME == "cocondenser" ]]
  then
    MODEL_FILE=""
  elif [[ $MODEL_NAME == "dpr-single" ]]
  then
    MODEL_FILE="../DPR/data/retriever/checkpoint/single_hf_bert_base.cp"
  elif [[ $MODEL_NAME == "dpr-multi" ]]
  then
    MODEL_FILE="../DPR/data/retriever/checkpoint/multi_hf_bert_base.cp"
  else
    MODEL_FILE="$MODEL_DIR/checkpoints/dpr_biencoder.100000"
  fi
else
  MODEL_DIR="../od_splinter/runs/$MODEL_NAME/$SETTING"
  MODEL_FILE="$MODEL_DIR/best_cp"
  WANDB_NAME="eval-$MODEL_NAME-$SETTING"
fi
echo "Model dir: $MODEL_DIR"
echo "Model file: $MODEL_FILE"
echo "W&B name: $WANDB_NAME"

PRETRAINED_MODEL_CFG="bert-base-uncased"
if [[ $MODEL_NAME == "orqa-unsup" ]]
then
  PRETRAINED_MODEL_CFG="../dpr_models/orqa_torch"
elif [[ $MODEL_NAME == "condenser" ]]
then
  PRETRAINED_MODEL_CFG="Luyu/condenser"
elif [[ $MODEL_NAME == "cocondenser" ]]
then
  PRETRAINED_MODEL_CFG="Luyu/co-condenser-wiki"
fi

if [[ $QA_FILE == "nq" ]]
then
  QA_PATTERN="../DPR/data/retriever/qas/nq-dev.csv,../DPR/data/retriever/qas/nq-test*.csv"
elif [[ $QA_FILE == "trivia" ]]
then
  QA_PATTERN="../DPR/data/retriever/qas/trivia-dev.csv,../DPR/data/retriever/qas/trivia-test*.csv"
elif [[ $QA_FILE == "all" ]]
then
  QA_PATTERN="../DPR/data/retriever/qas/nq-test*.csv,../DPR/data/retriever/qas/trivia-test*.csv,../DPR/data/retriever/qas/nq-dev.csv,../DPR/data/retriever/qas/trivia-dev.csv,../DPR/data/retriever/qas/webquestions-test*.csv,../DPR/data/retriever/qas/curatedtrec-test.csv,../DPR/data/retriever/qas/squad1-test.csv,../entity-questions/dataset/test/P*.test.json"
else
  QA_PATTERN=$QA_FILE
fi
echo "QA file pattern: $QA_PATTERN"

if [[ $MODE == "dense" ]]
then
  N_DOCS=200
  OUTPUT_DIR="$MODEL_DIR/eval"
  ADDITIONAL_ARGS="--wandb_run_type eval-$SETTING"
elif [[ $MODE == "hybrid" ]]
then
  N_DOCS=1000
  OUTPUT_DIR="$MODEL_DIR/eval-1000"
  ADDITIONAL_ARGS="--output_no_text --no_wandb"
else
  echo "Invalid MODE argument... Options: {dense,hybrid}"
  exit
fi
echo "Num docs: $N_DOCS"
echo "Output dir: $OUTPUT_DIR"

python dense_retriever.py \
--encoder_model_type hf_bert \
--pretrained_model_cfg $PRETRAINED_MODEL_CFG \
--model_file "$MODEL_FILE" \
--qa_file $QA_PATTERN \
--ctx_file ../DPR/data/wikipedia_split/psgs_w100.tsv \
--encoded_ctx_file "$MODEL_DIR/embeddings/wikipedia_passages*.pkl" \
--output_dir $OUTPUT_DIR \
--n-docs $N_DOCS \
--batch_size 64 \
--wandb_project od_splinter \
--wandb_name $WANDB_NAME \
$ADDITIONAL_ARGS

if [[ $MODE == "dense" ]]
then
  exit
fi

python hybrid_retriever.py \
--first_results $OUTPUT_DIR \
--second_results "../od_splinter/runs/BM25/eval-1000/" \
--ctx_file ../DPR/data/wikipedia_split/psgs_w100.tsv \
--output_dir "$MODEL_DIR/eval-hybrid" \
--lambda_min 1.0 \
--k_to_optimize 100 \
--n-docs 200 \
--num_threads 20 \
--wandb_project od_splinter \
--wandb_name "$WANDB_NAME-hybrid" \
--wandb_run_type "eval-$SETTING"