test -d ".git" || { echo "This script must be executed from the project root directory"; exit 1; }
set -xe
test -d "datasets" || mkdir -v "datasets"

WORK_DIR=$(pwd)
DATASET_DIR="$WORK_DIR/datasets"

# conll03
function load_conll03 () {
  test -f "$DATASET_DIR/conll03.zip" || wget https://polybox.ethz.ch/index.php/s/bFf8vJBonIT7sr8/download -O "$DATASET_DIR/conll03.zip"
  unzip -q "$DATASET_DIR/conll03.zip" -d "$DATASET_DIR"
  rm -v "$DATASET_DIR/conll03.zip"
  mv "$DATASET_DIR/conll03_ner" "$DATASET_DIR/conll03"
  python3 "$WORK_DIR/scripts/datasets/load_conll03_doclvl.py"
  rm -fv $DATASET_DIR/conll03/*.txt
}

# conll04
function load_conll04 () {
  mkdir -v "$DATASET_DIR/conll04"
  wget -r -nH --cut-dirs=100 --reject "index.html*" --no-parent http://lavis.cs.hs-rm.de/storage/spert/public/datasets/conll04/ -P "$DATASET_DIR/conll04"
}

# ade
function load_ade () {
  mkdir -v "$DATASET_DIR/ade"
  wget -r -nH --cut-dirs=100 --reject "index.html*" --no-parent http://lavis.cs.hs-rm.de/storage/spert/public/datasets/ade/ -P "$DATASET_DIR/ade"
  python3 "$WORK_DIR/scripts/datasets/load_ade.py"
}

# genia
function load_genia () {
  mkdir -v "$DATASET_DIR/genia"
  python3 "$WORK_DIR/scripts/datasets/load_genia.py"
}

# nyt
function load_nyt () {
  types=( "dev" "test" "train" )
  mkdir -v "$DATASET_DIR/nyt"
  for type in "${types[@]}"; do
    wget "https://raw.githubusercontent.com/yubowen-ph/JointER/master/dataset/NYT-multi/data/$type.json" -P "$DATASET_DIR/nyt"
  done
  python3 "$WORK_DIR/scripts/datasets/load_nyt.py"
}

# scierc
function load_scierc () {
  mkdir -v "$DATASET_DIR/scierc"
  wget -r -nH --cut-dirs=100 --reject "index.html*" --no-parent http://lavis.cs.hs-rm.de/storage/spert/public/datasets/scierc/ -P "$DATASET_DIR/scierc"
}

test -d "datasets/conll03" || load_conll03
test -d "datasets/conll04" || load_conll04
test -d "datasets/ade" || load_ade
test -d "datasets/genia" || load_genia
test -d "datasets/nyt" || load_nyt
test -d "datasets/scierc" || load_scierc
