#!/bin/bash

set -f # disable glob
IFS=' ' # split on space characters

# Parse options
while getopts ":l:o:p:" OPT; do
	case $OPT in
		l ) LANG=($OPTARG) ;; # ISO-639-1 (https://www.loc.gov/standards/iso639-2/php/code_list.php) two letter code for language such as "en", "fa" or "de"

		o ) OUTPUT_PATH=($OPTARG) ;; # Path to store fasttext embeddings

		p ) PYTHON_SCRIPT_PATH=($OPTARG) ;; # Path to the python script
	esac
done

# Download vector files
BASE_URL_ALIGNED="https://dl.fbaipublicfiles.com/fasttext/vectors-aligned"
VEC_FILE_NAME_ALIGNED="wiki.$LANG.align.vec"
PROCESSED_VEC_FILE_NAME_ALIGNED="fasttext_wiki-$LANG-multilingual-aligned_dim-300.kvec"

BASE_URL_UNALIGNED="https://dl.fbaipublicfiles.com/fasttext/vectors-wiki"
VEC_FILE_NAME_UNALIGNED="wiki.$LANG.vec"
PROCESSED_VEC_FILE_NAME_UNALIGNED="fasttext_wiki-$LANG-align_dim-300.kvec"

# Download aligned file
if [ ! -f "$OUTPUT_PATH/$VEC_FILE_NAME_ALIGNED" ]; then
 	echo "Downloading aligned vector file from $BASE_URL_ALIGNED/$VEC_FILE_NAME_ALIGNED..."
	wget $BASE_URL_ALIGNED/$VEC_FILE_NAME_ALIGNED -P $OUTPUT_PATH
	echo "Download completed!"
else
	echo "Found $OUTPUT_PATH/$VEC_FILE_NAME_ALIGNED on disk, skipping download!"
fi

# Download unaligned file
#if [ ! -f "$OUTPUT_PATH/$VEC_FILE_NAME_UNALIGNED" ]; then
#	echo "Downloading unaligned vector file from $BASE_URL_UNALIGNED/$VEC_FILE_NAME_UNALIGNED..."
#	wget $BASE_URL_UNALIGNED/$VEC_FILE_NAME_UNALIGNED -P $OUTPUT_PATH
#	echo "Download completed!"
#else
#	echo "Found $OUTPUT_PATH/$VEC_FILE_NAME_UNALIGNED on disk, skipping download!"
#fi

# Process aligned file
if [ ! -f "$OUTPUT_PATH/$PROCESSED_VEC_FILE_NAME_ALIGNED" ]; then
	echo "Processing the aligned file $VEC_FILE_NAME_ALIGNED..."
	python $PYTHON_SCRIPT_PATH/preprocess.py -a process_fasttext_embeddings -ip $OUTPUT_PATH -i $VEC_FILE_NAME_ALIGNED -op $OUTPUT_PATH -o $PROCESSED_VEC_FILE_NAME_ALIGNED -ed 300 --skip-first-line
else
	echo "Found $OUTPUT_PATH/$PROCESSED_VEC_FILE_NAME_ALIGNED on disk, skipping processing!"
fi

# Process unaligned file
#if [ ! -f "$OUTPUT_PATH/$PROCESSED_VEC_FILE_NAME_UNALIGNED" ]; then
#	echo "Processing the unaligned file $VEC_FILE_NAME_UNALIGNED..."
#	python $PYTHON_SCRIPT_PATH/preprocess.py -a process_fasttext_embeddings -ip $OUTPUT_PATH -i $VEC_FILE_NAME_UNALIGNED -op $OUTPUT_PATH -o $PROCESSED_VEC_FILE_NAME_UNALIGNED -ed 300 --skip-first-line
#else
#	echo "Found $OUTPUT_PATH/$PROCESSED_VEC_FILE_NAME_UNALIGNED on disk, skipping processing!"
#fi