#!/bin/bash

source ~/Miniconda3/etc/profile.d/conda.sh
conda activate hugginface
git clone https://github.com/huggingface/transformers.git
cat ~/corpus/training.conll~/corpus/dev.conll | cut -d$'\t' -f 2 | grep -v "^$"| sort | uniq > labels.txt  
export OUTPUT_DIR=~/out 
export BERT_MODEL=bert-base-multilingual-cased
export MAX_LENGTH=256
export BATCH_SIZE=32
export NUM_EPOCHS=3
export SAVE_STEPS=750
export SEED=1

python ~/transformers/examples/legacy/token-classification/run_ner.py --data_dir ./ \
--labels ./labels.txt \
--model_name_or_path $BERT_MODEL \
--output_dir $OUTPUT_DIR \
--max_seq_length  $MAX_LENGTH \
--num_train_epochs $NUM_EPOCHS \
--per_device_train_batch_size $BATCH_SIZE \
--save_steps $SAVE_STEPS \
--seed $SEED \
--do_train \
--do_eval \
--do_predict