export DYGIEFORMAT_PATH="./processed_data/ace05e_dygieppformat"
export OUTPUT_PATH="./processed_data/ace05e_bart"

mkdir $OUTPUT_PATH

python preprocessing/process_ace05e.py -i $DYGIEFORMAT_PATH/train.json -o $OUTPUT_PATH/train.w1.oneie.json -b facebook/bart-large -w 1

python preprocessing/process_ace05e.py -i $DYGIEFORMAT_PATH/dev.json -o $OUTPUT_PATH/dev.w1.oneie.json -b facebook/bart-large -w 1

python preprocessing/process_ace05e.py -i $DYGIEFORMAT_PATH/test.json -o $OUTPUT_PATH/test.w1.oneie.json -b facebook/bart-large -w 1

export OUTPUT_PATH="./processed_data/ace05e_t5"

mkdir $OUTPUT_PATH

python preprocessing/process_ace05e.py -i $DYGIEFORMAT_PATH/train.json -o $OUTPUT_PATH/train.w1.oneie.json -b t5-base -w 1

python preprocessing/process_ace05e.py -i $DYGIEFORMAT_PATH/dev.json -o $OUTPUT_PATH/dev.w1.oneie.json -b t5-base -w 1

python preprocessing/process_ace05e.py -i $DYGIEFORMAT_PATH/test.json -o $OUTPUT_PATH/test.w1.oneie.json -b t5-base -w 1

export OUTPUT_PATH="./processed_data/ace05e_bert"

mkdir $OUTPUT_PATH

python preprocessing/process_ace05e.py -i $DYGIEFORMAT_PATH/train.json -o $OUTPUT_PATH/train.w1.oneie.json -b bert-large-cased -w 1

python preprocessing/process_ace05e.py -i $DYGIEFORMAT_PATH/dev.json -o $OUTPUT_PATH/dev.w1.oneie.json -b bert-large-cased -w 1

python preprocessing/process_ace05e.py -i $DYGIEFORMAT_PATH/test.json -o $OUTPUT_PATH/test.w1.oneie.json -b bert-large-cased -w 1

export BASE_PATH="./processed_data/"
export SPLIT_PATH="./resource/low_resource_split/ace05e"

for TOKENIZER_NAME in 'bart' 't5' 'bert'
do 
    python preprocessing/split_dataset.py -i $BASE_PATH/ace05e_$TOKENIZER_NAME/train.w1.oneie.json -s $SPLIT_PATH/doc_list_001 -o $BASE_PATH/ace05e_$TOKENIZER_NAME/train.001.w1.oneie.json

    python preprocessing/split_dataset.py -i $BASE_PATH/ace05e_$TOKENIZER_NAME/train.w1.oneie.json -s $SPLIT_PATH/doc_list_002 -o $BASE_PATH/ace05e_$TOKENIZER_NAME/train.002.w1.oneie.json

    python preprocessing/split_dataset.py -i $BASE_PATH/ace05e_$TOKENIZER_NAME/train.w1.oneie.json -s $SPLIT_PATH/doc_list_003 -o $BASE_PATH/ace05e_$TOKENIZER_NAME/train.003.w1.oneie.json

    python preprocessing/split_dataset.py -i $BASE_PATH/ace05e_$TOKENIZER_NAME/train.w1.oneie.json -s $SPLIT_PATH/doc_list_005 -o $BASE_PATH/ace05e_$TOKENIZER_NAME/train.005.w1.oneie.json

    python preprocessing/split_dataset.py -i $BASE_PATH/ace05e_$TOKENIZER_NAME/train.w1.oneie.json -s $SPLIT_PATH/doc_list_010 -o $BASE_PATH/ace05e_$TOKENIZER_NAME/train.010.w1.oneie.json
        
    python preprocessing/split_dataset.py -i $BASE_PATH/ace05e_$TOKENIZER_NAME/train.w1.oneie.json -s $SPLIT_PATH/doc_list_020 -o $BASE_PATH/ace05e_$TOKENIZER_NAME/train.020.w1.oneie.json

    python preprocessing/split_dataset.py -i $BASE_PATH/ace05e_$TOKENIZER_NAME/train.w1.oneie.json -s $SPLIT_PATH/doc_list_030 -o $BASE_PATH/ace05e_$TOKENIZER_NAME/train.030.w1.oneie.json    

    python preprocessing/split_dataset.py -i $BASE_PATH/ace05e_$TOKENIZER_NAME/train.w1.oneie.json -s $SPLIT_PATH/doc_list_050 -o $BASE_PATH/ace05e_$TOKENIZER_NAME/train.050.w1.oneie.json      

    python preprocessing/split_dataset.py -i $BASE_PATH/ace05e_$TOKENIZER_NAME/train.w1.oneie.json -s $SPLIT_PATH/doc_list_075 -o $BASE_PATH/ace05e_$TOKENIZER_NAME/train.075.w1.oneie.json
done

# split dygie data
export BASE_PATH="./processed_data/"
export SPLIT_PATH="./resource/low_resource_split/ace05e"

python preprocessing/split_dataset_dygie.py -i $BASE_PATH/ace05e_dygieppformat/train.json -s $SPLIT_PATH/doc_list_001 -o $BASE_PATH/ace05e_dygieppformat/train.001.json

python preprocessing/split_dataset_dygie.py -i $BASE_PATH/ace05e_dygieppformat/train.json -s $SPLIT_PATH/doc_list_002 -o $BASE_PATH/ace05e_dygieppformat/train.002.json

python preprocessing/split_dataset_dygie.py -i $BASE_PATH/ace05e_dygieppformat/train.json -s $SPLIT_PATH/doc_list_003 -o $BASE_PATH/ace05e_dygieppformat/train.003.json

python preprocessing/split_dataset_dygie.py -i $BASE_PATH/ace05e_dygieppformat/train.json -s $SPLIT_PATH/doc_list_005 -o $BASE_PATH/ace05e_dygieppformat/train.005.json

python preprocessing/split_dataset_dygie.py -i $BASE_PATH/ace05e_dygieppformat/train.json -s $SPLIT_PATH/doc_list_010 -o $BASE_PATH/ace05e_dygieppformat/train.010.json
    
python preprocessing/split_dataset_dygie.py -i $BASE_PATH/ace05e_dygieppformat/train.json -s $SPLIT_PATH/doc_list_020 -o $BASE_PATH/ace05e_dygieppformat/train.020.json

python preprocessing/split_dataset_dygie.py -i $BASE_PATH/ace05e_dygieppformat/train.json -s $SPLIT_PATH/doc_list_030 -o $BASE_PATH/ace05e_dygieppformat/train.030.json    

python preprocessing/split_dataset_dygie.py -i $BASE_PATH/ace05e_dygieppformat/train.json -s $SPLIT_PATH/doc_list_050 -o $BASE_PATH/ace05e_dygieppformat/train.050.json      

python preprocessing/split_dataset_dygie.py -i $BASE_PATH/ace05e_dygieppformat/train.json -s $SPLIT_PATH/doc_list_075 -o $BASE_PATH/ace05e_dygieppformat/train.075.json
