# [NAACL 2022]

A Dog Is Passing Over The Jet? A Text-Generation Dataset for Korean Commonsense Reasoning and Evaluation

## 0. Overview

This CodeZip is designed to implement the Korean CommonGen and sample dataset

```python
/NAACL2022_Korean_CommonGen	
	/dataset # Dataset for Korean CommonGen train/dev/test
		- korean_commongen_official_train.txt/json 
		- korean_commongen_official_dev.txt/json
		- korean_commongen_official_test.txt/json 
	
		/ablation_1 # Dataset for Concept Ablation Study
			- korean_commongen_free_morpheme_train.txt/json 
			- korean_commongen_free_morpheme_test.txt/json 
			- korean_commongen_only_noun_verb_train.txt/json 
			- korean_commongen_only_noun_verb_test.txt/json 
		
		/ablation_2 # Dataset for Data Source Ablation Study
			- korean_commongen_image_only_train.txt/json 
			- korean_commongen_dialogue_summary_only_train.txt/json
	
		/high_level_commonsense_reasoning # Dataset for High-level commonsense reasoning
			- high_level_korean_commongen_train_seed_42.txt/json
			- high_level_korean_commongen_train_seed_52.txt/json
			- high_level_korean_commongen_train_seed_62.txt/json
			- high_level_korean_commongen_train_seed_72.txt/json
			- high_level_korean_commongen_train_seed_82.txt/json

		/reformulated_commongen # Dataset for reformulated commongen
			- korean_commongen_reformulated_test.txt/json 

	/baseline_results # Model-generated results in quantitative evaluation and ablation study
		/quantitative_eval # Main experimental results on Korean CommonGen
			- KoGPT2_quantitative.txt
			- KoBART_quantitative.txt
			- mBART_quantitative.txt
			- mBART_50_quantitative.txt
			- mT5_small_qunatitative.txt
			- mT5_base_quantitative.txt
			- mT5_large_quantitative.txt

		/ablation_free_morph # Ablation results on Korean CommonGen (concept configuration)
			- KoGPT2_free_morph.txt
			- KoBART_free_morph.txt
			- mBART_50_free_morph.txt
			- mT5_large_free_morph.txt

		/ablation_noun_and_verb # Ablation results on Korean CommonGen (concept configuration)
			- KoGPT2_noun_and_verb.txt
			- KoBART_noun_and_verb.txt
			- mBART_50_noun_and_verb.txt
			- mT5_large_noun_and_verb.txt
		
		/ablation_only_image_captions # Ablation results on Korean CommonGen (data source)
			- KoGPT2_only_image_captions.txt
			- KoBART_only_image_captions.txt
			- mBART_50_only_image_captions.txt
			- mT5_large_only_image_captions.txt

		/ablation_only_dialogue_summary # Ablation results on Korean CommonGen (data source)
			- KoGPT2_only_dialogue_summary.txt
			- KoBART_only_dialogue_summary.txt
			- mBART_50_only_dialogue_summary.txt
			- mT5_large_only_dialogue_summary.txt

		/high_level_commonsense_reasoning # High-level commonsense reasoning results on Korean CommonGen
			- KoGPT2_high_level_commonsense_reasoning_seed42.txt
			- KoBART_high_level_commonsense_reasoning_seed42.txt
			- mBART_{25,50}_high_level_commonsense_reasoning_seed42.txt
			- mT5_{small,base,large}_high_level_commonsense_reasoning_seed42.txt

			- KoGPT2_high_level_commonsense_reasoning_seed52.txt
			- KoBART_high_level_commonsense_reasoning_seed52.txt
			- mBART_{25,50}_high_level_commonsense_reasoning_seed52.txt
			- mT5_{small,base,large}_high_level_commonsense_reasoning_seed52.txt
			
			- KoGPT2_high_level_commonsense_reasoning_seed62.txt
			- KoBART_high_level_commonsense_reasoning_seed62.txt
			- mBART_{25,50}_high_level_commonsense_reasoning_seed62.txt
			- mT5_{small,base,large}_high_level_commonsense_reasoning_seed62.txt

			- KoGPT2_high_level_commonsense_reasoning_seed72.txt
			- KoBART_high_level_commonsense_reasoning_seed72.txt
			- mBART_{25,50}_high_level_commonsense_reasoning_seed72.txt
			- mT5_{small,base,large}_high_level_commonsense_reasoning_seed72.txt

			- KoGPT2_high_level_commonsense_reasoning_seed82.txt
			- KoBART_high_level_commonsense_reasoning_seed82.txt
			- mBART_{25,50}_high_level_commonsense_reasoning_seed82.txt
			- mT5_{small,base,large}_high_level_commonsense_reasoning_seed82.txt

		/reformulated_commongen # Reformulated commongen test results on Korean CommonGen
			- KoGPT2_reformulated_commongen.txt
			- KoBART_reformulated_commongen.txt
			- mBART_50_reformulated_commongen.txt
			- mT5_large_reformulated_commongen.txt

	/human_evaluations # Evaluation results of humans with respect to the four criteria (.txt's index is same as model-generated sentence index)
		/kogpt2 # Korean commongen/reformulated commongen + (commonsense, factuality, fluency, grammar correction)
			- gpt2s_commonsense_korean_commongen.txt
			- gpt2s_commonsense_reform.txt
			- gpt2s_factuality_korean_commongen.txt
			- gpt2s_factuality_reform.txt
			- gpt2s_fluency_korean_commongen.txt
			- gpt2s_fluency_reform.txt
			- gpt2s_grammar_korean_commongen.txt
			- gpt2s_grammar_reform.txt

		/kobart # Korean commongen/reformulated commongen + (commonsense, factuality, fluency, grammar correction)
			- kobart_commonsense_korean_commongen.txt
			- kobart_commonsense_reform.txt
			- kobart_factuality_korean_commongen.txt
			- kobart_factuality_reform.txt
			- kobart_fluency_korean_commongen.txt
			- kobart_fluency_reform.txt
			- kobart_grammar_korean_commongen.txt
			- kobart_grammar_reform.txt

		/mbart-50 # Korean commongen/reformulated commongen + (commonsense, factuality, fluency, grammar correction)
			- mbart_commonsense_korean_commongen.txt
			- mbart_commonsense_reform.txt
			- mbart_factuality_korean_commongen.txt
			- mbart_factuality_reform.txt
			- mbart_fluency_korean_commongen.txt
			- mbart_fluency_reform.txt
			- mbart_grammar_korean_commongen.txt
			- mbart_grammar_reform.txt
	
		/mt5-large # Korean commongen/reformulated commongen + (commonsense, factuality, fluency, grammar correction)
			- mt5_commonsense_korean_commongen.txt
			- mt5_commonsense_reform.txt
			- mt5_factuality_korean_commongen.txt
			- mt5_factuality_reform.txt
			- mt5_fluency_korean_commongen.txt
			- mt5_fluency_reform.txt
			- mt5_grammar_korean_commongen.txt
			- mt5_grammar_reform.txt
			
	# Language modeling for fine-tuning 
	- run_language_modeling_KoGPT2.py
	- run_language_modeling_KoBART.py
	- run_language_modeling_mBART_mT5.py
	
	# Decoding systems for fine-tuned models 
	- decoding_GPT2_re_rank_commongen.py
	- decoding_BART_re_rank_commongen.py
	- decoding_mBART_re_rank_commongen.py
	- decoding_mT5_re_rank_commongen.py

	# Evaluation metrics
	- korean_commongen_evaluation_multi_ref.py
		/semantic_eval # For BERTscore
		/eval_metrics # For Rouge for Korean

	# Requirements for implementations
	- requirements.txt

```

## 1. Installation

- To implement evaluation metrics, you should install 1.1 KoNLPy and 1.2 Ko-mecab

**1.1** KoNLPy 0.5.2

[https://konlpy.org/ko/latest/index.html](https://konlpy.org/ko/latest/index.html)

(1) Ubuntu 16.04 ~ 20.04

```bash
$ sudo apt-get install g++ openjdk-7-jdk # Install Java 1.7+
$ sudo apt-get install python-dev; pip install konlpy     # Python 2.x
$ sudo apt-get install python3-dev; pip3 install konlpy   # Python 3.x
```

(2) MAC

```bash
$ pip install konlpy     # Python 2.x
$ pip3 install konlpy    # Python 3.x
```

**1.2** Ko-mecab

(1) Ubuntu 16.04 ~ 20.04

```bash
$ sudo apt-get install curl
$ bash <(curl -s https://raw.githubusercontent.com/konlpy/konlpy/master/scripts/mecab.sh)
```

(2) MAC

```bash
$ bash <(curl -s https://raw.githubusercontent.com/konlpy/konlpy/master/scripts/mecab.sh)
```

## 2. Quantitative Experiment

- $YOUR_SAVE_PATH$: Set the path where you saved the checkpoint.
- $YOUR_CHECKPOINT_PATH$: Set the path to load the checkpoint.

```bash
# Before you start training and decoding...
conda create -n korean_commongen python=3.7
conda activate korean_commongen
pip install -r requirements.txt
```

😳 Warning! 

If the version of the cuda driver is 11.1 or higher, the following error may occur.

```bash
RuntimeError: CUDA error: no kernel image is available for execution on the device
```

Then, installing conda pytorch can be one of the solutions.

```bash
conda install pytorch=1.9.0 torchvision torchaudio cudatoolkit=11.1 -c pytorch -c nvidia
```

If you load a model of a different parameter size, you just change *—model_name_or_path*

(*Fixed seed 42, checkpoint-9600)* 

```python
# KoGPT2
python run_language_modeling_KoGPT2.py --model_name_or_path skt/kogpt2-base-v2 --do_train --train_data_file dataset/korean_commongen_official_train.txt --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=1 --gradient_accumulation_steps 4 --save_steps 1600 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --num_train_epochs 5 --block_size 128 --line_by_line
# KoBART
python run_language_modeling_KoBART.py --model_name_or_path hyunwoongko/kobart --do_train --train_file dataset/korean_commongen_official_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model KoBART
# mBARTs (mBART, mBART-50)
python run_language_modeling_mBART_mT5.py --model_name_or_path facebook/mbart-large-50 --do_train --train_file dataset/korean_commongen_official_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mBART
# mT5s (mT5-small, mT5-base, mT5-large)
python run_language_modeling_mBART_mT5.py --model_name_or_path google/mt5-large --do_train --train_file dataset/korean_commongen_official_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mT5 --source_prefix "summarize: "
```

## 3. Ablation Study - Free morph / Noun & Verb / Content

1) Free morpheme set

```python
# KoGPT2
python run_language_modeling_KoGPT2.py --model_name_or_path skt/kogpt2-base-v2 --do_train --train_data_file dataset/ablation_1/korean_commongen_free_morpheme_train.txt --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=1 --gradient_accumulation_steps 4 --save_steps 1600 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --num_train_epochs 5 --block_size 128 --line_by_line
# KoBART
python run_language_modeling_KoBART.py --model_name_or_path hyunwoongko/kobart --do_train --train_file dataset/ablation_1/korean_commongen_free_morpheme_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model KoBART
# mBARTs
python run_language_modeling_mBART_mT5.py --model_name_or_path facebook/mbart-large-50 --do_train --train_file dataset/ablation_1/korean_commongen_free_morpheme_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mBART
# mT5s
python run_language_modeling_mBART_mT5.py --model_name_or_path google/mt5-large --do_train --train_file dataset/ablation_1/korean_commongen_free_morpheme.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mT5 --source_prefix "summarize: "
```

2) Only noun and verb set

```python
# KoGPT2
python run_language_modeling_KoGPT2.py --model_name_or_path skt/kogpt2-base-v2 --do_train --train_data_file dataset/ablation_1/korean_commongen_only_noun_verb_train.txt --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=1 --gradient_accumulation_steps 4 --save_steps 1600 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --num_train_epochs 5 --block_size 128 --line_by_line
# KoBART
python run_language_modeling_KoBART.py --model_name_or_path hyunwoongko/kobart --do_train --train_file dataset/ablation_1/korean_commongen_only_noun_verb_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model KoBART
# mBARTs 
python run_language_modeling_mBART_mT5.py --model_name_or_path facebook/mbart-large-50 --do_train --train_file dataset/ablation_1/korean_commongen_only_noun_verb_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mBART
# mT5s
python run_language_modeling_mBART_mT5.py --model_name_or_path google/mt5-large --do_train --train_file dataset/ablation_1/korean_commongen_only_noun_verb_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mT5 --source_prefix "summarize: "
```

## 4. Ablation Study - Image captions / Dialogue summaries

1) Only Image caption 

```python
# KoGPT2
python run_language_modeling_KoGPT2.py --model_name_or_path skt/kogpt2-base-v2 --do_train --train_data_file dataset/ablation_2/korean_commongen_image_only_train.txt --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=1 --gradient_accumulation_steps 4 --save_steps 1600 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --num_train_epochs 5 --block_size 128 --line_by_line
# KoBART
python run_language_modeling_KoBART.py --model_name_or_path hyunwoongko/kobart --do_train --train_file dataset/ablation_2/korean_commongen_image_only_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model KoBART
# mBARTs 
python run_language_modeling_mBART_mT5.py --model_name_or_path facebook/mbart-large-50 --do_train --train_file dataset/ablation_2/korean_commongen_image_only_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mBART
# mT5s
python run_language_modeling_mBART_mT5.py --model_name_or_path google/mt5-large --do_train --train_file dataset/ablation_2/korean_commongen_image_only_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mT5 --source_prefix "summarize: "
```

2) Only Dialogue summary

```python
# KoGPT2
python run_language_modeling_KoGPT2.py --model_name_or_path skt/kogpt2-base-v2 --do_train --train_data_file dataset/ablation_2/korean_commongen_dialogue_summary_only_train.txt --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=1 --gradient_accumulation_steps 4 --save_steps 1600 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --num_train_epochs 5 --block_size 128 --line_by_line
# KoBART
python run_language_modeling_KoBART.py --model_name_or_path hyunwoongko/kobart --do_train --train_file dataset/ablation_2/korean_commongen_dialogue_summary_only_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model KoBART
# mBARTs 
python run_language_modeling_mBART_mT5.py --model_name_or_path facebook/mbart-large-50 --do_train --train_file dataset/ablation_2/korean_commongen_dialogue_summary_only_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mBART
# mT5s
python run_language_modeling_mBART_mT5.py --model_name_or_path google/mt5-large --do_train --train_file dataset/ablation_2/korean_commongen_dialogue_summary_only_train.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mT5 --source_prefix "summarize: "
```

## 5. High-level Commonsense Reasoning

- ${42,52,62,72,82}$: Choose one of the seeds given.

```python
# KoGPT2
python run_language_modeling_KoGPT2.py --model_name_or_path skt/kogpt2-base-v2 --do_train --train_data_file dataset/high_level_commonsense_reasoning/korean_commongen_train_seed_${42,52,62,72,82}$.txt --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=1 --gradient_accumulation_steps 4 --save_steps 1600 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --num_train_epochs 5 --block_size 128 --line_by_line
# KoBART
python run_language_modeling_KoBART.py --model_name_or_path hyunwoongko/kobart --do_train --train_file dataset/high_level_commonsense_reasoning/korean_commongen_train_seed_${42,52,62,72,82}$.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model KoBART
# mBARTs 
python run_language_modeling_mBART_mT5.py --model_name_or_path facebook/mbart-large-50 --do_train --train_file dataset/high_level_commonsense_reasoning/korean_commongen_train_seed_${42,52,62,72,82}$.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mBART
# mT5s
python run_language_modeling_mBART_mT5.py --model_name_or_path google/mt5-large --do_train --train_file dataset/high_level_commonsense_reasoning/korean_commongen_train_seed_${42,52,62,72,82}$.json --output_dir $YOUR_SAVE_PATH$ --per_device_train_batch_size=4 --gradient_accumulation_steps 4 --save_steps 1600 --num_train_epochs 5 --learning_rate 5e-5 --warmup_steps 400 --logging_steps 400 --max_source_length 64 --max_target_length 256 --setproc_model mT5 --source_prefix "summarize: "
```

## 6. Decoding

- ${1,2}$: Choose *1* or *2*.
- ${free_morpheme_test,only_noun_verb_test}$: Choose *free_morph* or *only noun and verb*.

```python
## 2. Quantitative Experiment / 5. High-level 
# KoGPT2 
python decoding_GPT2_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/korean_commongen_official_test.txt --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --ngram 3 --model_name kogpt2 —num_sentences 5
# KoBART 
python decoding_BART_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/korean_commongen_official_test.json --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --min_len 10 --ngram 3 
# mBARTs
python decoding_mBART_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/korean_commongen_official_test.json --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --min_len 10 --ngram 3
# mT5s
python decoding_mT5_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/korean_commongen_official_test.json --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --min_len 10 --ngram 3

## 3 & 4. Ablation Study
# KoGPT2 
python decoding_GPT2_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/ablation_${1,2}$/korean_commongen_${free_morpheme_test,only_noun_verb_test}$.txt --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --ngram 3 --model_name kogpt2 —num_sentences 5
# KoBART 
python decoding_BART_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/ablation_${1,2}$/korean_commongen_${free_morpheme_test,only_noun_verb_test}$.json --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --min_len 10 --ngram 3 
# mBARTs
python decoding_mBART_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/ablation_${1,2}$/korean_commongen_${free_morpheme_test,only_noun_verb_test}$.json --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --min_len 10 --ngram 3
# mT5s
python decoding_mT5_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/ablation_${1,2}$/korean_commongen_${free_morpheme_test,only_noun_verb_test}$.json --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --min_len 10 --ngram 3

## Reformulated
# KoGPT2 
python decoding_GPT2_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/reformulated_commongen/korean_commongen_reformulated_test.txt --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --ngram 3 --model_name kogpt2 —num_sentences 5
# KoBART 
python decoding_BART_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/reformulated_commongen/korean_commongen_reformulated_test.json --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --min_len 10 --ngram 3 
# mBARTs
python decoding_mBART_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/reformulated_commongen/korean_commongen_reformulated_test.json --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --min_len 10 --ngram 3
# mT5s
python decoding_mT5_re_rank_commongen.py --beam_size 10 --output_dir $YOUR_SAVE_PATH$ --test_file dataset/reformulated_commongen/korean_commongen_reformulated_test.json --checkpoint_path $YOUR_CHECKPOINT_PATH$ --max_len 30 --min_len 10 --ngram 3
```

## 7. Evaluation

- $TASK_NAME$: Enter the folder name of the sub-folder.
- $MODEL_GENERATE.TXT$: Enter a file name of the results generated by the model.
- $MODEL_NAME$: Enter the model name.

```bash
conda create -n evaluation python=3.7
conda activate evaluation
pip install -r requirements_eval.txt
```

```python
# 2. Quantitative Experiment / 5. High-level
python korean_commongen_evaluation_multi_ref.py --reference_file dataset/korean_commongen_official_test.txt --prediction_file baseline_results/$TASK_NAME$/$MODEL_GENERATE.TXT$ --model $MODEL_NAME$

# 3 & 4. Ablation Study
python korean_commongen_evaluation_multi_ref.py --reference_file dataset/ablation_${1,2}$/korean_commongen_${free_morpheme_test,only_noun_verb_test}$.txt --prediction_file baseline_results/$TASK_NAME$/$MODEL_GENERATE.TXT$ --model $MODEL_NAME$

# Reformulated
python korean_commongen_evaluation_multi_ref.py --reference_file dataset/reformulated_commongen/korean_commongen_reformulated_test.txt --prediction_file baseline_results/$TASK_NAME$/$MODEL_GENERATE.TXT$ --model $MODEL_NAME$
```