@inproceedings{yang-etal-2023-bridging,
title = "Bridging the Gap between Pre-Training and Fine-Tuning for Commonsense Generation",
author = "Yang, Haoran and
Wang, Yan and
Li, Piji and
Bi, Wei and
Lam, Wai and
Xu, Chen",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.findings-eacl.28/",
doi = "10.18653/v1/2023.findings-eacl.28",
pages = "376--383",
abstract = "Commonsense generation aims to generate a plausible sentence containing all given unordered concept words. Previous methods focusing on this task usually directly concatenate these words as the input of a pre-trained language model (PLM). However, in PLMs' pre-training process, the inputs are often corrupted sentences with correct word order. This input distribution discrepancy between pre-training and fine-tuning makes the model difficult to fully utilize the knowledge of PLMs. In this paper, we propose a two-stage framework to alleviate this issue. Firstly, in pre-training stage, we design a new format of input to endow PLMs the ability to deal with masked sentences with incorrect word order. Secondly, during fine-tuning, we insert the special token [MASK] between two consecutive concept words to make the input distribution more similar to the input distribution in pre-training. We conduct extensive experiments and provide thorough analysis to demonstrate the effectiveness of our proposed method."
}
Markdown (Informal)
[Bridging the Gap between Pre-Training and Fine-Tuning for Commonsense Generation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.findings-eacl.28/) (Yang et al., Findings 2023)
ACL