@inproceedings{chen-yang-2021-simple,
title = "Simple Conversational Data Augmentation for Semi-supervised Abstractive Dialogue Summarization",
author = "Chen, Jiaao and
Yang, Diyi",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.emnlp-main.530/",
doi = "10.18653/v1/2021.emnlp-main.530",
pages = "6605--6616",
abstract = "Abstractive conversation summarization has received growing attention while most current state-of-the-art summarization models heavily rely on human-annotated summaries. To reduce the dependence on labeled summaries, in this work, we present a simple yet effective set of Conversational Data Augmentation (CODA) methods for semi-supervised abstractive conversation summarization, such as random swapping/deletion to perturb the discourse relations inside conversations, dialogue-acts-guided insertion to interrupt the development of conversations, and conditional-generation-based substitution to substitute utterances with their paraphrases generated based on the conversation context. To further utilize unlabeled conversations, we combine CODA with two-stage noisy self-training where we first pre-train the summarization model on unlabeled conversations with pseudo summaries and then fine-tune it on labeled conversations. Experiments conducted on the recent conversation summarization datasets demonstrate the effectiveness of our methods over several state-of-the-art data augmentation baselines."
}
Markdown (Informal)
[Simple Conversational Data Augmentation for Semi-supervised Abstractive Dialogue Summarization](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.emnlp-main.530/) (Chen & Yang, EMNLP 2021)
ACL