@inproceedings{chen-etal-2022-dialogsum,
title = "{D}ialog{S}um Challenge: Results of the Dialogue Summarization Shared Task",
author = "Chen, Yulong and
Deng, Naihao and
Liu, Yang and
Zhang, Yue",
editor = "Shaikh, Samira and
Ferreira, Thiago and
Stent, Amanda",
booktitle = "Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges",
month = jul,
year = "2022",
address = "Waterville, Maine, USA and virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.inlg-genchal.14/",
pages = "94--103",
abstract = "We report the results of DialogSum Challenge, the shared task on summarizing real-life sce- nario dialogues at INLG 2022. Four teams participate in this shared task and three submit their system reports, exploring different meth- ods to improve the performance of dialogue summarization. Although there is a great im- provement over the baseline models regarding automatic evaluation metrics, such as ROUGE scores, we find that there is a salient gap be- tween model generated outputs and human an- notated summaries by human evaluation from multiple aspects. These findings demonstrate the difficulty of dialogue summarization and suggest that more fine-grained evaluatuion met- rics are in need."
}
Markdown (Informal)
[DialogSum Challenge: Results of the Dialogue Summarization Shared Task](https://preview.aclanthology.org/add-emnlp-2024-awards/2022.inlg-genchal.14/) (Chen et al., INLG 2022)
ACL