@inproceedings{han-etal-2024-alignsum,
title = "{A}lign{S}um: Data Pyramid Hierarchical Fine-tuning for Aligning with Human Summarization Preference",
author = "Han, Yang and
Wang, Yiming and
Wang, Rui and
Chen, Lu and
Yu, Kai",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.findings-emnlp.498/",
doi = "10.18653/v1/2024.findings-emnlp.498",
pages = "8506--8522",
abstract = "Text summarization tasks commonly employ Pre-trained Language Models (PLMs) to fit diverse standard datasets. While these PLMs excel in automatic evaluations, they frequently underperform in human evaluations, indicating a deviation between their generated summaries and human summarization preferences. This discrepancy is likely due to the low quality of fine-tuning datasets and the limited availability of high-quality human-annotated data that reflect true human preference. To address this challenge, we introduce a novel human summarization preference alignment framework AlignSum. This framework consists of three parts: Firstly, we construct a Data Pymarid with extractive, abstractive, and human-annotated summary data. Secondly, we conduct the Gaussian Resampling to remove summaries with extreme lengths. Finally, we implement the two-stage hierarchical fine-tuning with Data Pymarid after Gaussian Resampling. We apply AlignSum to PLMs on the human-annotated CNN/DailyMail and BBC XSum datasets. Experiments show that with AlignSum, PLMs like BART-Large surpass 175B GPT-3 in both automatic and human evaluations. This demonstrates that AlignSum significantly enhances the alignment of language models with human summarization preferences."
}
Markdown (Informal)
[AlignSum: Data Pyramid Hierarchical Fine-tuning for Aligning with Human Summarization Preference](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.findings-emnlp.498/) (Han et al., Findings 2024)
ACL