@inproceedings{sahu-etal-2025-guide,
title = "A Guide To Effectively Leveraging {LLM}s for Low-Resource Text Summarization: Data Augmentation and Semi-supervised Approaches",
author = "Sahu, Gaurav and
Vechtomova, Olga and
Laradji, Issam H.",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.findings-naacl.86/",
pages = "1584--1603",
ISBN = "979-8-89176-195-7",
abstract = "Existing approaches for low-resource text summarization primarily employ large language models (LLMs) like GPT-3 or GPT-4 at inference time to generate summaries directly; however, such approaches often suffer from inconsistent LLM outputs and are difficult to adapt to domain-specific data in low-resource scenarios. In this work, we propose two novel methods to effectively utilize LLMs for low-resource text summarization: 1) MixSumm, an LLM-based data augmentation regime that synthesizes high-quality documents (short and long) for few-shot text summarization, and 2) PPSL, a prompt-based pseudolabeling strategy for sample-efficient semi-supervised text summarization. Specifically, MixSumm leverages the open-source LLaMA-3-70b-Instruct model to generate new documents by mixing topical information derived from a small seed set, and PPSL leverages the LLaMA-3-70b-Instruct model to generate high-quality pseudo-labels in a semi-supervised learning setup. We evaluate our methods on the TweetSumm, WikiHow, and ArXiv/PubMed datasets and use L-Eval, a LLaMA-3-based evaluation metric, and ROUGE scores to measure the quality of generated summaries. Our experiments on extractive and abstractive summarization show that MixSumm and PPSL achieve competitive ROUGE scores as a fully supervised method with 5{\%} of the labeled data. We release our codebase here: https://github.com/ServiceNow/text-summarization-with-llms/"
}
Markdown (Informal)
[A Guide To Effectively Leveraging LLMs for Low-Resource Text Summarization: Data Augmentation and Semi-supervised Approaches](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.findings-naacl.86/) (Sahu et al., Findings 2025)
ACL