@inproceedings{wang-etal-2025-transplant,
title = "Transplant Then Regenerate: A New Paradigm for Text Data Augmentation",
author = "Wang, Guangzhan and
Zhang, Hongyu and
Shen, Beijun and
Gu, Xiaodong",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.702/",
pages = "13917--13931",
ISBN = "979-8-89176-332-6",
abstract = "Data augmentation is a critical technique in deep learning. Traditional methods like Back-translation typically focus on lexical-level rephrasing, which primarily produces variations with the same semantics. While large language models (LLMs) have enhanced text augmentation by their ``knowledge emergence'' capability, controlling the style and structure of these outputs remains challenging and requires meticulous prompt engineering. In this paper, we propose LMTransplant, a novel text augmentation paradigm leveraging LLMs. The core idea of LMTransplant is transplant-then-regenerate: incorporating seed text into a context expanded by LLM, and asking the LLM to regenerate a variant based on the expanded context. This strategy allows the model to create more diverse and creative content-level variants by fully leveraging the knowledge embedded in LLMs, while preserving the core attributes of the original text. We evaluate LMTransplant across various text-related tasks, demonstrating its superior performance over existing text augmentation methods. Moreover, LMTransplant demonstrates exceptional scalability as the size of augmented data grows."
}Markdown (Informal)
[Transplant Then Regenerate: A New Paradigm for Text Data Augmentation](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.702/) (Wang et al., EMNLP 2025)
ACL