@inproceedings{mirbeygi-beigy-2025-prompt,
title = "Prompt Guided Diffusion for Controllable Text Generation",
author = "Mirbeygi, Mohaddeseh and
Beigy, Hamid",
editor = "Bak, JinYeong and
Goot, Rob van der and
Jang, Hyeju and
Buaphet, Weerayut and
Ramponi, Alan and
Xu, Wei and
Ritter, Alan",
booktitle = "Proceedings of the Tenth Workshop on Noisy and User-generated Text",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.wnut-1.9/",
pages = "78--84",
ISBN = "979-8-89176-232-9",
abstract = "Controlled text generation, originally a task to generate coherent, contextually relevant text with specified attributes such as sentiment, topic, or style, has seen a lot of development with methods that use PPLM, FUDGE, and diffusion-based models. However, most state-of-the-art methods balance control precision with fluency. Classifier-guided approaches, like PPLM, are well-known for unstable updates of gradients, yielding incoherent outputs, while autoregressive models, like FUDGE, depend on rigid templates that limit creativity. While recent diffusion models show promises in iterative refinement and diversity, they often lack mechanisms to explicitly incorporate task-specific knowledge and hence require various complicated auxiliary classifiers for training and inference.We now propose a prompt-guided diffusion framework that integrates structured prompts seamlessly into the process of diffusion for precise and flexible control of generated texts.Each prompt combines a target condition (e.g., sentiment label), an in-class example (e.g., a positive movie review), and a placeholder for the generated sentence. Explicit, human-readable guidance is thereby given, spanning high-level intent to low-level text generation.Our approach encodes prompts using large pre-trained language models, e.g., BART, fusing these in a cross-attention manner with the diffusion dynamics, achieves new state-of-the-art results for all benchmarks, including IMDB for sentiment, AG News for topic, and E2E for structured data-to-text generation."
}