@inproceedings{fu-barez-2025-question,
title = "Same Question, Different Words: A Latent Adversarial Framework for Prompt Robustness",
author = "Fu, Tingchen and
Barez, Fazl",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.1595/",
doi = "10.18653/v1/2025.emnlp-main.1595",
pages = "31293--31307",
ISBN = "979-8-89176-332-6",
abstract = "Insensitivity to semantically-preserving variations of prompts (paraphrases) is crucial for reliable behavior and real-world deployment of large language models. However, language models exhibit significant performance degradation with semantically equivalent but differently phrased prompts, and existing solutions either depend on trial-and-error prompt engineering or require computationally expensive inference-time algorithms. In this study, built on the key insight that worst-case prompts exhibit a drift in embedding space, we present Latent Adversarial Paraphrasing (LAP), a dual-loop adversarial framework that optimizes a trainable perturbation as ``latent continuous paraphrase'' and language model performance on these perturbations iteratively. Extensive experiments are conducted to demonstrate the effectiveness of LAP across multiple backbones on the RobustAlpaca benchmark with a 0.5{\%}-4{\%} absolution improvement on worst-case win-rate."
}Markdown (Informal)
[Same Question, Different Words: A Latent Adversarial Framework for Prompt Robustness](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.1595/) (Fu & Barez, EMNLP 2025)
ACL