@inproceedings{zhu-etal-2025-convsearch,
title = "{C}onv{S}earch-R1: Enhancing Query Reformulation for Conversational Search with Reasoning via Reinforcement Learning",
author = "Zhu, Changtai and
Wang, Siyin and
Feng, Ruijun and
Song, Kai and
Qiu, Xipeng",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1349/",
pages = "26558--26575",
ISBN = "979-8-89176-332-6",
abstract = "Conversational search systems require effective handling of context-dependent queries that often contain ambiguity, omission, and coreference. Conversational Query Reformulation (CQR) addresses this challenge by transforming these queries into self-contained forms suitable for off-the-shelf retrievers. However, existing CQR approaches suffer from two critical constraints: high dependency on costly external supervision from human annotations or large language models, and insufficient alignment between the rewriting model and downstream retrievers. We present ConvSearch-R1, the first self-driven framework that completely eliminates dependency on external rewrite supervision by leveraging reinforcement learning to optimize reformulation directly through retrieval signals. Our novel two-stage approach combines Self-Driven Policy Warm-Up to address the cold-start problem through retrieval-guided self-distillation, followed by Retrieval-Guided Reinforcement Learning with a specially designed rank-incentive reward shaping mechanism that addresses the sparsity issue in conventional retrieval metrics. Extensive experiments on TopiOCQA and QReCC datasets demonstrate that ConvSearch-R1 significantly outperforms previous state-of-the-art methods, achieving over 10{\%} improvement on the challenging TopiOCQA dataset while using smaller 3B parameter models without any external supervision."
}Markdown (Informal)
[ConvSearch-R1: Enhancing Query Reformulation for Conversational Search with Reasoning via Reinforcement Learning](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1349/) (Zhu et al., EMNLP 2025)
ACL