@inproceedings{du-etal-2025-agentic,
title = "Agentic-R1: Distilled Dual-Strategy Reasoning",
author = "Du, Weihua and
Aggarwal, Pranjal and
Welleck, Sean and
Yang, Yiming",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.emnlp-main.604/",
doi = "10.18653/v1/2025.emnlp-main.604",
pages = "12040--12054",
ISBN = "979-8-89176-332-6",
abstract = "Current long chain-of-thought (long-CoT) models excel at mathematical reasoning but rely on slow and error-prone natural language traces. Tool-augmented agents address arithmetic via code execution, but often falter on complex logical tasks. We introduce a fine-tuning framework, **DualDistill**, that distills complementary reasoning strategies from multiple teachers into a unified student model. Using this approach, we train **Agentic-R1**, which dynamically selects the optimal strategy for each query, invoking tools for arithmetic and algorithmic problems and using text-based reasoning for abstract ones. Our method improves accuracy on computation-intensive tasks and reduces inference latency on standard benchmarks, demonstrating the promise of multi-strategy distillation for robust and efficient reasoning."
}Markdown (Informal)
[Agentic-R1: Distilled Dual-Strategy Reasoning](https://preview.aclanthology.org/name-variant-enfa-fane/2025.emnlp-main.604/) (Du et al., EMNLP 2025)
ACL
- Weihua Du, Pranjal Aggarwal, Sean Welleck, and Yiming Yang. 2025. Agentic-R1: Distilled Dual-Strategy Reasoning. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 12040–12054, Suzhou, China. Association for Computational Linguistics.