@inproceedings{chen-etal-2026-reasonembed,
title = "{R}eason{E}mbed: Enhanced Text Embeddings for Reasoning-Intensive Document Retrieval",
author = "Chen, Jianlyu and
Lan, Junwei and
Li, Chaofan and
Lian, Defu and
Liu, Zheng",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.54/",
pages = "1203--1221",
ISBN = "979-8-89176-390-6",
abstract = "In this paper, we introduce **ReasonEmbed**, a novel text embedding model developed for reasoning-intensive document retrieval. Our work includes three key technical contributions. First, we propose **ReMixer**, a new data synthesis method that overcomes the triviality problem prevalent in previous synthetic datasets, enabling large-scale production of 82K high-quality training samples. Second, we design **Redapter**, a self-adaptive learning algorithm that dynamically adjusts training each sample{'}s weight based on its reasoning intensity. This allows the model to effectively capture the complex semantic relationships between queries and documents. Third, we implement ReasonEmbed across multiple backbones of varying sizes, all of which achieve **superior performance** on reasoning-intensive retrieval tasks. Notably, our ReasonEmbed-Qwen3-8B model offers a record-high nDCG@10 score of 38.1 on the BRIGHT benchmark, which significantly outperforms existing text embedding models. We will fully open-source our created resource in ReasonEmbed to push forward the research advancement in this field."
}Markdown (Informal)
[ReasonEmbed: Enhanced Text Embeddings for Reasoning-Intensive Document Retrieval](https://preview.aclanthology.org/ingest-acl/2026.acl-long.54/) (Chen et al., ACL 2026)
ACL