@inproceedings{wang-etal-2026-marf,
title = "{M}a{RF}: Leveraging Representation-Level Fusion of Formula Semantics for Mathematical Information Retrieval",
author = "Wang, Suyuan and
Zheng, Hongbo and
Kani, Nickvash",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1277/",
pages = "25570--25585",
ISBN = "979-8-89176-395-1",
abstract = "Mathematical information retrieval (MIR) depends on jointly modeling natural-language context and mathematical expressions. While BERT-based dense retrievers are effective, they often dilute mathematical semantics because textual content dominates most training data and mathematical formulas differ fundamentally from natural language in structure and composition. Consequently, these models rely heavily on surrounding text, which reduces robustness in math-intensive scenarios with limited textual description. We propose MaRF, a dual-encoder representation-level fusion framework for MIR that explicitly integrates formula semantics into context-aware dense retrieval. By combining contextual and formula-specific representations, MaRF captures complementary information from both textual and symbolic views. Experiments on the ARQMath-3 benchmark demonstrate that MaRF substantially improves retrieval performance and robustness, outperforming strong baselines across MIR tasks. The source code and datasets are available at https://github.com/MLPgroup/MaRF."
}Markdown (Informal)
[MaRF: Leveraging Representation-Level Fusion of Formula Semantics for Mathematical Information Retrieval](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1277/) (Wang et al., Findings 2026)
ACL