@inproceedings{wang-zhang-2026-implicit,
title = "From Implicit to Explicit: Token-Efficient Logical Supervision for Mathematical Reasoning in {LLM}s",
author = "Wang, Shaojie and
Zhang, Liang",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1420/",
pages = "28486--28500",
ISBN = "979-8-89176-395-1",
abstract = "Recent studies reveal that large language models (LLMs) exhibit limited logical reasoning abilities in mathematical problem-solving, instead often relying on pattern-matching and memorization. We systematically analyze this limitation, focusing on logical relationship understanding, which is a core capability underlying genuine logical reasoning, and reveal that errors related to this capability account for over 90{\%} of incorrect predictions, with Chain-of-Thought Supervised Fine-Tuning (CoT-SFT) failing to substantially reduce these errors. To address this bottleneck, we propose **F**irst-**S**tep **L**ogical **R**easoning (**FSLR**), a lightweight training framework targeting logical relationship understanding. Our key insight is that the first planning step-identifying which variables to use and which operation to apply-encourages the model to derive logical relationships directly from the problem statement. By training models on this isolated step, FSLR provides explicit supervision for logical relationship understanding, unlike CoT-SFT which implicitly embeds such relationships within complete solution trajectories. Extensive experiments across multiple models and datasets demonstrate that FSLR consistently outperforms CoT-SFT under both in-distribution and out-of-distribution settings, with average improvements of 3.2{\%} and 4.6{\%}, respectively. Moreover, FSLR achieves 4-6{\texttimes} faster training and reduces training token consumption by over 80{\%}."
}Markdown (Informal)
[From Implicit to Explicit: Token-Efficient Logical Supervision for Mathematical Reasoning in LLMs](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1420/) (Wang & Zhang, Findings 2026)
ACL