@inproceedings{niwa-etal-2025-rectifying,
title = "Rectifying Belief Space via Unlearning to Harness {LLM}s' Reasoning",
author = "Niwa, Ayana and
Kaneko, Masahiro and
Inui, Kentaro",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.findings-acl.1285/",
pages = "25060--25075",
ISBN = "979-8-89176-256-5",
abstract = "Large Language Models (LLMs) exhibit sophisticated reasoning yet still generate incorrect answers. We attribute these errors to **Spurious Beliefs**, defined as propositions the model internally considers as true despite being factually false. To reduce reasoning errors, we propose a belief space rectification framework. Our method first identifies the beliefs invoked during inference via an explanation{-}based approach with Forward{-}Backward Beam Search (FBBS). We subsequently apply unlearning via gradient ascent to suppress spurious beliefs and enhance true ones, thereby effectively rectifying the model{'}s belief space. Experiments on three QA datasets and three LLMs show that our method significantly reduces erroneous reasoning and improves generalization."
}
Markdown (Informal)
[Rectifying Belief Space via Unlearning to Harness LLMs’ Reasoning](https://preview.aclanthology.org/landing_page/2025.findings-acl.1285/) (Niwa et al., Findings 2025)
ACL