@inproceedings{li-etal-2026-faithful,
title = "Faithful-First Reasoning, Planning, and Acting for Multimodal {LLM}s",
author = "Li, Junxian and
Xu, Xinyue and
Ma, Sai and
Zhang, Di and
Li, Sichao",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.336/",
pages = "6777--6793",
ISBN = "979-8-89176-395-1",
abstract = "Multimodal Large Language Models (MLLMs) frequently suffer from unfaithfulness, generating reasoning chains that drift from visual evidence or contradict final predictions. We propose Faithful-First Reasoning, Planning, and Acting (RPA) framework in which FaithEvi provides step-wise and chain-level supervision by evaluating the faithfulness of intermediate reasoning, and FaithAct uses these signals to plan and execute faithfulness-aware actions during inference. Experiments across multiple multimodal reasoning benchmarks show that faithful-first RPA improves perceptual faithfulness by up to 24{\%} over prompt-based and tool-augmented reasoning frameworks, without degrading task accuracy. Our analysis shows that treating faithfulness as a guiding principle perceptually faithful reasoning trajectories and mitigates hallucination behavior. This work thereby establishes a unified framework for both evaluating and enforcing faithfulness in multimodal reasoning. Code is at https://github.com/lijunxian111/Faithful-First-RPA."
}Markdown (Informal)
[Faithful-First Reasoning, Planning, and Acting for Multimodal LLMs](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.336/) (Li et al., Findings 2026)
ACL
- Junxian Li, Xinyue Xu, Sai Ma, Di Zhang, and Sichao Li. 2026. Faithful-First Reasoning, Planning, and Acting for Multimodal LLMs. In Findings of the Association for Computational Linguistics: ACL 2026, pages 6777–6793, San Diego, California, United States. Association for Computational Linguistics.