@inproceedings{chen-etal-2025-hierarchical,
title = "Hierarchical Attention Generates Better Proofs",
author = "Chen, Jianlong and
Li, Chao and
Yuan, Yang and
Yao, Andrew C",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.856/",
pages = "17506--17520",
ISBN = "979-8-89176-251-0",
abstract = "Large language models (LLMs) have shown promise in formal theorem proving, but their token-level processing often fails to capture the inherent hierarchical nature of mathematical proofs. We introduce \textbf{Hierarchical Attention}, a regularization method that aligns LLMs' attention mechanisms with mathematical reasoning structures. Our approach establishes a five-level hierarchy from foundational elements to high-level concepts, ensuring structured information flow in proof generation. Experiments demonstrate that our method improves proof success rates by 2.05{\%} on miniF2F and 1.69{\%} on ProofNet while reducing proof complexity by 23.81{\%} and 16.50{\%} respectively. The code and models will be available."
}
Markdown (Informal)
[Hierarchical Attention Generates Better Proofs](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.856/) (Chen et al., ACL 2025)
ACL
- Jianlong Chen, Chao Li, Yang Yuan, and Andrew C Yao. 2025. Hierarchical Attention Generates Better Proofs. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 17506–17520, Vienna, Austria. Association for Computational Linguistics.