@inproceedings{gupta-2026-sentences,
title = "From Sentences to Proof Trees: Leveraging Language Models for Structured Reasoning",
author = "Gupta, Aayushee",
editor = "Baez Santamaria, Selene and
Somayajula, Sai Ashish and
Yamaguchi, Atsuki",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 4: Student Research Workshop)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-srw.71/",
pages = "967--981",
ISBN = "979-8-89176-383-8",
abstract = "The ability of AI systems to not only answer complex natural language questions, but also transparently justify their reasoning, is crucial for building trust and enabling effective human-AI collaboration. In domains requiring multi-hop reasoning, answers must often be constructed by combining multiple relevant sentences from a knowledge base to build an inferential path from the question toward the answer. We tackle this challenge by exploring a neuro-symbolic approach to reasoning through the generation of entailment trees {--} structured, step-by-step proof trees {--} using Large Language Models (LLMs). These trees provide interpretable justifications for the inference process. Using the EntailmentBank{~}(CITATION) data set, we evaluated a diverse set of prompting strategies across multiple models, along with a proposal of an inference-guided prompting approach that performs well. We also fine-tuned LLMs trained specifically for proof generation by applying several data augmentation, curriculum learning, and reinforcement-guided optimization strategies. Our results show that the fine-tuned model outperforms all prompting strategies, achieving superior performance across multiple structural and semantic metrics. We also provide a detailed evaluation of which training strategies are helpful towards proof generation. Our findings highlight the importance of proof tree generation as a benchmark for evaluating structured reasoning in LLMs."
}Markdown (Informal)
[From Sentences to Proof Trees: Leveraging Language Models for Structured Reasoning](https://preview.aclanthology.org/ingest-eacl/2026.eacl-srw.71/) (Gupta, EACL 2026)
ACL