@inproceedings{tzachristas-tzachristas-2026-natural,
title = "From Natural Language to Certified Geometry Proofs: A Survey of {LLM}-Augmented Verification and Neuro-Symbolic Theorem Proving",
author = "Tzachristas, Ioannis and
Tzachristas, Georgios",
editor = "Elazar, Yanai and
Ettinger, Allyson and
Kassner, Nora and
Ruder, Sebastian",
booktitle = "Proceedings of The Big Picture v2: Crafting a Research Narrative",
month = jul,
year = "2026",
address = "San Diego, CA, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bigpicture-main.1/",
pages = "1--9",
ISBN = "979-8-89176-416-3",
abstract = "Large Language Models (LLMs) can produce convincing geometric arguments, yet their outputs are not reliable enough to be treated as proofs without independent verification. In parallel, symbolic geometry tools (e.g. automated theorem provers in dynamic geometry systems) offer strong rigor guarantees, but require formalized inputs and can struggle with problem formalization, auxiliary construction, and proof presentation. This survey synthesizes work at the intersection of these lines: hybrid LLM{--}symbolic systems for geometry that (i) translate natural language and diagrams into formal constraints, (ii) search for solution plans and proof steps using learned or heuristic methods, and (iii) verify the resulting steps using symbolic provers or proof assistants. We propose a taxonomy organized around (a) the role of the LLM in the pipeline (parser, strategist, prover, critic), (b) the target proof artifact (answer-only, informal proof, semi-formal step trace, or kernel-checked formal proof), and (c) the verification backend (numeric testing, algebraic provers, synthetic provers, and proof-assistant kernels). We review representative systems in NLP and AI (e.g. GeoS, Inter-GPS, FormalGeo, AlphaGeometry, AutoGPS, and recent heuristic-only deductive solvers), and connect them to broader neurosymbolic paradigms for faithful reasoning (e.g. SatLM, LINC, and autoformalization). Finally, we outline evaluation protocols emphasizing step-level soundness and robustness, and we discuss open problems in multimodal formalization, handling of non-degeneracy conditions, human-readable certified proofs, and reproducibility."
}