@inproceedings{wei-etal-2025-equibench, title = "{E}qui{B}ench: Benchmarking Large Language Models' Reasoning about Program Semantics via Equivalence Checking", author = "Wei, Anjiang and Cao, Jiannan and Li, Ran and Chen, Hongyu and Zhang, Yuhui and Wang, Ziheng and Liu, Yuan and Teixeira, Thiago S. F. X. and Yang, Diyi and Wang, Ke and Aiken, Alex", editor = "Christodoulopoulos, Christos and Chakraborty, Tanmoy and Rose, Carolyn and Peng, Violet", booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2025", address = "Suzhou, China", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1718/", pages = "33856--33869", ISBN = "979-8-89176-332-6" }