@inproceedings{wei-etal-2025-equibench,
    title = "{E}qui{B}ench: Benchmarking Large Language Models' Reasoning about Program Semantics via Equivalence Checking",
    author = "Wei, Anjiang  and
      Cao, Jiannan  and
      Li, Ran  and
      Chen, Hongyu  and
      Zhang, Yuhui  and
      Wang, Ziheng  and
      Liu, Yuan  and
      Teixeira, Thiago S. F. X.  and
      Yang, Diyi  and
      Wang, Ke  and
      Aiken, Alex",
    editor = "Christodoulopoulos, Christos  and
      Chakraborty, Tanmoy  and
      Rose, Carolyn  and
      Peng, Violet",
    booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1718/",
    pages = "33856--33869",
    ISBN = "979-8-89176-332-6"
}