@inproceedings{shi-etal-2026-aj, title = "{AJ}-Bench: Benchmarking Agent-as-a-Judge for Environment-Aware Evaluation", author = "Shi, Wentao and Wang, Yu and Zhao, Yuyang and Chen, Yuxin and Feng, Fuli and Hao, Xueyuan and Su, Xi and GU, Qi and Su, Hui and Cai, Xunliang and He, Xiangnan", editor = "Liakata, Maria and Moreira, Viviane P. and Zhang, Jiajun and Jurgens, David", booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026", month = jul, year = "2026", address = "San Diego, California, United States", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1269/", pages = "25371--25413", ISBN = "979-8-89176-395-1" }