@inproceedings{jenkins-2026-beyond, title = "Beyond Static Benchmarks: A Validity, Reliability, and Sociotechnical Framework for Evaluating {LLM}s in Deployment Contexts", author = "Jenkins, Ben", editor = "Akhtar, Mubashara and Batzner, Jan and Choshen, Leshem and Ghosh, Avijit and Gohar, Usman and Mickel, Jennifer and Pant, Ichhya and Talat, Zeerak and Lin, Michelle", booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)", month = jul, year = "2026", address = "San Diego, CA", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.30/", pages = "201--210", ISBN = "979-8-89176-429-3" }