@inproceedings{yamauchi-etal-2026-empirical, title = "An Empirical Study of {LLM}-as-a-Judge: How Design Choices Impact Evaluation Reliability", author = "Yamauchi, Yusuke and Yano, Taro and Oyamada, Masafumi", editor = "Mille, Simon and Gehrmann, Sebastian and Schmidtov{\'a}, Patr{\'i}cia and Du{\v{s}}ek, Ond{\v{r}}ej and Fadaee, Marzieh and Lo, Kyle and Santus, Enrico and Stanovsky, Gabriel", booktitle = "Proceedings of the Fifth Workshop on Generation, Evaluation and Metrics ({GEM})", month = jul, year = "2026", address = "San Diego, California, USA", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.gem-main.19/", pages = "167--176", ISBN = "979-8-89176-423-1" }