@inproceedings{yamauchi-etal-2026-empirical,
    title = "An Empirical Study of {LLM}-as-a-Judge: How Design Choices Impact Evaluation Reliability",
    author = "Yamauchi, Yusuke  and
      Yano, Taro  and
      Oyamada, Masafumi",
    editor = "Mille, Simon  and
      Gehrmann, Sebastian  and
      Schmidtov{\'a}, Patr{\'i}cia  and
      Du{\v{s}}ek, Ond{\v{r}}ej  and
      Fadaee, Marzieh  and
      Lo, Kyle  and
      Santus, Enrico  and
      Stanovsky, Gabriel",
    booktitle = "Proceedings of the Fifth Workshop on Generation, Evaluation and Metrics ({GEM})",
    month = jul,
    year = "2026",
    address = "San Diego, California, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.gem-main.19/",
    pages = "167--176",
    ISBN = "979-8-89176-423-1"
}