@proceedings{gem-ws-2025-1,
    title = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.0/",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{yu-2025-towards,
    title = "Towards Comprehensive Evaluation of Open-Source Language Models: A Multi-Dimensional, User-Driven Approach",
    author = "Yu, Qingchen",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.1/",
    pages = "1--7",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{conde-etal-2025-psycholinguistic,
    title = "Psycholinguistic Word Features: a New Approach for the Evaluation of {LLM}s Alignment with Humans",
    author = "Conde, Javier  and
      Saiz, Miguel Gonz{\'a}lez  and
      Grandury, Mar{\'i}a  and
      Reviriego, Pedro  and
      Mart{\'i}nez, Gonzalo  and
      Brysbaert, Marc",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.2/",
    pages = "8--17",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{wenyawu-deng-2025-spatial,
    title = "Spatial Representation of Large Language Models in 2{D} Scene",
    author = "Wu, Wenya  and
      Deng, Weihong",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.3/",
    pages = "18--29",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{arif-etal-2025-fellowship,
    title = "The Fellowship of the {LLM}s: Multi-Model Workflows for Synthetic Preference Optimization Dataset Generation",
    author = "Arif, Samee  and
      Farid, Sualeha  and
      Azeemi, Abdul Hameed  and
      Athar, Awais  and
      Raza, Agha Ali",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.4/",
    pages = "30--45",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{dada-etal-2025-biomedical,
    title = "Does Biomedical Training Lead to Better Medical Performance?",
    author = "Dada, Amin  and
      Kora{\c{s}}, Osman Alperen  and
      Bauer, Marie  and
      Corbeil, Jean-Philippe  and
      Contreras, Amanda Butler  and
      Seibold, Constantin Marc  and
      Smith, Kaleb E  and
      Friedrich, Julian  and
      Kleesiek, Jens",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.5/",
    pages = "46--59",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{belz-thomson-2025-heds,
    title = "{HEDS} 3.0: The Human Evaluation Data Sheet Version 3.0",
    author = "Belz, Anya  and
      Thomson, Craig",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.6/",
    pages = "60--81",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{zhang-etal-2025-argent,
    title = "{ARGENT}: Automatic Reference-free Evaluation for Open-Ended Text Generation without Source Inputs",
    author = "Zhang, Xinyue  and
      Zecevic, Agathe  and
      Zeki, Sebastian  and
      Roberts, Angus",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.8/",
    pages = "82--98",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{wachter-etal-2025-llms,
    title = "Are {LLM}s (Really) Ideological? An {IRT}-based Analysis and Alignment Tool for Perceived Socio-Economic Bias in {LLM}s",
    author = "Wachter, Jasmin  and
      Radloff, Michael  and
      Smolej, Maja  and
      Kinder-Kurlanda, Katharina",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.9/",
    pages = "99--120",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{sandan-etal-2025-knockout,
    title = "Knockout {LLM} Assessment: Using Large Language Models for Evaluations through Iterative Pairwise Comparisons",
    author = "Sandan, Isik Baran  and
      Dinh, Tu Anh  and
      Niehues, Jan",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.10/",
    pages = "121--128",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{hsu-etal-2025-free,
    title = "Free-text Rationale Generation under Readability Level Control",
    author = "Hsu, Yi-Sheng  and
      Feldhus, Nils  and
      Hakimov, Sherzod",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.11/",
    pages = "129--150",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{bhattacharya-gupta-2025-selective,
    title = "Selective Shot Learning for Code Explanation",
    author = "Bhattacharya, Paheli  and
      Gupta, Rishabh",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.12/",
    pages = "151--160",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{gogoulou-etal-2025-llms,
    title = "Can {LLM}s Detect Intrinsic Hallucinations in Paraphrasing and Machine Translation?",
    author = {Gogoulou, Evangelia  and
      Zahra, Shorouq  and
      Guillou, Liane  and
      D{\"u}rlich, Luise  and
      Nivre, Joakim},
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.13/",
    pages = "161--177",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{wang-etal-2025-evaluating,
    title = "Evaluating {LLM}s with Multiple Problems at once",
    author = "Wang, Zhengxiang  and
      Kodner, Jordan  and
      Rambow, Owen",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.14/",
    pages = "178--199",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{toles-etal-2025-learning,
    title = "Learning and Evaluating Factual Clarification Question Generation Without Examples",
    author = "Toles, Matthew  and
      Huang, Yukun  and
      Yu, Zhou",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.15/",
    pages = "200--211",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{benyoash-etal-2025-secque,
    title = "{SECQUE}: A Benchmark for Evaluating Real-World Financial Analysis Capabilities",
    author = "BenYoash, Noga  and
      Brief, Menachem  and
      Ovadia, Oded  and
      Shenderovitz, Gil  and
      Mishaeli, Moshik  and
      Lemberg, Rachel  and
      Sheetrit, Eitam",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.16/",
    pages = "212--230",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{van-miltenburg-etal-2025-measure,
    title = "Measure only what is measurable: towards conversation requirements for evaluating task-oriented dialogue systems",
    author = "Van Miltenburg, Emiel  and
      Braggaar, Anouck  and
      Croes, Emmelyn  and
      Kunneman, Florian  and
      Liebrecht, Christine  and
      Martijn, Gabriella",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.18/",
    pages = "231--238",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{luitel-etal-2025-perplexity,
    title = "Can Perplexity Predict Finetuning Performance? An Investigation of Tokenization Effects on Sequential Language Models for {N}epali",
    author = "Luitel, Nishant  and
      Bekoju, Nirajan  and
      Sah, Anand Kumar  and
      Shakya, Subarna",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.21/",
    pages = "239--248",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{berrayana-etal-2025-bias,
    title = "Are Bias Evaluation Methods Biased ?",
    author = "Berrayana, Lina  and
      Rooney, Sean  and
      Garc{\'e}s-Erice, Luis  and
      Giurgiu, Ioana",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.22/",
    pages = "249--261",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{takeshita-etal-2025-irsum,
    title = "{IRS}um: One Model to Rule Summarization and Retrieval",
    author = "Takeshita, Sotaro  and
      Ponzetto, Simone Paolo  and
      Eckert, Kai",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.23/",
    pages = "262--275",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{lee-etal-2025-modeling,
    title = "Modeling the One-to-Many Property in Open-Domain Dialogue with {LLM}s",
    author = "Lee, Jing Yang  and
      Lee, Kong Aik  and
      Gan, Woon-Seng",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.24/",
    pages = "276--290",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{joo-cho-2025-cleanse,
    title = "Cleanse: Uncertainty Estimation Approach Using Clustering-based Semantic Consistency in {LLM}s",
    author = "Joo, Minsuh  and
      Cho, Hyunsoo",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.25/",
    pages = "291--301",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{goliakova-etal-2025-metric,
    title = "Metric assessment protocol in the context of answer fluctuation on {MCQ} tasks",
    author = "Goliakova, Ekaterina  and
      Renard, Xavier  and
      Lesot, Marie-Jeanne  and
      Laugel, Thibault  and
      Marsala, Christophe  and
      Detyniecki, Marcin",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.26/",
    pages = "302--319",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{braun-forell-2025-towards,
    title = "(Towards) Scalable Reliable Automated Evaluation with Large Language Models",
    author = "Braun, Bertil  and
      Forell, Martin",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.28/",
    pages = "320--336",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{franck-etal-2025-clustering,
    title = "Clustering Zero-Shot Uncertainty Estimations to Assess {LLM} Response Accuracy for Yes/No {Q}{\&}{A}",
    author = "Franck, Christopher T.  and
      Vennos, Amy  and
      Mueller, W. Graham  and
      Dakota, Daniel",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.29/",
    pages = "337--353",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{huidrom-belz-2025-using,
    title = "Using {LLM} Judgements for Sanity Checking Results and Reproducibility of Human Evaluations in {NLP}",
    author = "Huidrom, Rudali  and
      Belz, Anya",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.30/",
    pages = "354--365",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{joshi-etal-2025-coke,
    title = "{C}o{K}e: Customizable Fine-Grained Story Evaluation via Chain-of-Keyword Rationalization",
    author = "Joshi, Brihi  and
      Venkatapathy, Sriram  and
      Bansal, Mohit  and
      Peng, Nanyun  and
      Chang, Haw-Shiuan",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.31/",
    pages = "366--384",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{ligeti-nagy-etal-2025-hugme,
    title = "{H}u{GME}: A benchmark system for evaluating {H}ungarian generative {LLM}s",
    author = "Ligeti-Nagy, No{\'e}mi  and
      Madarasz, Gabor  and
      Foldesi, Flora  and
      Lengyel, Mariann  and
      Osvath, Matyas  and
      Sarossy, Bence  and
      Varga, Kristof  and
      Yang, Gy{\H{o}}z{\H{o}} Zijian  and
      H{\'e}ja, Enik{\H{o}}  and
      V{\'a}radi, Tam{\'a}s  and
      Pr{\'o}sz{\'e}ky, G{\'a}bor",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.32/",
    pages = "385--403",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{thakur-etal-2025-judging,
    title = "Judging the Judges: Evaluating Alignment and Vulnerabilities in {LLM}s-as-Judges",
    author = "Thakur, Aman Singh  and
      Choudhary, Kartik  and
      Ramayapally, Venkat Srinik  and
      Vaidyanathan, Sankaran  and
      Hupkes, Dieuwke",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.33/",
    pages = "404--430",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{shah-etal-2025-analyzing,
    title = "Analyzing the Sensitivity of Vision Language Models in Visual Question Answering",
    author = "Shah, Monika  and
      Balaji, Sudarshan  and
      Sarkhel, Somdeb  and
      Dey, Sanorita  and
      Venugopal, Deepak",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.36/",
    pages = "431--438",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{percin-etal-2025-investigating,
    title = "Investigating the Robustness of Retrieval-Augmented Generation at the Query Level",
    author = "Per{\c{c}}in, Sezen  and
      Su, Xin  and
      Syed, Qutub Sha  and
      Howard, Phillip  and
      Kuvshinov, Aleksei  and
      Schwinn, Leo  and
      Scholl, Kay-Ulrich",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.38/",
    pages = "439--457",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{pourbahman-etal-2025-elab,
    title = "{ELAB}: Extensive {LLM} Alignment Benchmark in {P}ersian Language",
    author = "Pourbahman, Zahra  and
      Rajabi, Fatemeh  and
      Sadeghi, Mohammadhossein  and
      Ghahroodi, Omid  and
      Bakhshaei, Somayeh  and
      Amini, Arash  and
      Kazemi, Reza  and
      Baghshah, Mahdieh Soleymani",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.40/",
    pages = "458--470",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{umutlu-etal-2025-evaluating,
    title = "Evaluating the Quality of Benchmark Datasets for Low-Resource Languages: A Case Study on {T}urkish",
    author = "Umutlu, Elif Ecem  and
      Cengiz, Ayse Aysu  and
      Sever, Ahmet Kaan  and
      Erdem, Seyma  and
      Aytan, Burak  and
      Tufan, Busra  and
      Topraksoy, Abdullah  and
      Dar{\i}c{\i}, Esra  and
      Toraman, Cagri",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.41/",
    pages = "471--487",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{tang-sun-2025-big,
    title = "Big Escape Benchmark: Evaluating Human-Like Reasoning in Language Models via Real-World Escape Room Challenges",
    author = "Tang, Zinan  and
      Sun, QiYao",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.42/",
    pages = "488--503",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{you-etal-2025-event,
    title = "Event-based evaluation of abstractive news summarization",
    author = "You, Huiling  and
      Touileb, Samia  and
      {\O}vrelid, Lilja  and
      Velldal, Erik",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.43/",
    pages = "504--510",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{bunn-etal-2025-fine,
    title = "Fine-Tune on the Format: First Improving Multiple-Choice Evaluation for Intermediate {LLM} Checkpoints",
    author = "Bunn, Alec  and
      Wiegreffe, Sarah  and
      Bogin, Ben",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.46/",
    pages = "511--521",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{myung-etal-2025-papersplease,
    title = "{P}apers{P}lease: A Benchmark for Evaluating Motivational Values of Large Language Models Based on {ERG} Theory",
    author = "Myung, Junho  and
      Park, Yeon Su  and
      Kim, Sunwoo  and
      Yoo, Shin  and
      Oh, Alice",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.47/",
    pages = "522--531",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{qi-etal-2025-shallow,
    title = "Shallow Preference Signals: Large Language Model Aligns {E}ven Better with Truncated Data?",
    author = "Qi, Xuan  and
      Qiu, Jiahao  and
      Juan, Xinzhe  and
      Wu, Yue  and
      Wang, Mengdi",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.48/",
    pages = "532--548",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{cruz-etal-2025-improving,
    title = "Improving Large Language Model Confidence Estimates using Extractive Rationales for Classification",
    author = "dela Cruz, Jane Arleth  and
      Hendrickx, Iris  and
      Larson, Martha",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.49/",
    pages = "549--560",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{junker-2025-reprohum,
    title = "{R}epro{H}um {\#}0729-04: Human Evaluation Reproduction Report for ``{M}em{S}um: Extractive Summarization of Long Documents Using Multi-Step Episodic {M}arkov Decision Processes''",
    author = "Junker, Simeon",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.50/",
    pages = "561--567",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{steen-markert-2025-reprohum,
    title = "{R}epro{H}um {\#}0744-02: A Reproduction of the Human Evaluation of Meaning Preservation in ``Factorising Meaning and Form for Intent-Preserving Paraphrasing''",
    author = "Steen, Julius  and
      Markert, Katja",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.51/",
    pages = "568--575",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{braun-2025-reprohum,
    title = "{R}epro{H}um {\#}0031-01: Reproducing the Human Evaluation of Readability from ``It is {AI}{'}s Turn to Ask Humans a Question''",
    author = "Braun, Daniel",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.52/",
    pages = "576--582",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{florescu-etal-2025-reprohum,
    title = "{R}epro{H}um {\#}0033-05: Human Evaluation of Factuality from A Multidisciplinary Perspective",
    author = "Florescu, Andra-Maria  and
      Micluța-C{\^a}mpeanu, Marius  and
      Tabusca, Stefana Arina  and
      Dinu, Liviu P",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.53/",
    pages = "583--589",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{arvan-parde-2025-reprohum,
    title = "{R}epro{H}um: {\#}0744-02: Investigating the Reproducibility of Semantic Preservation Human Evaluations",
    author = "Arvan, Mohammad  and
      Parde, Natalie",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.54/",
    pages = "590--600",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{onderkova-etal-2025-reprohum,
    title = "{R}epro{H}um {\#}0669-08: Reproducing Sentiment Transfer Evaluation",
    author = "Onderkov{\'a}, Krist{\'y}na  and
      Lango, Mateusz  and
      Schmidtov{\'a}, Patr{\'i}cia  and
      Dusek, Ondrej",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.55/",
    pages = "601--608",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{supryadi-etal-2025-reprohum,
    title = "{R}epro{H}um {\#}0067-01: A Reproduction of the Evaluation of Cross-Lingual Summarization",
    author = "Supryadi  and
      Liu, Chuang  and
      Xiong, Deyi",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.56/",
    pages = "609--614",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{mille-lorandi-2025-reprohum,
    title = "{R}epro{H}um {\#}0729-04: Partial reproduction of the human evaluation of the {M}em{S}um and {N}eu{S}um summarisation systems",
    author = "Mille, Simon  and
      Lorandi, Michela",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.57/",
    pages = "615--621",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{zhou-matusevych-2025-curse,
    title = "Curse of bilinguality: Evaluating monolingual and bilingual language models on {C}hinese linguistic benchmarks",
    author = "Zhou, Yuwen  and
      Matusevych, Yevgen",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.58/",
    pages = "622--630",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{garces-arias-etal-2025-towards,
    title = "Towards Better Open-Ended Text Generation: A Multicriteria Evaluation Framework",
    author = "Garces Arias, Esteban  and
      Blocher, Hannah  and
      Rodemann, Julian  and
      Li, Meimingwei  and
      Heumann, Christian  and
      A{\ss}enmacher, Matthias",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.59/",
    pages = "631--654",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{oketch-etal-2025-bridging,
    title = "Bridging the {LLM} Accessibility Divide? Performance, Fairness, and Cost of Closed versus Open {LLM}s for Automated Essay Scoring",
    author = "Oketch, Kezia  and
      Lalor, John P.  and
      Yang, Yi  and
      Abbasi, Ahmed",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.60/",
    pages = "655--669",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{toukmaji-flanigan-2025-prompt,
    title = "Prompt, Translate, Fine-Tune, Re-Initialize, or Instruction-Tune? Adapting {LLM}s for In-Context Learning in Low-Resource Languages",
    author = "Toukmaji, Christopher  and
      Flanigan, Jeffrey",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.61/",
    pages = "670--704",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{lewis-2025-winning,
    title = "Winning Big with Small Models: Knowledge Distillation vs. Self-Training for Reducing Hallucination in {QA} Agents",
    author = "Lewis, Ashley  and
      White, Michael  and
      Liu, Jing  and
      Koike-Akino, Toshiaki  and
      Parsons, Kieran  and
      Wang, Ye",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.62/",
    pages = "705--727",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{hakimov-etal-2025-ad,
    title = "Ad-hoc Concept Forming in the Game Codenames as a Means for Evaluating Large Language Models",
    author = "Hakimov, Sherzod  and
      Pfennigschmidt, Lara  and
      Schlangen, David",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.63/",
    pages = "728--740",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{khalili-etal-2025-evaluating,
    title = "Evaluating Intermediate Reasoning of Code-Assisted Large Language Models for Mathematics",
    author = "Al-Khalili, Zena  and
      Howell, Nick  and
      Klakow, Dietrich",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.64/",
    pages = "741--758",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{stephan-etal-2025-calculation,
    title = "From Calculation to Adjudication: Examining {LLM} Judges on Mathematical Reasoning Tasks",
    author = "Stephan, Andreas  and
      Zhu, Dawei  and
      A{\ss}enmacher, Matthias  and
      Shen, Xiaoyu  and
      Roth, Benjamin",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.65/",
    pages = "759--773",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{chen-etal-2025-personatwin,
    title = "{P}ersona{T}win: A Multi-Tier Prompt Conditioning Framework for Generating and Evaluating Personalized Digital Twins",
    author = "Chen, Sihan  and
      Lalor, John P.  and
      Yang, Yi  and
      Abbasi, Ahmed",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.66/",
    pages = "774--788",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{ilinykh-etal-2025-coreference,
    title = "Coreference as an indicator of context scope in multimodal narrative",
    author = "Ilinykh, Nikolai  and
      Lappin, Shalom  and
      Sayeed, Asad B.  and
      Lo{\'a}iciga, Sharid",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.67/",
    pages = "789--807",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{fang-etal-2025-patch,
    title = "{PATCH}! {P}sychometrics-{A}ssis{T}ed {B}en{CH}marking of Large Language Models against Human Populations: A Case Study of Proficiency in 8th Grade Mathematics",
    author = "Fang, Qixiang  and
      Oberski, Daniel  and
      Nguyen, Dong",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.68/",
    pages = "808--823",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{takizawa-etal-2025-mcqformatbench,
    title = "{MCQF}ormat{B}ench: Robustness Tests for Multiple-Choice Questions",
    author = "Takizawa, Hiroo  and
      Sugawara, Saku  and
      Aizawa, Akiko",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.69/",
    pages = "824--846",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{anschutz-etal-2025-dis,
    title = "(Dis)improved?! How Simplified Language Affects Large Language Model Performance across Languages",
    author = {Ansch{\"u}tz, Miriam  and
      Damaratskaya, Anastasiya  and
      Lee, Chaeeun Joy  and
      Schmalz, Arthur  and
      Mosca, Edoardo  and
      Groh, Georg},
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.70/",
    pages = "847--861",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{liang-etal-2025-fine,
    title = "Fine-Grained Constraint Generation-Verification for Improved Instruction-Following",
    author = "Liang, Zhixiang  and
      Hou, Zhenyu  and
      Wang, Xiao",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.71/",
    pages = "862--879",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{matlin-etal-2025-finance,
    title = "Finance Language Model Evaluation ({FL}a{ME})",
    author = "Matlin, Glenn  and
      Okamoto, Mika  and
      Pardawala, Huzaifa  and
      Yang, Yang  and
      Chava, Sudheer",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.72/",
    pages = "880--926",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{ahuja-etal-2025-sphinx,
    title = "s{P}hin{X}: Sample Efficient Multilingual Instruction Fine-Tuning Through N-shot Guided Prompting",
    author = "Ahuja, Sanchit  and
      Tanmay, Kumar  and
      Chauhan, Hardik Hansrajbhai  and
      Patra, Barun  and
      Aggarwal, Kriti  and
      Corro, Luciano Del  and
      Mitra, Arindam  and
      Dhamecha, Tejas Indulal  and
      Awadallah, Ahmed Hassan  and
      Choudhury, Monojit  and
      Chaudhary, Vishrav  and
      Sitaram, Sunayana",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.73/",
    pages = "927--946",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{de-baer-etal-2025-single,
    title = "Single- vs. Dual-Prompt Dialogue Generation with {LLM}s for Job Interviews in Human Resources",
    author = {De Baer, Joachim  and
      Do{\u{g}}ru{\"o}z, A. Seza  and
      Demeester, Thomas  and
      Develder, Chris},
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.74/",
    pages = "947--957",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{dobiczek-etal-2025-natural,
    title = "Natural Language Counterfactual Explanations in Financial Text Classification: A Comparison of Generators and Evaluation Metrics",
    author = "Dobiczek, Karol  and
      Altmeyer, Patrick  and
      Liem, Cynthia C. S.",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.75/",
    pages = "958--972",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{boudin-aizawa-2025-analysis,
    title = "An Analysis of Datasets, Metrics and Models in Keyphrase Generation",
    author = "Boudin, Florian  and
      Aizawa, Akiko",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.76/",
    pages = "973--973",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{chernyshev-etal-2025-u,
    title = "{U}-{MATH}: A University-Level Benchmark for Evaluating Mathematical Skills in Large Language Models",
    author = "Chernyshev, Konstantin  and
      Polshkov, Vitaliy  and
      Stepanov, Vlad  and
      Myasnikov, Alex  and
      Artemova, Ekaterina  and
      Miasnikov, Alexei  and
      Tilga, Sergei",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.77/",
    pages = "974--1001",
    ISBN = "979-8-89176-261-9"
}
@inproceedings{belz-etal-2025-2025,
    title = "The 2025 {R}epro{NLP} Shared Task on Reproducibility of Evaluations in {NLP}: Overview and Results",
    author = "Belz, Anya  and
      Thomson, Craig  and
      Gonz{\'a}lez Corbelle, Javier  and
      Ruelle, Malo",
    editor = "Arviv, Ofir  and
      Clinciu, Miruna  and
      Dhole, Kaustubh  and
      Dror, Rotem  and
      Gehrmann, Sebastian  and
      Habba, Eliya  and
      Itzhak, Itay  and
      Mille, Simon  and
      Perlitz, Yotam  and
      Santus, Enrico  and
      Sedoc, Jo{\~a}o  and
      Shmueli Scheuer, Michal  and
      Stanovsky, Gabriel  and
      Tafjord, Oyvind",
    booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
    month = jul,
    year = "2025",
    address = "Vienna, Austria and virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2025.gem-1.78/",
    pages = "1002--1016",
    ISBN = "979-8-89176-261-9"
}
