@proceedings{eval4nlp-ws-2025-1,
    title = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.0/",
    doi = "10.18653/v1/2025.eval4nlp-1.0",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{apsel-etal-2025-beyond,
    title = "Beyond Tokens and Into Minds: Future Directions for Human-Centered Evaluation in Machine Translation Post-Editing",
    author = "Apsel, Molly  and
      Kothari, Sunil  and
      Mehta, Manish  and
      Sundarababu, Vasudevan",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.1/",
    doi = "10.18653/v1/2025.eval4nlp-1.1",
    pages = "1--8",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{ranjani-prabhudesai-2025-measuring,
    title = "Measuring Visual Understanding in Telecom domain: Performance Metrics for Image-to-{UML} conversion using {VLM}s",
    author = "Ranjani, H. G.  and
      Prabhudesai, Rutuja",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.2/",
    doi = "10.18653/v1/2025.eval4nlp-1.2",
    pages = "9--20",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{hidayat-etal-2025-simulating,
    title = "Simulating Training Data Leakage in Multiple-Choice Benchmarks for {LLM} Evaluation",
    author = "Hidayat, Naila Shafirni  and
      Al Kautsar, Muhammad Dehan  and
      Wicaksono, Alfan Farizki  and
      Koto, Fajri",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.3/",
    doi = "10.18653/v1/2025.eval4nlp-1.3",
    pages = "21--39",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{patil-etal-2025-reliable,
    title = "Reliable Inline Code Documentation with {LLM}s: Fine-Grained Evaluation of Comment Quality and Coverage",
    author = "Patil, Rohan  and
      Tirodkar, Gaurav  and
      Gatfane, Shubham",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.4/",
    doi = "10.18653/v1/2025.eval4nlp-1.4",
    pages = "40--54",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{urchs-etal-2025-fair,
    title = "Fair Play in the Newsroom: Actor-Based Filtering Gender Discrimination in Text Corpora",
    author = "Urchs, Stefanie  and
      Thurner, Veronika  and
      A{\ss}enmacher, Matthias  and
      Heumann, Christian  and
      Thiemichen, Stephanie",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.5/",
    doi = "10.18653/v1/2025.eval4nlp-1.5",
    pages = "55--65",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{chen-etal-2025-drafts,
    title = "Between the Drafts: An Evaluation Framework for Identifying Quality Improvement and Stylistic Differences in Scientific Texts",
    author = "Chen, Danqing  and
      Weber, Ingo  and
      Dietrich, Felix",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.6/",
    doi = "10.18653/v1/2025.eval4nlp-1.6",
    pages = "66--84",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{wagh-srivastava-2025-dentist,
    title = "``The dentist is an involved parent, the bartender is not'': Revealing Implicit Biases in {QA} with Implicit {BBQ}",
    author = "Wagh, Aarushi  and
      Srivastava, Saniya",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.7/",
    doi = "10.18653/v1/2025.eval4nlp-1.7",
    pages = "85--90",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{elaraby-maheswari-2025-synclaimeval,
    title = "{S}yn{C}laim{E}val: A Framework for Evaluating the Utility of Synthetic Data in Long-Context Claim Verification",
    author = "Elaraby, Mohamed  and
      Maheswari, Jyoti Prakash",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.8/",
    doi = "10.18653/v1/2025.eval4nlp-1.8",
    pages = "91--108",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{marecek-etal-2025-evaluation,
    title = "Evaluation of Generated Poetry",
    author = "Mare{\v{c}}ek, David  and
      Hodkov{\'a}, Kate{\v{r}}ina Motal{\'i}k  and
      Musil, Tom{\'a}{\v{s}}  and
      Rosa, Rudolf",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.9/",
    doi = "10.18653/v1/2025.eval4nlp-1.9",
    pages = "109--118",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{du-2025-titletrap,
    title = "{T}itle{T}rap: Probing Presentation Bias in {LLM}-Based Scientific Reviewing",
    author = "Du, Shurui",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.10/",
    doi = "10.18653/v1/2025.eval4nlp-1.10",
    pages = "119--125",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{dey-etal-2025-beyond,
    title = "Beyond the Rubric: Cultural Misalignment in {LLM} Benchmarks for Sexual and Reproductive Health",
    author = "Dey, Sumon Kanti  and
      S, Manvi  and
      Mehta, Zeel  and
      Shah, Meet  and
      Agrawal, Unnati  and
      Jalota, Suhani  and
      Ismail, Azra",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.11/",
    doi = "10.18653/v1/2025.eval4nlp-1.11",
    pages = "126--134",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{atil-etal-2025-non,
    title = "Non-Determinism of ``Deterministic'' {LLM} System Settings in Hosted Environments",
    author = "At{\i}l, Berk  and
      Aykent, Sarp  and
      Chittams, Alexa  and
      Fu, Lisheng  and
      Passonneau, Rebecca J.  and
      Radcliffe, Evan  and
      Rajagopal, Guru Rajan  and
      Sloan, Adam  and
      Tudrej, Tomasz  and
      Ture, Ferhan  and
      Wu, Zhe  and
      Xu, Lixinyu  and
      Baldwin, Breck",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.12/",
    doi = "10.18653/v1/2025.eval4nlp-1.12",
    pages = "135--148",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{ghosh-etal-2025-infinite,
    title = "{I}n{F}i{NITE} ($\infty$): {I}ndian Financial Narrative Inference Tasks {\&} Evaluations",
    author = "Ghosh, Sohom  and
      Maji, Arnab  and
      Naskar, Sudip Kumar",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.13/",
    doi = "10.18653/v1/2025.eval4nlp-1.13",
    pages = "149--166",
    ISBN = "979-8-89176-305-0"
}
@inproceedings{kranti-etal-2025-test,
    title = "Test Set Quality in Multilingual {LLM} Evaluation",
    author = "Kranti, Chalamalasetti  and
      Bernier-Colborne, Gabriel  and
      Gauthier, Yvan  and
      Vajjala, Sowmya",
    editor = "Akter, Mousumi  and
      Chowdhury, Tahiya  and
      Eger, Steffen  and
      Leiter, Christoph  and
      Opitz, Juri  and
      {\c{C}}ano, Erion",
    booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
    month = dec,
    year = "2025",
    address = "Mumbai, India",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.eval4nlp-1.14/",
    doi = "10.18653/v1/2025.eval4nlp-1.14",
    pages = "167--178",
    ISBN = "979-8-89176-305-0"
}