@proceedings{evaleval-2026-1,
    title = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.0/",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{lee-etal-2026-rigorous,
    title = "Rigorous Interpretation Is a Form of Evaluation",
    author = "Lee, Isabelle  and
      Liu, Emmy  and
      Jiao, Cathy  and
      Joshi, Brihi  and
      Yogatama, Dani  and
      Barez, Fazl  and
      Saxon, Michael",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.1/",
    pages = "1--11",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{ding-tan-2026-evaluating,
    title = "Evaluating Multi-turn Human-{AI} Interaction",
    author = "Ding, Shi  and
      Tan, Sijian",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.2/",
    pages = "12--18",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{yoo-etal-2026-guidelines,
    title = "Guidelines for Whom? Rethinking {AI} Ethics in Resource-Constrained Migration Services",
    author = "Yoo, Nari  and
      Khor, Ashley  and
      Mukhija, Namrata  and
      Adebiyi, Aminat  and
      Zilka, Miri",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.3/",
    pages = "19--25",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{kirtac-2026-evaluating,
    title = "Evaluating Large Language Model News Sentiment in Finance under Liquidity and Market Frictions",
    author = "Kirtac, Kemal",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.4/",
    pages = "26--35",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{liu-2026-wordle,
    title = "From Wordle to Fibble$^5$: Evaluating {LLM} Reasoning Under Escalating Deception",
    author = "Liu, Chang",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.5/",
    pages = "36--45",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{mahajan-etal-2026-mind,
    title = "Mind the Gap: How Elicitation Protocols Shape the Stated-Revealed Preference Gap in Language Models",
    author = "Mahajan, Pranav  and
      Kendiukhov, Ihor  and
      Hussain, Syed  and
      Nottingham, Lydia",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.9/",
    pages = "46--55",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{erez-etal-2026-scanners,
    title = "When Scanners Lie: Evaluator Instability in {LLM} Red-Teaming",
    author = "Erez, Lidor  and
      Hofman, Omer  and
      Nizri, Tamir  and
      Vainshtein, Roman",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.11/",
    pages = "56--69",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{huang-etal-2026-reasoning,
    title = "Reasoning Model Is Superior {LLM}-Judge, Yet Suffers from Biases",
    author = "Huang, Hui  and
      Wu, Xuanxin  and
      Yang, Muyun  and
      Arase, Yuki",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.13/",
    pages = "70--81",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{hayati-etal-2026-rubrics,
    title = "From Rubrics to Recipe: Principle-Centric Benchmark for Evaluating Large Language Models",
    author = "Hayati, Shirley Anugrah  and
      Wang, Ruizi  and
      Kang, Dongyeop",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.15/",
    pages = "82--99",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{cabrera-etal-2026-long,
    title = "Too long; didn{'}t solve",
    author = "Cabrera, Luc{\'i}a  and
      D{'}Arcy, Jocelyn  and
      Saxton-Knight, Isaac",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.20/",
    pages = "100--110",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{trott-parkinson-coombs-2026-graduating,
    title = "Graduating the Benchmark Scale: Lessons from Thermometry",
    author = "Trott, Sean  and
      Parkinson-Coombs, Ois{\'i}n",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.21/",
    pages = "111--115",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{nishal-bandy-2026-caged,
    title = "Caged Birds and Cute Bookworms: Feminine Tropes and Implicit Gender Bias in Large Language Models",
    author = "Nishal, Sachita  and
      Bandy, Jack",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.22/",
    pages = "116--127",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{san-joaquin-etal-2026-scorecard,
    title = "Scorecard of {AI} Benchmark Quality",
    author = "San Joaquin, Ayrton  and
      Gipi{\v{s}}kis, Rokas  and
      Chin, Ze Shen",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.25/",
    pages = "128--160",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{nejadgholi-etal-2026-defining,
    title = "Defining Cultural Capabilities for {AI} Evaluation: A Taxonomy Grounded in Intercultural Communication Theory",
    author = "Nejadgholi, Isar  and
      Kianpour, Masoud  and
      Vishnubhotla, Krishnapriya  and
      Molamohammadi, Maryam",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.26/",
    pages = "161--173",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{sokol-etal-2026-benchnavigator,
    title = "{B}ench{N}avigator: A Discovery Interface for Comparing {LLM} Benchmarks",
    author = "Sokol, Anna  and
      Vejsbjerg, Inge  and
      Daly, Elizabeth M.  and
      Piorkowski, David  and
      Hind, Michael  and
      Moniz, Nuno  and
      Chawla, Nitesh V.",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.29/",
    pages = "174--200",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{jenkins-2026-beyond,
    title = "Beyond Static Benchmarks: A Validity, Reliability, and Sociotechnical Framework for Evaluating {LLM}s in Deployment Contexts",
    author = "Jenkins, Ben",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.30/",
    pages = "201--210",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{lundin-etal-2026-guidelines,
    title = "From Guidelines to Guarantees: A Graph-Based Evaluation Harness for Domain-Specific Evaluation of {LLM}s",
    author = "Lundin, Jessica M.  and
      Nakakana, Usman Nasir  and
      Chabot-Couture, Guillaume",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.34/",
    pages = "211--220",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{ponnuraj-2026-document,
    title = "Document Overlap Is Not Evidence Continuity: Measuring Retrieval Jitter in Citation-Based {RAG} Evaluation",
    author = "Ponnuraj, Punitha",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.35/",
    pages = "221--226",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{chooi-etal-2026-measuring,
    title = "Measuring {AI}-Induced Disempowerment: A Framework and Proposed Metrics",
    author = "Chooi, Je Qin  and
      Lee, Jaeho  and
      Li, Jasmine Xinze",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.36/",
    pages = "227--236",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{kierans-etal-2026-position,
    title = "Position: Evaluations of {AI} Moral Reasoning Still Miss Half of the Picture",
    author = "Kierans, Aidan  and
      Dutt, Ritam  and
      Rittichier, Kaley  and
      Dori-Hacohen, Shiri  and
      Ghosh, Avijit",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.38/",
    pages = "237--244",
    ISBN = "979-8-89176-429-3"
}
@inproceedings{gipiskis-kurasova-2026-evaluation,
    title = "Evaluation Cards for {XAI} Metrics",
    author = "Gipi{\v{s}}kis, Rokas  and
      Kurasova, Olga",
    editor = "Akhtar, Mubashara  and
      Batzner, Jan  and
      Choshen, Leshem  and
      Ghosh, Avijit  and
      Gohar, Usman  and
      Mickel, Jennifer  and
      Pant, Ichhya  and
      Talat, Zeerak  and
      Lin, Michelle",
    booktitle = "Proceedings of the Workshop on Evaluating Evaluations ({E}val{E}val)",
    month = jul,
    year = "2026",
    address = "San Diego, CA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.evaleval-1.39/",
    pages = "245--251",
    ISBN = "979-8-89176-429-3"
}
