@inproceedings{mousavi-etal-2026-garbage, title = "Garbage In, Reasoning Out? Why Benchmark Scores are Unreliable and What to Do About It", author = "Mousavi, Seyed Mahed and Cecchinato, Edoardo and Horn{\'i}kov{\'a}, Lucia and Riccardi, Giuseppe", editor = "Demberg, Vera and Inui, Kentaro and Marquez, Llu{\'i}s", booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026", month = mar, year = "2026", address = "Rabat, Morocco", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.89/", pages = "1747--1759", ISBN = "979-8-89176-386-9" }