@inproceedings{baumg-artner-gurevych-2026-scicoqa,
title = "{S}ci{C}o{QA}: Quality Assurance for Scientific Paper{--}Code Alignment",
author = "Baumg{\textbackslash}{''}artner, Tim and
Gurevych, Iryna",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1795/",
pages = "38740--38770",
ISBN = "979-8-89176-390-6",
abstract = "Discrepancies between scientific papers and their code undermine reproducibility, a concern that grows as automated research agents scale scientific output beyond human review capacity. Whether LLMs can reliably detect such discrepancies has not been systematically measured. To this end, we present SciCoQA, a dataset of 635 paper-code discrepancies (92 real, 543 synthetic) for this cross-modal verification task. Across 22 evaluated models, even the best-performing LLMs, Gemini 3.1 Pro and GPT-5 Mini, detect only 46.7{\%} of real-world discrepancies, revealing a critical gap in automated scientific quality assurance. We construct SciCoQA from GitHub issues and reproducibility papers, and propose a synthetic generation pipeline to scale beyond AI to Physics, Quantitative Biology, and other computational sciences. We further introduce a taxonomy of discrepancy types and categories to characterize the occurring mismatches. Our analysis shows that models particularly struggle with omitted paper details, long-context inputs, and papers outside their pre-training corpus."
}Markdown (Informal)
[SciCoQA: Quality Assurance for Scientific Paper–Code Alignment](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1795/) (Baumg\"artner & Gurevych, ACL 2026)
ACL
- Tim Baumg\"artner and Iryna Gurevych. 2026. SciCoQA: Quality Assurance for Scientific Paper–Code Alignment. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 38740–38770, San Diego, California, United States. Association for Computational Linguistics.