@inproceedings{kim-yoon-2025-questioning, title = "Questioning Our Questions: How Well Do Medical {QA} Benchmarks Evaluate Clinical Capabilities of Language Models?", author = "Kim, Siun and Yoon, Hyung-Jin", editor = "Demner-Fushman, Dina and Ananiadou, Sophia and Miwa, Makoto and Tsujii, Junichi", booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing", month = aug, year = "2025", address = "Viena, Austria", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/landing_page/2025.bionlp-1.24/", pages = "274--296", ISBN = "979-8-89176-275-6" }