@inproceedings{qiu-etal-2025-training,
title = "Training Medical {QA} Models Based on Mixed Rewards from Multiple-Choice and Open-Ended Questions",
author = "Qiu, Yue and
Ting, Yujan and
Dong, Pei and
Chen, Terrence and
Huang, Weijing",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.463/",
doi = "10.18653/v1/2025.findings-emnlp.463",
pages = "8721--8729",
ISBN = "979-8-89176-335-7",
abstract = "Reinforcement learning (RL) for large language models (LLMs) typically requires clear reward signals, which are often unavailable for open-ended (OE) questions where answer evaluation is ambiguous without scalable expert labeling. We investigate whether LLMs benefit from training on mixed data with varying reward clarity. Our approach combines Multiple-choice questions (MCQs), which offer clear binary rewards, with OE questions, for which we use simpler, potentially noisy rewards such as Jaccard similarity or LLM-based evaluators. We hypothesize that MCQs can stabilize training when mixed with OE questions. Our experiments show this mixed-data approach consistently improves medical question-answering performance across model scales."
}Markdown (Informal)
[Training Medical QA Models Based on Mixed Rewards from Multiple-Choice and Open-Ended Questions](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.463/) (Qiu et al., Findings 2025)
ACL