@inproceedings{wang-etal-2025-codecontests,
title = "{C}ode{C}ontests+: High-Quality Test Case Generation for Competitive Programming",
author = "Wang, Zihan and
Liu, Siyao and
Sun, Yang and
Ding, Ming and
Li, Hongyan",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.299/",
doi = "10.18653/v1/2025.findings-emnlp.299",
pages = "5576--5600",
ISBN = "979-8-89176-335-7",
abstract = "Competitive programming, due to its high reasoning difficulty and precise correctness feedback, has become a key task for both training and evaluating the reasoning capabilities of large language models (LLMs). However, while a large amount of public problem data, such as problem statements and solutions, is available, the test cases of these problems are often difficult to obtain. Therefore, test case generation is a necessary task for building large-scale datasets, and the quality of the test cases directly determines the accuracy of the evaluation. In this paper, we introduce an LLM-based agent system that creates high-quality test cases for competitive programming problems. We apply this system to the CodeContests dataset and propose a new version with improved test cases, named CodeContests+. We evaluated the quality of test cases in CodeContests+. First, we used 1.72 million submissions with pass/fail labels to examine the accuracy of these test cases in evaluation. The results indicated that CodeContests+ achieves significantly higher accuracy than CodeContests, particularly with a notably higher True Positive Rate (TPR). Subsequently, our experiments in LLM Reinforcement Learning (RL) further confirmed that improvements in test case quality yield considerable advantages for RL."
}Markdown (Informal)
[CodeContests+: High-Quality Test Case Generation for Competitive Programming](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.299/) (Wang et al., Findings 2025)
ACL