@inproceedings{xia-etal-2026-coderm,
title = "{C}ode{RM}-{NT}: Reward Model for Code {RL} without Unit Tests",
author = "Xia, Xiao and
Zhang, Dan and
Sun, Tianrui",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.findings-acl.2150/",
pages = "43316--43333",
ISBN = "979-8-89176-395-1",
abstract = "Providing accurate reward signals for code generated by large language models (LLMs) is a significant challenge in applying reinforcement learning (RL) to code generation. Existing methods rely on unit tests to evaluate code correctness and provide rewards, which are hindered by the difficulty of acquiring and verifying reliable unit tests at scale. In this work, we propose CodeRM-NT, a code reward model with no reliance on unit tests. Our method leverages Monte Carlo Tree Search guided by LLMs to generate code snippets and judges execution traces to annotate code with reward signals. We use the rewards to train CodeRM-NT that is capable of providing rewards for code during RL. CodeRM-NT also facilitates curriculum learning by scoring and sorting training samples based on their difficulty. Experimental results demonstrate that training with CodeRM-NT consistently outperforms synthetic unit test-based rewards, yielding superior performance on multiple code generation benchmarks. Additionally, curriculum learning based on CodeRM-NT further enhances model performance. Our code and dataset are available at: https://github.com/THUDM/CodeRM-NT."
}Markdown (Informal)
[CodeRM-NT: Reward Model for Code RL without Unit Tests](https://preview.aclanthology.org/ingest-acl-workshops/2026.findings-acl.2150/) (Xia et al., Findings 2026)
ACL
- Xiao Xia, Dan Zhang, and Tianrui Sun. 2026. CodeRM-NT: Reward Model for Code RL without Unit Tests. In Findings of the Association for Computational Linguistics: ACL 2026, pages 43316–43333, San Diego, California, United States. Association for Computational Linguistics.