@inproceedings{chen-etal-2026-cot,
title = "{C}o{T}-Edit: Reinforcement Learning of Chain-of-Thought Reasoning for Code Edit Suggestion",
author = "Chen, Wuya and
Yang, Yihao and
Lin, Yue",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1407/",
pages = "28219--28234",
ISBN = "979-8-89176-395-1",
abstract = "Code edit suggestion, which encompasses modifying, refactoring, and maintaining existing code, represents the most frequent software development activity and has become a focal point for AI-powered tools. Traditional methods translate explicit natural language instructions into code edits, while pattern-based approaches learn from users' historical editing patterns to provide style-consistent and more accurate suggestions. However, these pattern-based methods still face two critical challenges: (1) difficulty handling edits that demand deep contextual reasoning, and (2) lack of interpretability in editing decisions. To tackle this, we propose CoT-Edit, a reinforcement learning framework that guides LLMs to discover chain-of-thought (CoT) reasoning paths for code editing without requiring human-annotated CoT data. Specifically, we design multi-step reasoning framework that enable: (1) analysis-guided code editing, and (2) seamless switching between CoT and non-CoT inference modes. Building on this, we introduce Edit-Aware Reward Modeling (EARM), a fine-grained diff-based reward approach for effective learning. Furthermore, we discover a LoRA merging strategy that enhances model generalization. Evaluations on an industrial dataset show that our approach achieves 60.2{\%} edit accuracy, outperforming all strong baselines. Online A/B tests further confirm its effectiveness in production. Code is available at https://github.com/202230483077yyh/CoT-Edit."
}Markdown (Informal)
[CoT-Edit: Reinforcement Learning of Chain-of-Thought Reasoning for Code Edit Suggestion](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1407/) (Chen et al., Findings 2026)
ACL