@inproceedings{li-wan-2026-edit,
title = "Edit-Aware Reward Modeling for {C}hinese Grammatical Error Correction",
author = "Li, Yilin and
Wan, Xiaojun",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1900/",
pages = "40945--40957",
ISBN = "979-8-89176-390-6",
abstract = "While large language models have achieved remarkable success in various natural language processing tasks, their potential in grammatical error correction remains underexplored. Recent work has applied reinforcement learning with rule-based rewards to CGEC, but these approaches rely on coarse-grained binary signals (exact match or not) that fail to capture fine-grained quality distinctions among correction candidates. In this paper, we propose \textbf{Edit-Aware Reward Model (EARM)}, a novel reward modeling framework that explicitly incorporates edit-awareness into preference learning for CGEC. EARM introduces a dual-granularity training objective that jointly optimizes sentence-level and token-level weighted Bradley-Terry ranking losses, where edit tokens receive higher importance weights. When integrated with GRPO, our approach achieves 61.29/63.08 on FCGEC/NaCGEC (single output), and 65.04/64.59 with best-of-16 reranking, surpassing previous best by 5.41 and 1.80 points. Extensive experiments demonstrate that learned edit-aware rewards significantly outperform rule-based alternatives for CGEC preference optimization."
}Markdown (Informal)
[Edit-Aware Reward Modeling for Chinese Grammatical Error Correction](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1900/) (Li & Wan, ACL 2026)
ACL