@inproceedings{ren-etal-2026-coarse,
title = "From Coarse to Fine: Benchmarking and Reward Modeling for Writing-Centric Generation Tasks",
author = "Ren, Qingyu and
Pan, Tianjun and
Chen, Xingzhou and
Wang, Xuhong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.134/",
pages = "2796--2810",
ISBN = "979-8-89176-395-1",
abstract = "Large language models have achieved remarkable progress in text generation but still struggle with generative writing tasks. In terms of evaluation, existing evaluation benchmarks include few requirement types and writing reward models are not evaluated. In terms of training, existing studies often enhance writing ability through reinforcement learning with verifiable rewards (RLVR). Howerver, existing reward model training remains coarse-grained. To address these issues, we introduce W{\texttwosuperior}Bench, a comprehensive evaluation benchmark, and WRL, a fine-grained training framework. W{\texttwosuperior}Bench covers five task categories and seven requirement types, enabling systematic evaluation of both writing and writing reward models by measuring the correlation between reward rankings and golden rankings. WRL constructs positive and negative samples by dropping instruction requirements to construct positive and negative examples, allowing more precise reward model training. Experiments show that our models achieve substantial improvements on various writing benchmarks and exhibit strong generalization. We will release our code and data to support future research."
}Markdown (Informal)
[From Coarse to Fine: Benchmarking and Reward Modeling for Writing-Centric Generation Tasks](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.134/) (Ren et al., Findings 2026)
ACL