@inproceedings{liu-etal-2026-capturing,
title = "Capturing Classic Authorial Style in Long-Form Story Generation with {GRPO} Fine-Tuning",
author = "Liu, Jinlong and
Lee, Mark G. and
Bahja, Mohammed and
Kovatchev, Venelin",
editor = "Bonial, Claire and
Berzak, Yevgeni",
booktitle = "Proceedings of the 30th Conference on Computational Natural Language Learning",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.31/",
pages = "526--543",
ISBN = "979-8-89176-410-1",
abstract = "Evaluating and optimizing authorial style in long-form story generation is challenging because style judgments often rely on subjective human voting, and there is no stable automatic evaluation method. We propose a two-stage pipeline. First, we train a style-similarity judge by fine-tuning a sentence-transformer with authorship-verification supervision, and calibrate its similarity outputs into a bounded [0,1] reward. Second, we use this judge as the primary reward in Group Relative Policy Optimization (GRPO) to fine-tune an 8B story generator for style-conditioned writing, avoiding the accept/reject supervision required by Direct Preference Optimization (DPO). Across four target authors (Mark Twain, Jane Austen, Charles Dickens, Thomas Hardy), the GRPO-trained 8B model achieves higher style scores than open-weight baselines, with an average style score of 0.893 across authors. These results suggest that AV-calibrated reward modeling provides a practical mechanism for controllable long-form style transfer under moderate model size and training budget."
}Markdown (Informal)
[Capturing Classic Authorial Style in Long-Form Story Generation with GRPO Fine-Tuning](https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.31/) (Liu et al., CoNLL 2026)
ACL