@inproceedings{ichihara-jinnai-2025-auto,
title = "Auto-Weighted Group Relative Preference Optimization for Multi-Objective Text Generation Tasks",
author = "Ichihara, Yuki and
Jinnai, Yuu",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.80/",
pages = "1134--1147",
ISBN = "979-8-89176-333-3",
abstract = "Group Relative Policy Optimization (GRPO) is a promising approach to complex, real-world tasks, such as those involving multiple rewards or strict constraints. However, when training GRPO with multiple rewards, the weights of each reward must be decided in advance. Failing to balance the objectives adequately can lead to overfitting or insufficient learning of each reward function. To address this problem, we propose Auto-Weighted Group Relative Policy Optimization (AW-GRPO), which adjusts reward weights during training according to the progress of the learning of each objective so far.We evaluate AW-GRPO on advertising text generation, a real-world problem where the generated text must satisfy multiple objectives, such as quality and diversity, while adhering to the constraints of the media (e.g., maximum number of characters).Our results show that AW-GRPO successfully balances multiple objectives, improving the overall scores while reducing the constraint violation rate.We additionally evaluate AW-GRPO using publicly available benchmark problems for reproducibility, in which we observe the same qualitative result that the proposed method outperforms GRPO."
}Markdown (Informal)
[Auto-Weighted Group Relative Preference Optimization for Multi-Objective Text Generation Tasks](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.80/) (Ichihara & Jinnai, EMNLP 2025)
ACL