@inproceedings{zhou-etal-2026-dynamicfocalpo,
title = "{D}ynamic{F}ocal{PO}: Adaptive Focusing Strategy for Preference Optimization",
author = "Zhou, Shu and
Chen, Junan and
Ling, Rui and
Wang, Xin and
Fan, Tao and
Wang, Hao",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1009/",
pages = "20206--20221",
ISBN = "979-8-89176-395-1",
abstract = "Recent preference optimization algorithms such as Direct Preference Optimization (DPO) have become prevalent for aligning large language models (LLMs) with human preferences. FocalPO improves upon DPO by introducing a modulating factor that down-weighs misranked preference pairs. However, using a fixed modulating factor throughout training is suboptimal, as the model{'}s learning capacity evolves during training. We introduce DynamicFocalPO, which employs a dynamic focusing strategy that adapts over the course of training. Inspired by curriculum learning, our method initially focuses on correctly ranked samples to establish a solid foundation, then gradually incorporates harder samples as training progresses. Experiments demonstrate that DynamicFocalPO surpasses both DPO and FocalPO on benchmarks including Alpaca Eval 2.0 and Arena-Hard using Mistral-Base-7B and Llama-3-Instruct-8B. We further provide theoretical analysis showing that the dynamic schedule enables adaptive entropy regularization and selective gradient suppression."
}Markdown (Informal)
[DynamicFocalPO: Adaptive Focusing Strategy for Preference Optimization](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1009/) (Zhou et al., Findings 2026)
ACL