@inproceedings{zhou-etal-2026-compatibility,
title = "Compatibility-Aware Dynamic Fine-Tuning for Large Language Models",
author = "Zhou, Yucheng and
Sheng, Junwei and
Wang, Qianning and
Shen, Jianbing",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1383/",
pages = "29997--30008",
ISBN = "979-8-89176-390-6",
abstract = "Supervised Fine-Tuning (SFT) is the predominant paradigm for aligning large language models (LLMs), yet it suffers from optimization instability and limited generalization. Recent work attributes this issue to pathological gradient scaling and proposes Dynamic Fine-Tuning (DFT) to correct it at the token level. However, DFT assumes all demonstrations are equally suitable learning targets, an assumption violated by the strong heterogeneity of large-scale instruction data, where demonstration-policy mismatch induces high-variance updates at the sample level. We introduce Compatibility-Aware Dynamic Fine-Tuning (CADFT), a principled extension of DFT that controls sample-level optimization variance. CADFT derives a dynamic, policy-dependent compatibility signal from model likelihoods to modulate supervised updates, suppressing high-variance gradients from incompatible demonstrations. We further propose a delayed, low-frequency compatibility-guided rewriting strategy to transform persistently incompatible demonstrations into learnable targets. We show that CADFT can be interpreted as a variance-controlled estimator that generalizes token-level stabilization in DFT to the sample level. Extensive experiments demonstrate improved stability, generalization, and cold-start reinforcement learning initialization, while remaining fully supervised and free of reward modeling."
}Markdown (Informal)
[Compatibility-Aware Dynamic Fine-Tuning for Large Language Models](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1383/) (Zhou et al., ACL 2026)
ACL
- Yucheng Zhou, Junwei Sheng, Qianning Wang, and Jianbing Shen. 2026. Compatibility-Aware Dynamic Fine-Tuning for Large Language Models. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 29997–30008, San Diego, California, United States. Association for Computational Linguistics.