@inproceedings{gao-etal-2025-self,
title = "Self-Preference: An Automated Method for Preference-Aligned Data Constructed from Business Metrics",
author = "Gao, Feng and
Zhang, Xuan and
Ni, Boyi and
Wang, Chunping and
Chen, Lei",
editor = "Sun, Maosong and
Duan, Peiyong and
Liu, Zhiyuan and
Xu, Ruifeng and
Sun, Weiwei",
booktitle = "Proceedings of the 24th {C}hina National Conference on Computational Linguistics ({CCL} 2025)",
month = aug,
year = "2025",
address = "Jinan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://preview.aclanthology.org/ingest-ccl/2025.ccl-1.66/",
pages = "864--879",
abstract = "``Large language models (LLMs) have become integral components of various AI solutions, with the reinforcement learning from human feedback (RLHF) stage playing a critical role in align-ing model outputs with human preferences. However, generating the human preference data required for RLHF is often costly and time-consuming due to its reliance on human evaluation.This study addresses this challenge within the dialogue scenarios of the fintech industry. We leverage rich, non-confidential, multi-turn dialogue data, such as call center dialogue records,which include associated business metrics (e.g., problem-solving rates, turnover ratios) to con-struct preference-aligned data. We introduce Self-Preference, an automated method for creating preference-aligned data guided by these objective business metrics. The approach involves clustering dialogue histories based on their semantic representations and calculating a well-designed conditional probability ratio that correlates sequences with business metrics to generate preference data. In contrast to traditional preference alignment data generation methods that depend on subjective human evaluations, Self-Preference significantly reduces labeling costs and mitigates model-induced biases. Experimental results indicate that models trained with Self-Preference generated data demonstrate a strong positive correlation with target business metrics, highlight-ing the method{'}s effectiveness in facilitating efficient, goal-oriented alignment of LLMs.''"
}Markdown (Informal)
[Self-Preference: An Automated Method for Preference-Aligned Data Constructed from Business Metrics](https://preview.aclanthology.org/ingest-ccl/2025.ccl-1.66/) (Gao et al., CCL 2025)
ACL