@inproceedings{guo-etal-2026-trident,
title = "Trident: Self-Supervised Preference Alignment via Triplet Regularization",
author = "Guo, Yingnan and
Chen, Kejia and
Zhang, Xiaofeng and
Wu, Zifei and
Zhang, Yu",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1585/",
pages = "31668--31683",
ISBN = "979-8-89176-395-1",
abstract = "Aligning Large Vision-Language Models (LVLMs) to mitigate hallucinations typically relies on high-quality preference data. However, in self-supervised settings, standard binary preference optimization (e.g., DPO) suffers from noisy supervision and semantic ambiguity, as automatically generated chosen responses are not guaranteed to be superior to rejected ones. In this work, we propose \textbf{Trident}, a fully self-supervised framework that ensures robust alignment via a structured triplet paradigm. Trident autonomously constructs reliable preference triplets{---}comprising semantically enriched (chosen), degraded (rejected), and neutral (anchor) responses{---}through automated visual perturbations and self-summarization. We further introduce Trident Preference Regularization (TPR), a novel objective that utilizes an adaptive margin to enforce semantic separation between the triplet components while preventing deviation from the pretrained distribution. Despite requiring no human annotations or external reward models, Trident consistently outperforms state-of-the-art RLHF and RLAIF baselines. For instance, on LLaVA-1.5-7B, it reduces the hallucination rate on AMBER to 11.3{\%} and achieves 95.70{\%} precision on POPE using only 4k self-generated triplets and a single epoch. This validates structured triplet supervision as a scalable paradigm for robust self-supervised alignment."
}Markdown (Informal)
[Trident: Self-Supervised Preference Alignment via Triplet Regularization](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1585/) (Guo et al., Findings 2026)
ACL