@inproceedings{li-etal-2026-anchoring,
title = "Anchoring the Affective Manifold: Learning Canonical and Disentangled Representations via Generative Cross-Modal Alignment",
author = "Li, Weibin and
Cheng, Jintao and
Tang, Xiaoyu and
Vong, Chi Man",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1929/",
pages = "41605--41614",
ISBN = "979-8-89176-390-6",
abstract = "Dominant multimodal emotion recognition paradigms often neglect the intrinsic geometric structure of affect, resulting in representations heavily entangled with non-affective factors. To address this, we propose a Canonical Disentangled Multimodal Generative Framework aimed at recovering the canonical affective manifold from raw data. We explicitly decompose the latent space into a canonical Shared Affective Subspace ($z_{vad}$) and a Private Modality Subspace ($z_{priv}$). We facilitate this factorization through Supervised Manifold Anchoring and Cross-Modal Manifold Alignment. Experiments demonstrate that our model effectively disentangles affect from private attributes (e.g., identity), achieving superior robustness in zero-shot cross-domain transfer compared to fully supervised baselines, while enabling controllable emotion generation."
}Markdown (Informal)
[Anchoring the Affective Manifold: Learning Canonical and Disentangled Representations via Generative Cross-Modal Alignment](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1929/) (Li et al., ACL 2026)
ACL