@inproceedings{zhuang-etal-2025-adaptive,
title = "Adaptive Collaborative Labeling with {MLLM}s for Low-Resource Multimodal Emotion Recognition",
author = "Zhuang, Wenwen and
Xiang, Lu and
Tang, Shubei and
Zhang, Yaping and
Zhou, Yu",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-long.152/",
pages = "2837--2853",
ISBN = "979-8-89176-298-5",
abstract = "Multimodal emotion recognition (MER) plays a crucial role in human-centric AI applications, yet existing models struggle in low-resource scenarios due to their heavy reliance on large amounts of high-quality labeled data. To address this challenge, we propose Adaptive Collaborative Labeling for Low-Resource MER (ACL-MER), a novel framework that leverages off-the-shelf multimodal large language models (MLLMs) to effectively exploit abundant unlabeled data. Specifically, ACL-MER incorporates a diverse teacher model zoo, wherein each MLLM specializes in a specific modality and is prompted to generate chain-of-thought predictions accompanied by scalar confidence scores. Rather than directly adopting these pseudo-labels, ACL-MER introduces an adaptive refinement strategy that selectively distills knowledge based on teacher confidence, iteratively guiding the lightweight student model toward robust learning under limited supervision. Extensive experiments on two benchmarks demonstrate that ACL-MER consistently outperforms strong baselines, especially in extremely low-resource settings."
}Markdown (Informal)
[Adaptive Collaborative Labeling with MLLMs for Low-Resource Multimodal Emotion Recognition](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-long.152/) (Zhuang et al., IJCNLP-AACL 2025)
ACL
- Wenwen Zhuang, Lu Xiang, Shubei Tang, Yaping Zhang, and Yu Zhou. 2025. Adaptive Collaborative Labeling with MLLMs for Low-Resource Multimodal Emotion Recognition. In Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, pages 2837–2853, Mumbai, India. The Asian Federation of Natural Language Processing and The Association for Computational Linguistics.