@article{tsoumplekas-etal-2026-tic,
title = "{T}i{C}-{M}u{F}ormer: Time-Aware Caption-Integrated Multimodal Transformers for User-Level Mental Health Modeling",
author = "Tsoumplekas, Georgios and
Spyridis, Yannis and
Argyriou, Vasileios",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.759/",
pages = "9669--9677",
abstract = "User-level affective modeling from social media requires integrating heterogeneous signals that unfold over time. While prior work has focused predominantly on textual analysis, visually expressed affect and temporal posting patterns also carry important psychological cues. However, these modalities are difficult to combine in practice due to sparse emotional evidence, asynchronous posting behavior, and frequent semantic misalignment between images and accompanying text. This paper introduces TiC-MuFormer, a time-enriched caption-integrated multimodal transformer that addresses these challenges by verbalizing visual content through image captioning before fusion and injecting temporal structure prior to cross-modal attention, enabling user trajectories to be modeled in a time-aware semantic space. We instantiate the method on a mental health detection task and demonstrate that it achieves state-of-the-art results across all user-level metrics, outperforming both unimodal and multimodal baselines. Ablation studies further show that temporal coverage, batch size and encoder choice jointly influence downstream accuracy, underscoring the importance of aligned temporal and semantic representations. Overall, this work highlights caption-guided temporal multimodality as a principled modeling strategy for general affective or psychiatric risk inference in social platforms."
}Markdown (Informal)
[TiC-MuFormer: Time-Aware Caption-Integrated Multimodal Transformers for User-Level Mental Health Modeling](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.759/) (Tsoumplekas et al., LREC 2026)
ACL