@inproceedings{huang-etal-2025-cross-moe,
title = "Cross-{M}o{E}: An Efficient Temporal Prediction Framework Integrating Textual Modality",
author = "Huang, Ruizheng and
Zhang, Zhicheng and
Wang, Yong",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1520/",
pages = "29915--29926",
ISBN = "979-8-89176-332-6",
abstract = "It has been demonstrated that incorporating external information as textual modality can effectively improve time series forecasting accuracy. However, current multi-modal models ignore the dynamic and different relations between time series patterns and textual features, which leads to poor performance in temporal-textual feature fusion. In this paper, we propose a lightweight and model-agnostic temporal-textual fusion framework named Cross-MoE. It replaces Cross Attention with Cross-Ranker to reduce computational complexity, and enhances modality-aware correlation memorization with Mixture-of-Experts (MoE) networks to tolerate the distributional shifts in time series. The experimental results demonstrate a 8.78{\%} average reduction in Mean Squared Error (MSE) compared to the SOTA multi-modal time series framework. Notably, our method requires only 75{\%} of computational overhead and 12.5{\%} of activated parameters compared with Cross Attention mechanism. Our codes are available at \url{https://github.com/Kilosigh/Cross-MoE.git}"
}Markdown (Informal)
[Cross-MoE: An Efficient Temporal Prediction Framework Integrating Textual Modality](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1520/) (Huang et al., EMNLP 2025)
ACL