@inproceedings{chen-etal-2025-ts,
title = "{TS}-{CLIP}: Time Series Understanding by {CLIP}",
author = "Chen, Ziwen and
Zhang, Xiaoyuan and
Zhu, Ming",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-11/2025.emnlp-main.231/",
doi = "10.18653/v1/2025.emnlp-main.231",
pages = "4646--4664",
ISBN = "979-8-89176-332-6",
abstract = "Contrastive Language{--}Image Pre-training (CLIP) has recently demonstrated remarkable success in aligning vision and language. Aligning time series with text leverages the rich semantic cues of language to enhance interpretability and generalization, addressing a largely underexplored area of research. Although applying the CLIP training paradigm to time-series and language pairs is promising, it may result in label collapse due to the sparse semantic annotations and the absence of visual cues in time-series data. To address this, we introduce Time Series CLIP (TS-CLIP), a novel approach that tackles label collapse using a synonym bank mechanism. Synonym bank exploits word analogy phenomena to generate potential synonym embeddings as alignment targets. Specifically, the synonym bank facilitates aligning time series with a word distribution instead of a precise textual description. We conducted extensive zero-shot and few-shot experiments on 128 sub-datasets from the UCR archive. The results show that TS-CLIP achieves state-of-the-art (SOTA) performance in zero-shot settings on 51 datasets. Comprehensive ablation studies and visualization analyzes reveal that TS-CLIP effectively aligns time series with natural language. To the best of our knowledge, this is the first foundational model to achieve general time series and natural language alignment. TS-CLIP introduces a new paradigm for the semantic understanding of time series and opens the possibility of integrating the time series modality into multimodal large models."
}Markdown (Informal)
[TS-CLIP: Time Series Understanding by CLIP](https://preview.aclanthology.org/corrections-2025-11/2025.emnlp-main.231/) (Chen et al., EMNLP 2025)
ACL
- Ziwen Chen, Xiaoyuan Zhang, and Ming Zhu. 2025. TS-CLIP: Time Series Understanding by CLIP. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 4646–4664, Suzhou, China. Association for Computational Linguistics.