@inproceedings{liu-yu-2025-mt2st,
title = "{MT}2{ST}: Adaptive Multi-Task to Single-Task Learning",
author = "Liu, Dong and
Yu, Yanxuan",
editor = "Kriz, Reno and
Murray, Kenton",
booktitle = "Proceedings of the 1st Workshop on Multimodal Augmented Generation via Multimodal Retrieval (MAGMaR 2025)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.magmar-1.8/",
pages = "79--89",
ISBN = "979-8-89176-280-0",
abstract = "We propose \textbf{MT2ST}, a general and efficient framework for accelerating multi-task training by progressively transitioning to single-task optimization. Unlike conventional multi-task learning (MTL) or single-task fine-tuning (STL), MT2ST dynamically adjusts the training focus via two complementary strategies: \textit{Diminish}, which gradually down-weights auxiliary losses, and \textit{Switch}, which explicitly switches to the primary task at a scheduled point. We demonstrate the effectiveness of MT2ST across three key paradigms: representation learning, transformers, and diffusion models, covering both unimodal (text/image) and multimodal (vision-language) tasks. Extensive experiments show that MT2ST significantly improves training efficiency{---}achieving up to 56{\%} FLOPs compression{---}while maintaining or surpassing task performance. These results suggest MT2ST as a general-purpose solution for scalable and adaptive multi-task training. Although this work is general-purpose, it is especially suitable for multimodal settings such as VQA or vision-language retrieval, where auxiliary pretraining (e.g., masked language modeling or contrastive learning) often diverges from final objectives. We include a VQA case study and outline its efficiency for multimodal retrieval."
}
Markdown (Informal)
[MT2ST: Adaptive Multi-Task to Single-Task Learning](https://preview.aclanthology.org/landing_page/2025.magmar-1.8/) (Liu & Yu, MAGMaR 2025)
ACL
- Dong Liu and Yanxuan Yu. 2025. MT2ST: Adaptive Multi-Task to Single-Task Learning. In Proceedings of the 1st Workshop on Multimodal Augmented Generation via Multimodal Retrieval (MAGMaR 2025), pages 79–89, Vienna, Austria. Association for Computational Linguistics.