@inproceedings{shi-etal-2025-dids, title = "{DIDS}: Domain Impact-aware Data Sampling for Large Language Model Training", author = "Shi, Weijie and Zhang, Jipeng and Wu, Yaguang and Fang, Jingzhi and Zhang, Shibo and Zhao, Yao and Chen, Hao and Zhang, Ruiyuan and Cui, Yue and Zhu, Jia and Han, Sirui and Xu, Jiajie and Zhou, Xiaofang", editor = "Christodoulopoulos, Christos and Chakraborty, Tanmoy and Rose, Carolyn and Peng, Violet", booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2025", address = "Suzhou, China", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.215/", pages = "4330--4350", ISBN = "979-8-89176-332-6" }