@inproceedings{ying-etal-2025-data, title = "Data-Efficient Selection via Grammatical Complexity in Continual Pre-training of Domain-Specific {LLM}s", author = "Ying, Yizhou and Zhang, Geng and Danxin, Cui and Du, Chengyu and Yue, Guanglei and Jiang, Sihang and Liang, Jiaqing and Fu, Yifei and Hu, Hailin and Xiao, Yanghua", editor = "Christodoulopoulos, Christos and Chakraborty, Tanmoy and Rose, Carolyn and Peng, Violet", booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2025", address = "Suzhou, China", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1121/", pages = "22066--22080", ISBN = "979-8-89176-332-6" }