@inproceedings{ying-etal-2025-data, title = "Data-Efficient Selection via Grammatical Complexity in Continual Pre-training of Domain-Specific {LLM}s", author = "Ying, Yizhou and Zhang, Geng and Danxin, Cui and Du, Chengyu and Yue, Guanglei and Jiang, Sihang and Liang, Jiaqing and Fu, Yifei and Hu, Hailin and Xiao, Yanghua", editor = "Christodoulopoulos, Christos and Chakraborty, Tanmoy and Rose, Carolyn and Peng, Violet", booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2025", address = "Suzhou, China", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-luhme/2025.emnlp-main.1121/", doi = "10.18653/v1/2025.emnlp-main.1121", pages = "22066--22080", ISBN = "979-8-89176-332-6" }