@inproceedings{fysikoudi-etal-2025-active,
title = "Active Curriculum Language Modeling over a Hybrid Pre-training Method",
author = "Fysikoudi, Eleni and
Lo{\'a}iciga, Sharid and
Sayeed, Asad B.",
editor = "Charpentier, Lucas and
Choshen, Leshem and
Cotterell, Ryan and
Gul, Mustafa Omer and
Hu, Michael Y. and
Liu, Jing and
Jumelet, Jaap and
Linzen, Tal and
Mueller, Aaron and
Ross, Candace and
Shah, Raj Sanjay and
Warstadt, Alex and
Wilcox, Ethan Gotlieb and
Williams, Adina",
booktitle = "Proceedings of the First BabyLM Workshop",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.babylm-main.34/",
doi = "10.18653/v1/2025.babylm-main.34",
pages = "488--495",
ISBN = "TODO",
abstract = "We apply the Active Curriculum Language Modeling (ACLM) method to the constrained pretraining setting of the 2025 BabyLM Challenge, where models are limited by both data and compute budgets. Using GPT-BERT (Charpentier and Samuel, 2024) as the base architecture, we investigate the impact of surprisal-based example selection for constructing a training curriculum. In addition, we conduct a targeted hyperparameter search over tokenizer size and batch size. Our approach yields stable pretrained models that surpass the official baseline on multiple evaluation tasks, demonstrating ACLM{'}s potential for improving performance and generalization in low-resource pretraining scenarios."
}Markdown (Informal)
[Active Curriculum Language Modeling over a Hybrid Pre-training Method](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.babylm-main.34/) (Fysikoudi et al., BabyLM 2025)
ACL