@article{chang-etal-2024-characterizing, title = "Characterizing Learning Curves During Language Model Pre-Training: Learning, Forgetting, and Stability", author = "Chang, Tyler A. and Tu, Zhuowen and Bergen, Benjamin K.", journal = "Transactions of the Association for Computational Linguistics", volume = "12", year = "2024", address = "Cambridge, MA", publisher = "MIT Press", url = "https://preview.aclanthology.org/fix-sig-urls/2024.tacl-1.74/", doi = "10.1162/tacl_a_00708", pages = "1346--1362" }