@inproceedings{burchell-etal-2025-expanded, title = "An Expanded Massive Multilingual Dataset for High-Performance Language Technologies ({HPLT})", author = {Burchell, Laurie and De Gibert Bonet, Ona and Arefyev, Nikolay and Aulamo, Mikko and Ba{\~n}{\'o}n, Marta and Chen, Pinzhen and Fedorova, Mariia and Guillou, Liane and Haddow, Barry and Haji{\v{c}}, Jan and Helcl, Jind{\v{r}}ich and Henriksson, Erik and Klimaszewski, Mateusz and Komulainen, Ville and Kutuzov, Andrey and Kyt{\"o}niemi, Joona and Laippala, Veronika and M{\ae}hlum, Petter and Malik, Bhavitvya and Mehryary, Farrokh and Mikhailov, Vladislav and Moghe, Nikita and Myntti, Amanda and O{'}Brien, Dayy{\'a}n and Oepen, Stephan and Pal, Proyag and Piha, Jousia and Pyysalo, Sampo and Ram{\'i}rez-S{\'a}nchez, Gema and Samuel, David and Stepachev, Pavel and Tiedemann, J{\"o}rg and Vari{\v{s}}, Du{\v{s}}an and Vojt{\v{e}}chov{\'a}, Tereza and Zaragoza-Bernabeu, Jaume}, editor = "Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher", booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = jul, year = "2025", address = "Vienna, Austria", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.854/", pages = "17452--17485", ISBN = "979-8-89176-251-0" }