@inproceedings{palomar-giner-etal-2024-curated, title = "A {CURATE}d {CAT}alog: Rethinking the Extraction of Pretraining Corpora for Mid-Resourced Languages", author = "Palomar-Giner, Jorge and Saiz, Jose Javier and Espu{\~n}a, Ferran and Mina, Mario and Da Dalt, Severino and Llop, Joan and Ostendorff, Malte and Ortiz Suarez, Pedro and Rehm, Georg and Gonzalez-Agirre, Aitor and Villegas, Marta", editor = "Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro and Sakti, Sakriani and Xue, Nianwen", booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)", month = may, year = "2024", address = "Torino, Italia", publisher = "ELRA and ICCL", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.31/", pages = "335--349" }