@inproceedings{kim-etal-2025-rethinking-kenlm, title = "Rethinking {K}en{LM}: Good and Bad Model Ensembles for Efficient Text Quality Filtering in Large Web Corpora", author = "Kim, Yungi and Ha, Hyunsoo and Lee, Sukyung and Kim, Jihoo and Yang, Seonghoon and Park, Chanjun", editor = "Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher", booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)", month = jul, year = "2025", address = "Vienna, Austria", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/landing_page/2025.acl-short.4/", pages = "53--58", ISBN = "979-8-89176-252-7" }