@inproceedings{suarez-etal-2026-commonlid, title = "{C}ommon{LID}: Re-evaluating State-of-the-Art Language Identification Performance on Web Data", author = "Suarez, Pedro Ortiz and Burchell, Laurie and Arnett, Catherine and Mosquera, Rafael and Monsalve, Sara Hincapi{\textbackslash}'e and Vaughan, Thom and Stewart, Damian and Ostendorff, Malte and Abdulmumin, Idris and Marivate, Vukosi and Muhammad, Shamsuddeen Hassan and Tonja, Atnafu Lambebo and Al-Khalifa, Hend and Hammouda, Nadia Ghezaiel and Otiende, Verrah Akinyi and Wong, Tack Hwa and Saydaliev, Jakhongir and Nobakhtian, Melika and Habibi, Muhammad Ravi Shulthan and Kranti, Chalamalasetti and Muchemi, Carol and Nguyen, Khang and Adam, Faisal Muhammad and Salim, Luis Frentzen and Alqifari, Reem and Amol, Cynthia Jayne and Imperial, Joseph Marvin and Kesen, Ilker and Mustafid, Ahmad and Stepachev, Pavel and Choshen, Leshem and Anugraha, David and Nayel, Hamada and Yimam, Seid Muhie and Putra, Vallerie Alexandra and Nguyen, My Chiffon and Wasi, Azmine Toushik and Vadithya, Gouthami and Van Der Goot, Rob and C'horr, Lanwenn ar and Dua, Karan and Yates, Andrew and Bangera, Mithil and Bangera, Yeshil and Patel, Hitesh Laxmichand and Okabe, Shu and Ilasariya, Fenal Ashokbhai and Gaynullin, Dmitry and Winata, Genta Indra and Li, Yiyuan and Mart{\{}{\textbackslash}'{\textbackslash}i{\}}nez, Juan Pablo and Agarwal, Amit and Hanif, Ikhlasul Akmal and Ahmad, Raia Abu and Adenuga, Esther and Tjiaranata, Filbert Aurelian and Buaphet, Weerayut and Anugraha, Michael and Vajjala, Sowmya and Rice, Benjamin L and Amirudin, Azril Hafizi and Alabi, Jesujoba Oluwadara and Panda, Srikant and Toughrai, Yassine and Kyomuhendo, Bruhan and Ruffinelli, Daniel and Akshata and Goul{\textbackslash}{\textasciitilde}ao, Manuel and Zhou, Ej and Ramirez, Ingrid Gabriela Franco and Aggazzotti, Cristina and Dobler, Konstantin and Kevin, Jun and Pag{\textbackslash}`es, Quentin and Andrews, Nicholas and Ibrahim, Nuhu and Ruckdeschel, Mattes and Keleg, Amr and Zhang, Mike and Muziri, Casper Rufaro and Samuel, Saron and Takeshita, Sotaro and Kerdthaisong, Kun and Foppiano, Luca and Dent, Rasul and Green, Tommaso and Wali, Ahmad Mustapha and Makaaka, Kamohelo and Feliren, Vicky and Idris, Inshirah and Celikkanat, Hande and Abubakar, Abdulhamid and Maillard, Jean and Sagot, Beno{\{}{\textbackslash}{\textasciicircum}{\textbackslash}i{\}}t and Cl{\textbackslash}'erice, Thibault and Murray, Kenton and Luger, Sarah K. K.", editor = "Liakata, Maria and Moreira, Viviane P. and Zhang, Jiajun and Jurgens, David", booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)", month = jul, year = "2026", address = "San Diego, California, United States", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1527/", pages = "33063--33080", ISBN = "979-8-89176-390-6" }