@inproceedings{abadji-etal-2022-towards, title = "Towards a Cleaner Document-Oriented Multilingual Crawled Corpus", author = "Abadji, Julien and Ortiz Suarez, Pedro and Romary, Laurent and Sagot, Beno{\^i}t", editor = "Calzolari, Nicoletta and B{\'e}chet, Fr{\'e}d{\'e}ric and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Odijk, Jan and Piperidis, Stelios", booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference", month = jun, year = "2022", address = "Marseille, France", publisher = "European Language Resources Association", url = "https://preview.aclanthology.org/fix-sig-urls/2022.lrec-1.463/", pages = "4344--4355" }