@inproceedings{clerice-2021-dont, title = "``Don{'}t worry, it{'}s just noise''': quantifying the impact of files treated as single textual units when they are really collections", author = "Cl{\'e}rice, Thibault", editor = {H{\"a}m{\"a}l{\"a}inen, Mika and Alnajjar, Khalid and Partanen, Niko and Rueter, Jack}, booktitle = "Proceedings of the Workshop on Natural Language Processing for Digital Humanities", month = dec, year = "2021", address = "NIT Silchar, India", publisher = "NLP Association of India (NLPAI)", url = "https://preview.aclanthology.org/fix-sig-urls/2021.nlp4dh-1.11/", pages = "95--105" }