@inproceedings{clerice-2021-dont, title = "{\textquotedblleft}Don`t worry, it`s just noise'{\textquotedblright}: quantifying the impact of files treated as single textual units when they are really collections", author = "Cl{\'e}rice, Thibault", editor = {H{\"a}m{\"a}l{\"a}inen, Mika and Alnajjar, Khalid and Partanen, Niko and Rueter, Jack}, booktitle = "Proceedings of the Workshop on Natural Language Processing for Digital Humanities", month = dec, year = "2021", address = "NIT Silchar, India", publisher = "NLP Association of India (NLPAI)", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.nlp4dh-1.11/", pages = "95--105" }