@inproceedings{pawlowski-walkowiak-2024-nlp,
title = "{NLP} for Digital Humanities: Processing Chronological Text Corpora",
author = "Paw{\l}owski, Adam and
Walkowiak, Tomasz",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Miyagawa, So and
Alnajjar, Khalid and
Bizzoni, Yuri},
booktitle = "Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities",
month = nov,
year = "2024",
address = "Miami, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2024.nlp4dh-1.10/",
doi = "10.18653/v1/2024.nlp4dh-1.10",
pages = "105--112",
abstract = "The paper focuses on the integration of Natural Language Processing (NLP) techniques to analyze extensive chronological text corpora. This research underscores the synergy between humanistic inquiry and computational methods, especially in the processing and analysis of sequential textual data known as lexical series. A reference workflow for chronological corpus analysis is introduced, outlining the methodologies applicable to the ChronoPress corpus, a data set that encompasses 22 years of Polish press from 1945 to 1966. The study showcases the potential of this approach in uncovering cultural and historical patterns through the analysis of lexical series. The findings highlight both the challenges and opportunities present in leveraging lexical series analysis within Digital Humanities, emphasizing the necessity for advanced data filtering and anomaly detection algorithms to effectively manage the vast and intricate datasets characteristic of this field."
}
Markdown (Informal)
[NLP for Digital Humanities: Processing Chronological Text Corpora](https://preview.aclanthology.org/landing_page/2024.nlp4dh-1.10/) (Pawłowski & Walkowiak, NLP4DH 2024)
ACL