@inproceedings{dei-2026-detecting-reported,
title = "Detecting reported speech as a token classification task: an application to Classical {L}atin?",
author = "Dei, Agustin",
editor = "Alves, Diego and
Bizzoni, Yuri and
Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Pagel, Janis and
Szpakowicz, Stan",
booktitle = "Proceedings of the 10th Joint {SIGHUM} Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.latechclfl-1.24/",
pages = "251--256",
ISBN = "979-8-89176-373-9",
abstract = "This paper presents the first application of an automatic token-classification approach for detecting reported speech spans in Classical Latin using transformer-based neural architectures.Focusing on Seneca the Elder{'}s Declamatory Anthology, the study addresses the text{'}s highly polyphonic nature, resulting from theuse of reported speech. Instead of relying exclusively on sentence-level syntactic information, the proposed approach treats reported speech detection as a token-level sequence labeling problem. This enables the identification of reported speech spans extending across multiple sentences. We fine-tune three Latin neural language models {---}LatinBERT, LaBERTa, and PhilBERTa{---} for binary token-level classification and conduct experiments both with and without punctuation. The results show that RoBERTa-based models effectively identify reported speech, with LaBERTa achieving the best performance (F1 scores above 0.90)."
}Markdown (Informal)
[Detecting reported speech as a token classification task: an application to Classical Latin?](https://preview.aclanthology.org/ingest-eacl/2026.latechclfl-1.24/) (Dei, LaTeCH-CLfL 2026)
ACL