@inproceedings{reichbauer-etal-2026-evaluating,
title = "Evaluating {L}atin and {A}ncient {G}reek Sentence Alignment through Parallel Sentence Mining",
author = "Reichbauer, Sebastian and
Okabe, Shu and
Fraser, Alexander",
editor = {Hamilton, Sil and
{\"O}hman, Emily and
Hicke, Rebecca M. M. and
Bizzoni, Yuri and
Bax, Axel and
Matthews, Jacob A. and
H{\"a}m{\"a}l{\"a}inen, Mika},
booktitle = "Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities",
month = jul,
year = "2026",
address = "San Diego, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.nlp4dh-1.11/",
pages = "106--120",
ISBN = "979-8-89176-427-9",
abstract = "Cross-lingual detection of intertextuality and translation in Latin and Ancient Greek through computational approaches is of great interest for classical studies.While several systems exist for parallel sentence detection, based on general multilingual or specific models for Latin{--}Ancient Greek, they have not been compared against each other. Therefore, we present a synthetic benchmark to evaluate the performance of language models regarding cross-lingual Ancient Greek and Latin parallel sentence mining. We first compare six language models to encode sentences and then further improve the cross-lingual alignment through post-processing, fine-tuning, and knowledge distillation. We find that the whitening transformation in combination with knowledge distillation provides excellent results. Specifically, SPhilBERTa, a trilingual language model for Ancient Greek and Latin, benefits the most from the improvements and achieves a substantial mining score of 97.6 on our benchmark."
}Markdown (Informal)
[Evaluating Latin and Ancient Greek Sentence Alignment through Parallel Sentence Mining](https://preview.aclanthology.org/ingest-acl-workshops/2026.nlp4dh-1.11/) (Reichbauer et al., NLP4DH 2026)
ACL