@article{ing-etal-2026-phrase,
title = "Phrase-Level Segmentation on Medieval Corpora for Aligning Multilingual Texts",
author = "Ing, Lucence and
Gille Levenson, Matthias and
Macedo, Carolina",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.72/",
pages = "936--946",
abstract = "This paper presents an approach to multilingual alignment for medieval languages, focusing on the prior step of{''}phrase'' segmentation. It outlines the challenges posed by historical data and describes different strategies forsegmenting texts in multiple languages. It releases a gold-standard segmentation corpus based on various literaryand historical works from the late Middle Ages in Europe. This corpus consists of texts in seven medieval languages (French, Castilian, Catalan, Portuguese, Latin, Italian, English). Several architectures are tested with both in-domain and out-of-domain evaluation sets."
}Markdown (Informal)
[Phrase-Level Segmentation on Medieval Corpora for Aligning Multilingual Texts](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.72/) (Ing et al., LREC 2026)
ACL