@inproceedings{osorio-lopes-cardoso-2026-portoldbert,
title = "{P}ort{O}ld{BERT}: {P}ortuguese Historical Language Models",
author = "Osorio, Tomas Freitas and
Lopes Cardoso, Henrique",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.123/",
pages = "2691--2705",
ISBN = "979-8-89176-380-7",
abstract = "Historical language models play a crucial role in the study of languages, and can benefit tasks such as named-entity recognition (NER), part-of-speech (PoS) tagging, and post-OCR correction, among others. Despite their relevance, most efforts have been concentrated on English. To the best of our knowledge, no such model exists for historical Portuguese. In this work, we introduce PortOldBERT, the first historical Portuguese encoder language model. We demonstrate its usefulness by comparing PortOldBERT{'}s performance with Albertina, the encoder on which it is based, across multiple tasks{---}pseudo-perplexity, NER, PoS tagging, word error rate (WER) prediction, and OCR error detection{---}and for different historical periods. PortOldBERT consistently outperforms Albertina in historical data, demonstrating its ability to effectively integrate historical linguistic contexts while retaining the ability to process contemporary text."
}Markdown (Informal)
[PortOldBERT: Portuguese Historical Language Models](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.123/) (Osorio & Lopes Cardoso, EACL 2026)
ACL
- Tomas Freitas Osorio and Henrique Lopes Cardoso. 2026. PortOldBERT: Portuguese Historical Language Models. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 2691–2705, Rabat, Morocco. Association for Computational Linguistics.