@inproceedings{toutou-etal-2026-data,
title = "Data Contamination in Neural Hieroglyphic Translation: A Reproducibility Study",
author = "Toutou, Ammar and
Harb, Abdelrahman and
Basta, Christine",
editor = {Hamilton, Sil and
{\"O}hman, Emily and
Hicke, Rebecca M. M. and
Bizzoni, Yuri and
Bax, Axel and
Matthews, Jacob A. and
H{\"a}m{\"a}l{\"a}inen, Mika},
booktitle = "Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities",
month = jul,
year = "2026",
address = "San Diego, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.nlp4dh-1.6/",
pages = "50--57",
ISBN = "979-8-89176-427-9",
abstract = "Ancient and endangered languages pose a unique challenge for NLP: their datasets are inherently scarce, difficult to expand, and built from formulaic corpora{---}making data-quality issues especially consequential yet rarely audited. Motivated by the need to understand what current NMT can realistically achieve for such languages, we investigate hieroglyphic-to-German translation, where a recent study reported 61.5 BLEU using fine-tuned M2M-100. Our reproduction yields only 37.0 BLEU with the released model. Investigating this gap, we find \textbf{32{\%} of test targets appear identically in training} (16/50; 50{\%} under 8-gram overlap at 70{\%} threshold). This contamination inflates scores dramatically: contaminated samples achieve up to 83.8 BLEU / 0.924 COMET-22 versus 30.9{--}39.2 BLEU / 0.622{--}0.676 COMET-22 on clean samples across five model configurations spanning two architectures. Document-level decontamination reduces contaminated BLEU by only 4.6 points because 8/16 targets persist via other source documents{---}target-level deduplication is required. We release a decontaminated 34-sample test set and establish corrected baselines (30.9{--}39.2 BLEU), providing a realistic assessment of NMT capability for this endangered writing system."
}Markdown (Informal)
[Data Contamination in Neural Hieroglyphic Translation: A Reproducibility Study](https://preview.aclanthology.org/ingest-acl-workshops/2026.nlp4dh-1.6/) (Toutou et al., NLP4DH 2026)
ACL