@inproceedings{elboher-pinter-2026-hebrew,
title = "{H}ebrew Diacritics Restoration using Visual Representation",
author = "Elboher, Yair and
Pinter, Yuval",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Plum, Alistair and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the Second Workshop on Language Models for Low-Resource Languages ({L}o{R}es{LM} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/manual-author-scripts/2026.loreslm-1.44/",
pages = "504--514",
ISBN = "979-8-89176-377-7",
abstract = "Diacritics restoration in Hebrew is a fundamental task for ensuring accurate word pronunciation and disambiguating textual meaning. Despite the language{'}s high degree of ambiguity when unvocalized, recent machine learning approaches have significantly advanced performance on this task. In this work, we present DiVRit, a novel system for Hebrew diacritization that frames the task as a zero-shot classification problem. Our approach operates at the word level, selecting the most appropriate diacritization pattern for each undiacritized word from a dynamically generated candidate set, conditioned on the surrounding textual context. A key innovation of DiVRit is its use of a Hebrew Visual Language Model to process diacritized candidates as images, allowing diacritic information to be embedded directly within their vector representations while the surrounding context remains tokenization-based. Through a comprehensive evaluation across various configurations, we demonstrate that the system effectively performs diacritization without relying on complex, explicit linguistic analysis. Notably, in an ``oracle'' setting where the correct diacritized form is guaranteed to be among the provided candidates, DiVRit achieves a high level of accuracy. Furthermore, strategic architectural enhancements and optimized training methodologies yield significant improvements in the system{'}s overall generalization capabilities. These findings highlight the promising potential of visual representations for accurate and automated Hebrew diacritization."
}Markdown (Informal)
[Hebrew Diacritics Restoration using Visual Representation](https://preview.aclanthology.org/manual-author-scripts/2026.loreslm-1.44/) (Elboher & Pinter, LoResLM 2026)
ACL