@inproceedings{alqahtani-etal-2016-investigating,
title = "Investigating the Impact of Various Partial Diacritization Schemes on {A}rabic-{E}nglish Statistical Machine Translation",
author = "Alqahtani, Sawsan and
Ghoneim, Mahmoud and
Diab, Mona",
editor = "Green, Spence and
Schwartz, Lane",
booktitle = "Conferences of the Association for Machine Translation in the Americas: MT Researchers' Track",
month = oct # " 28 - " # nov # " 1",
year = "2016",
address = "Austin, TX, USA",
publisher = "The Association for Machine Translation in the Americas",
url = "https://preview.aclanthology.org/ingest_wac_2008/2016.amta-researchers.15/",
pages = "191--204",
abstract = "Most diacritics in Arabic represent short vowels. In Arabic orthography, such diacritics are considered optional. The absence of these diacritics naturally leads to significant word ambiguity to top the inherent ambiguity present in fully diacritized words. Word ambiguity is a significant impediment for machine translation. Despite the ambiguity presented by lack of diacritization, context helps ameliorate the situation. Identifying the appropriate amount of diacritic restoration to reduce word sense ambiguity in the context of machine translation is the object of this paper. Diacritic marks help reduce the number of possible lexical word choices assigned to a source word which leads to better quality translated sentences. We investigate a variety of (linguistically motivated) partial diacritization schemes that preserve some of the semantics that in essence complement the implicit contextual information present in the sentences. We also study the effect of training data size and report results on three standard test sets that represent a combination of different genres. The results show statistically significant improvements for some schemes compared to two baselines: text with no diacritics (the typical writing system adopted for Arabic) and text that is fully diacritized."
}
Markdown (Informal)
[Investigating the Impact of Various Partial Diacritization Schemes on Arabic-English Statistical Machine Translation](https://preview.aclanthology.org/ingest_wac_2008/2016.amta-researchers.15/) (Alqahtani et al., AMTA 2016)
ACL