@inproceedings{halpern-2008-exploiting,
title = "Exploiting Lexical Resources for Disambiguating {CJK} and {A}rabic Orthographic Variants",
author = "Halpern, Jack",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/fix-sig-urls/L08-1319/",
abstract = "The orthographical complexities of Chinese, Japanese, Korean (CJK) and Arabic pose a special challenge to developers of NLP applications. These difficulties are exacerbated by the lack of a standardized orthography in these languages, especially the highly irregular Japanese orthography and the ambiguities of the Arabic script. This paper focuses on CJK and Arabic orthographic variation and provides a brief analysis of the linguistic issues. The basic premise is that statistical methods by themselves are inadequate, and that linguistic knowledge supported by large-scale lexical databases should play a central role in achieving high accuracy in disambiguating and normalizing orthographic variants."
}
Markdown (Informal)
[Exploiting Lexical Resources for Disambiguating CJK and Arabic Orthographic Variants](https://preview.aclanthology.org/fix-sig-urls/L08-1319/) (Halpern, LREC 2008)
ACL