@inproceedings{arikan-etal-2019-detecting,
title = "Detecting Clitics Related Orthographic Errors in {T}urkish",
author = "Arikan, Ugurcan and
Gungor, Onur and
Uskudarli, Suzan",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/R19-1009/",
doi = "10.26615/978-954-452-056-4_009",
pages = "71--76",
abstract = "For the spell correction task, vocabulary based methods have been replaced with methods that take morphological and grammar rules into account. However, such tools are fairly immature, and, worse, non-existent for many low resource languages. Checking only if a word is well-formed with respect to the morphological rules of a language may produce false negatives due to the ambiguity resulting from the presence of numerous homophonic words. In this work, we propose an approach to detect and correct the {\textquotedblleft}de/da{\textquotedblright} clitic errors in Turkish text. Our model is a neural sequence tagger trained with a synthetically constructed dataset consisting of positive and negative samples. The model`s performance with this dataset is presented according to different word embedding configurations. The model achieved an F1 score of 86.67{\%} on a synthetically constructed dataset. We also compared the model`s performance on a manually curated dataset of challenging samples that proved superior to other spelling correctors with 71{\%} accuracy compared to the second-best (Google Docs) with and accuracy of 34{\%}."
}
Markdown (Informal)
[Detecting Clitics Related Orthographic Errors in Turkish](https://preview.aclanthology.org/jlcl-multiple-ingestion/R19-1009/) (Arikan et al., RANLP 2019)
ACL