@inproceedings{mandal-nanmaran-2018-normalization,
title = "Normalization of Transliterated Words in Code-Mixed Data Using {S}eq2{S}eq Model {\&} {L}evenshtein Distance",
author = "Mandal, Soumil and
Nanmaran, Karthick",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the 2018 {EMNLP} Workshop W-{NUT}: The 4th Workshop on Noisy User-generated Text",
month = nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W18-6107/",
doi = "10.18653/v1/W18-6107",
pages = "49--53",
abstract = "Building tools for code-mixed data is rapidly gaining popularity in the NLP research community as such data is exponentially rising on social media. Working with code-mixed data contains several challenges, especially due to grammatical inconsistencies and spelling variations in addition to all the previous known challenges for social media scenarios. In this article, we present a novel architecture focusing on normalizing phonetic typing variations, which is commonly seen in code-mixed data. One of the main features of our architecture is that in addition to normalizing, it can also be utilized for back-transliteration and word identification in some cases. Our model achieved an accuracy of 90.27{\%} on the test data."
}
Markdown (Informal)
[Normalization of Transliterated Words in Code-Mixed Data Using Seq2Seq Model & Levenshtein Distance](https://preview.aclanthology.org/jlcl-multiple-ingestion/W18-6107/) (Mandal & Nanmaran, WNUT 2018)
ACL