@inproceedings{mager-etal-2019-subword,
title = "Subword-Level Language Identification for Intra-Word Code-Switching",
author = {Mager, Manuel and
{\c{C}}etino{\u{g}}lu, {\"O}zlem and
Kann, Katharina},
editor = "Burstein, Jill and
Doran, Christy and
Solorio, Thamar",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/N19-1201/",
doi = "10.18653/v1/N19-1201",
pages = "2005--2011",
abstract = "Language identification for code-switching (CS), the phenomenon of alternating between two or more languages in conversations, has traditionally been approached under the assumption of a single language per token. However, if at least one language is morphologically rich, a large number of words can be composed of morphemes from more than one language (intra-word CS). In this paper, we extend the language identification task to the subword-level, such that it includes splitting mixed words while tagging each part with a language ID. We further propose a model for this task, which is based on a segmental recurrent neural network. In experiments on a new Spanish{--}Wixarika dataset and on an adapted German{--}Turkish dataset, our proposed model performs slightly better than or roughly on par with our best baseline, respectively. Considering only mixed words, however, it strongly outperforms all baselines."
}
Markdown (Informal)
[Subword-Level Language Identification for Intra-Word Code-Switching](https://preview.aclanthology.org/Author-page-Marten-During-lu/N19-1201/) (Mager et al., NAACL 2019)
ACL
- Manuel Mager, Özlem Çetinoğlu, and Katharina Kann. 2019. Subword-Level Language Identification for Intra-Word Code-Switching. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pages 2005–2011, Minneapolis, Minnesota. Association for Computational Linguistics.