@inproceedings{sterner-2024-multilingual,
title = "Multilingual Identification of {E}nglish Code-Switching",
author = "Sterner, Igor",
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Zampieri, Marcos and
Nakov, Preslav and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.vardial-1.14/",
doi = "10.18653/v1/2024.vardial-1.14",
pages = "163--173",
abstract = "Code-switching research depends on fine-grained language identification. In this work, we study existing corpora used to train token-level language identification systems. We aggregate these corpora with a consistent labelling scheme and train a system to identify English code-switching in multilingual text. We show that the system identifies code-switching in unseen language pairs with absolute measure 2.3-4.6{\%} better than language-pair-specific SoTA. We also analyse the correlation between typological similarity of the languages and difficulty in recognizing code-switching."
}
Markdown (Informal)
[Multilingual Identification of English Code-Switching](https://preview.aclanthology.org/fix-sig-urls/2024.vardial-1.14/) (Sterner, VarDial 2024)
ACL