@inproceedings{santy-etal-2021-bertologicomix,
title = "{BERT}ologi{C}o{M}ix: How does Code-Mixing interact with Multilingual {BERT}?",
author = "Santy, Sebastin and
Srinivasan, Anirudh and
Choudhury, Monojit",
editor = "Ben-David, Eyal and
Cohen, Shay and
McDonald, Ryan and
Plank, Barbara and
Reichart, Roi and
Rotman, Guy and
Ziser, Yftah",
booktitle = "Proceedings of the Second Workshop on Domain Adaptation for NLP",
month = apr,
year = "2021",
address = "Kyiv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2021.adaptnlp-1.12/",
pages = "111--121",
abstract = "Models such as mBERT and XLMR have shown success in solving Code-Mixed NLP tasks even though they were not exposed to such text during pretraining. Code-Mixed NLP models have relied on using synthetically generated data along with naturally occurring data to improve their performance. Finetuning mBERT on such data improves it`s code-mixed performance, but the benefits of using the different types of Code-Mixed data aren`t clear. In this paper, we study the impact of finetuning with different types of code-mixed data and outline the changes that occur to the model during such finetuning. Our findings suggest that using naturally occurring code-mixed data brings in the best performance improvement after finetuning and that finetuning with any type of code-mixed text improves the responsivity of it`s attention heads to code-mixed text inputs."
}
Markdown (Informal)
[BERTologiCoMix: How does Code-Mixing interact with Multilingual BERT?](https://preview.aclanthology.org/add-emnlp-2024-awards/2021.adaptnlp-1.12/) (Santy et al., AdaptNLP 2021)
ACL