@inproceedings{limisiewicz-etal-2023-data,
title = "You Can Have Your Data and Balance It Too: Towards Balanced and Efficient Multilingual Models",
author = "Limisiewicz, Tomasz and
Malkin, Dan and
Stanovsky, Gabriel",
editor = "Beinborn, Lisa and
Goswami, Koustava and
Murado{\u{g}}lu, Saliha and
Sorokin, Alexey and
Kumar, Ritesh and
Shcherbakov, Andreas and
Ponti, Edoardo M. and
Cotterell, Ryan and
Vylomova, Ekaterina",
booktitle = "Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2023.sigtyp-1.1/",
doi = "10.18653/v1/2023.sigtyp-1.1",
pages = "1--11",
abstract = "Multilingual models have been widely used for the cross-lingual transfer to low-resource languages. However, the performance on these languages is hindered by their under-representation in the pretraining data. To alleviate this problem, we propose a novel multilingual training technique based on teacher-student knowledge distillation. In this setting, we utilize monolingual teacher models optimized for their language. We use those teachers along with balanced (sub-sampled) data to distill the teachers' knowledge into a single multilingual student. Our method outperforms standard training methods in low-resource languages and retains performance on high-resource languages while using the same amount of data. If applied widely, our approach can increase the representation of low-resource languages in NLP systems."
}
Markdown (Informal)
[You Can Have Your Data and Balance It Too: Towards Balanced and Efficient Multilingual Models](https://preview.aclanthology.org/Author-page-Marten-During-lu/2023.sigtyp-1.1/) (Limisiewicz et al., SIGTYP 2023)
ACL