@inproceedings{cruz-2025-extracting,
title = "Extracting General-use Transformers for Low-resource Languages via Knowledge Distillation",
author = "Cruz, Jan Christian Blaise",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the First Workshop on Language Models for Low-Resource Languages",
month = jan,
year = "2025",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.loreslm-1.17/",
pages = "219--224",
abstract = "In this paper, we propose the use of simple knowledge distillation to produce smaller and more efficient single-language transformers from Massively Multilingual Transformers (MMTs) to alleviate tradeoffs associated with the use of such in low-resource settings. Using Tagalog as a case study, we show that these smaller single-language models perform on-par with strong baselines in a variety of benchmark tasks in a much more efficient manner. Furthermore, we investigate additional steps during the distillation process that improves the soft-supervision of the target language, and provide a number of analyses and ablations to show the efficacy of the proposed method."
}
Markdown (Informal)
[Extracting General-use Transformers for Low-resource Languages via Knowledge Distillation](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.loreslm-1.17/) (Cruz, LoResLM 2025)
ACL