@inproceedings{kohli-etal-2025-choose,
title = "Choose Your Words Wisely: Domain-adaptive Masking Makes Language Models Learn Faster",
author = "Kohli, Vanshpreet S. and
Monis, Aaron and
Mamidi, Radhika",
editor = "Adlakha, Vaibhav and
Chronopoulou, Alexandra and
Li, Xiang Lorraine and
Majumder, Bodhisattwa Prasad and
Shi, Freda and
Vernikos, Giorgos",
booktitle = "Proceedings of the 10th Workshop on Representation Learning for NLP (RepL4NLP-2025)",
month = may,
year = "2025",
address = "Albuquerque, NM",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/moar-dois/2025.repl4nlp-1.6/",
doi = "10.18653/v1/2025.repl4nlp-1.6",
pages = "87--91",
ISBN = "979-8-89176-245-9",
abstract = "Foundational Language Models perform significantly better on downstream tasks in specialised domains (such as law, computer science, and medical science) upon being further pre-trained on extensive domain-specific corpora, but this continual pre-training incurs heavy computational costs. Indeed, some of the most performant specialised language models such as BioBERT incur even higher computing costs during domain-specific training than the pre-training cost of the foundational models they are initialised from. In this paper, we argue that much of the extended pre-training is redundant, with models seemingly wasting valuable resources re-learning lexical and semantic patterns already well-represented in their foundational models such as BERT, T5 and GPT. Focusing on Masked Language Models, we introduce a novel domain-specific masking strategy that is designed to facilitate continual learning while minimizing the training cost. Using this approach, we train and present a BERT-based model trained on a biomedical corpus that matches or surpasses traditionally trained biomedical language models in performance across several downstream classification tasks while incurring up to 11 times lower training costs."
}
Markdown (Informal)
[Choose Your Words Wisely: Domain-adaptive Masking Makes Language Models Learn Faster](https://preview.aclanthology.org/moar-dois/2025.repl4nlp-1.6/) (Kohli et al., RepL4NLP 2025)
ACL