@inproceedings{yano-miwa-2025-effect,
title = "Effect of Multilingual and Domain-adapted Continual Pre-training on Few-shot Promptability",
author = "Yano, Ken and
Miwa, Makoto",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Miwa, Makoto and
Tsujii, Junichi",
booktitle = "ACL 2025",
month = aug,
year = "2025",
address = "Viena, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bionlp-1.2/",
pages = "18--26",
ISBN = "979-8-89176-275-6",
abstract = "Continual Pre-training (CPT) can help pre-trained large language models (LLMs) effectively adapt to new or under-trained domains or low-resource languages without re-training from scratch.Nevertheless, during CPT, the model{'}s few-shot transfer ability is known to be affected for emergent tasks.We verified this by comparing the performance between the few-shot and fine-tuning settings on the same tasks.We used Llama3-ELAINE-medLLM, which was continually pre-trained on Llama3-8B, targeted for the biomedical domain, and adapted for multilingual languages (English, Japanese, and Chinese).We compared the performance of Llama3-ELAINE-medLLM and Llama3-8B in three emergent tasks: two related domain tasks, entity recognition (NER) and machine translation (MT), and one out-of-domain task, summarization (SUM). Our experimental results show that degradation in few-shot transfer ability does not necessarily indicate the model{'}s underlying potential on the same task after fine-tuning."
}
Markdown (Informal)
[Effect of Multilingual and Domain-adapted Continual Pre-training on Few-shot Promptability](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bionlp-1.2/) (Yano & Miwa, BioNLP 2025)
ACL