@inproceedings{du-nguyen-2023-measuring,
title = "Measuring the Instability of Fine-Tuning",
author = "Du, Yupei and
Nguyen, Dong",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.acl-long.342/",
doi = "10.18653/v1/2023.acl-long.342",
pages = "6209--6230",
abstract = "Fine-tuning pre-trained language models on downstream tasks with varying random seeds has been shown to be unstable, especially on small datasets. Many previous studies have investigated this instability and proposed methods to mitigate it. However, most of these studies only used the standard deviation of performance scores (SD) as their measure, which is a narrow characterization of instability. In this paper, we analyze SD and six other measures quantifying instability of different granularity levels. Moreover, we propose a systematic evaluation framework of these measures' validity. Finally, we analyze the consistency and difference between different measures by reassessing existing instability mitigation methods. We hope our results will inform better measurements of the fine-tuning instability."
}
Markdown (Informal)
[Measuring the Instability of Fine-Tuning](https://preview.aclanthology.org/fix-sig-urls/2023.acl-long.342/) (Du & Nguyen, ACL 2023)
ACL
- Yupei Du and Dong Nguyen. 2023. Measuring the Instability of Fine-Tuning. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 6209–6230, Toronto, Canada. Association for Computational Linguistics.