@inproceedings{liu-wang-2021-empirical,
title = "An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models",
author = "Liu, Xueqing and
Wang, Chi",
editor = "Zong, Chengqing and
Xia, Fei and
Li, Wenjie and
Navigli, Roberto",
booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2021.acl-long.178/",
doi = "10.18653/v1/2021.acl-long.178",
pages = "2286--2300",
abstract = "The performance of fine-tuning pre-trained language models largely depends on the hyperparameter configuration. In this paper, we investigate the performance of modern hyperparameter optimization methods (HPO) on fine-tuning pre-trained language models. First, we study and report three HPO algorithms' performances on fine-tuning two state-of-the-art language models on the GLUE dataset. We find that using the same time budget, HPO often fails to outperform grid search due to two reasons: insufficient time budget and overfitting. We propose two general strategies and an experimental procedure to systematically troubleshoot HPO`s failure cases. By applying the procedure, we observe that HPO can succeed with more appropriate settings in the search space and time budget; however, in certain cases overfitting remains. Finally, we make suggestions for future work. Our implementation can be found in \url{https://github.com/microsoft/FLAML/tree/main/flaml/nlp/}"
}
Markdown (Informal)
[An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models](https://preview.aclanthology.org/add-emnlp-2024-awards/2021.acl-long.178/) (Liu & Wang, ACL-IJCNLP 2021)
ACL