@inproceedings{zhang-etal-2025-enhancing-language,
title = "Enhancing Language Model Hypernetworks with Restart: A Study on Optimization",
author = "Zhang, Yihan and
Fu, Jie and
Ji, Rongrong and
Chen, Jie",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.397/",
pages = "7826--7838",
ISBN = "979-8-89176-189-6",
abstract = "Hypernetworks are a class of meta-networks that generate weights for main neural networks. Their unique parameter spaces necessitate exploring suitable optimization strategies to enhance performance, especially for language models. However, a comprehensive investigation into optimization strategies for hypernetworks remains absent. To address this gap, we analyze the loss landscape of hypernetworks and propose that restart optimization strategies can improve their performance for language models. We find that hypernetworks have inherently more complicated loss landscapes compared to conventional networks due to their distinct parameter spaces. Consequently, a restart strategy that periodically resets the learning rate can facilitate better convergence for hypernetworks. Through experiments on instruction tuning and multi-task training, we demonstrate that the restart strategy consistently enhances the performance of hypernetworks for language models, often more effectively than for conventional deep neural networks. Our findings highlight the importance of tailored optimization techniques to unlock the full potential of hypernetworks in natural language processing tasks."
}
Markdown (Informal)
[Enhancing Language Model Hypernetworks with Restart: A Study on Optimization](https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.397/) (Zhang et al., NAACL 2025)
ACL
- Yihan Zhang, Jie Fu, Rongrong Ji, and Jie Chen. 2025. Enhancing Language Model Hypernetworks with Restart: A Study on Optimization. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 7826–7838, Albuquerque, New Mexico. Association for Computational Linguistics.