@inproceedings{dai-etal-2023-gpt, title = "Why Can {GPT} Learn In-Context? Language Models Secretly Perform Gradient Descent as Meta-Optimizers", author = "Dai, Damai and Sun, Yutao and Dong, Li and Hao, Yaru and Ma, Shuming and Sui, Zhifang and Wei, Furu", editor = "Rogers, Anna and Boyd-Graber, Jordan and Okazaki, Naoaki", booktitle = "Findings of the Association for Computational Linguistics: ACL 2023", month = jul, year = "2023", address = "Toronto, Canada", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.findings-acl.247/", doi = "10.18653/v1/2023.findings-acl.247", pages = "4005--4019" }