@inproceedings{gu-etal-2023-pre,
title = "Pre-Training to Learn in Context",
author = "Gu, Yuxian and
Dong, Li and
Wei, Furu and
Huang, Minlie",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.acl-long.267/",
doi = "10.18653/v1/2023.acl-long.267",
pages = "4849--4870",
abstract = "In-context learning, where pre-trained language models learn to perform tasks from task examples and instructions in their contexts, has attracted much attention in the NLP community. However, the ability of in-context learning is not fully exploited because language models are not explicitly trained to learn in context. To this end, we propose PICL (Pre-training for In-Context Learning), a framework to enhance the language models' in-context learning ability by pre-training the model on a large collection of ``intrinsic tasks'' in the general plain-text corpus using the simple language modeling objective. PICL encourages the model to infer and perform tasks by conditioning on the contexts while maintaining task generalization of pre-trained models. We evaluate the in-context learning performance of the model trained with PICL on seven widely-used text classification datasets and the Super-NaturalInstrctions benchmark, which contains 100+ NLP tasks formulated to text generation. Our experiments show that PICL is more effective and task-generalizable than a range of baselines, outperforming larger language models with nearly 4x parameters. The code is publicly available at \url{https://github.com/thu-coai/PICL}."
}
Markdown (Informal)
[Pre-Training to Learn in Context](https://preview.aclanthology.org/fix-sig-urls/2023.acl-long.267/) (Gu et al., ACL 2023)
ACL
- Yuxian Gu, Li Dong, Furu Wei, and Minlie Huang. 2023. Pre-Training to Learn in Context. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 4849–4870, Toronto, Canada. Association for Computational Linguistics.