@inproceedings{zhong-etal-2021-adapting-language,
title = "Adapting Language Models for Zero-shot Learning by Meta-tuning on Dataset and Prompt Collections",
author = "Zhong, Ruiqi and
Lee, Kristy and
Zhang, Zheng and
Klein, Dan",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.findings-emnlp.244/",
doi = "10.18653/v1/2021.findings-emnlp.244",
pages = "2856--2878",
abstract = "Large pre-trained language models (LMs) such as GPT-3 have acquired a surprising ability to perform zero-shot learning. For example, to classify sentiment without any training examples, we can {\textquotedblleft}prompt{\textquotedblright} the LM with the review and the label description {\textquotedblleft}Does the user like this movie?{\textquotedblright}, and ask whether the next word is {\textquotedblleft}yes{\textquotedblright} or {\textquotedblleft}no{\textquotedblright}. However, the next word prediction training objective is still misaligned with the target zero-shot learning objective. To address this weakness, we propose meta-tuning, which directly optimizes the zero-shot learning objective by fine-tuning pre-trained language models on a collection of datasets. We focus on classification tasks, and construct the meta-dataset by aggregating 43 existing datasets and annotating 441 label descriptions in a question-answering (QA) format. When evaluated on unseen tasks, meta-tuned models outperform a same-sized QA model and the previous SOTA zero-shot learning system based on natural language inference. Additionally, increasing parameter count from 220M to 770M improves AUC-ROC scores by 6.3{\%}, and we forecast that even larger models would perform better. Therefore, measuring zero-shot learning performance on language models out-of-the-box might underestimate their true potential, and community-wide efforts on aggregating datasets and unifying their formats can help build models that answer prompts better."
}
Markdown (Informal)
[Adapting Language Models for Zero-shot Learning by Meta-tuning on Dataset and Prompt Collections](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.findings-emnlp.244/) (Zhong et al., Findings 2021)
ACL