@inproceedings{micheli-fleuret-2021-language,
title = "Language Models are Few-Shot Butlers",
author = "Micheli, Vincent and
Fleuret, Francois",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.emnlp-main.734/",
doi = "10.18653/v1/2021.emnlp-main.734",
pages = "9312--9318",
abstract = "Pretrained language models demonstrate strong performance in most NLP tasks when fine-tuned on small task-specific datasets. Hence, these autoregressive models constitute ideal agents to operate in text-based environments where language understanding and generative capabilities are essential. Nonetheless, collecting expert demonstrations in such environments is a time-consuming endeavour. We introduce a two-stage procedure to learn from a small set of demonstrations and further improve by interacting with an environment. We show that language models fine-tuned with only 1.2{\%} of the expert demonstrations and a simple reinforcement learning algorithm achieve a 51{\%} absolute improvement in success rate over existing methods in the ALFWorld environment."
}
Markdown (Informal)
[Language Models are Few-Shot Butlers](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.emnlp-main.734/) (Micheli & Fleuret, EMNLP 2021)
ACL
- Vincent Micheli and Francois Fleuret. 2021. Language Models are Few-Shot Butlers. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pages 9312–9318, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics.