@inproceedings{weber-etal-2023-mind,
title = "Mind the instructions: a holistic evaluation of consistency and interactions in prompt-based learning",
author = "Weber, Lucas and
Bruni, Elia and
Hupkes, Dieuwke",
editor = "Jiang, Jing and
Reitter, David and
Deng, Shumin",
booktitle = "Proceedings of the 27th Conference on Computational Natural Language Learning (CoNLL)",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.conll-1.20/",
doi = "10.18653/v1/2023.conll-1.20",
pages = "294--313",
abstract = "Finding the best way of adapting pre-trained language models to a task is a big challenge in current NLP. Just like the previous generation of \textit{task-tuned} models (TT), models that are adapted to tasks via in-context-learning (ICL) or instruction tuning (IT) are robust in some setups, but not in others. Here, we present a detailed analysis of which design choices cause instabilities and inconsistencies in LLM predictions. First, we show how spurious correlations between input distributions and labels {--} a known issue in TT models {--} form only a minor problem for prompted models. Then we engage in a systematic, holistic evaluation of different factors that have been found to influence predictions in a prompting setup. We test all possible combinations of a range of factors on both vanilla and instruction-tuned LLMs of different scale, and statistically analyse the results to show which factors are the most influential, the most interactive or the most stable. From our results, we deduce which factors can be used without precautions, should be avoided or handled with care in most settings."
}
Markdown (Informal)
[Mind the instructions: a holistic evaluation of consistency and interactions in prompt-based learning](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.conll-1.20/) (Weber et al., CoNLL 2023)
ACL