@inproceedings{yao-koller-2024-predicting,
title = "Predicting generalization performance with correctness discriminators",
author = "Yao, Yuekun and
Koller, Alexander",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-emnlp.686/",
doi = "10.18653/v1/2024.findings-emnlp.686",
pages = "11725--11739",
abstract = "The ability to predict an NLP model`s accuracy on unseen, potentially out-of-distribution data is a prerequisite for trustworthiness. We present a novel model that establishes upper and lower bounds on the accuracy, without requiring gold labels for the unseen data. We achieve this by training a *discriminator* which predicts whether the output of a given sequence-to-sequence model is correct or not. We show across a variety of tagging, parsing, and semantic parsing tasks that the gold accuracy is reliably between the predicted upper and lower bounds, and that these bounds are remarkably close together."
}
Markdown (Informal)
[Predicting generalization performance with correctness discriminators](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-emnlp.686/) (Yao & Koller, Findings 2024)
ACL