@inproceedings{murty-etal-2023-pseudointelligence,
title = "Pseudointelligence: A Unifying Lens on Language Model Evaluation",
author = "Murty, Shikhar and
Paradise, Orr and
Sharma, Pratyusha",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.485/",
doi = "10.18653/v1/2023.findings-emnlp.485",
pages = "7284--7290",
abstract = "With large language models surpassing human performance on an increasing number of benchmarks, we must take a principled approach for targeted evaluation of model capabilities. Inspired by pseudorandomness, we propose pseudointelligence, which captures the maxim that ``(perceived) intelligence lies in the eye of the beholder.'' That is, that claims of intelligence are meaningful only when their evaluator is taken into account. Concretely, we propose a complexity-theoretic framework of model evaluation cast as a dynamic interaction between a model and a learned evaluator. We demonstrate that this framework can be used to reason about two case studies in language model evaluation, as well as analyze existing evaluation methods."
}
Markdown (Informal)
[Pseudointelligence: A Unifying Lens on Language Model Evaluation](https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.485/) (Murty et al., Findings 2023)
ACL