@inproceedings{mcdonald-etal-2025-afford,
title = "Can We Afford The Perfect Prompt? Balancing Cost and Accuracy with the Economical Prompting Index",
author = "McDonald, Tyler and
Colosimo, Anthony and
Li, Yifeng and
Emami, Ali",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.471/",
pages = "7075--7086",
abstract = "As prompt engineering research rapidly evolves, evaluations beyond accuracy are crucial for developing cost-effective techniques. We present the Economical Prompting Index (EPI), a novel metric that combines accuracy scores with token consumption, adjusted by a user-specified cost concern level to reflect different resource constraints. Our study examines 6 advanced prompting techniques, including Chain-of-Thought, Self-Consistency, and Tree of Thoughts, across 10 widely-used language models and 4 diverse datasets. We demonstrate that approaches such as Self-Consistency often provide statistically insignificant gains while becoming cost-prohibitive. For example, on high-performing models like Claude 3.5 Sonnet, the EPI of simpler techniques like Chain-of-Thought (0.72) surpasses more complex methods like Self-Consistency (0.64) at slight cost concern levels. Our findings suggest a reevaluation of complex prompting strategies in resource-constrained scenarios, potentially reshaping future research priorities and improving cost-effectiveness for end-users."
}
Markdown (Informal)
[Can We Afford The Perfect Prompt? Balancing Cost and Accuracy with the Economical Prompting Index](https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.471/) (McDonald et al., COLING 2025)
ACL