@inproceedings{duchnowski-etal-2025-knapsack,
title = "A Knapsack by Any Other Name: Presentation impacts {LLM} performance on {NP}-hard problems",
author = "Duchnowski, Alex and
Pavlick, Ellie and
Koller, Alexander",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.352/",
doi = "10.18653/v1/2025.findings-emnlp.352",
pages = "6628--6651",
ISBN = "979-8-89176-335-7",
abstract = "To investigate the effect of problem presentation on LLMs' ability to solve optimization problems, we introduce the dataset of Everyday Hard Optimization Problems (EHOP), a collection of NP-hard problems expressed in natural language. EHOP includes problem formulations that could be found in computer science textbooks (e.g., graph coloring), versions that are dressed up as problems that could arise in real life (e.g., party planning), and variants with inverted rules. We find that state-of-the-art LLMs, across multiple prompting strategies, systematically solve textbook problems more accurately than their real-life and inverted counterparts. While reasoning models are more capable, they nonetheless show high variance across problem presentations, suggesting they lack a truly robust reasoning mechanism. We argue that this constitutes evidence that LLMs are still heavily dependent on what was seen in training and struggle to generalize to novel problems."
}Markdown (Informal)
[A Knapsack by Any Other Name: Presentation impacts LLM performance on NP-hard problems](https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.352/) (Duchnowski et al., Findings 2025)
ACL