@inproceedings{gupta-etal-2025-llms-bayesian,
title = "{LLM}s for {B}ayesian Optimization in Scientific Domains: Are We There Yet?",
author = "Gupta, Rushil and
Hartford, Jason and
Liu, Bang",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.838/",
doi = "10.18653/v1/2025.findings-emnlp.838",
pages = "15482--15510",
ISBN = "979-8-89176-335-7",
abstract = "Large language models (LLMs) have recently been proposed as general-purpose agents for experimental design, with claims that they can perform in-context experimental design. We evaluate this hypothesis using open-source instruction-tuned LLMs applied to genetic perturbation and molecular property discovery tasks. We find that LLM-based agents show no sensitivity to experimental feedback: replacing true outcomes with randomly permuted labels has no impact on performance. Across benchmarks, classical methods such as linear bandits and Gaussian process optimization consistently outperform LLM agents. We further propose a simple hybrid method, LLM-guided Nearest Neighbour (LLMNN) sampling, that combines LLM prior knowledge with nearest-neighbor sampling to guide the design of experiments. LLMNN achieves competitive or superior performance across domains without requiring significant in-context adaptation. These results suggest that current open-source LLMs do not perform in-context experimental design in practice and highlight the need for hybrid frameworks that decouple prior-based reasoning from batch acquisition with updated posteriors."
}Markdown (Informal)
[LLMs for Bayesian Optimization in Scientific Domains: Are We There Yet?](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.838/) (Gupta et al., Findings 2025)
ACL