@inproceedings{barrett-etal-2025-llms,
title = "Can {LLM}s Find a Needle in a Haystack? A Look at Anomaly Detection Language Modeling",
author = "Barrett, Leslie and
Bajaj, Vikram Sunil and
Kingan, Robert John",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.341/",
doi = "10.18653/v1/2025.findings-emnlp.341",
pages = "6428--6435",
ISBN = "979-8-89176-335-7",
abstract = "Anomaly detection (AD), also known as Outlier Detection, is a longstanding problem in machine learning, which has recently been applied to text data. In these datasets, a textual anomaly is a part of the text that does not fit the overall topic of the text. Some recent approaches to textual AD have used transformer models, achieving positive results but with trade-offs in pre-training time and inflexibility with respect to new domains. Others have used linear models which are fast and more flexible but not always competitive on certain datasets. We introduce a new approach based on Large Pre-trained Language Models in three modalities. Our findings indicate that LLMs beat baselines when AD is presented as an imbalanced classification problem regardless of the concentration of anomalous samples. However, their performance is markedly worse on unsupervised AD, suggesting that the concept of ``anomaly'' may somehow elude the LLM reasoning process."
}Markdown (Informal)
[Can LLMs Find a Needle in a Haystack? A Look at Anomaly Detection Language Modeling](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.341/) (Barrett et al., Findings 2025)
ACL