@inproceedings{gralinski-etal-2025-oddballness,
title = "Oddballness: universal anomaly detection with language models",
author = "Gralinski, Filip and
Staruch, Ryszard and
Jurkiewicz, Krzysztof",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-main.183/",
pages = "2683--2689",
abstract = "We present a new method to detect anomalies in texts (in general: in sequences of any data), using language models, in a totally unsupervised manner. The method considers probabilities (likelihoods) generated by a language model, but instead of focusing on low-likelihood tokens, it considers a new metric defined in this paper: oddballness. Oddballness measures how {\textquotedblleft}strange{\textquotedblright} a given token is according to the language model. We demonstrate in grammatical error detection tasks (a specific case of text anomaly detection) that oddballness is better than just considering low-likelihood events, if a totally unsupervised setup is assumed."
}
Markdown (Informal)
[Oddballness: universal anomaly detection with language models](https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-main.183/) (Gralinski et al., COLING 2025)
ACL