@inproceedings{alves-etal-2025-surprisal,
title = "Surprisal Dynamics for the Detection of Multi-Word Expressions in {E}nglish",
author = "Alves, Diego and
Bagdasarov, Sergei and
Teich, Elke",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.72/",
pages = "1185--1194",
ISBN = "979-8-89176-303-6",
abstract = "This work examines the potential of surprisal slope as a feature for identifying multi-word expressions (MWEs) in English, leveraging token-level surprisal estimates from the GPT-2 language model. Evaluations on the DiMSUM and SemEval-2022 datasets reveal that surprisal slope provides moderate yet meaningful discriminative power with a trade-off between specificity and coverage: while high recall indicates that surprisal slope captures many true MWEs, the slightly lower precision reflects false positives, particularly for non-MWEs that follow formulaic patterns (e.g., adjective-noun or verb-pronoun structures). The method performs particularly well for conventionalized expressions, such as idiomatic bigrams in the SemEval-2022 corpus. Both idiomatic and literal usages of these bigrams exhibit negative slopes, with idiomatic instances generally showing a more pronounced decrease.Overall, surprisal slope offers a cognitively motivated and interpretable signal that complements existing MWE identification methods, particularly for conventionalized expressions."
}Markdown (Informal)
[Surprisal Dynamics for the Detection of Multi-Word Expressions in English](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.72/) (Alves et al., Findings 2025)
ACL
- Diego Alves, Sergei Bagdasarov, and Elke Teich. 2025. Surprisal Dynamics for the Detection of Multi-Word Expressions in English. In Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, pages 1185–1194, Mumbai, India. The Asian Federation of Natural Language Processing and The Association for Computational Linguistics.