@inproceedings{wold-etal-2026-measuring,
title = "Measuring Idiomaticity in Text Embedding Models with epsilon-compositionality",
author = "Wold, Sondre and
Simon, {\'E}tienne and
Velldal, Erik and
{\O}vrelid, Lilja",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.99/",
pages = "2239--2252",
ISBN = "979-8-89176-380-7",
abstract = "The principle of compositionality, which concerns the construction of meaning from constituent parts, is a longstanding topic in various disciplines, most commonly associated with formal semantics. In NLP, recent studies have focused on the compositional properties of text embedding models, particularly regarding their sensitivity to idiomatic expression, as idioms have traditionally been seen as non-compositional. In this paper, we argue that it is unclear how previous work relates to formal definitions of the principle. To address this limitation, we take a theoretically motivated approach based on definitions in formal semantics. We present $\varepsilon$-compositionality, a continuous relaxation of compositionality derived from these definitions. We measure $\varepsilon$-compositionality on a dataset containing both idiomatic and non-idiomatic sentences, providing a theoretically motivated assessment of sensitivity to idiomaticity. Our findings indicate that most text embedding models differentiate between idiomatic and non-idiomatic phrases, although to varying degrees."
}Markdown (Informal)
[Measuring Idiomaticity in Text Embedding Models with epsilon-compositionality](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.99/) (Wold et al., EACL 2026)
ACL