@inproceedings{karakas-simsek-2026-supervision,
title = "Supervision versus Demonstration-Based In-Context Learning for Multiword Expression Classification",
author = "Karaka{\c{s}}, Sercan and
{\c{S}}im{\c{s}}ek, Yusuf",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-srw.71/",
pages = "800--815",
ISBN = "979-8-89176-393-7",
abstract = "Turkish idiomatic light verb constructions (LVCs) are challenging for multiword expression processing because they often share the same surface form as fully literal verb{--}object combinations while functioning as a single, partially idiomatic predicate. We frame Turkish LVC detection as a binary classification task (literal meaning vs. idiomatic meaning) and evaluate on a manually created controlled set (N=147) with matched negatives: out-of-domain random sentences and in-domain literal controls (NLVC), alongside LVC positives. We compare a supervised Turkish encoder baseline (BERTurk with a classifier head) to three instruction-tuned LLMs from different families under zero-shot, one-shot, and few-shot prompting, and analyze how demonstrations shift error profiles. In zero-shot, LLMs perform well on negatives but show very low LVC recall. One-shot prompting sharply improves LVC detection but can induce strong, model-specific biases (over- vs. under-predicting LVC). A richer few-shot prompt improves calibration and yields robust overall performance for GPT-OSS-20B and Qwen 2.5-14B. Overall, the results highlight substantial prompt sensitivity in Turkish metalinguistic classification: the supervised baseline remains competitive, while prompted LLMs can match or exceed it on LVCs with carefully constructed demonstrations. We release code, prompts, and evaluation materials to support reproducibility."
}Markdown (Informal)
[Supervision versus Demonstration-Based In-Context Learning for Multiword Expression Classification](https://preview.aclanthology.org/ingest-acl/2026.acl-srw.71/) (Karakaş & Şimşek, ACL 2026)
ACL