@inproceedings{bagdasarov-etal-2026-cheese,
title = "Cheese it up: {C}amem{BERT} Outperforms Large Language Models for Identification of {F}rench Multi-word Expressions",
author = "Bagdasarov, Sergei and
Alves, Diego and
Teich, Elke",
editor = {Ojha, Atul Kr. and
Mititelu, Verginica Barbu and
Constant, Mathieu and
Stoyanova, Ivelina and
Do{\u{g}}ru{\"o}z, A. Seza and
Rademaker, Alexandre},
booktitle = "Proceedings of the 22nd Workshop on Multiword Expressions ({MWE} 2026)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.mwe-1.6/",
pages = "54--60",
ISBN = "979-8-89176-363-0",
abstract = "In recent years, language models, both encoder-only and generative, have been applied to a variety of downstream NLP tasks, includingsequence labeling tasks like automatic multi-word expression identification (MWEI). Multiple studies show that, in general, fine-tunedencoder-only models like BERT tend to outperform pretrained generative LLMs on downstream tasks (Arzideh et al., 2025; Ochoa et al.,2025; Bucher and Martini, 2024; Sebok et al., 2025). However, such comparisons are sparse for MWEI, in particular for French, in partdue to the lack of comprehensive gold-standard datasets. In this study, we address this research gap by comparing CamemBERT with gpt-oss and Qwen3 for MWEI, using the French subcorpus of the newly released PARSEME dataset. CamemBERT outperforms both LLMs by large margins in precision, recall, and F1. We complement this numerical evaluation with a qualitative analysis of prediction errors."
}Markdown (Informal)
[Cheese it up: CamemBERT Outperforms Large Language Models for Identification of French Multi-word Expressions](https://preview.aclanthology.org/ingest-eacl/2026.mwe-1.6/) (Bagdasarov et al., MWE 2026)
ACL