@article{samo-merlo-2026-datasets,
title = "Datasets for Verb Alternations across Languages: {BLM} Templates and Data Augmentation Strategies",
author = "Samo, Giuseppe and
Merlo, Paola",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.920/",
pages = "11747--11760",
abstract = "Large language models (LLMs) have shown remarkable performance across various sentence-based linguistic phenomena, yet their ability to capture cross-sentence paradigmatic patterns, such as verb alternations, remains underexplored. In this work, we present curated paradigm-based datasets for four languages, designed to probe systematic cross-sentence knowledge of verb alternations (change-of-state and object-drop constructions in English, German and Italian, and Hebrew binyanim). The datasets comprise thousands of the Blackbird Language Matrices (BLMs) problems. The BLM task {--} an RPM/ARC-like task devised specifically for language {--} is a controlled linguistic puzzle where models must select the sentence that completes a pattern according to syntactic and semantic rules. We introduce three types of templates varying in complexity and apply linguistically-informed data augmentation strategies across synthetic and natural data. We provide simple baseline performance results across English, Italian, German, and Hebrew, that demonstrate the diagnostic usefulness of the datasets."
}Markdown (Informal)
[Datasets for Verb Alternations across Languages: BLM Templates and Data Augmentation Strategies](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.920/) (Samo & Merlo, LREC 2026)
ACL