@article{de-vergnette-amblard-2026-semantic,
title = "Semantic Parsing for Evaluating Large Language Models: Separating Linguistic Abilities with {YARN}",
author = "DE VERGNETTE, R{\'e}mi and
Amblard, Maxime",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.765/",
pages = "9745--9755",
abstract = "We evaluate large language models (LLMs) through semantic parsing into Yarn, a structured meaning representation that distinguishes predicate{--}argument structure from higher-level linguistic features such as tense, aspect, and modality. For evaluation, we employ SmatchY, a fine-grained metric designed to assess different layers of meaning independently. Our experiments test multiple LLMs under varied conditions, including inference modes, linearization formats (JSON and logic-inspired CFG), and the presence or absence of auxiliary supervision via partial semantic parses. Results show that model performance is highly sensitive to both representational design and supervision, with no single configuration consistently outperforming the others. While some models gain from additional semantic information in prompts, others are negatively affected. A layer-wise analysis indicates that surface-level features such as temporality and negation are captured more reliably than deeper semantic phenomena like quantification. Consistent with prior work, our findings highlight the limited capacity of current LLMs to generate fully formal meaning representations."
}Markdown (Informal)
[Semantic Parsing for Evaluating Large Language Models: Separating Linguistic Abilities with YARN](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.765/) (DE VERGNETTE & Amblard, LREC 2026)
ACL