@inproceedings{mackintosh-etal-2025-evaluating,
title = "Evaluating {C}x{G} Generalisation in {LLM}s via Construction-Based {NLI} Fine Tuning",
author = "Mackintosh, Tom and
Tayyar Madabushi, Harish and
Bonial, Claire",
editor = "Bonial, Claire and
Torgbi, Melissa and
Weissweiler, Leonie and
Blodgett, Austin and
Beuls, Katrien and
Van Eecke, Paul and
Tayyar Madabushi, Harish",
booktitle = "Proceedings of the Second International Workshop on Construction Grammars and NLP",
month = sep,
year = "2025",
address = {D{\"u}sseldorf, Germany},
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/iwcs-25-ingestion/2025.cxgsnlp-1.19/",
pages = "180--189",
ISBN = "979-8-89176-318-0",
abstract = "We probe large language models' ability to learn deep form-meaning mappings as defined by construction grammars. We introduce the ConTest-NLI benchmark of 80k sentences covering eight English constructions from highly lexicalized to highly schematic. Our pipeline generates diverse synthetic NLI triples via templating and the application of a model-in-the loop filter. This provides aspects of human validation to ensure challenge and label reliability. Zero-shot tests on leading LLMs reveal a 24{\%} drop in accuracy between naturalistic (88{\%}) and adversarial data (64{\%}), with schematic patterns proving hardest. Fine-tuning on a subset of ConTest-NLI yields up to 9{\%} improvement, yet our results highlight persistent abstraction gaps in current LLMs and offer a scalable framework for evaluating construction informed learning."
}
Markdown (Informal)
[Evaluating CxG Generalisation in LLMs via Construction-Based NLI Fine Tuning](https://preview.aclanthology.org/iwcs-25-ingestion/2025.cxgsnlp-1.19/) (Mackintosh et al., CxGsNLP 2025)
ACL