@inproceedings{bunzeck-zarriess-2026-child,
title = "Child-directed speech facilitates production, not comprehension, in {B}aby{LM}s",
author = "Bunzeck, Bastian and
Zarrie{\ss}, Sina",
editor = "Bonial, Claire and
Berzak, Yevgeni",
booktitle = "Proceedings of the 30th Conference on Computational Natural Language Learning",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.14/",
pages = "227--249",
ISBN = "979-8-89176-410-1",
abstract = "Recent studies suggest that child-directed speech is not conducive to language learning in BabyLMs. However, current evaluations focus predominantly on comprehension and not production, which is central to usage-based theories of language acquisition which argue how CDS facilitates early language use through constructional ``frames'' (frequent lexical patterns with open slots). We introduce a novel generation-based evaluation inspired by such theories in form of a **frame-completion task**, and compare Llama models trained with CDS, the BabyLM corpus, and web-crawl data (FineWeb-edu) on comprehension benchmarks and our novel framework. Our results reveal a clear dissociation between models' comprehension and production capabilities: while FineWeb-trained models excel at minimal pairs, CDS-trained models produce grammatical completions substantially earlier in training and concentrate probability mass on appropriate slot-fillers. These findings show that comprehension benchmarks underestimate what CDS affords to BabyLMs."
}Markdown (Informal)
[Child-directed speech facilitates production, not comprehension, in BabyLMs](https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.14/) (Bunzeck & Zarrieß, CoNLL 2026)
ACL