@inproceedings{cacioli-2026-structural,
title = "Do Structural Priors Help Neural Language Models Learn Grammar? Evidence from Child-Scale Data",
author = "Cacioli, Jon-Paul",
editor = "Ma, Martin Ziqiao and
Liu, Emmy and
Liu, Jing and
Chang, Tyler A. and
Fourtassi, Abdellah and
Warstadt, Alex and
Hahn, Michael and
Sun, Weiwei and
Shi, Freda",
booktitle = "Proceedings of the 1st Workshop on Computational Developmental Linguistics ({CDL})",
month = jul,
year = "2026",
address = "Grand Hyatt Manchester San Diego, 1 Market Pl, San Diego, CA 92101",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.cdl-1.3/",
pages = "15--26",
ISBN = "979-8-89176-428-6",
abstract = "We show that structural grammatical priors produce targeted, linguistically specific effects on grammatical learning: improving filler-gap dependencies {---} which require long-distance hierarchical tracking {---} by 9{--}13 percentage points beyond structural regularisation alone ($d = 2.41${--}2.82), while damaging locally cued phenomena regardless of whether the grammar is real or random. This phenomenon-specificity, revealed by a random grammar control, suggests the right question is not whether structural priors help, but for which constructions and why. We test this by augmenting BabyBERTa (7.4M parameters) with a differentiable PCFG auxiliary loss derived from Minimalist Grammar, trained on AO-CHILDES (893K sentences of child-directed speech). In a pre-registered study of 190 experimental runs spanning 7 constraint strengths, 3 data scales, 5 random seeds, and 3 independent lexicon permutations, our confirmatory hypotheses about overall accuracy and sample efficiency are falsified. However, a random grammar control ($n = 15$ runs per condition; three independent lexicon permutations) reveals that linguistically accurate category assignments specifically drive filler-gap gains: real grammar outperforms both a structurally equivalent random grammar and the no-grammar baseline, while both conditions equally damage subject-verb agreement. These results show that structural priors function as targeted interventions rather than global boosters: they help specifically the constructions, specifically long-distance dependencies, whose computational demands align with what phrase-structure representations encode. We release code and pre-registered materials."
}Markdown (Informal)
[Do Structural Priors Help Neural Language Models Learn Grammar? Evidence from Child-Scale Data](https://preview.aclanthology.org/ingest-acl-workshops/2026.cdl-1.3/) (Cacioli, CDL 2026)
ACL