@inproceedings{yang-etal-2026-function,
title = "Function Words as Statistical Cues for Language Learning",
author = "Yang, Xiulin and
Getz, Heidi R. and
Wilcox, Ethan Gotlieb",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.728/",
pages = "16042--16058",
ISBN = "979-8-89176-390-6",
abstract = "What statistical properties might support learning abstract grammatical knowledge from linear input? We address this question by examining the statistical distribution of function words. Function words have been argued to aid acquisition through three distributional properties: high frequency, reliable syntactic association, and phrase-boundary alignment. We conduct a cross-linguistic corpus analysis of 186 languages, which confirms that all three properties are universal. Using counterfactual language modeling and ablation experiments on English, we show that preserving these properties facilitates acquisition in neural learners, with a Goldilocks effect: function words must be frequent enough to be reliable, yet diverse enough to remain informative to structural dependency. Probing analyses further reveal that different learning conditions produce systematically different reliance on function words."
}Markdown (Informal)
[Function Words as Statistical Cues for Language Learning](https://preview.aclanthology.org/ingest-acl/2026.acl-long.728/) (Yang et al., ACL 2026)
ACL
- Xiulin Yang, Heidi R. Getz, and Ethan Gotlieb Wilcox. 2026. Function Words as Statistical Cues for Language Learning. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 16042–16058, San Diego, California, United States. Association for Computational Linguistics.