@inproceedings{yang-etal-2025-transformer,
title = "Transformer-based Speech Model Learns Well as Infants and Encodes Abstractions through Exemplars in the Poverty of the Stimulus Environment",
author = "Yang, Yi and
Wang, Yiming and
Yuan, Jiahong",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2025.coling-main.528/",
pages = "7881--7890",
abstract = "Infants are capable of learning language, predominantly through speech and associations, in impoverished environments{---}a phenomenon known as the Poverty of the Stimulus (POS). Is this ability uniquely human, as an innate linguistic predisposition, or can it be empirically learned through potential linguistic structures from sparse and noisy exemplars? As an early exploratory work, we systematically designed a series of tasks, scenarios, and metrics to simulate the POS. We found that the emerging speech model wav2vec2.0 with pretrained weights from an English corpus can learn well in noisy and sparse Mandarin environments. We then tested various hypotheses and observed three pieces of evidence for abstraction: label correction, categorical patterns, and clustering effects. We concluded that models can encode hierarchical linguistic abstractions through exemplars in POS environments. We hope this work offers new insights into language acquisition from a speech perspective and inspires further research."
}
Markdown (Informal)
[Transformer-based Speech Model Learns Well as Infants and Encodes Abstractions through Exemplars in the Poverty of the Stimulus Environment](https://preview.aclanthology.org/add-emnlp-2024-awards/2025.coling-main.528/) (Yang et al., COLING 2025)
ACL