@inproceedings{lu-etal-2026-modeling,
title = "Modeling generalization in perceptual learning of speech",
author = "Lu, Yiming and
Liao, Xinyu Leslie and
Tabas, Alejandro and
Xie, Xin",
editor = "Voigt, Rob and
Warstadt, Alex and
Feldman, Naomi and
Linzen, Tal",
booktitle = "Proceedings of the Society for Computation in Linguistics 2026",
month = jul,
year = "2026",
address = "San Diego, CA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.scil-main.49/",
pages = "529--541",
ISBN = "979-8-89176-412-5",
abstract = "A hallmark of learning is generalization to novel instances. In speech, exposure to atypical pronunciation drives perceptual adjustment that can generalize to unheard tokens. Prior work has attributed constraints on generalization primarily to acoustic similarity between exposure and test contexts. We propose that generalization can also be understood as an inference problem: listeners must determine whether, and how strongly, a learned phonetic mapping should apply in a new context. We test this proposal using data from a recent experiment in which listeners were exposed to shifted vowel pronunciations and then tested on minimal pairs varying in lexical frequency. Learning effects appeared strongest when the exposure direction aligned with a high-frequency alternative in mixed-frequency pairs, and were absent for low-frequency pairs. The observed pattern could reflect token-level acoustic similarity, reliance on prior expectations, or frequency-dependent constraints in applying the learned mapping. We formalized these alternatives within a Bayesian belief-updating framework: a talker-specific model assuming full transfer, a mixture-of-expectations model that interpolates between the updated representation and the listener{'}s prior, and a hierarchical Bayesian model that deploys the updated representation with uncertainty. The talker-specific model captured most generalization patterns through its sensitivity to token-level acoustic properties, but overpredicted learning for low-frequency pairs. The hierarchical model best recovered the theoretically central exposure-control contrast pattern, suggesting that lexical frequency may constrain how learned representations are applied. Our results provide a computationally explicit framework for studying how contextual factors shape generalization in speech perception."
}Markdown (Informal)
[Modeling generalization in perceptual learning of speech](https://preview.aclanthology.org/ingest-acl-workshops/2026.scil-main.49/) (Lu et al., SCiL 2026)
ACL