@inproceedings{jian-manning-2026-humans,
title = "Humans and transformer {LM}s: Abstraction drives language learning",
author = "Jian, Jasper and
Manning, Christopher D",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.32/",
pages = "752--765",
ISBN = "979-8-89176-380-7",
abstract = "Categorization is a core component of human linguistic competence. We investigate how a transformer-based language model (LM) learns linguistic categories by comparing its behaviour over the course of training to behaviours which characterize abstract feature{--}based and concrete exemplar{--}based accounts of human language acquisition. We investigate how lexical semantic and syntactic categories emerge using novel divergence-based metrics that track learning trajectories using next-token distributions. In experiments with GPT-2 small, we find that (i) when a construction is learned, abstract class-level behaviour is evident at earlier steps than lexical item{--}specific behaviour, and (ii) that different linguistic behaviours emerge abruptly in sequence at different points in training, revealing that abstraction plays a key role in how LMs learn. This result informs the models of human language acquisition that LMs may serve as an existence proof for."
}Markdown (Informal)
[Humans and transformer LMs: Abstraction drives language learning](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.32/) (Jian & Manning, EACL 2026)
ACL
- Jasper Jian and Christopher D Manning. 2026. Humans and transformer LMs: Abstraction drives language learning. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 752–765, Rabat, Morocco. Association for Computational Linguistics.