@inproceedings{domenichelli-etal-2026-linguistic,
title = "Linguistic Profiling of Transformer Embedding Geometry",
author = "Domenichelli, Lucia and
Brunato, Dominique and
Dell{'}Orletta, Felice",
editor = "Bonial, Claire and
Berzak, Yevgeni",
booktitle = "Proceedings of the 30th Conference on Computational Natural Language Learning",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.10/",
pages = "145--164",
ISBN = "979-8-89176-410-1",
abstract = "Transformer language models embed tokens in high-dimensional spaces, but whether geometry reflects linguistic structure remains unclear. We analyse token representations in BERT and $GPT\mbox{-}2$, selected as canonical encoder-only and decoder-only Transformer architectures, through a linguistically grounded geometric lens. We partition tokens from the UD English-EWT treebank by surface and syntactic features (position, length, POS, head distance and arity) and examine how their representational geometry evolves across layers. We employ complementary diagnostic metrics, including isotropy, linear and nonlinear intrinsic dimensionality, to capture distinct aspects of embedding structure. Our findings reveal that BERT maintains more isotropic and higher-dimensional subspaces, whereas $GPT\mbox{-}2$ exhibits stronger anisotropy driven by a compact cluster of sentence-initial tokens. Across models, open-class words, longer tokens, and high-arity predicates occupy more isotropic, higher-dimensional manifolds than short function words and pre-head modifiers, indicating that semantic richness and syntactic centrality play a key role in structuring embedding space. Our analysis provides a reusable framework for profiling how linguistic abstractions organize the geometry of Transformer embeddings."
}Markdown (Informal)
[Linguistic Profiling of Transformer Embedding Geometry](https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.10/) (Domenichelli et al., CoNLL 2026)
ACL
- Lucia Domenichelli, Dominique Brunato, and Felice Dell’Orletta. 2026. Linguistic Profiling of Transformer Embedding Geometry. In Proceedings of the 30th Conference on Computational Natural Language Learning, pages 145–164, San Diego, California, USA. Association for Computational Linguistics.