@article{varda-marelli-2023-data,
title = "Data-driven Cross-lingual Syntax: An Agreement Study with Massively Multilingual Models",
author = "de Varda, Andrea Gregor and
Marelli, Marco",
journal = "Computational Linguistics",
volume = "49",
number = "2",
month = jun,
year = "2023",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2023.cl-2.1",
doi = "10.1162/coli_a_00472",
pages = "261--299",
abstract = "Massively multilingual models such as mBERT and XLM-R are increasingly valued in Natural Language Processing research and applications, due to their ability to tackle the uneven distribution of resources available for different languages. The models{'} ability to process multiple languages relying on a shared set of parameters raises the question of whether the grammatical knowledge they extracted during pre-training can be considered as a data-driven cross-lingual grammar. The present work studies the inner workings of mBERT and XLM-R in order to test the cross-lingual consistency of the individual neural units that respond to a precise syntactic phenomenon, that is, number agreement, in five languages (English, German, French, Hebrew, Russian). We found that there is a significant overlap in the latent dimensions that encode agreement across the languages we considered. This overlap is larger (a) for long- vis-{\`a}-vis short-distance agreement and (b) when considering XLM-R as compared to mBERT, and peaks in the intermediate layers of the network. We further show that a small set of syntax-sensitive neurons can capture agreement violations across languages; however, their contribution is not decisive in agreement processing.",
}
Markdown (Informal)
[Data-driven Cross-lingual Syntax: An Agreement Study with Massively Multilingual Models](https://aclanthology.org/2023.cl-2.1) (de Varda & Marelli, CL 2023)
ACL