@inproceedings{nastase-etal-2024-exploring-syntactic,
title = "Exploring Syntactic Information in Sentence Embeddings through Multilingual Subject-verb Agreement",
author = "Nastase, Vivi and
Samo, Giuseppe and
Jiang, Chunyang and
Merlo, Paola",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.clicit-1.71/",
pages = "631--643",
ISBN = "979-12-210-7060-6",
abstract = "In this paper, our goal is to investigate to what degree multilingual pretrained language models capture cross-linguistically valid abstract linguistic representations. We take the approach of developing curated synthetic data on a large scale, with specific properties, and using them to study sentence representations built using pretrained language models. We use a new multiple-choice task and datasets, Blackbird Language Matrices (BLMs), to focus on a specific grammatical structural phenomenon {--} subject-verb agreement across a variety of sentence structures {--} in several languages. Finding a solution to this task requires a system detecting complex linguistic patterns and paradigms in text representations. Using a two-level architecture that solves the problem in two steps {--} detect syntactic objects and their properties in individual sentences, and find patterns across an input sequence of sentences {--} we show that despite having been trained on multilingual texts in a consistent manner, multilingual pretrained language models have language-specific differences, and syntactic structure is not shared, even across closely related languages."
}
Markdown (Informal)
[Exploring Syntactic Information in Sentence Embeddings through Multilingual Subject-verb Agreement](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.clicit-1.71/) (Nastase et al., CLiC-it 2024)
ACL