@inproceedings{beekhuizen-2025-token,
title = "Token-level semantic typology without a massively parallel corpus",
author = "Beekhuizen, Barend",
editor = "Hahn, Michael and
Rani, Priya and
Kumar, Ritesh and
Shcherbakov, Andreas and
Sorokin, Alexey and
Serikov, Oleg and
Cotterell, Ryan and
Vylomova, Ekaterina",
booktitle = "Proceedings of the 7th Workshop on Research in Computational Linguistic Typology and Multilingual NLP",
month = aug,
year = "2025",
address = "Vinenna. Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.sigtyp-1.16/",
pages = "165--176",
ISBN = "979-8-89176-281-7",
abstract = "This paper presents a computational method for token-level lexical semantic comparative research in an original text setting, as opposed to the more common massively parallel setting. Given a set of (non-massively parallel) bitexts, the method consists of leveraging pre-trained contextual vectors in a reference language to induce, for a token in one target language, the lexical items that all other target languages would have used, thus simulating a massively parallel set-up. The method is evaluated on its extraction and induction quality, and the use of the method for lexical semantic typological research is demonstrated."
}
Markdown (Informal)
[Token-level semantic typology without a massively parallel corpus](https://preview.aclanthology.org/landing_page/2025.sigtyp-1.16/) (Beekhuizen, SIGTYP 2025)
ACL