@inproceedings{izbicki-2022-aligning,
title = "Aligning Word Vectors on Low-Resource Languages with {W}iktionary",
author = "Izbicki, Mike",
editor = "Ojha, Atul Kr. and
Liu, Chao-Hong and
Vylomova, Ekaterina and
Abbott, Jade and
Washington, Jonathan and
Oco, Nathaniel and
Pirinen, Tommi A and
Malykh, Valentin and
Logacheva, Varvara and
Zhao, Xiaobing",
booktitle = "Proceedings of the Fifth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2022)",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.loresmt-1.14/",
pages = "107--117",
abstract = "Aligned word embeddings have become a popular technique for low-resource natural language processing. Most existing evaluation datasets are generated automatically from machine translations systems, so they have many errors and exist only for high-resource languages. We introduce the Wiktionary bilingual lexicon collection, which provides high-quality human annotated translations for words in 298 languages to English. We use these lexicons to train and evaluate the largest published collection of aligned word embeddings on 157 different languages. All of our code and data is publicly available at \url{https://github.com/mikeizbicki/wiktionary_bli}."
}
Markdown (Informal)
[Aligning Word Vectors on Low-Resource Languages with Wiktionary](https://preview.aclanthology.org/fix-sig-urls/2022.loresmt-1.14/) (Izbicki, LoResMT 2022)
ACL