@inproceedings{zhao-schutze-2019-multilingual,
title = "A Multilingual {BPE} Embedding Space for Universal Sentiment Lexicon Induction",
author = {Zhao, Mengjie and
Sch{\"u}tze, Hinrich},
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/P19-1341/",
doi = "10.18653/v1/P19-1341",
pages = "3506--3517",
abstract = "We present a new method for sentiment lexicon induction that is designed to be applicable to the entire range of typological diversity of the world{'}s languages. We evaluate our method on Parallel Bible Corpus+ (PBC+), a parallel corpus of 1593 languages. The key idea is to use Byte Pair Encodings (BPEs) as basic units for multilingual embeddings. Through zero-shot transfer from English sentiment, we learn a seed lexicon for each language in the domain of PBC+. Through domain adaptation, we then generalize the domain-specific lexicon to a general one. We show {--} across typologically diverse languages in PBC+ {--} good quality of seed and general-domain sentiment lexicons by intrinsic and extrinsic and by automatic and human evaluation. We make freely available our code, seed sentiment lexicons for all 1593 languages and induced general-domain sentiment lexicons for 200 languages."
}
Markdown (Informal)
[A Multilingual BPE Embedding Space for Universal Sentiment Lexicon Induction](https://preview.aclanthology.org/fix-sig-urls/P19-1341/) (Zhao & Schütze, ACL 2019)
ACL