@inproceedings{singh-lefever-2020-sentiment,
title = "Sentiment Analysis for {H}inglish Code-mixed Tweets by means of Cross-lingual Word Embeddings",
author = "Singh, Pranaydeep and
Lefever, Els",
editor = "Solorio, Thamar and
Choudhury, Monojit and
Bali, Kalika and
Sitaram, Sunayana and
Das, Amitava and
Diab, Mona",
booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Code Switching",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2020.calcs-1.6/",
pages = "45--51",
language = "eng",
ISBN = "979-10-95546-66-5",
abstract = "This paper investigates the use of unsupervised cross-lingual embeddings for solving the problem of code-mixed social media text understanding. We specifically investigate the use of these embeddings for a sentiment analysis task for Hinglish Tweets, viz. English combined with (transliterated) Hindi. In a first step, baseline models, initialized with monolingual embeddings obtained from large collections of tweets in English and code-mixed Hinglish, were trained. In a second step, two systems using cross-lingual embeddings were researched, being (1) a supervised classifier and (2) a transfer learning approach trained on English sentiment data and evaluated on code-mixed data. We demonstrate that incorporating cross-lingual embeddings improves the results (F1-score of 0.635 versus a monolingual baseline of 0.616), without any parallel data required to train the cross-lingual embeddings. In addition, the results show that the cross-lingual embeddings not only improve the results in a fully supervised setting, but they can also be used as a base for distant supervision, by training a sentiment model in one of the source languages and evaluating on the other language projected in the same space. The transfer learning experiments result in an F1-score of 0.556, which is almost on par with the supervised settings and speak to the robustness of the cross-lingual embeddings approach."
}
Markdown (Informal)
[Sentiment Analysis for Hinglish Code-mixed Tweets by means of Cross-lingual Word Embeddings](https://preview.aclanthology.org/add-emnlp-2024-awards/2020.calcs-1.6/) (Singh & Lefever, CALCS 2020)
ACL