@inproceedings{kumar-etal-2024-indisentiment140,
title = "{I}ndi{S}entiment140: Sentiment Analysis Dataset for {I}ndian Languages with Emphasis on Low-Resource Languages using Machine Translation",
author = "Kumar, Saurabh and
Sanasam, Ranbir and
Nandi, Sukumar",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.naacl-long.425/",
doi = "10.18653/v1/2024.naacl-long.425",
pages = "7689--7698",
abstract = "Sentiment analysis, a fundamental aspect of Natural Language Processing (NLP), involves the classification of emotions, opinions, and attitudes in text data. In the context of India, with its vast linguistic diversity and low-resource languages, the challenge is to support sentiment analysis in numerous Indian languages. This study explores the use of machine translation to bridge this gap. The investigation examines the feasibility of machine translation for creating sentiment analysis datasets in 22 Indian languages. Google Translate, with its extensive language support, is employed for this purpose in translating the Sentiment140 dataset. The study aims to provide insights into the practicality of using machine translation in the context of India`s linguistic diversity for sentiment analysis datasets. Our findings indicate that a dataset generated using Google Translate has the potential to serve as a foundational framework for tackling the low-resource challenges commonly encountered in sentiment analysis for Indian languages."
}
Markdown (Informal)
[IndiSentiment140: Sentiment Analysis Dataset for Indian Languages with Emphasis on Low-Resource Languages using Machine Translation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.naacl-long.425/) (Kumar et al., NAACL 2024)
ACL