@inproceedings{rani-etal-2024-macms,
title = "{M}a{C}m{S}: {M}agahi Code-mixed Dataset for Sentiment Analysis",
author = "Rani, Priya and
Fransen, Theodorus and
McCrae, John P. and
Negi, Gaurav",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.950/",
pages = "10880--10890",
abstract = "The present paper introduces new sentiment data, MaCMS, for Magahi-Hindi-English (MHE) code-mixed language, where Magahi is a less-resourced minority language. This dataset is the first Magahi-Hindi-English code-mixed dataset for sentiment analysis tasks. Further, we also provide a linguistics analysis of the dataset to understand the structure of code-mixing and a statistical study to understand the language preferences of speakers with different polarities. With these analyses, we also train baseline models to evaluate the dataset{'}s quality."
}
Markdown (Informal)
[MaCmS: Magahi Code-mixed Dataset for Sentiment Analysis](https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.950/) (Rani et al., LREC-COLING 2024)
ACL
- Priya Rani, Theodorus Fransen, John P. McCrae, and Gaurav Negi. 2024. MaCmS: Magahi Code-mixed Dataset for Sentiment Analysis. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 10880–10890, Torino, Italia. ELRA and ICCL.