@inproceedings{r-etal-2025-justatalentedteam,
title = "{J}ust{AT}alented{T}eam@{D}ravidian{L}ang{T}ech 2025: A Study of {ML} and {DL} approaches for Sentiment Analysis in Code-Mixed {T}amil and {T}ulu Texts",
author = "R, Ponsubash Raj and
B, Paruvatha Priya and
B, Bharathi",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Sherly, Elizabeth and
Rajiakodi, Saranya and
Palani, Balasubramanian and
Subramanian, Malliga and
Cn, Subalalitha and
Chinnappa, Dhivya",
booktitle = "Proceedings of the Fifth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages",
month = may,
year = "2025",
address = "Acoma, The Albuquerque Convention Center, Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.dravidianlangtech-1.47/",
pages = "273--277",
ISBN = "979-8-89176-228-2",
abstract = "The growing prevalence of code-mixed text on social media presents unique challenges for sen- timent analysis, particularly in low-resource languages like Tamil and Tulu. This paper ex- plores sentiment classification in Tamil-English and Tulu-English code-mixed datasets using both machine learning (ML) and deep learn- ing (DL) approaches. The ML model utilizes TF-IDF feature extraction combined with a Logistic Regression classifier, while the DL model employs FastText embeddings and a BiLSTM network enhanced with an attention mechanism. Experimental results reveal that the ML model outperforms the DL model in terms of macro F1-score for both languages. Specifically, for Tamil, the ML model achieves a macro F1-score of 0.46, surpassing the DL model{'}s score of 0.43. For Tulu, the ML model significantly outperforms the DL model, achiev- ing 0.60 compared to 0.48. This performance disparity is more pronounced in Tulu due to its smaller dataset size of 13,308 samples com- pared to Tamil{'}s 31,122 samples, highlight- ing the data efficiency of ML models in low- resource settings. The study provides insights into the strengths and limitations of each ap- proach, demonstrating that traditional ML tech- niques remain competitive for code-mixed sen- timent analysis when data is limited. These findings contribute to ongoing research in mul- tilingual NLP and offer practical implications for applications such as social media monitor- ing, customer feedback analysis, and conversa- tional AI in Dravidian languages."
}