@inproceedings{v-etal-2026-bytebuilders,
title = "{B}yte{B}uilders@{D}ravidian{L}ang{T}ech 2026: Transformer-Based Weighted Ensemble for Political Multiclass Sentiment Analysis of {T}amil {X} ({T}witter) Comments",
author = "V, Mitharshana T and
S, Shanthi and
V, Lavana and
R, Kaviya Varma",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Rajiakodi, Saranya and
Navaneethakrishnan, Subalalitha and
Chinnappa, Dhivya and
Palani, Balasubramanian and
Subramanian, Malliga and
Shanmugavadivel, Kogilavani and
Rajalakshmi, Ratnavel",
booktitle = "Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for {D}ravidian Languages",
month = jul,
year = "2026",
address = "Underline (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.dravidianlangtech-1.20/",
pages = "163--168",
ISBN = "979-8-89176-401-9",
abstract = "Our proposal for the Dravidian LangTech 2026 Tamil Political Sentiment Analysis job is outlined in this document. Seven categories{---}substantiated, sarcastic, opinionated, positive, negative, neutral, and none of the above{---}should be used to classify Tamil political remarks according to their attitudes. Classifying the sentiments of Tamil political utterances is quite difficult. Furthermore, the emotions associated with various identities are not distributed uniformly. We built an ensemble of two transformer-based techniques, XLM-RoBERTa and IndicBERT, and used 10-fold cross-validation to improve the model{'}s dependability and prevent overfitting in order to address some of these issues while finishing this research. In order to help the model concentrate more on the challenging examples, used oversampling to address class imbalance and Focal Loss to train the model. In order to improve the representation of sentences, finally averaged the token embeddings."
}