@inproceedings{murali-sivanaiah-2025-ssn,
title = "{SSN}{\_}{MMHS}@{D}ravidian{L}ang{T}ech 2025: A Dual Transformer Approach for Multimodal Hate Speech Detection in {D}ravidian Languages",
author = "Murali, Jahnavi and
Sivanaiah, Rajalakshmi",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Sherly, Elizabeth and
Rajiakodi, Saranya and
Palani, Balasubramanian and
Subramanian, Malliga and
Cn, Subalalitha and
Chinnappa, Dhivya",
booktitle = "Proceedings of the Fifth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages",
month = may,
year = "2025",
address = "Acoma, The Albuquerque Convention Center, Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.dravidianlangtech-1.36/",
pages = "210--214",
ISBN = "979-8-89176-228-2",
abstract = "The proliferation of the Internet and social media platforms has resulted in an alarming increase in online hate speech, negatively affecting individuals and communities worldwide. While most research focuses on text-based detection in English, there is an increasing demand for multilingual and multimodal approaches to address hate speech more effectively. This paper presents a methodology for multiclass hate speech classification in low-resource Indian languages namely, Malayalam, Telugu, and Tamil, as part of the shared task at DravidianLangTech 2025. Our proposed approach employs a dual transformer-based framework that integrates audio and text modalities, facilitating cross-modal learning to enhance detection capabilities. Our model achieved macro-F1 scores of 0.348, 0.1631, and 0.1271 in the Malayalam, Telugu, and Tamil subtasks respectively. Although the framework{'}s performance is modest, it provides valuable insights into the complexities of multimodal hate speech detection in low-resource settings and highlights areas for future improvement, including data augmentation, and alternate fusion and feature extraction techniques."
}