@inproceedings{nivetha-etal-2026-kecs,
title = "{KEC}'{S} {CODE} {CRAFTERS}@{D}ravidian{L}ang{T}ech 2026: Abusive {T}amil Text Detection Targeting Women on Social Media",
author = "Nivetha and
S, Nethrasri and
Subramanian, Malliga",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Rajiakodi, Saranya and
Navaneethakrishnan, Subalalitha and
Chinnappa, Dhivya and
Palani, Balasubramanian and
Subramanian, Malliga and
Shanmugavadivel, Kogilavani and
Rajalakshmi, Ratnavel",
booktitle = "Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for {D}ravidian Languages",
month = jul,
year = "2026",
address = "Underline (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.dravidianlangtech-1.43/",
pages = "284--288",
ISBN = "979-8-89176-401-9",
abstract = "As social media platforms continue to grow insize, unfortunately, they have also become ahub for digital toxicity, where women in linguistically diverse regions are particularly vulnerable to online harassment. Hence, the requirement for an automated moderation toolthat can effectively handle regional languagesis critical. Our paper is a step in this direction as we propose a classification modelfor the ``Abusive Tamil Text Detection Targeting Women on Social Media'' shared taskfor DravidianLangTech-2026. Our model istrained on a dataset of 25,948 comments fortraining and 915 for testing. Our primary objective was to classify content as either ``Abusive{''}or ``Non-Abusive'' for YouTube videos. TheTamil language is particularly difficult to workwith owing to its highly agglutinative structure and the tendency for code-mixing betweenTamil and English or even using a mix of bothin a single sentence. To overcome these difficulties in preprocessing, we designed a specificpipeline for denoising these informal scripts.We then implemented four traditional machinelearning models: SVM, Logistic Regression,Random Forest, and Multinomial Naive Bayesusing TF-IDF for feature extraction. Our modelwas optimized for hyperparameters and decision thresholds to achieve an accuracy and F1score of 0.86 using Logistic Regression"
}Markdown (Informal)
[KEC’S CODE CRAFTERS@DravidianLangTech 2026: Abusive Tamil Text Detection Targeting Women on Social Media](https://preview.aclanthology.org/ingest-acl-workshops/2026.dravidianlangtech-1.43/) (Nivetha et al., DravidianLangTech 2026)
ACL