@inproceedings{hanif-rahman-2025-cuet,
title = "{CUET}{\_}{A}gile@{D}ravidian{L}ang{T}ech 2025: Fine-tuning Transformers for Detecting Abusive Text Targeting Women from {T}amil and {M}alayalam Texts",
author = "Hanif, Tareque Md and
Rahman, Md Rashadur",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Sherly, Elizabeth and
Rajiakodi, Saranya and
Palani, Balasubramanian and
Subramanian, Malliga and
Cn, Subalalitha and
Chinnappa, Dhivya",
booktitle = "Proceedings of the Fifth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages",
month = may,
year = "2025",
address = "Acoma, The Albuquerque Convention Center, Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.dravidianlangtech-1.55/",
pages = "315--319",
ISBN = "979-8-89176-228-2",
abstract = "As social media has grown, so has online abuse, with women often facing harmful online behavior. This discourages their free participation and expression online. This paper outlines the approach adopted by our team for detecting abusive comments in Tamil and Malayalam. The task focuses on classifying whether a given comment contains abusive language towards women. We experimented with transformer based models by fine-tuning Tamil-BERT for Tamil and Malayalam-BERT for Malayalam. Additionally, we fine-tuned IndicBERT v2 on both Tamil and Malayalam datasets. To evaluate the effect of pre-processing, we also conducted experiments using non-preprocessed text. Results demonstrate that IndicBERT v2 outperformed the language-specific BERT models in both languages. Pre-processing the data showed mixed results, with a slight improvement in the Tamil dataset but no significant benefit for the Malayalam dataset. Our approach secured first place in Tamil with a macro F1-score of 0.7883 and second place in Malayalam with a macro F1-score of 0.7234. The implementation details of the task will be found in the GitHub repository."
}
Markdown (Informal)
[CUET_Agile@DravidianLangTech 2025: Fine-tuning Transformers for Detecting Abusive Text Targeting Women from Tamil and Malayalam Texts](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.dravidianlangtech-1.55/) (Hanif & Rahman, DravidianLangTech 2025)
ACL