@inproceedings{manukonda-kodali-2024-bytellm,
title = "byte{LLM}@{LT}-{EDI}-2024: Homophobia/Transphobia Detection in Social Media Comments - Custom Subword Tokenization with {S}ubword2{V}ec and {B}i{LSTM}",
author = "Manukonda, Durga Prasad and
Kodali, Rohith Gowtham",
editor = {Chakravarthi, Bharathi Raja and
B, Bharathi and
Buitelaar, Paul and
Durairaj, Thenmozhi and
Kov{\'a}cs, Gy{\"o}rgy and
Garc{\'i}a Cumbreras, Miguel {\'A}ngel},
booktitle = "Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion",
month = mar,
year = "2024",
address = "St. Julian's, Malta",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.ltedi-1.16/",
pages = "157--163",
abstract = "This research focuses on Homophobia and Transphobia Detection in Dravidian languages, specifically Telugu, Kannada, Tamil, and Malayalam. Leveraging the Homophobia/ Transphobia Detection dataset, we propose an innovative approach employing a custom-designed tokenizer with a Bidirectional Long Short-Term Memory (BiLSTM) architecture. Our distinctive contribution lies in a tokenizer that reduces model sizes to below 7MB, improving efficiency and addressing real-time deployment challenges. The BiLSTM implementation demonstrates significant enhancements in hate speech detection accuracy, effectively capturing linguistic nuances. Low-size models efficiently alleviate inference challenges, ensuring swift real-time detection and practical deployment. This work pioneers a framework for hate speech detection, providing insights into model size, inference speed, and real-time deployment challenges in combatting online hate speech within Dravidian languages."
}
Markdown (Informal)
[byteLLM@LT-EDI-2024: Homophobia/Transphobia Detection in Social Media Comments - Custom Subword Tokenization with Subword2Vec and BiLSTM](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.ltedi-1.16/) (Manukonda & Kodali, LTEDI 2024)
ACL