@inproceedings{fahim-2023-aambela,
title = "Aambela at {BLP}-2023 Task 1: Focus on {UNK} tokens: Analyzing Violence Inciting {B}angla Text with Adding Dataset Specific New Word Tokens",
author = "Fahim, Md",
editor = "Alam, Firoj and
Kar, Sudipta and
Chowdhury, Shammur Absar and
Sadeque, Farig and
Amin, Ruhul",
booktitle = "Proceedings of the First Workshop on Bangla Language Processing (BLP-2023)",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2023.banglalp-1.24/",
doi = "10.18653/v1/2023.banglalp-1.24",
pages = "201--207",
abstract = "The BLP-2023 Task 1 aims to develop a Natural Language Inference system tailored for detecting and analyzing threats from Bangla YouTube comments. Bangla language models like BanglaBERT have demonstrated remarkable performance in various Bangla natural language processing tasks across different domains. We utilized BanglaBERT for the violence detection task, employing three different classification heads. As BanglaBERT`s vocabulary lacks certain crucial words, our model incorporates some of them as new special tokens, based on their frequency in the dataset, and their embeddings are learned during training. The model achieved the 2nd position on the leaderboard, boasting an impressive macro-F1 Score of 76.04{\%} on the official test set. With the addition of new tokens, we achieved a 76.90{\%} macro-F1 score, surpassing the top score (76.044{\%}) on the test set."
}
Markdown (Informal)
[Aambela at BLP-2023 Task 1: Focus on UNK tokens: Analyzing Violence Inciting Bangla Text with Adding Dataset Specific New Word Tokens](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2023.banglalp-1.24/) (Fahim, BanglaLP 2023)
ACL