@inproceedings{alizada-dubossarsky-2026-benchmarking,
title = "Benchmarking Hate Speech Detection in {A}zerbaijani with {T}urkish Cross-Lingual Transfer and Transformer Models",
author = "Alizada, Tural and
Dubossarsky, Haim",
editor = {Oflazer, Kemal and
K{\"o}ksal, Abdullatif and
Varol, Onur},
booktitle = "Proceedings of the Second Workshop Natural Language Processing for {T}urkic Languages ({SIGTURK} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/manual-author-scripts/2026.sigturk-1.10/",
pages = "103--112",
ISBN = "979-8-89176-370-8",
abstract = "In this paper, we investigated the task of hate-speech classification in the closely related Turkic language pair, Turkish-Azerbaijani. Transformer models can achieve strong hate-speech classification in Turkish, but their performance does not reliably transfer to closely related low-resource languages without careful evaluation. We study Turkish{--}Azerbaijani hate speech detection and introduce the first manually annotated Azerbaijani benchmark, comprising 1,112 YouTube comments from major news channels with severe class imbalance. We compare XLM-RoBERTa and a compact BERT-Tiny model against a TF{--}IDF + logistic regression baseline under monolingual training, zero-shot Turkish{\textrightarrow}Azerbaijani transfer, low-resource balanced subsampling, bilingual mixed fine-tuning, and translation-based augmentation using machine-translated Turkish data. XLM-R attains high macro-F1 in Turkish and achieves moderate zero-shot transfer to Azerbaijani, but native Azerbaijani training is fragile for the hate class. Mixed bilingual training improves robustness for both languages, whereas TF{--}IDF generalizes poorly to Azerbaijani."
}Markdown (Informal)
[Benchmarking Hate Speech Detection in Azerbaijani with Turkish Cross-Lingual Transfer and Transformer Models](https://preview.aclanthology.org/manual-author-scripts/2026.sigturk-1.10/) (Alizada & Dubossarsky, SIGTURK 2026)
ACL