@inproceedings{vaz-etal-2026-racismobr,
title = "{R}acismo{BR}: A Manually Annotated Dataset for Racist Discourse Detection in {B}razilian {P}ortuguese",
author = "Vaz, Jo{\~a}o V{\'i}tor and
Benevenuto, Fabr{\'i}cio and
Gon{\c{c}}alves, Marcos Andr{\'e}",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-dnd/2026.propor-1.76/",
pages = "770--779",
ISBN = "979-8-89176-387-6",
abstract = "Racist discourse on social media appears both through explicit attacks and subtle, context-dependent forms, remaining a challenge for Natural Language Processing. We introduce RacismoBR, a culturally grounded dataset for detecting racist discourse in Brazilian Portuguese, manually annotated exclusively by Black researchers to ensure sociolinguistic validity and epistemic representativeness. We conduct a controlled evaluation of binary racism classification in our dataset considering several classification modeling paradigms: classical machine learning, supervised Transformer-based (Small) Language Models, and Large Language models under in-context, few-shot learning. Results show that GPT-4.1 and BERTimbau yield the highest Macro-F1 scores; however, Wilcoxon signed-rank tests reveal no statistically significant differences across models, mostly due to high variability. Across paradigms, classifiers consistently display higher precision for non-racist content and higher recall for racist content. A qualitative analysis highlights persistent difficulties with implicit, euphemized, and context-dependent racism. These findings indicate that culturally grounded annotation plays a more decisive role than architectural sophistication alone in advancing racism detection."
}Markdown (Informal)
[RacismoBR: A Manually Annotated Dataset for Racist Discourse Detection in Brazilian Portuguese](https://preview.aclanthology.org/ingest-dnd/2026.propor-1.76/) (Vaz et al., PROPOR 2026)
ACL