@inproceedings{wong-etal-2023-cantnlp,
title = "cantnlp@{LT}-{EDI}-2023: Homophobia/Transphobia Detection in Social Media Comments using Spatio-Temporally Retrained Language Models",
author = "Wong, Sidney and
Durward, Matthew and
Adams, Benjamin and
Dunn, Jonathan",
editor = "Chakravarthi, Bharathi R. and
Bharathi, B. and
Griffith, Joephine and
Bali, Kalika and
Buitelaar, Paul",
booktitle = "Proceedings of the Third Workshop on Language Technology for Equality, Diversity and Inclusion",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.ltedi-1.15/",
pages = "103--108",
abstract = "This paper describes our multiclass classification system developed as part of the LT-EDI@RANLP-2023 shared task. We used a BERT-based language model to detect homophobic and transphobic content in social media comments across five language conditions: English, Spanish, Hindi, Malayalam, and Tamil. We retrained a transformer-based cross-language pretrained language model, XLM-RoBERTa, with spatially and temporally relevant social media language data. We found the inclusion of this spatio-temporal data improved the classification performance for all language and task conditions when compared with the baseline. We also retrained a subset of models with simulated script-mixed social media language data with varied performance. The results from the current study suggests that transformer-based language classification systems are sensitive to register-specific and language-specific retraining."
}
Markdown (Informal)
[cantnlp@LT-EDI-2023: Homophobia/Transphobia Detection in Social Media Comments using Spatio-Temporally Retrained Language Models](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.ltedi-1.15/) (Wong et al., LTEDI 2023)
ACL