@inproceedings{soustas-edwards-2024-elsagate,
title = "The Elsagate Corpus: Characterising Commentary on Alarming Video Content",
author = "Soustas, Panagiotis and
Edwards, Matthew",
editor = "Mitkov, Ruslan and
Ezzini, Saad and
Ranasinghe, Tharindu and
Ezeani, Ignatius and
Khallaf, Nouran and
Acarturk, Cengiz and
Bradbury, Matthew and
El-Haj, Mo and
Rayson, Paul",
booktitle = "Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security",
month = jul,
year = "2024",
address = "Lancaster, UK",
publisher = "International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.nlpaics-1.17/",
pages = "147--152",
abstract = "Identifying disturbing online content being targeted at children is an important content moderation problem. However, previous approaches to this problem have focused on features of the content itself, and neglected potentially helpful insights from the reactions expressed by its online audience. To help remedy this, we present the Elsagate Corpus, a collection of over 22 million comments on more than 18,000 videos that have been associated with disturbing content. We describe the how we collected this corpus and present some insights from our initial explorations, including the surprisingly positive reactions from audiences to this content, some unusual non-linguistic commenting behavior of uncertain purpose and references to some concerning themes."
}
Markdown (Informal)
[The Elsagate Corpus: Characterising Commentary on Alarming Video Content](https://preview.aclanthology.org/fix-sig-urls/2024.nlpaics-1.17/) (Soustas & Edwards, NLPAICS 2024)
ACL
- Panagiotis Soustas and Matthew Edwards. 2024. The Elsagate Corpus: Characterising Commentary on Alarming Video Content. In Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security, pages 147–152, Lancaster, UK. International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security.