@inproceedings{alcantara-etal-2020-offensive,
title = "Offensive Video Detection: Dataset and Baseline Results",
author = "Alc{\^a}ntara, Cleber and
Moreira, Viviane and
Feijo, Diego",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.531",
pages = "4309--4319",
abstract = "Web-users produce and publish high volumes of data of various types, such as text, images, and videos. The platforms try to restrain their users from publishing offensive content to keep a friendly and respectful environment and rely on moderators to filter the posts. However, this method is insufficient due to the high volume of publications. The identification of offensive material can be performed automatically using machine learning, which needs annotated datasets. Among the published datasets in this matter, the Portuguese language is underrepresented, and videos are little explored. We investigated the problem of offensive video detection by assembling and publishing a dataset of videos in Portuguese containing mostly textual features. We ran experiments using popular machine learning classifiers used in this domain and reported our findings, alongside multiple evaluation metrics. We found that using word embedding with Deep Learning classifiers achieved the best results on average. CNN architectures, Naive Bayes, and Random Forest ranked top among different experiments. Transfer Learning models outperformed Classic algorithms when processing video transcriptions, but scored lower using other feature sets. These findings can be used as a baseline for future works on this subject.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alcantara-etal-2020-offensive">
<titleInfo>
<title>Offensive Video Detection: Dataset and Baseline Results</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cleber</namePart>
<namePart type="family">Alcântara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Feijo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Web-users produce and publish high volumes of data of various types, such as text, images, and videos. The platforms try to restrain their users from publishing offensive content to keep a friendly and respectful environment and rely on moderators to filter the posts. However, this method is insufficient due to the high volume of publications. The identification of offensive material can be performed automatically using machine learning, which needs annotated datasets. Among the published datasets in this matter, the Portuguese language is underrepresented, and videos are little explored. We investigated the problem of offensive video detection by assembling and publishing a dataset of videos in Portuguese containing mostly textual features. We ran experiments using popular machine learning classifiers used in this domain and reported our findings, alongside multiple evaluation metrics. We found that using word embedding with Deep Learning classifiers achieved the best results on average. CNN architectures, Naive Bayes, and Random Forest ranked top among different experiments. Transfer Learning models outperformed Classic algorithms when processing video transcriptions, but scored lower using other feature sets. These findings can be used as a baseline for future works on this subject.</abstract>
<identifier type="citekey">alcantara-etal-2020-offensive</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.531</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>4309</start>
<end>4319</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Offensive Video Detection: Dataset and Baseline Results
%A Alcântara, Cleber
%A Moreira, Viviane
%A Feijo, Diego
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F alcantara-etal-2020-offensive
%X Web-users produce and publish high volumes of data of various types, such as text, images, and videos. The platforms try to restrain their users from publishing offensive content to keep a friendly and respectful environment and rely on moderators to filter the posts. However, this method is insufficient due to the high volume of publications. The identification of offensive material can be performed automatically using machine learning, which needs annotated datasets. Among the published datasets in this matter, the Portuguese language is underrepresented, and videos are little explored. We investigated the problem of offensive video detection by assembling and publishing a dataset of videos in Portuguese containing mostly textual features. We ran experiments using popular machine learning classifiers used in this domain and reported our findings, alongside multiple evaluation metrics. We found that using word embedding with Deep Learning classifiers achieved the best results on average. CNN architectures, Naive Bayes, and Random Forest ranked top among different experiments. Transfer Learning models outperformed Classic algorithms when processing video transcriptions, but scored lower using other feature sets. These findings can be used as a baseline for future works on this subject.
%U https://aclanthology.org/2020.lrec-1.531
%P 4309-4319
Markdown (Informal)
[Offensive Video Detection: Dataset and Baseline Results](https://aclanthology.org/2020.lrec-1.531) (Alcântara et al., LREC 2020)
ACL