@inproceedings{akomeah-etal-2021-ur,
title = "{UR}@{NLP}{\_}{A}{\_}{T}eam @ {G}erm{E}val 2021: Ensemble-based Classification of Toxic, Engaging and Fact-Claiming Comments",
author = "Akomeah, Kwabena Odame and
Kruschwitz, Udo and
Ludwig, Bernd",
editor = "Risch, Julian and
Stoll, Anke and
Wilms, Lena and
Wiegand, Michael",
booktitle = "Proceedings of the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments",
month = sep,
year = "2021",
address = "Duesseldorf, Germany",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.germeval-1.14/",
pages = "95--99",
abstract = "In this paper, we report on our approach to addressing the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments for the German language. We submitted three runs for each subtask based on ensembles of three models each using contextual embeddings from pre-trained language models using SVM and neural-network-based classifiers. We include language-specific as well as language-agnostic language models {--} both with and without fine-tuning. We observe that for the runs we submitted that the SVM models overfitted the training data and this affected the aggregation method (simple majority voting) of the ensembles. The model records a lower performance on the test set than on the training set. Exploring the issue of overfitting we uncovered that due to a bug in the pipeline the runs we submitted had not been trained on the full set but only on a small training set. Therefore in this paper we also include the results we get when trained on the full training set which demonstrate the power of ensembles."
}
Markdown (Informal)
[UR@NLP_A_Team @ GermEval 2021: Ensemble-based Classification of Toxic, Engaging and Fact-Claiming Comments](https://preview.aclanthology.org/fix-sig-urls/2021.germeval-1.14/) (Akomeah et al., GermEval 2021)
ACL