@inproceedings{gaman-2023-using,
title = "Using Ensemble Learning in Language Variety Identification",
author = "Gaman, Mihaela",
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Nakov, Preslav and
Tiedemann, J{\"o}rg and
Zampieri, Marcos},
booktitle = "Tenth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2023.vardial-1.23/",
doi = "10.18653/v1/2023.vardial-1.23",
pages = "230--240",
abstract = "The present work describes the solutions pro- posed by the UnibucNLP team to address the closed format of the DSL-TL task featured in the tenth VarDial Evaluation Campaign. The DSL-TL organizers provided approximately 11 thousand sentences written in three different languages and manually tagged with one of 9 classes. Out of these, 3 tags are considered common label and the remaining 6 tags are variety-specific. The DSL-TL task features 2 subtasks: Track 1 - a three-way and Track 2 - a two-way classification per language. In Track 2 only the variety-specific labels are used for scoring, whereas in Track 1 the common label is considered as well. Our team participated in both tracks, with three ensemble-based sub- missions for each. The meta-learner used for Track 1 is XGBoost and for Track 2, Logis- tic Regression. With each submission, we are gradually increasing the complexity of the en- semble, starting with two shallow, string-kernel based methods. To the first ensemble, we add a convolutional neural network for our second submission. The third ensemble submitted adds a fine-tuned BERT model to the second one. In Track 1, ensemble three is our highest ranked, with an F1 {\ensuremath{-}} score of 53.18{\%}; 5.36{\%} less than the leader. Surprisingly, in Track 2 the en- semble of shallow methods surpasses the other two, more complex ensembles, achieving an F 1 {\ensuremath{-}} score of 69.35{\%}."
}
Markdown (Informal)
[Using Ensemble Learning in Language Variety Identification](https://preview.aclanthology.org/Author-page-Marten-During-lu/2023.vardial-1.23/) (Gaman, VarDial 2023)
ACL