@inproceedings{prelevikj-zitnik-2021-multilingual,
title = "Multilingual Named Entity Recognition and Matching Using {BERT} and Dedupe for {S}lavic Languages",
author = "Prelevikj, Marko and
Zitnik, Slavko",
editor = "Babych, Bogdan and
Kanishcheva, Olga and
Nakov, Preslav and
Piskorski, Jakub and
Pivovarova, Lidia and
Starko, Vasyl and
Steinberger, Josef and
Yangarber, Roman and
Marci{\'n}czuk, Micha{\l} and
Pollak, Senja and
P{\v{r}}ib{\'a}{\v{n}}, Pavel and
Robnik-{\v{S}}ikonja, Marko",
booktitle = "Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing",
month = apr,
year = "2021",
address = "Kiyv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2021.bsnlp-1.9/",
pages = "80--85",
abstract = "This paper describes the University of Ljubljana (UL FRI) Group`s submissions to the shared task at the Balto-Slavic Natural Language Processing (BSNLP) 2021 Workshop. We experiment with multiple BERT-based models, pre-trained in multi-lingual, Croatian-Slovene-English and Slovene-only data. We perform training iteratively and on the concatenated data of previously available NER datasets. For the normalization task we use Stanza lemmatizer, while for entity matching we implemented a baseline using the Dedupe library. The performance of evaluations suggests that multi-source settings outperform less-resourced approaches. The best NER models achieve 0.91 F-score on Slovene training data splits while the best official submission achieved F-scores of 0.84 and 0.78 for relaxed partial matching and strict settings, respectively. In multi-lingual NER setting we achieve F-scores of 0.82 and 0.74."
}
Markdown (Informal)
[Multilingual Named Entity Recognition and Matching Using BERT and Dedupe for Slavic Languages](https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2021.bsnlp-1.9/) (Prelevikj & Zitnik, BSNLP 2021)
ACL