@inproceedings{petrovski-2022-parallel,
title = "A Parallel {E}nglish - {S}erbian - {B}ulgarian - {M}acedonian Lexicon of Named Entities",
author = "Petrovski, Aleksandar",
booktitle = "Proceedings of the Fifth International Conference on Computational Linguistics in Bulgaria (CLIB 2022)",
month = sep,
year = "2022",
address = "Sofia, Bulgaria",
publisher = "Department of Computational Linguistics, IBL -- BAS",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.clib-1.17/",
pages = "146--151",
abstract = "This paper describes the creation of a parallel multilingual lexicon of named entities from English to three South Slavic languages: Serbian, Bulgarian and Macedonian, with Wikipedia as a source. The basics of the proposed methodology are well known. This methodology provides a cheap opportunity to build multilingual lexicons, without having expertise in target languages. Wikipedia`s database dump can be freely downloaded in SQL and XML formats. The method presented here has been used to build a Python application that extracts the English {--} Serbian {--} Bulgarian {--} Macedonian parallel titles from Wikipedia and classifies them using the English Wikipedia category system. The extracted named entity sets have been classified into five classes: PERSON, ORGANIZATION, LOCATION, PRODUCT, and MISC (miscellaneous). It has been achieved using Wikipedia metadata. The quality of classification has been checked manually on 1,000 randomly chosen named entities. The following are the results obtained: 97{\%} for precision and 90{\%} for recall."
}
Markdown (Informal)
[A Parallel English - Serbian - Bulgarian - Macedonian Lexicon of Named Entities](https://preview.aclanthology.org/add-emnlp-2024-awards/2022.clib-1.17/) (Petrovski, CLIB 2022)
ACL