@inproceedings{mustafa-etal-2024-leveraging,
title = "Leveraging {W}ikidata for Biomedical Entity Linking in a Low-Resource Setting: A Case Study for {G}erman",
author = "Mustafa, Faizan E and
Dima, Corina and
Ochoa, Juan and
Staab, Steffen",
editor = "Naumann, Tristan and
Ben Abacha, Asma and
Bethard, Steven and
Roberts, Kirk and
Bitterman, Danielle",
booktitle = "Proceedings of the 6th Clinical Natural Language Processing Workshop",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.clinicalnlp-1.17/",
doi = "10.18653/v1/2024.clinicalnlp-1.17",
pages = "202--207",
abstract = "Biomedical Entity Linking (BEL) is a challenging task for low-resource languages, due to the lack of appropriate resources: datasets, knowledge bases (KBs), and pre-trained models. In this paper, we propose an approach to create a biomedical knowledge base for German BEL using UMLS information from Wikidata, that provides good coverage and can be easily extended to further languages. As a further contribution, we adapt several existing approaches for use in the German BEL setup, and report on their results. The chosen methods include a sparse model using character n-grams, a multilingual biomedical entity linker, and two general-purpose text retrieval models. Our results show that a language-specific KB that provides good coverage leads to most improvement in entity linking performance, irrespective of the used model. The finetuned German BEL model, newly created UMLS$_{Wikidata}$ KB as well as the code to reproduce our results are publicly available."
}
Markdown (Informal)
[Leveraging Wikidata for Biomedical Entity Linking in a Low-Resource Setting: A Case Study for German](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.clinicalnlp-1.17/) (Mustafa et al., ClinicalNLP 2024)
ACL