@inproceedings{plum-etal-2024-guided,
title = "Guided Distant Supervision for Multilingual Relation Extraction Data: Adapting to a New Language",
author = "Plum, Alistair and
Ranasinghe, Tharindu and
Purschke, Christoph",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.703/",
pages = "7982--7992",
abstract = "Relation extraction is essential for extracting and understanding biographical information in the context of digital humanities and related subjects. There is a growing interest in the community to build datasets capable of training machine learning models to extract relationships. However, annotating such datasets can be expensive and time-consuming, in addition to being limited to English. This paper applies guided distant supervision to create a large biographical relationship extraction dataset for German. Our dataset, composed of more than 80,000 instances for nine relationship types, is the largest biographical German relationship extraction dataset. We also create a manually annotated dataset with 2000 instances to evaluate the models and release it together with the dataset compiled using guided distant supervision. We train several state-of-the-art machine learning models on the automatically created dataset and release them as well. Furthermore, we experiment with multilingual and cross-lingual zero-shot experiments that could benefit many low-resource languages."
}
Markdown (Informal)
[Guided Distant Supervision for Multilingual Relation Extraction Data: Adapting to a New Language](https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.703/) (Plum et al., LREC-COLING 2024)
ACL