@inproceedings{aljabari-etal-2025-wojoodrelations,
title = "$\mathrm{Wojood^{Relations}}$: {A}rabic Relation Extraction Corpus and Modeling",
author = "Aljabari, Alaa and
Khalilia, Mohammed and
Jarrar, Mustafa",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1741/",
pages = "34330--34348",
ISBN = "979-8-89176-332-6",
abstract = "Relation extraction (RE) is a core task in natural language processing, crucial for semantic understanding, knowledge graph construction, and enhancing downstream applications. Existing work on Arabic RE remains limited due to the language{'}s rich morphology and syntactic complexity, and the lack of large, high-quality datasets. In this paper, we present $\mathrm{Wojood^{Relations}}$, the largest and most diverse Arabic RE corpus to date, containing over $33K$ sentences ($\sim550K$ tokens) annotated with $\sim15K$ relation triples across 40 relation types. The corpus is built on top of Wojood NER dataset with manual relation annotations carried out by expert annotators, achieving a Cohen{'}s $\kappa$ of 0.92, indicating high reliability. In addition, we propose two methods: NLI-RE, which formulates RE as a binary natural language inference problem using relation-aware templates, and GPT-Joint, a few-shot LLM framework for joint entity and RE via relation-aware retrieval. Finally, we benchmark the dataset using both supervised models and in-context learning with LLMs. Supervised models achieve 92.89{\%} F1 for RE, while LLMs obtain 72.73{\%} F1 for joint entity and RE. These results establish strong baselines, highlight key challenges, and provide a foundation for advancing Arabic RE research."
}Markdown (Informal)
[WojoodRelations: Arabic Relation Extraction Corpus and Modeling](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1741/) (Aljabari et al., EMNLP 2025)
ACL