@inproceedings{ghanem-etal-2023-benchmark,
title = "A Benchmark and Scoring Algorithm for Enriching {A}rabic Synonyms",
author = "Ghanem, Sana and
Jarrar, Mustafa and
Jarrar, Radi and
Bounhas, Ibrahim",
editor = "Rigau, German and
Bond, Francis and
Rademaker, Alexandre",
booktitle = "Proceedings of the 12th Global Wordnet Conference",
month = jan,
year = "2023",
address = "University of the Basque Country, Donostia - San Sebastian, Basque Country",
publisher = "Global Wordnet Association",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.gwc-1.34/",
pages = "274--283",
abstract = "This paper addresses the task of extending a given synset with additional synonyms taking into account synonymy strength as a fuzzy value. Given a mono/multilingual synset and a threshold (a fuzzy value [0{\ensuremath{-}}1]), our goal is to extract new synonyms above this threshold from existing lexicons. We present twofold contributions: an algorithm and a benchmark dataset. The dataset consists of 3K candidate synonyms for 500 synsets. Each candidate synonym is annotated with a fuzzy value by four linguists. The dataset is important for (i) understanding how much linguists (dis/)agree on synonymy, in addition to (ii) using the dataset as a baseline to evaluate our algorithm. Our proposed algorithm extracts synonyms from existing lexicons and computes a fuzzy value for each candidate. Our evaluations show that the algorithm behaves like a linguist and its fuzzy values are close to those proposed by linguists (using RMSE and MAE). The dataset and a demo page are publicly available at https://portal.sina.birzeit.edu/synonyms."
}
Markdown (Informal)
[A Benchmark and Scoring Algorithm for Enriching Arabic Synonyms](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.gwc-1.34/) (Ghanem et al., GWC 2023)
ACL