@inproceedings{alrajhi-etal-2022-assessing,
title = "Assessing the Linguistic Knowledge in {A}rabic Pre-trained Language Models Using Minimal Pairs",
author = "Alrajhi, Wafa Abdullah and
Al-Khalifa, Hend and
AlSalman, Abdulmalik",
editor = "Bouamor, Houda and
Al-Khalifa, Hend and
Darwish, Kareem and
Rambow, Owen and
Bougares, Fethi and
Abdelali, Ahmed and
Tomeh, Nadi and
Khalifa, Salam and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.wanlp-1.17/",
doi = "10.18653/v1/2022.wanlp-1.17",
pages = "185--193",
abstract = "Despite the noticeable progress that we recently witnessed in Arabic pre-trained language models (PLMs), the linguistic knowledge captured by these models remains unclear. In this paper, we conducted a study to evaluate available Arabic PLMs in terms of their linguistic knowledge. BERT-based language models (LMs) are evaluated using Minimum Pairs (MP), where each pair represents a grammatical sentence and its contradictory counterpart. MPs isolate specific linguistic knowledge to test the model{'}s sensitivity in understanding a specific linguistic phenomenon. We cover nine major Arabic phenomena: Verbal sentences, Nominal sentences, Adjective Modification, and Idafa construction. The experiments compared the results of fifteen Arabic BERT-based PLMs. Overall, among all tested models, CAMeL-CA outperformed the other PLMs by achieving the highest overall accuracy."
}
Markdown (Informal)
[Assessing the Linguistic Knowledge in Arabic Pre-trained Language Models Using Minimal Pairs](https://preview.aclanthology.org/fix-sig-urls/2022.wanlp-1.17/) (Alrajhi et al., WANLP 2022)
ACL