@inproceedings{inoue-etal-2022-morphosyntactic,
title = "Morphosyntactic Tagging with Pre-trained Language Models for {A}rabic and its Dialects",
author = "Inoue, Go and
Khalifa, Salam and
Habash, Nizar",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.findings-acl.135/",
doi = "10.18653/v1/2022.findings-acl.135",
pages = "1708--1719",
abstract = "We present state-of-the-art results on morphosyntactic tagging across different varieties of Arabic using fine-tuned pre-trained transformer language models. Our models consistently outperform existing systems in Modern Standard Arabic and all the Arabic dialects we study, achieving 2.6{\%} absolute improvement over the previous state-of-the-art in Modern Standard Arabic, 2.8{\%} in Gulf, 1.6{\%} in Egyptian, and 8.3{\%} in Levantine. We explore different training setups for fine-tuning pre-trained transformer language models, including training data size, the use of external linguistic resources, and the use of annotated data from other dialects in a low-resource scenario. Our results show that strategic fine-tuning using datasets from other high-resource dialects is beneficial for a low-resource dialect. Additionally, we show that high-quality morphological analyzers as external linguistic resources are beneficial especially in low-resource settings."
}
Markdown (Informal)
[Morphosyntactic Tagging with Pre-trained Language Models for Arabic and its Dialects](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.findings-acl.135/) (Inoue et al., Findings 2022)
ACL