@inproceedings{hadj-mohamed-etal-2023-alphamwe,
title = "{A}lpha{MWE}-{A}rabic: {A}rabic Edition of Multilingual Parallel Corpora with Multiword Expression Annotations",
author = "Hadj Mohamed, Najet and
Rassem, Malak and
Han, Lifeng and
Nenadic, Goran",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2023.ranlp-1.50/",
pages = "448--457",
abstract = "Multiword Expressions (MWEs) have been a bottleneck for Natural Language Understanding (NLU) and Natural Language Generation (NLG) tasks due to their idiomaticity, ambiguity, and non-compositionality. Bilingual parallel corpora introducing MWE annotations are very scarce which set another challenge for current Natural Language Processing (NLP) systems, especially in a multilingual setting. This work presents AlphaMWE-Arabic, an Arabic edition of the AlphaMWE parallel corpus with MWE annotations. We introduce how we created this corpus including machine translation (MT), post-editing, and annotations for both standard and dialectal varieties, i.e. Tunisian and Egyptian Arabic. We analyse the MT errors when they meet MWEs-related content, both quantitatively using the human-in-the-loop metric HOPE and qualitatively. We report the current state-of-the-art MT systems are far from reaching human parity performances. We expect our bilingual English-Arabic corpus will be an asset for multilingual research on MWEs such as translation and localisation, as well as for monolingual settings including the study of Arabic-specific lexicography and phrasal verbs on MWEs. Our corpus and experimental data are available at \url{https://github.com/aaronlifenghan/AlphaMWE}."
}
Markdown (Informal)
[AlphaMWE-Arabic: Arabic Edition of Multilingual Parallel Corpora with Multiword Expression Annotations](https://preview.aclanthology.org/Author-page-Marten-During-lu/2023.ranlp-1.50/) (Hadj Mohamed et al., RANLP 2023)
ACL