@inproceedings{khered-etal-2025-multi,
title = "A Multi-Task Learning Approach to Dialectal {A}rabic Identification and Translation to {M}odern {S}tandard {A}rabic",
author = "Khered, Abdullah and
Benkhedda, Youcef and
Batista-Navarro, Riza",
editor = "Estevanell-Valladares, Ernesto Luis and
Picazo-Izquierdo, Alicia and
Ranasinghe, Tharindu and
Mikaberidze, Besik and
Ostermann, Simon and
Gurgurov, Daniil and
Mueller, Philipp and
Borg, Claudia and
{\v{S}}imko, Mari{\'a}n",
booktitle = "Proceedings of the First Workshop on Advancing NLP for Low-Resource Languages",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://preview.aclanthology.org/corrections-2026-01/2025.lowresnlp-1.4/",
pages = "21--31",
abstract = "Translating Dialectal Arabic (DA) into Modern Standard Arabic (MSA) is a complex task due to the linguistic diversity and informal nature of dialects, particularly in social media texts. To improve translation quality, we propose a Multi-Task Learning (MTL) framework that combines DA-MSA translation as the primary task and dialect identification as an auxiliary task. Additionally, we introduce LahjaTube, a new corpus containing DA transcripts and corresponding MSA and English translations, covering four major Arabic dialects: Egyptian (EGY), Gulf (GLF), Levantine (LEV), and Maghrebi (MGR), collected from YouTube. We evaluate AraT5 and AraBART on the Dial2MSA-Verified dataset under Single-Task Learning (STL) and MTL setups. Our results show that adopting the MTL framework and incorporating LahjaTube into the training data improve the translation performance, leading to a BLEU score improvement of 2.65 points over baseline models."
}Markdown (Informal)
[A Multi-Task Learning Approach to Dialectal Arabic Identification and Translation to Modern Standard Arabic](https://preview.aclanthology.org/corrections-2026-01/2025.lowresnlp-1.4/) (Khered et al., LowResNLP 2025)
ACL