@inproceedings{kchaou-etal-2020-parallel,
title = "Parallel resources for {T}unisian {A}rabic Dialect Translation",
author = "Kchaou, Sam{\'e}h and
Boujelbane, Rahma and
Hadrich-Belguith, Lamia",
editor = "Zitouni, Imed and
Abdul-Mageed, Muhammad and
Bouamor, Houda and
Bougares, Fethi and
El-Haj, Mahmoud and
Tomeh, Nadi and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fifth Arabic Natural Language Processing Workshop",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.wanlp-1.18/",
pages = "200--206",
abstract = "The difficulty of processing dialects is clearly observed in the high cost of building representative corpus, in particular for machine translation. Indeed, all machine translation systems require a huge amount and good management of training data, which represents a challenge in a low-resource setting such as the Tunisian Arabic dialect. In this paper, we present a data augmentation technique to create a parallel corpus for Tunisian Arabic dialect written in social media and standard Arabic in order to build a Machine Translation (MT) model. The created corpus was used to build a sentence-based translation model. This model reached a BLEU score of 15.03{\%} on a test set, while it was limited to 13.27{\%} utilizing the corpus without augmentation."
}
Markdown (Informal)
[Parallel resources for Tunisian Arabic Dialect Translation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.wanlp-1.18/) (Kchaou et al., WANLP 2020)
ACL