@inproceedings{zhang-etal-2023-enhancing-arabic,
title = "Enhancing {A}rabic Machine Translation for {E}-commerce Product Information: Data Quality Challenges and Innovative Selection Approaches",
author = "Zhang, Bryan and
Danial, Salah and
Walter, Stephan",
editor = "Sawaf, Hassan and
El-Beltagy, Samhaa and
Zaghouani, Wajdi and
Magdy, Walid and
Abdelali, Ahmed and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Habash, Nizar and
Khalifa, Salam and
Keleg, Amr and
Haddad, Hatem and
Zitouni, Imed and
Mrini, Khalil and
Almatham, Rawan",
booktitle = "Proceedings of ArabicNLP 2023",
month = dec,
year = "2023",
address = "Singapore (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.arabicnlp-1.13/",
doi = "10.18653/v1/2023.arabicnlp-1.13",
pages = "150--157",
abstract = "Product information in e-commerce is usually localized using machine translation (MT) systems. Arabic language has rich morphology and dialectal variations, so Arabic MT in e-commerce training requires a larger volume of data from diverse data sources; Given the dynamic nature of e-commerce, such data needs to be acquired periodically to update the MT. Consequently, validating the quality of training data periodically within an industrial setting presents a notable challenge. Meanwhile, the performance of MT systems is significantly impacted by the quality and appropriateness of the training data. Hence, this study first examines the Arabic MT in e-commerce and investigates the data quality challenges for English-Arabic MT in e-commerce then proposes heuristics-based and topic-based data selection approaches to improve MT for product information. Both online and offline experiment results have shown our proposed approaches are effective, leading to improved shopping experiences for customers."
}
Markdown (Informal)
[Enhancing Arabic Machine Translation for E-commerce Product Information: Data Quality Challenges and Innovative Selection Approaches](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.arabicnlp-1.13/) (Zhang et al., ArabicNLP 2023)
ACL