@inproceedings{zhang-etal-2023-leveraging-latent,
title = "Leveraging Latent Topic Information to Improve Product Machine Translation",
author = "Zhang, Bryan and
Walter, Stephan and
Misra, Amita and
Tan, Liling",
editor = "Yamada, Masaru and
do Carmo, Felix",
booktitle = "Proceedings of Machine Translation Summit XIX, Vol. 2: Users Track",
month = sep,
year = "2023",
address = "Macau SAR, China",
publisher = "Asia-Pacific Association for Machine Translation",
url = "https://preview.aclanthology.org/ingest_wac_2008/2023.mtsummit-users.10/",
pages = "109--118",
abstract = "Meeting the expectations of e-commerce customers involves offering a seamless online shopping experience in their preferred language. To achieve this, modern e-commerce platforms rely on machine translation systems to provide multilingual product information on a large scale. However, maintaining high-quality machine translation that can keep up with the ever-expanding volume of product data remains an open challenge for industrial machine translation systems. In this context, topical clustering emerges as a valuable approach, leveraging latent signals and interpretable textual patterns to potentially enhance translation quality and facilitate industry-scale translation data discovery. This paper proposes two innovative methods: topic-based data selection and topic-signal augmentation, both utilizing latent topic clusters to improve the quality of machine translation in e-commerce. Furthermore, we present a data discovery workflow that utilizes topic clusters to effectively manage the growing multilingual product catalogs, addressing the challenges posed by their expansion."
}
Markdown (Informal)
[Leveraging Latent Topic Information to Improve Product Machine Translation](https://preview.aclanthology.org/ingest_wac_2008/2023.mtsummit-users.10/) (Zhang et al., MTSummit 2023)
ACL