@inproceedings{zhang-etal-2025-leveraging-product,
title = "Leveraging Product Catalog Patterns for Multilingual {E}-commerce Product Attribute Prediction",
author = "Zhang, Bryan and
Khan, Suleiman A. and
Walter, SteCphan",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.18/",
pages = "267--275",
ISBN = "979-8-89176-333-3",
abstract = "E-commerce stores increasingly use Large Language Models (LLMs) to enhance catalog data quality through automated regeneration. A critical challenge is accurately predicting missing structured attribute values across multilingual product catalogs, where LLM performance varies significantly by language. While existing approaches leverage general knowledge through prompt engineering and external retrieval, more effective and accurate signals for attribute prediction can exist within the catalog ecosystem itself-similar products often share consistent patterns and structural relationships, and may have the missing attributes filled. Therefore, this paper introduces PatternRAG, a novel retrieval-augmented system that strategically leverages existing product catalog entries to guide LLM predictions for missing attributes. Our approach introduces a multi-stage retrieval framework that progressively refines the search space based on product type, uses textual similarity, glance views and brand relationships to identify the most relevant attribute-filled examples for LLM prediction guidance. Experiments on test sets across three major e-commerce stores in different languages (US, DE, FR) demonstrate substantial improvements in catalog data quality, achieving up to 34{\%} increase in recall and 0.8{\%} in precision for attribute value prediction. At catalog entry level, it also achieves up to +43.32{\%} increase in completeness and up to +2.83{\%} in correctness."
}Markdown (Informal)
[Leveraging Product Catalog Patterns for Multilingual E-commerce Product Attribute Prediction](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.18/) (Zhang et al., EMNLP 2025)
ACL