@inproceedings{chiu-2023-retrieval,
title = "Retrieval-Enhanced Dual Encoder Training for Product Matching",
author = "Chiu, Justin",
editor = "Wang, Mingxuan and
Zitouni, Imed",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.emnlp-industry.22/",
doi = "10.18653/v1/2023.emnlp-industry.22",
pages = "216--222",
abstract = "Product matching is the task of matching a seller-listed item to an appropriate product. It is a critical task for an e-commerce platform, and the approach needs to be efficient to run in a large-scale setting. A dual encoder approach has been a common practice for product matching recently, due to its high performance and computation efficiency. In this paper, we propose a two-stage training for the dual encoder model. Stage 1 trained a dual encoder to identify the more informative training data. Stage 2 then train on the more informative data to get a better dual encoder model. This technique is a learned approach for building training data. We evaluate the retrieval-enhanced training on two different datasets: a publicly available Large-Scale Product Matching dataset and a real-world e-commerce dataset containing 47 million products. Experiment results show that our approach improved by 2{\%} F1 on the public dataset and 9{\%} F1 on the real-world e-commerce dataset."
}
Markdown (Informal)
[Retrieval-Enhanced Dual Encoder Training for Product Matching](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.emnlp-industry.22/) (Chiu, EMNLP 2023)
ACL