@inproceedings{agrawal-etal-2025-multilingual,
title = "Multilingual Continual Learning using Attention Distillation",
author = "Agrawal, Sanjay and
Nayak, Deep and
Sembium, Vivek Varadarajan",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven and
Darwish, Kareem and
Agarwal, Apoorv",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics: Industry Track",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-industry.8/",
pages = "91--99",
abstract = "Query-product relevance classification is crucial for e-commerce stores like Amazon, ensuring accurate search results that match customer intent. Using a unified multilingual model across multiple languages/marketplaces tends to yield superior outcomes but also presents challenges, especially in maintaining performance across all languages when the model is updated or expanded to include a new one. To tackle this, we examine a multilingual continual learning (CL) framework focused on relevance classification tasks and address the issue of catastrophic forgetting. We propose a novel continual learning approach called attention distillation, which sequentially adds adapters for each new language and incorporates a fusion layer above language-specific adapters. This fusion layer distills attention scores from the previously trained fusion layer, focusing on the older adapters. Additionally, translating a portion of the new language data into older ones supports backward knowledge transfer. Our method reduces trainable parameters by 80{\%}, enhancing computational efficiency and enabling frequent updates, while achieving a 1-3{\%} ROC-AUC improvement over single marketplace baselines and outperforming SOTA CL methods on proprietary and external datasets."
}
Markdown (Informal)
[Multilingual Continual Learning using Attention Distillation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-industry.8/) (Agrawal et al., COLING 2025)
ACL