@inproceedings{ponce-etal-2025-vicomtech,
title = "Vicomtech@{WMT} 2025: Evolutionary Model Compression for Machine Translation",
author = "Ponce, David and
Gete, Harritxu and
Etchegoyhen, Thierry",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Tenth Conference on Machine Translation",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.wmt-1.77/",
doi = "10.18653/v1/2025.wmt-1.77",
pages = "1011--1021",
ISBN = "979-8-89176-341-8",
abstract = "We describe Vicomtech{'}s participation in the WMT 2025 Shared Task on Model Compression. We addressed all three language pairs of the constrained task, namely Czech to German, English to Arabic and Japanese to Chinese, using the Aya Expanse 8B model as our base model. Our approach centers on GeLaCo, an evolutionary method for LLM compression via layer collapse operations, which efficiently explores the compression solution space through population-based search and a module-wise similarity fitness function that captures attention, feed-forward, and hidden state representations. We systematically evaluated compression at three different ratios (0.25, 0.50, and 0.75) and applied targeted post-training techniques to recover performance through fine-tuning and knowledge distillation over translation instructions. Additionally, we explored quantization techniques to achieve further model size reduction. Our experimental results demonstrate that the combination of evolutionary layer compression, targeted post-training, and quantization can achieve substantial model size reduction while maintaining competitive translation quality across all language pairs."
}Markdown (Informal)
[Vicomtech@WMT 2025: Evolutionary Model Compression for Machine Translation](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.wmt-1.77/) (Ponce et al., WMT 2025)
ACL