@inproceedings{escolano-etal-2024-residual,
title = "Residual Dropout: A Simple Approach to Improve Transformer`s Data Efficiency",
author = "Escolano, Carlos and
De Luca Fornaciari, Francesca and
Melero, Maite",
editor = "Melero, Maite and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.sigul-1.35/",
pages = "294--299",
abstract = "Transformer models often demand a vast amount of training data to achieve the desired level of performance. However, this data requirement poses a major challenge for low-resource languages seeking access to high-quality systems, particularly in tasks like Machine Translation. To address this issue, we propose adding Dropout to Transformer`s Residual Connections. Our experimental results demonstrate that this modification effectively mitigates overfitting during training, resulting in substantial performance gains of over 4 BLEU points on a dataset consisting of merely 10 thousand examples."
}
Markdown (Informal)
[Residual Dropout: A Simple Approach to Improve Transformer’s Data Efficiency](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.sigul-1.35/) (Escolano et al., SIGUL 2024)
ACL