@inproceedings{kambhatla-etal-2022-cipherdaug,
title = "{C}ipher{DA}ug: Ciphertext based Data Augmentation for Neural Machine Translation",
author = "Kambhatla, Nishant and
Born, Logan and
Sarkar, Anoop",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.acl-long.17/",
doi = "10.18653/v1/2022.acl-long.17",
pages = "201--218",
abstract = "We propose a novel data-augmentation technique for neural machine translation based on ROT-$k$ ciphertexts. ROT-$k$ is a simple letter substitution cipher that replaces a letter in the plaintext with the $k$th letter after it in the alphabet. We first generate multiple ROT-$k$ ciphertexts using different values of $k$ for the plaintext which is the source side of the parallel data. We then leverage this enciphered training data along with the original parallel data via multi-source training to improve neural machine translation. Our method, CipherDAug, uses a co-regularization-inspired training procedure, requires no external data sources other than the original training data, and uses a standard Transformer to outperform strong data augmentation techniques on several datasets by a significant margin. This technique combines easily with existing approaches to data augmentation, and yields particularly strong results in low-resource settings."
}
Markdown (Informal)
[CipherDAug: Ciphertext based Data Augmentation for Neural Machine Translation](https://preview.aclanthology.org/fix-sig-urls/2022.acl-long.17/) (Kambhatla et al., ACL 2022)
ACL