@inproceedings{blin-kucharavy-2021-transformer,
title = "Can the Transformer Be Used as a Drop-in Replacement for {RNN}s in Text-Generating {GAN}s?",
author = "Blin, Kevin and
Kucharavy, Andrei",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2021.ranlp-1.21/",
pages = "173--181",
abstract = "In this paper we address the problem of fine-tuned text generation with a limited computational budget. For that, we use a well-performing text generative adversarial network (GAN) architecture - Diversity-Promoting GAN (DPGAN), and attempted a drop-in replacement of the LSTM layer with a self-attention-based Transformer layer in order to leverage their efficiency. The resulting Self-Attention DPGAN (SADPGAN) was evaluated for performance, quality and diversity of generated text and stability. Computational experiments suggested that a transformer architecture is unable to drop-in replace the LSTM layer, under-performing during the pre-training phase and undergoing a complete mode collapse during the GAN tuning phase. Our results suggest that the transformer architecture need to be adapted before it can be used as a replacement for RNNs in text-generating GANs."
}
Markdown (Informal)
[Can the Transformer Be Used as a Drop-in Replacement for RNNs in Text-Generating GANs?](https://preview.aclanthology.org/add-emnlp-2024-awards/2021.ranlp-1.21/) (Blin & Kucharavy, RANLP 2021)
ACL