@inproceedings{zeyer-etal-2018-returnn,
title = "{RETURNN} as a Generic Flexible Neural Toolkit with Application to Translation and Speech Recognition",
author = "Zeyer, Albert and
Alkhouli, Tamer and
Ney, Hermann",
editor = "Liu, Fei and
Solorio, Thamar",
booktitle = "Proceedings of {ACL} 2018, System Demonstrations",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/P18-4022/",
doi = "10.18653/v1/P18-4022",
pages = "128--133",
abstract = "We compare the fast training and decoding speed of RETURNN of attention models for translation, due to fast CUDA LSTM kernels, and a fast pure TensorFlow beam search decoder. We show that a layer-wise pretraining scheme for recurrent attention models gives over 1{\%} BLEU improvement absolute and it allows to train deeper recurrent encoder networks. Promising preliminary results on max. expected BLEU training are presented. We are able to train state-of-the-art models for translation and end-to-end models for speech recognition and show results on WMT 2017 and Switchboard. The flexibility of RETURNN allows a fast research feedback loop to experiment with alternative architectures, and its generality allows to use it on a wide range of applications."
}
Markdown (Informal)
[RETURNN as a Generic Flexible Neural Toolkit with Application to Translation and Speech Recognition](https://preview.aclanthology.org/jlcl-multiple-ingestion/P18-4022/) (Zeyer et al., ACL 2018)
ACL