@inproceedings{aji-heafield-2019-making, title = "Making Asynchronous Stochastic Gradient Descent Work for Transformers", author = "Aji, Alham Fikri and Heafield, Kenneth", editor = "Birch, Alexandra and Finch, Andrew and Hayashi, Hiroaki and Konstas, Ioannis and Luong, Thang and Neubig, Graham and Oda, Yusuke and Sudoh, Katsuhito", booktitle = "Proceedings of the 3rd Workshop on Neural Generation and Translation", month = nov, year = "2019", address = "Hong Kong", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/D19-5608/", doi = "10.18653/v1/D19-5608", pages = "80--89" }