@inproceedings{liu-etal-2020-understanding, title = "Understanding the Difficulty of Training Transformers", author = "Liu, Liyuan and Liu, Xiaodong and Gao, Jianfeng and Chen, Weizhu and Han, Jiawei", editor = "Webber, Bonnie and Cohn, Trevor and He, Yulan and Liu, Yang", booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)", month = nov, year = "2020", address = "Online", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.emnlp-main.463/", doi = "10.18653/v1/2020.emnlp-main.463", pages = "5747--5763" }