@inproceedings{mccoy-etal-2020-berts, title = "{BERT}s of a feather do not generalize together: Large variability in generalization across models with similar test set performance", author = "McCoy, R. Thomas and Min, Junghyun and Linzen, Tal", editor = "Alishahi, Afra and Belinkov, Yonatan and Chrupa{\l}a, Grzegorz and Hupkes, Dieuwke and Pinter, Yuval and Sajjad, Hassan", booktitle = "Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP", month = nov, year = "2020", address = "Online", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2020.blackboxnlp-1.21/", doi = "10.18653/v1/2020.blackboxnlp-1.21", pages = "217--227" }