@article{bugliarello-etal-2021-multimodal, title = "Multimodal Pretraining Unmasked: A Meta-Analysis and a Unified Framework of Vision-and-Language {BERT}s", author = "Bugliarello, Emanuele and Cotterell, Ryan and Okazaki, Naoaki and Elliott, Desmond", editor = "Roark, Brian and Nenkova, Ani", journal = "Transactions of the Association for Computational Linguistics", volume = "9", year = "2021", address = "Cambridge, MA", publisher = "MIT Press", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.tacl-1.58/", doi = "10.1162/tacl_a_00408", pages = "978--994" }