@inproceedings{xu-etal-2021-vlm, title = "{VLM}: Task-agnostic Video-Language Model Pre-training for Video Understanding", author = "Xu, Hu and Ghosh, Gargi and Huang, Po-Yao and Arora, Prahal and Aminzadeh, Masoumeh and Feichtenhofer, Christoph and Metze, Florian and Zettlemoyer, Luke", editor = "Zong, Chengqing and Xia, Fei and Li, Wenjie and Navigli, Roberto", booktitle = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021", month = aug, year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2021.findings-acl.370/", doi = "10.18653/v1/2021.findings-acl.370", pages = "4227--4239" }