@inproceedings{huang-etal-2020-multimodal, title = "Multimodal Pretraining for Dense Video Captioning", author = "Huang, Gabriel and Pang, Bo and Zhu, Zhenhai and Rivera, Clara and Soricut, Radu", editor = "Wong, Kam-Fai and Knight, Kevin and Wu, Hua", booktitle = "Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing", month = dec, year = "2020", address = "Suzhou, China", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2020.aacl-main.48/", doi = "10.18653/v1/2020.aacl-main.48", pages = "470--490" }