@inproceedings{huang-etal-2020-multimodal,
    title = "Multimodal Pretraining for Dense Video Captioning",
    author = "Huang, Gabriel  and
      Pang, Bo  and
      Zhu, Zhenhai  and
      Rivera, Clara  and
      Soricut, Radu",
    editor = "Wong, Kam-Fai  and
      Knight, Kevin  and
      Wu, Hua",
    booktitle = "Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing",
    month = dec,
    year = "2020",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2020.aacl-main.48/",
    doi = "10.18653/v1/2020.aacl-main.48",
    pages = "470--490"
}