@inproceedings{xiong-etal-2024-unicorn,
    title = "{UNICORN}: A Unified Causal Video-Oriented Language-Modeling Framework for Temporal Video-Language Tasks",
    author = "Xiong, Yuanhao  and
      Nie, Yixin  and
      Liu, Haotian  and
      Wang, Boxin  and
      Chen, Jun  and
      Jin, Rong  and
      Hsieh, Cho-Jui  and
      Torresani, Lorenzo  and
      Lei, Jie",
    editor = "Al-Onaizan, Yaser  and
      Bansal, Mohit  and
      Chen, Yun-Nung",
    booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.emnlp-main.722/",
    doi = "10.18653/v1/2024.emnlp-main.722",
    pages = "12983--12997"
}