@inproceedings{chen-etal-2022-litevl,
    title = "{L}ite{VL}: Efficient Video-Language Learning with Enhanced Spatial-Temporal Modeling",
    author = "Chen, Dongsheng  and
      Tao, Chaofan  and
      Hou, Lu  and
      Shang, Lifeng  and
      Jiang, Xin  and
      Liu, Qun",
    editor = "Goldberg, Yoav  and
      Kozareva, Zornitsa  and
      Zhang, Yue",
    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.emnlp-main.545/",
    doi = "10.18653/v1/2022.emnlp-main.545",
    pages = "7985--7997"
}