@inproceedings{lyu-etal-2023-video,
    title = "Is a Video worth n n Images? A Highly Efficient Approach to Transformer-based Video Question Answering",
    author = "Lyu, Chenyang  and
      Ji, Tianbo  and
      Graham, Yvette  and
      Foster, Jennifer",
    editor = "Sadat Moosavi, Nafise  and
      Gurevych, Iryna  and
      Hou, Yufang  and
      Kim, Gyuwan  and
      Kim, Young Jin  and
      Schuster, Tal  and
      Agrawal, Ameeta",
    booktitle = "Proceedings of the Fourth Workshop on Simple and Efficient Natural Language Processing (SustaiNLP)",
    month = jul,
    year = "2023",
    address = "Toronto, Canada (Hybrid)",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-emnlp/2023.sustainlp-1.12/",
    doi = "10.18653/v1/2023.sustainlp-1.12",
    pages = "183--189"
}Markdown (Informal)
[Is a Video worth n n Images? A Highly Efficient Approach to Transformer-based Video Question Answering](https://preview.aclanthology.org/ingest-emnlp/2023.sustainlp-1.12/) (Lyu et al., sustainlp 2023)
ACL