@inproceedings{jiang-etal-2022-trips,
    title = "{TRIPS}: Efficient Vision-and-Language Pre-training with Text-Relevant Image Patch Selection",
    author = "Jiang, Chaoya  and
      Xu, Haiyang  and
      Li, Chenliang  and
      Yan, Ming  and
      Ye, Wei  and
      Zhang, Shikun  and
      Bi, Bin  and
      Huang, Songfang",
    editor = "Goldberg, Yoav  and
      Kozareva, Zornitsa  and
      Zhang, Yue",
    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.emnlp-main.273/",
    doi = "10.18653/v1/2022.emnlp-main.273",
    pages = "4084--4096"
}