@inproceedings{pan-etal-2022-leveraging, title = "Leveraging Unimodal Self-Supervised Learning for Multimodal Audio-Visual Speech Recognition", author = "Pan, Xichen and Chen, Peiyu and Gong, Yichen and Zhou, Helong and Wang, Xinbing and Lin, Zhouhan", editor = "Muresan, Smaranda and Nakov, Preslav and Villavicencio, Aline", booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = may, year = "2022", address = "Dublin, Ireland", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2022.acl-long.308/", doi = "10.18653/v1/2022.acl-long.308", pages = "4491--4503" }