@inproceedings{liu-etal-2024-semantics, title = "Semantics-enhanced Cross-modal Masked Image Modeling for Vision-Language Pre-training", author = "Liu, Haowei and Shi, Yaya and Xu, Haiyang and Yuan, Chunfeng and Ye, Qinghao and Li, Chenliang and Yan, Ming and Zhang, Ji and Huang, Fei and Li, Bing and Hu, Weiming", editor = "Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro and Sakti, Sakriani and Xue, Nianwen", booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)", month = may, year = "2024", address = "Torino, Italia", publisher = "ELRA and ICCL", url = "https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.1277/", pages = "14664--14675" }