@inproceedings{blaier-etal-2021-caption,
title = "Caption Enriched Samples for Improving Hateful Memes Detection",
author = "Blaier, Efrat and
Malkiel, Itzik and
Wolf, Lior",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.emnlp-main.738/",
doi = "10.18653/v1/2021.emnlp-main.738",
pages = "9350--9358",
abstract = "The recently introduced hateful meme challenge demonstrates the difficulty of determining whether a meme is hateful or not. Specifically, both unimodal language models and multimodal vision-language models cannot reach the human level of performance. Motivated by the need to model the contrast between the image content and the overlayed text, we suggest applying an off-the-shelf image captioning tool in order to capture the first. We demonstrate that the incorporation of such automatic captions during fine-tuning improves the results for various unimodal and multimodal models. Moreover, in the unimodal case, continuing the pre-training of language models on augmented and original caption pairs, is highly beneficial to the classification accuracy."
}
Markdown (Informal)
[Caption Enriched Samples for Improving Hateful Memes Detection](https://preview.aclanthology.org/fix-sig-urls/2021.emnlp-main.738/) (Blaier et al., EMNLP 2021)
ACL