@inproceedings{baluja-2025-text,
title = "Text Is Not All You Need: Multimodal Prompting Helps {LLM}s Understand Humor",
author = "Baluja, Ashwin",
editor = "Hempelmann, Christian F. and
Rayz, Julia and
Dong, Tiansi and
Miller, Tristan",
booktitle = "Proceedings of the 1st Workshop on Computational Humor (CHum)",
month = jan,
year = "2025",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.chum-1.2/",
pages = "9--17",
abstract = "While Large Language Models (LLMs) have demonstrated impressive natural language understanding capabilities across various text-based tasks, understanding humor has remained a persistent challenge. Humor is frequently multimodal, relying not only on the meaning of the words, but also their pronunciations, and even the speaker`s intonations. In this study, we explore a simple multimodal prompting approach to humor understanding and explanation. We present an LLM with both the text and the spoken form of a joke, generated using an off-the-shelf text-to-speech (TTS) system. Using multimodal cues improves the explanations of humor compared to textual prompts across all tested datasets."
}
Markdown (Informal)
[Text Is Not All You Need: Multimodal Prompting Helps LLMs Understand Humor](https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.chum-1.2/) (Baluja, chum 2025)
ACL