@inproceedings{baruah-2026-abaruah-semeval,
title = "{ABARUAH} at {S}em{E}val-2026 Task 1: Leveraging High-Resolution {VLM}s and Reasoning {LLM}s for Multimodal Humor Generation",
author = "Baruah, Arup",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.436/",
pages = "3536--3543",
ISBN = "979-8-89176-414-9",
abstract = "This paper describes the systems developed for ``SemEval 2026 Task 1: Humor Generation''. This shared task covered both unimodal text constraints and multimodal GIF-based humor generation. The proposed approach used a two-stage pipeline consisting of a Multimodal Grounding stage to extract semantic descriptions from GIFs and a Humor Synthesis stage to generate the final humorous output. The Qwen2-VL and Qwen3-8B models were used for these respective stages. The system achieved competitive Elo-like ratings of 1009, 973, and 914 for Subtasks A, B1, and B2, respectively, demonstrating its ability to address diverse humorous constraints. The system was ranked 4th in overall standings for Subtasks A and B1."
}Markdown (Informal)
[ABARUAH at SemEval-2026 Task 1: Leveraging High-Resolution VLMs and Reasoning LLMs for Multimodal Humor Generation](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.436/) (Baruah, SemEval 2026)
ACL