@inproceedings{chen-etal-2026-task,
title = "Task-Level Instructions Induction for Audio Question Answering from Few Examples",
author = "Chen, Po-Chun and
Huang, Hen-Hsen and
Chen, Hsin-Hsi",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 2: Short Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-short.18/",
pages = "244--264",
ISBN = "979-8-89176-381-4",
abstract = "Large audio-language models (LALMs) benefit from Chain-of-Thought (CoT) prompting for audio question answering (AQA), but acquiring audio CoT examples is particularly challenging as it requires sequential listening and careful integration of acoustic and linguistic information. Surprisingly, our experiments reveal that standard few-shot prompting yields inconsistent results compared to zero-shot CoT, with several models showing degraded accuracy. Moreover, few-shot prompting incurs substantially higher inference costs by processing multiple audio demonstrations per inference. We propose Audio-Induct, which induces reusable textual task instructions from few audio examples once per task, requiring no additional demonstrations at inference. Evaluated on 9 LALMs across two benchmarks, Audio-Induct outperforms state-of-the-art prompting methods while maintaining low inference costs. Inducted Task Instructions transfer effectively across models, enabling scalable deployment."
}Markdown (Informal)
[Task-Level Instructions Induction for Audio Question Answering from Few Examples](https://preview.aclanthology.org/ingest-eacl/2026.eacl-short.18/) (Chen et al., EACL 2026)
ACL