@inproceedings{denisov-vu-2024-teaching,
title = "Teaching a Multilingual Large Language Model to Understand Multilingual Speech via Multi-Instructional Training",
author = "Denisov, Pavel and
Vu, Thang",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-naacl.52/",
doi = "10.18653/v1/2024.findings-naacl.52",
pages = "814--834",
abstract = "Recent advancements in language modeling have led to the emergenceof Large Language Models (LLMs) capable ofvarious natural language processing tasks.Despite their success in text-based tasks, applying LLMs to the speech domainremains limited and challenging. This paper presents BLOOMZMMS, a novel modelthat integrates a multilingual LLM with a multilingual speech encoder,aiming to harness the capabilities of LLMs for speech recognition and beyond.Utilizing a multi-instructional training approach, we demonstrate the transferabilityof linguistic knowledge from the text to the speech modality.Our experiments, conducted on 1900 hours of transcribed data from 139 languages,establish that a multilingual speech representation can be effectivelylearned and aligned with a multilingual LLM. While this learned representationinitially shows limitations in task generalization, we address this issue bygenerating synthetic targets in a multi-instructional style.Our zero-shot evaluation results confirm the robustness of our approach acrossmultiple tasks, including speech translation and multilingual spoken languageunderstanding, thereby opening new avenues for applying LLMs in the speech domain."
}
Markdown (Informal)
[Teaching a Multilingual Large Language Model to Understand Multilingual Speech via Multi-Instructional Training](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-naacl.52/) (Denisov & Vu, Findings 2024)
ACL