@inproceedings{djanibekov-aldarmaki-2025-sparqle,
title = "{S}par{QL}e: Speech Queries to Text Translation Through {LLM}s",
author = "Djanibekov, Amirbek and
Aldarmaki, Hanan",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Anastasopoulos, Antonis",
booktitle = "Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.iwslt-1.6/",
pages = "76--83",
ISBN = "979-8-89176-272-5",
abstract = "With the growing influence of Large Language Models (LLMs), there is increasing interest in integrating speech representations with them to enable more seamless multi-modal processing and speech understanding. This study introduces a novel approach that combines self-supervised speech representations with instruction-tuned LLMs for speech-to-text translation. The proposed approach leverages a modality adapter to align extracted speech features with instruction-tuned LLMs using English speech data. Our experiments demonstrate that this method effectively preserves the semantic content of the input speech and serves as an effective bridge between self-supervised speech models and instruction-tuned LLMs, offering a promising approach for various speech understanding applications."
}
Markdown (Informal)
[SparQLe: Speech Queries to Text Translation Through LLMs](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.iwslt-1.6/) (Djanibekov & Aldarmaki, IWSLT 2025)
ACL
- Amirbek Djanibekov and Hanan Aldarmaki. 2025. SparQLe: Speech Queries to Text Translation Through LLMs. In Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025), pages 76–83, Vienna, Austria (in-person and online). Association for Computational Linguistics.