@inproceedings{yang-etal-2025-generative,
title = "Generative Error Correction for Emotion-aware Speech-to-text Translation",
author = "Yang, Zhengdong and
Li, Sheng and
Chu, Chenhui",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.findings-acl.1047/",
pages = "20413--20421",
ISBN = "979-8-89176-256-5",
abstract = "This paper explores emotion-aware speech-to-text translation (ST) using generative error correction (GER) by large language models (LLMs). Despite recent advancements in ST, the impact of the emotional content has been overlooked. First, we enhance the translation of emotional speech by adopting the GER paradigm: Finetuned an LLM to generate the translation based on the decoded N-best hypotheses. Moreover, we combine the emotion and sentiment labels into the LLM finetuning process to enable the model to consider the emotion content. In addition, we project the ST model{'}s latent representation into the LLM embedding space to further improve emotion recognition and translation. Experiments on an English-Chinese dataset show the effectiveness of the combination of GER, emotion and sentiment labels, and the projector for emotion-aware ST. Our code is available at https://github.com/N-Orien/EmoST."
}
Markdown (Informal)
[Generative Error Correction for Emotion-aware Speech-to-text Translation](https://preview.aclanthology.org/landing_page/2025.findings-acl.1047/) (Yang et al., Findings 2025)
ACL