@inproceedings{obi-funakoshi-2025-integrating,
title = "Integrating Respiration into Voice Activity Projection for Enhancing Turn-taking Performance",
author = "Obi, Takao and
Funakoshi, Kotaro",
editor = "Torres, Maria Ines and
Matsuda, Yuki and
Callejas, Zoraida and
del Pozo, Arantza and
D'Haro, Luis Fernando",
booktitle = "Proceedings of the 15th International Workshop on Spoken Dialogue Systems Technology",
month = may,
year = "2025",
address = "Bilbao, Spain",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.iwsds-1.28/",
pages = "272--276",
ISBN = "979-8-89176-248-0",
abstract = "Voice Activity Projection (VAP) models predict upcoming voice activities on a continuous timescale, enabling more nuanced turn-taking behaviors in spoken dialogue systems. Although previous studies have shown robust performance with audio-based VAP, the potential of incorporating additional physiological information, such as respiration, remains relatively unexplored. In this paper, we investigate whether respiratory information can enhance VAP performance in turn-taking. To this end, we collected Japanese dialogue data with synchronized audio and respiratory waveforms, and then we integrated the respiratory information into the VAP model. Our results showed that the VAP model combining audio and respiratory information had better performance than the audio-only model. This finding underscores the potential for improving the turn-taking performance of VAP by incorporating respiration."
}
Markdown (Informal)
[Integrating Respiration into Voice Activity Projection for Enhancing Turn-taking Performance](https://preview.aclanthology.org/fix-sig-urls/2025.iwsds-1.28/) (Obi & Funakoshi, IWSDS 2025)
ACL