@inproceedings{xu-etal-2024-llm,
title = "{LLM} Knows Body Language, Too: Translating Speech Voices into Human Gestures",
author = "Xu, Chenghao and
Lyu, Guangtao and
Yan, Jiexi and
Yang, Muli and
Deng, Cheng",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.acl-long.273/",
doi = "10.18653/v1/2024.acl-long.273",
pages = "5004--5013",
abstract = "In response to the escalating demand for digital human representations, progress has been made in the generation of realistic human gestures from given speeches. Despite the remarkable achievements of recent research, the generation process frequently includes unintended, meaningless, or non-realistic gestures. To address this challenge, we propose a gesture translation paradigm, GesTran, which leverages large language models (LLMs) to deepen the understanding of the connection between speech and gesture and sequentially generates human gestures by interpreting gestures as a unique form of body language. The primary stage of the proposed framework employs a transformer-based auto-encoder network to encode human gestures into discrete symbols. Following this, the subsequent stage utilizes a pre-trained LLM to decipher the relationship between speech and gesture, translating the speech into gesture by interpreting the gesture as unique language tokens within the LLM. Our method has demonstrated state-of-the-art performance improvement through extensive and impartial experiments conducted on public TED and TED-Expressive datasets."
}
Markdown (Informal)
[LLM Knows Body Language, Too: Translating Speech Voices into Human Gestures](https://preview.aclanthology.org/fix-sig-urls/2024.acl-long.273/) (Xu et al., ACL 2024)
ACL