@inproceedings{gowda-etal-2026-emg2speech,
title = "emg2speech: synthesizing speech from electromyography using self-supervised speech models",
author = "Gowda, Harshavardhana T and
Comstock, Daniel C and
Miller, Lee M.",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/bulk-corrections-2026-07-02/2026.acl-long.750/",
doi = "10.18653/v1/2026.acl-long.750",
pages = "16490--16507",
ISBN = "979-8-89176-390-6",
abstract = "We present a neuromuscular speech interface that translates electromyographic (EMG) signals recorded from orofacial muscles during speech articulation directly into audio. We find that self-supervised speech (S3) representations are strongly linearly related to the electrical power of muscle activity: a simple linear mapping predicts EMG power from S3 representations with a correlation of *r* = 0.85. In addition, EMG power vectors associated with distinct articulatory gestures form structured, separable clusters. Together, these observations suggest that S3 models implicitly encode articulatory mechanisms, as reflected in EMG activity. Leveraging this structure, we map EMG signals into the S3 representation space and synthesize speech, enabling end-to-end EMG-to-speech generation without explicit articulatory modeling or vocoder training. We demonstrate this system with a participant with amyotrophic lateral sclerosis (ALS), converting orofacial EMG recorded while she *silently* articulated speech into audio."
}Markdown (Informal)
[emg2speech: synthesizing speech from electromyography using self-supervised speech models](https://preview.aclanthology.org/bulk-corrections-2026-07-02/2026.acl-long.750/) (Gowda et al., ACL 2026)
ACL