@inproceedings{kim-etal-2019-gated,
title = "Gated Embeddings in End-to-End Speech Recognition for Conversational-Context Fusion",
author = "Kim, Suyoun and
Dalmia, Siddharth and
Metze, Florian",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/P19-1107/",
doi = "10.18653/v1/P19-1107",
pages = "1131--1141",
abstract = "We present a novel conversational-context aware end-to-end speech recognizer based on a gated neural network that incorporates conversational-context/word/speech embeddings. Unlike conventional speech recognition models, our model learns longer conversational-context information that spans across sentences and is consequently better at recognizing long conversations. Specifically, we propose to use text-based external word and/or sentence embeddings (i.e., fastText, BERT) within an end-to-end framework, yielding significant improvement in word error rate with better conversational-context representation. We evaluated the models on the Switchboard conversational speech corpus and show that our model outperforms standard end-to-end speech recognition models."
}
Markdown (Informal)
[Gated Embeddings in End-to-End Speech Recognition for Conversational-Context Fusion](https://preview.aclanthology.org/fix-sig-urls/P19-1107/) (Kim et al., ACL 2019)
ACL