@inproceedings{xiong-etal-2018-session,
title = "Session-level Language Modeling for Conversational Speech",
author = "Xiong, Wayne and
Wu, Lingfeng and
Zhang, Jun and
Stolcke, Andreas",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/D18-1296/",
doi = "10.18653/v1/D18-1296",
pages = "2764--2768",
abstract = "We propose to generalize language models for conversational speech recognition to allow them to operate across utterance boundaries and speaker changes, thereby capturing conversation-level phenomena such as adjacency pairs, lexical entrainment, and topical coherence. The model consists of a long-short-term memory (LSTM) recurrent network that reads the entire word-level history of a conversation, as well as information about turn taking and speaker overlap, in order to predict each next word. The model is applied in a rescoring framework, where the word history prior to the current utterance is approximated with preliminary recognition results. In experiments in the conversational telephone speech domain (Switchboard) we find that such a model gives substantial perplexity reductions over a standard LSTM-LM with utterance scope, as well as improvements in word error rate."
}
Markdown (Informal)
[Session-level Language Modeling for Conversational Speech](https://preview.aclanthology.org/add-emnlp-2024-awards/D18-1296/) (Xiong et al., EMNLP 2018)
ACL
- Wayne Xiong, Lingfeng Wu, Jun Zhang, and Andreas Stolcke. 2018. Session-level Language Modeling for Conversational Speech. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pages 2764–2768, Brussels, Belgium. Association for Computational Linguistics.