@inproceedings{moore-etal-2016-automated,
title = "Automated speech-unit delimitation in spoken learner {E}nglish",
author = "Moore, Russell and
Caines, Andrew and
Graham, Calbert and
Buttery, Paula",
editor = "Matsumoto, Yuji and
Prasad, Rashmi",
booktitle = "Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Technical Papers",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/C16-1075/",
pages = "782--793",
abstract = "In order to apply computational linguistic analyses and pass information to downstream applications, transcriptions of speech obtained via automatic speech recognition (ASR) need to be divided into smaller meaningful units, in a task we refer to as {\textquoteleft}speech-unit (SU) delimitation'. We closely recreate the automatic delimitation system described by Lee and Glass (2012), {\textquoteleft}Sentence detection using multiple annotations', Proceedings of INTERSPEECH, which combines a prosodic model, language model and speech-unit length model in log-linear fashion. Since state-of-the-art natural language processing (NLP) tools have been developed to deal with written text and its characteristic sentence-like units, SU delimitation helps bridge the gap between ASR and NLP, by normalising spoken data into a more canonical format. Previous work has focused on native speaker recordings; we test the system of Lee and Glass (2012) on non-native speaker (or {\textquoteleft}learner') data, achieving performance above the state-of-the-art. We also consider alternative evaluation metrics which move away from the idea of a single {\textquoteleft}truth' in SU delimitation, and frame this work in the context of downstream NLP applications."
}
Markdown (Informal)
[Automated speech-unit delimitation in spoken learner English](https://preview.aclanthology.org/jlcl-multiple-ingestion/C16-1075/) (Moore et al., COLING 2016)
ACL
- Russell Moore, Andrew Caines, Calbert Graham, and Paula Buttery. 2016. Automated speech-unit delimitation in spoken learner English. In Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers, pages 782–793, Osaka, Japan. The COLING 2016 Organizing Committee.