@inproceedings{ircing-etal-2006-exploiting,
title = "Exploiting Linguistic Knowledge in Language Modeling of {C}zech Spontaneous Speech",
author = "Ircing, Pavel and
Hoidekr, Jan and
Psutka, Josef",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}`06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/L06-1358/",
abstract = "In our paper, we present a method for incorporating available linguistic information into a statistical language model that is used in ASR system for transcribing spontaneous speech. We employ the class-based language model paradigm and use the morphological tags as the basis for world-to-class mapping. Since the number of different tags is at least by one order of magnitude lower than the number of words even in the tasks with moderately-sized vocabularies, the tag-based model can be rather robustly estimated using even the relatively small text corpora. Unfortunately, this robustness goes hand in hand with restricted predictive ability of the class-based model. Hence we apply the two-pass recognition strategy, where the first pass is performed with the standard word-based n-gram and the resulting lattices are rescored in the second pass using the aforementioned class-based model. Using this decoding scenario, we have managed to moderately improve the word error rate in the performed ASR experiments."
}
Markdown (Informal)
[Exploiting Linguistic Knowledge in Language Modeling of Czech Spontaneous Speech](https://preview.aclanthology.org/add-emnlp-2024-awards/L06-1358/) (Ircing et al., LREC 2006)
ACL