@inproceedings{kipyatkova-karpov-2020-class,
title = "Class-based {LSTM} {R}ussian Language Model with Linguistic Information",
author = "Kipyatkova, Irina and
Karpov, Alexey",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.lrec-1.300/",
pages = "2470--2474",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "In the paper, we present class-based LSTM Russian language models (LMs) with classes generated with the use of both word frequency and linguistic information data, obtained with the help of the ``VisualSynan'' software from the AOT project. We have created LSTM LMs with various numbers of classes and compared them with word-based LM and class-based LM with word2vec class generation in terms of perplexity, training time, and WER. In addition, we performed a linear interpolation of LSTM language models with the baseline 3-gram language model. The LSTM language models were used for very large vocabulary continuous Russian speech recognition at an N-best list rescoring stage. We achieved significant progress in training time reduction with only slight degradation in recognition accuracy comparing to the word-based LM. In addition, our LM with classes generated using linguistic information outperformed LM with classes generated using word2vec. We achieved WER of 14.94 {\%} at our own speech corpus of continuous Russian speech that is 15 {\%} relative reduction with respect to the baseline 3-gram model."
}
Markdown (Informal)
[Class-based LSTM Russian Language Model with Linguistic Information](https://preview.aclanthology.org/fix-sig-urls/2020.lrec-1.300/) (Kipyatkova & Karpov, LREC 2020)
ACL