@inproceedings{morchid-etal-2014-lda,
title = "A {LDA}-Based Topic Classification Approach From Highly Imperfect Automatic Transcriptions",
author = "Morchid, Mohamed and
Dufour, Richard and
Linar{\`e}s, Georges",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}`14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/L14-1621/",
pages = "1309--1314",
abstract = "Although the current transcription systems could achieve high recognition performance, they still have a lot of difficulties to transcribe speech in very noisy environments. The transcription quality has a direct impact on classification tasks using text features. In this paper, we propose to identify themes of telephone conversation services with the classical Term Frequency-Inverse Document Frequency using Gini purity criteria (TF-IDF-Gini) method and with a Latent Dirichlet Allocation (LDA) approach. These approaches are coupled with a Support Vector Machine (SVM) classification to resolve theme identification problem. Results show the effectiveness of the proposed LDA-based method compared to the classical TF-IDF-Gini approach in the context of highly imperfect automatic transcriptions. Finally, we discuss the impact of discriminative and non-discriminative words extracted by both methods in terms of transcription accuracy."
}
Markdown (Informal)
[A LDA-Based Topic Classification Approach From Highly Imperfect Automatic Transcriptions](https://preview.aclanthology.org/add-emnlp-2024-awards/L14-1621/) (Morchid et al., LREC 2014)
ACL