@inproceedings{r-etal-2024-cen,
title = "{CEN}{\_}{A}mrita@{LT}-{EDI} 2024: A Transformer based Speech Recognition System for Vulnerable Individuals in {T}amil",
author = "R, Jairam and
G, Jyothish and
B, Premjith and
M, Viswa",
editor = {Chakravarthi, Bharathi Raja and
B, Bharathi and
Buitelaar, Paul and
Durairaj, Thenmozhi and
Kov{\'a}cs, Gy{\"o}rgy and
Garc{\'i}a Cumbreras, Miguel {\'A}ngel},
booktitle = "Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion",
month = mar,
year = "2024",
address = "St. Julian's, Malta",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.ltedi-1.21/",
pages = "190--195",
abstract = "Speech recognition is known to be a specialized application of speech processing. Automatic speech recognition (ASR) systems are designed to perform the speech-to-text task. Although ASR systems have been the subject of extensive research, they still encounter certain challenges when speech variations arise. The speaker`s age, gender, vulnerability, and other factors are the main causes of the variations in speech. In this work, we propose a fine-tuned speech recognition model for recognising the spoken words of vulnerable individuals in Tamil. This research utilizes a dataset sourced from the LT-EDI@EACL2024 shared task. We trained and tested pre-trained ASR models, including XLS-R and Whisper. The findings highlight that the fine-tuned Whisper ASR model surpasses the XLSR, achieving a word error rate (WER) of 24.452, signifying its superior performance in recognizing speech from diverse individuals."
}
Markdown (Informal)
[CEN_Amrita@LT-EDI 2024: A Transformer based Speech Recognition System for Vulnerable Individuals in Tamil](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.ltedi-1.21/) (R et al., LTEDI 2024)
ACL