@inproceedings{j-etal-2026-azrael,
title = "Azrael@{D}ravidian{L}ang{T}ech 2026:Dialect-Sensitive Automatic Speech Recognition and Classification for {T}amil",
author = "J, Janish Andrin and
Sahil, Mohammed and
S, Saranya",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Rajiakodi, Saranya and
Navaneethakrishnan, Subalalitha and
Chinnappa, Dhivya and
Palani, Balasubramanian and
Subramanian, Malliga and
Shanmugavadivel, Kogilavani and
Rajalakshmi, Ratnavel",
booktitle = "Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for {D}ravidian Languages",
month = jul,
year = "2026",
address = "Underline (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.dravidianlangtech-1.18/",
pages = "153--157",
ISBN = "979-8-89176-401-9",
abstract = "Tamil is a pre-historic language of millions of individuals who live in India, Sri Lanka, and other parts of the world. Consider the variations in accents, vocabulary and even speech rhythm even among the central region, the northern region, the southern region and the western region of Tamil Nadu. Such idiosyncrasies make it difficult to use features such as voice assistants or translation applications to keep up. A feasible system has been developed in this project to manage that challenge. It picks up raw audio files in Tamil, identifies which of the four predominant dialects the speech belongs to and translates that speech into text. Good quality datasets on Tamil dialects are rather rare, due to the lack of resources and interest in languages. There were pre-trained models, namely, XLSR to spot the dialects and Wav2Vec 2.0 to convert speech into text. All in all, this configuration had an accuracy rate of 46 percentage. It was very good at distinguishing between northern and southern, but was somewhat confused between central and west-central-western. In the case of the transcription component, a cursory inspection reveals that it is a reliable process, able to nail down clear speech despite those accent twists. With that said, it is possible to improve it with such details as a more detailed fine-tuning or equalizing the classes of data."
}Markdown (Informal)
[Azrael@DravidianLangTech 2026:Dialect-Sensitive Automatic Speech Recognition and Classification for Tamil](https://preview.aclanthology.org/ingest-acl-workshops/2026.dravidianlangtech-1.18/) (J et al., DravidianLangTech 2026)
ACL