@inproceedings{karunanidhi-arumugam-2026-chmod-777-dravidianlangtech,
title = "{CHMOD}{\_}777@{D}ravidian{L}ang{T}ech 2026: {T}amil-Adapted Whisper and {MMS} for Dialect Speech Recognition and Classification",
author = "Karunanidhi, Arunaggiri Pandian and
Arumugam, Prabalakshmi",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Rajiakodi, Saranya and
Navaneethakrishnan, Subalalitha and
Chinnappa, Dhivya and
Palani, Balasubramanian and
Subramanian, Malliga and
Shanmugavadivel, Kogilavani and
Rajalakshmi, Ratnavel",
booktitle = "Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for {D}ravidian Languages",
month = jul,
year = "2026",
address = "Underline (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.dravidianlangtech-1.24/",
pages = "186--190",
ISBN = "979-8-89176-401-9",
abstract = "This paper describes Team CHMOD{\_}777{'}s system for the DravidianLangTech@ACL 2026 shared task on Tamil dialect speech recognition and classification. The task comprises two subtasks: classifying Tamil speech into four regional dialects (Northern, Southern, Western, Central) and transcribing dialectal Tamil speech to text. For dialect classification, we fine-tune MMS-1b-all with Focal Loss and weighted sampling, achieving 83.04 Macro F1 on the development set (5th out of 11 teams on the test set). For speech recognition, we fine-tune a Tamil-specific Whisper model (763M parameters), achieving 53.72 WER on the development set and 49.75 on the official test set, ranking 1st out of 13 teams. Our key finding is that domain-specific pre-training significantly outperforms larger general-purpose models: Tamil Whisper (763M) beats Whisper-large-v3 (1.5B) by 8 WER points despite having half the parameters."
}Markdown (Informal)
[CHMOD_777@DravidianLangTech 2026: Tamil-Adapted Whisper and MMS for Dialect Speech Recognition and Classification](https://preview.aclanthology.org/ingest-acl-workshops/2026.dravidianlangtech-1.24/) (Karunanidhi & Arumugam, DravidianLangTech 2026)
ACL