@inproceedings{a-etal-2026-dravid,
title = "Dravid-Tech-Builders@{D}ravidian{L}ang{T}ech 2026: A Comparative Study of Classical and Deep Learning Approaches for {T}amil Dialect Classification and Speech Recognition",
author = "A, Naveen and
P, Karthiyayini and
S, Kalaivani K",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Rajiakodi, Saranya and
Navaneethakrishnan, Subalalitha and
Chinnappa, Dhivya and
Palani, Balasubramanian and
Subramanian, Malliga and
Shanmugavadivel, Kogilavani and
Rajalakshmi, Ratnavel",
booktitle = "Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for {D}ravidian Languages",
month = jul,
year = "2026",
address = "Underline (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.dravidianlangtech-1.36/",
pages = "248--252",
ISBN = "979-8-89176-401-9",
abstract = "The rapid expansion of digital connectivity across India has dramatically increased participation in speech-enabled services and multilingual communication platforms. Tamil, with its rich dialectal diversity across geographical regions, presents unique challenges for automatic speech recognition and dialect identification systems. We participated in the DravidianLangTech 2026 shared task to classify Tamil speech into four regional dialects (Central, Northern, Southern, Western) and perform automatic speech recognition. We trained four machine learning models (SVM, Random Forest, CNN, CNN+BiLSTM) alongside two transfer learning models (Wav2Vec2-Base, Wav2Vec2-XLSR-53) for ASR. Among classification models, SVM with MFCC features achieved the best performance with 94.17{\%} macro F1-score and validation accuracy of 94.35{\%}. For ASR, Wav2Vec2-XLSR-53 obtained 15.3{\%} WER, demonstrating effective cross-lingual knowledge transfer. Our analysis reveals that traditional machine learning approaches with engineered features outperform deep learning methods in low-resource scenarios with limited training data. Code is available at: https://github.com/Naveen-Arul/dravid-tech"
}