@article{coats-2026-fine,
title = "A Fine-tuned {ASR} Model for Historical {A}merican Dialect Recordings",
author = "Coats, Steven",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.107/",
pages = "1372--1381",
abstract = "This paper introduces DASS2019{\_}NLP, a newly cleaned and curated version of the Digital Archive of Southern Speech, a major historical resource for the study of Southern American English, together with six Whisper ASR models fine-tuned on the data. The 344 hours of conversational speech were recorded by fieldworkers between 1969 and 1983 across the Southern United States. Each Whisper model was fine-tuned on DASS2019{\_}NLP, then evaluated on held-out DASS2019{\_}NLP data, a subset of the Corpus of Regional African American Language (CORAAL), and a subset of Common Voice. The fine-tuned models show consistent learning trajectories and achieve an average 37{\%} reduction in WER on in-domain data relative to baseline models. Notably, they also improve transcription accuracy on CORAAL, suggesting enhanced robustness to African American English. As expected under read vs. conversational style mismatch, accuracy on CV generally favors the OpenAI baselines. Both the DASS2019{\_}NLP dataset and the best-performing fine-tuned model (whisper-large-v3-DASS-ct2) have been publicly released. These resources provide new tools for quantitative research in historical sociolinguistics, facilitating large-scale analyses of phonological, lexical, and grammatical change in Southern and African American English."
}Markdown (Informal)
[A Fine-tuned ASR Model for Historical American Dialect Recordings](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.107/) (Coats, LREC 2026)
ACL