@inproceedings{khamis-ahmed-2026-llm,
title = "{LLM}-to-Speech: A Synthetic Data Pipeline for Training Dialectal Text-to-Speech Models",
author = "Khamis, Ahmed and
Ahmed, Hesham Ali",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/manual-author-scripts/2026.abjadnlp-1.6/",
pages = "47--54",
abstract = "Despite the advances in neural text to speech (TTS), many Arabic dialectal varieties remain marginally addressed, with most resources con- centrated on Modern Spoken Arabic (MSA) and Gulf dialects, leaving Egyptian Arabic{---} the most widely understood Arabic dialect{---} severely under-resourced. We address this gap by introducing NileTTS: 38 hours of tran- scribed speech from two speakers across di- verse domains including medical, sales, and general conversations. We construct this dataset using a novel synthetic pipeline: large language models (LLM) generate Egyptian Arabic content, which is then converted to natu- ral speech using audio synthesis tools, followed by automatic transcription and speaker diariza- tion with manual quality verification. We fine- tune XTTS v2, a state-of-the-art multilingual TTS model, on our dataset and evaluate against the baseline model trained on other Arabic dialects. Our contributions include: (1) the first publicly available Egyptian Arabic TTS dataset, (2) a reproducible synthetic data gen- eration pipeline for dialectal TTS, and (3) an open-source fine-tuned model. All resources are released to advance Egyptian Arabic speech synthesis research."
}Markdown (Informal)
[LLM-to-Speech: A Synthetic Data Pipeline for Training Dialectal Text-to-Speech Models](https://preview.aclanthology.org/manual-author-scripts/2026.abjadnlp-1.6/) (Khamis & Ahmed, AbjadNLP 2026)
ACL