@inproceedings{ahn-etal-2025-automatic,
title = "Automatic Phone Alignment of Code-switched {U}rum{--}{R}ussian Field Data",
author = "Ahn, Emily and
Chodroff, Eleanor and
Levow, Gina-Anne",
editor = "Le Ferrand, {\'E}ric and
Klyachko, Elena and
Postnikova, Anna and
Shavrina, Tatiana and
Serikov, Oleg and
Voloshina, Ekaterina and
Vylomova, Ekaterina",
booktitle = "Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.fieldmatters-1.1/",
pages = "1--14",
ISBN = "979-8-89176-282-4",
abstract = "Code-switching, using multiple languages in a single utterance, is a common means of communication.In the language documentation process, speakers may code-switch between the target language and a language of broader communication; however, how to handle this mixed speech data is not always clearly addressed for speech research and specifically for a corpus phonetics pipeline.This paper investigates best practices for conducting phone-level forced alignment of code-switched field data using the Urum speech dataset from DoReCo. This dataset comprises 117 minutes of narrative utterances, of which 42{\%} contain code-switched Urum{--}Russian speech.We demonstrate that the inclusion of Russian speech and Russian pretrained acoustic models can aid the alignment of Urum phones.Beyond using boundary alignment precision and accuracy metrics, we also discovered that the method of acoustic modeling impacted a downstream corpus phonetics investigation of code-switched Urum{--}Russian."
}
Markdown (Informal)
[Automatic Phone Alignment of Code-switched Urum–Russian Field Data](https://preview.aclanthology.org/corrections-2025-08/2025.fieldmatters-1.1/) (Ahn et al., FieldMatters 2025)
ACL