@inproceedings{yantosca-cheng-2026-spectral,
title = "Spectral Gravity Formant Estimation for Phonetic Segmentation",
author = "Yantosca, Michael S. and
Cheng, Albert M. K.",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1775/",
pages = "35639--35652",
ISBN = "979-8-89176-395-1",
abstract = "Recent automated transcription systems have focused on end-to-end orthographic approaches driven by deep neural networks and sequence-to-sequence transformers. Growing public interest in transcription at the phonemic or phonetic level has led to re-purposing these systems to segment and identify phones, the basic sounds which comprise human speech. However, they miss the mark on a fundamental component of time-series analysis, namely time. For linguistic applications which require high fidelity in the temporal domain, the loss of timing information is untenable. Our work proposes a deadline-bounded expectation maximization (EM) algorithm with a novel initialization method to estimate formants, i.e., salient speech frequencies, for enhanced phonetic segmentation. Based on the concept of spectral gravity, i.e., treating spectral energy as mass attenuated by the square of frequency distance across the spectrum, our technique outperforms the recent state of the art on key clustering metrics, generating reasonable alignments across multiple languages with no a priori training."
}Markdown (Informal)
[Spectral Gravity Formant Estimation for Phonetic Segmentation](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1775/) (Yantosca & Cheng, Findings 2026)
ACL