@inproceedings{liang-etal-2026-bottlenecks,
title = "Bottlenecks of In-Context Learning for Fieldwork {ASR}: A Case-study of Pan{\~a}ra",
author = "Liang, Siyu and
Lapierre, Myriam and
Levow, Gina-Anne",
editor = "Agyapong, Godfred and
Moeller, Sarah and
Arppe, Antti and
Marashian, Ali and
Rosenblum, Daisy",
booktitle = "Proceedings of the Ninth Workshop on the Use of Computational Methods in the Study of Endangered Languages ({C}omput{EL}-9)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.computel-1.17/",
pages = "157--166",
ISBN = "979-8-89176-422-4",
abstract = "In-context learning (ICL) enables ASR models to transcribe unseen languages by conditioning on a handful of audio-transcript pairs at inference time, with no fine-tuning. This is appealing for language documentation, where transcribed data is scarce and recording conditions vary across sessions. We evaluate ICL on Pan{\~a}ra (Northern J{\^e}, Brazil), a language with a complex practical orthography in which diacritics encode phonemic contrasts, across seven fieldwork recordings varying in speaker, narrative, and recording context. We find substantial within-language variation in transcription accuracy unexplained by any single recording-level factor, and show that diacritics are a systematic bottleneck with pronounced differences across diacritic types. An orthographic manipulation experiment further shows that how diacritics are represented in context transcriptions substantially affects model performance. These results highlight orthographic complexity and recording-level variation as key practical challenges for ICL-assisted fieldwork transcription."
}Markdown (Informal)
[Bottlenecks of In-Context Learning for Fieldwork ASR: A Case-study of Panãra](https://preview.aclanthology.org/ingest-acl-workshops/2026.computel-1.17/) (Liang et al., ComputEL 2026)
ACL