@article{canes-napoles-repp-2026-object,
title = "Object Realisation in Spoken Guadeloupan {F}rench: Evaluating {NLP} Models for an Under-Resourced Variety",
author = "Canes N{\'a}poles, Amalia and
Repp, Sophie",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.180/",
pages = "2299--2308",
abstract = "This paper contributes to the evaluation of natural language parsing models applied to colloquial speech in lesser studied varieties of a language. We are reporting on the performance of speech recognition and of universal dependency (UD) parsing models in a radio corpus of colloquial French spoken in Guadaloupe (GuaFr), which is in contact with a typologically distant language, French-based Guadaloupean Creole (GuaCr). The corpus poses specific challenges due to phonetic and syntactic specifics of GuaFr, as well as the occurrence of code switching to GuaCr. We show weakening the ASR decoder{'}s language-model (LM) in various parameters avoids hallucination of null objects, which have been described as typical for spoken GuaFr, but not of non-standard object clitic positioning. For UD parsing, we investigate utterance segmentation as the primary lever to affect model performance and compare different segmentation sources (ASR punctuation, manual chunking, UD parser tokenization) and their combination. We highlight both strengths and pitfalls of the models, again focussing on the expression of syntactic objects."
}Markdown (Informal)
[Object Realisation in Spoken Guadeloupan French: Evaluating NLP Models for an Under-Resourced Variety](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.180/) (Canes Nápoles & Repp, LREC 2026)
ACL