@article{ngo-etal-2026-human,
title = "Human vs {LLM} in Conversational Repair Annotation: A New Resource and Comparative Study",
author = "Ngo, Anh and
Rollet, Nicolas and
Pelachaud, Catherine and
Clavel, Chlo{\'e}",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.547/",
pages = "6880--6892",
abstract = "Addressing the scarcity of annotated data for Other-Initiated Repair (OIR), when recipients interrupt conversation progressivity to signal trouble, prompting speakers to provide repair, this work introduces OIR annotations for the NOXI corpus, achieving considerable reliability. We evaluate whether LLMs can reliably annotate OIR sequences using structured Chain-of-Thought prompting and conduct comparative analysis across two corpora: NOXI (natural dialogue) and CABB-S (Dutch, task-oriented), finding weak alignment between LLMs and human annotations, particularly in recognizing trouble-signaling. Analyzing human-LLM disagreement using the LLM-generated explanations revealed limitations: models rely on lexical patterns rather than conversational context, construct reasonable-sounding but misleading narratives, highlighting crucial limitations for both automated annotation of complex interactional phenomena."
}Markdown (Informal)
[Human vs LLM in Conversational Repair Annotation: A New Resource and Comparative Study](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.547/) (Ngo et al., LREC 2026)
ACL