@inproceedings{shah-etal-2026-llms,
title = "When {LLM}s Disagree with Human Experts: Understanding {LLM} Annotation Failures in Nutrition Misinformation through Hierarchical Error Analysis using Seed Oil Narratives",
author = "Shah, Vishwaa and
Kahanda, Indika and
Arikawa, Andrea",
editor = "Liu, Yang Janet and
Gessler, Luke",
booktitle = "Proceedings of the 20th Linguistic Annotation Workshop ({LAW} {XX})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.law-main.6/",
pages = "59--74",
ISBN = "979-8-89176-404-0",
abstract = "Accurate linguistic annotation is crucial for creating high-quality datasets in specialized domains, yet manual labeling is often slow, expensive, and inconsistent. We present a reproducible workflow for evaluating the effectiveness of large language models (LLMs) as annotators of domain-specific health misinformation on social media. Using a data set of 169 Instagram posts on seed oils, expert nutritionists provided gold-standard labels (71{\%} positives), which we compared against the outputs of five open-source LLMs. We introduce a hierarchical error taxonomy that categorizes LLM misclassifications according to the direction, mechanism, and contributing factors of the error, providing interpretable insights into model failures. Our analysis reveals systematic error patterns, including misinterpretation of nuanced claims and overconfidence in predictions, highlighting conditions under which LLM annotations do not align with expert judgment. Although the data set is modest in size and exhibits class imbalance, it reflects real-world distributions of nutrition-related Instagram content and motivates the need for a careful evaluation of the robustness of the LLM annotation. This study has implications for the development of frameworks for automated LLM-based annotators in the health and nutrition domains, as well as LLM developers in general."
}Markdown (Informal)
[When LLMs Disagree with Human Experts: Understanding LLM Annotation Failures in Nutrition Misinformation through Hierarchical Error Analysis using Seed Oil Narratives](https://preview.aclanthology.org/ingest-acl-workshops/2026.law-main.6/) (Shah et al., LAW 2026)
ACL