@inproceedings{liu-hou-2026-aggregating,
title = "Aggregating Crowd of {LLM}s for Cost-Effective Data Annotation",
author = "Liu, Jiacheng and
Hou, Xiaofeng",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.125/",
pages = "2407--2419",
ISBN = "979-8-89176-386-9",
abstract = "Recent advancements in Large Language Models (LLMs) have shown promise for automated data annotation, yet reliance on expensive commercial models like GPT-4 limits accessibility. This paper rigorously evaluates the potential of open-source smaller LLMs (sLLMs) as a cost-effective alternative. We introduce a new benchmark dataset, Multidisciplinary Open Research Data (MORD), comprising 12,277 annotated sentence segments from 1,500 schoolarly articles across five research domains, to systematically assess sLLM performance. Our experiments demonstrate that sLLMs achieve annotation quality surpassing Amazon MTurk workers and approach GPT-4{'}s accuracy at significantly lower costs. We further propose to build the Crowd of LLMs, which aggregates annotations from multiple sLLMs using label aggregation algorithms. This approach not only outperforms individual sLLMs but also reveals that combining sLLM annotations with human crowd labels yields superior results compared to either method alone. Our findings highlight the viability of sLLMs for democratizing high-quality data annotation while underscoring the need for tailored aggregation methods to fully realize their potential."
}Markdown (Informal)
[Aggregating Crowd of LLMs for Cost-Effective Data Annotation](https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.125/) (Liu & Hou, Findings 2026)
ACL