@inproceedings{halterman-keith-2026-protest,
title = "What is a protest anyway? Codebook conceptualization is still a first-order concern in {LLM}-era classification",
author = "Halterman, Andrew and
Keith, Katherine A.",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.92/",
pages = "2043--2059",
ISBN = "979-8-89176-390-6",
abstract = "Generative large language models (LLMs) are now used extensively for text classification in computational social science (CSS). In this work, we focus on the steps before and after LLM prompting: \textit{conceptualization} of the categories to classify and using LLM predictions in downstream statistical inference. We argue these steps have been overlooked in much of LLM-era CSS and LLMs can tempt analysts to skip conceptualization altogether. For example, a political scientist classifying ``protest'' with LLMs may never be forced to craft a definition: unlike human annotators who would ask clarifying questions, an LLM can silently accept an underspecified concept to classify and return plausible-looking labels. Using simulations, we show that conceptualization failures induce downstream inferential bias that cannot be corrected solely by a more accurate LLM or post-hoc bias correction methods. We conclude by reminding CSS analysts that conceptualization is still a first-order concern in the LLM-era and provide concrete advice for pursuing low-cost, unbiased, low-variance downstream estimates."
}Markdown (Informal)
[What is a protest anyway? Codebook conceptualization is still a first-order concern in LLM-era classification](https://preview.aclanthology.org/ingest-acl/2026.acl-long.92/) (Halterman & Keith, ACL 2026)
ACL