@inproceedings{gandhi-etal-2026-decomposing,
title = "Decomposing Unitization and Typing for Efficient and Consistent Span-Bound Concept Annotation",
author = "Gandhi, Nupoor and
Bada, Michael and
Strubell, Emma",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1728/",
pages = "34616--34631",
ISBN = "979-8-89176-395-1",
abstract = "In specialized domains that require expert annotators and high inter-annotator agreement, high-quality datasets with span-bound semantic concept annotations remain expensive to develop. Substantial resources are typically spent on $\textit{unitizing}$, the task of identifying precise span boundaries for entity mentions. Unitizing is a significant source of inter-annotator disagreement, a poor use of expensive domain expertise, and very time-consuming. We propose a lighter annotation procedure that concentrates manual efforts on typed position annotations, marking positions in the text that overlap with mentions of each entity type, abstracting away span boundary decisions. With as few as 100-200 example sentences, we train span boundary detection models to unitize typed position annotations. Through evaluation over three datasets: CRAFT (biomedical), GENIA (molecular biology), and POLIANNA (climate/energy policy text), we demonstrate that (1) annotating typed positions in the text instead of full concept annotation is a more efficient use of time in low-resource settings, and (2) model-inferred span boundaries result in higher agreement at both the annotator training and corpus annotation phases, without sacrificing utility."
}Markdown (Informal)
[Decomposing Unitization and Typing for Efficient and Consistent Span-Bound Concept Annotation](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1728/) (Gandhi et al., Findings 2026)
ACL