@inproceedings{kariyakaranage-athuraliya-2026-cal,
title = "{CAL}-Log: Cost-Aware Active Learning with Logarithmic Cognitive Effort Modeling and Online Adaptation to Human Annotation Behavior",
author = "Kariyakaranage, Vihanga Supasan and
Athuraliya, Banuka",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-srw.48/",
pages = "537--553",
ISBN = "979-8-89176-393-7",
abstract = "Active learning (AL) reduces labeled data requirements in NLP, yet most methods optimize label efficiency while ignoring annotation cost. Standard uncertainty sampling assumes uniform effort, leading to suboptimal resource allocation when documents vary in length. Supasan and Athuraliya (2026) introduced CAL-Log, a cost-aware AL variant using logarithmic cost modeling C(x)={\ensuremath{\alpha}}+{\ensuremath{\beta}} log(1+L(x)), where C(x) is the predicted annotation time for document x and L(x) is its token length, grounded in information foraging theory (Pirolli and Card, 1999) and psycholinguistic studies of human skimming (Rayner, 1998). This paper presents CAL-Log in full, extending that preliminary framework with two new contributions: temperature-scaled calibrated entropy and online per-annotator cost adaptation, which together resolve the cold-start calibration bottleneck identified in the prior work. Experiments on ten text classification benchmarks demonstrate a 3.3{\texttimes} speedup over BADGE (Batch Active learning by Diverse Gradient Embeddings; Ash et al., 2020) and 3.9{\texttimes} over Entropy sampling to reach F1=0.80, with large effect sizes (Cohen{'}s d{\ensuremath{>}}0.8). A live annotation deployment with preliminary user evaluation (N=7) confirms that the online cost model produces reading-speed classifications consistent with annotator self-reports, and that a transparency interface successfully communicates the scoring rationale to non-expert users."
}Markdown (Informal)
[CAL-Log: Cost-Aware Active Learning with Logarithmic Cognitive Effort Modeling and Online Adaptation to Human Annotation Behavior](https://preview.aclanthology.org/ingest-acl/2026.acl-srw.48/) (Kariyakaranage & Athuraliya, ACL 2026)
ACL