@inproceedings{glenn-etal-2022-viability,
title = "The Viability of Best-worst Scaling and Categorical Data Label Annotation Tasks in Detecting Implicit Bias",
author = "Glenn, Parker and
Jacobs, Cassandra L. and
Thielk, Marvin and
Chu, Yi",
editor = "Abercrombie, Gavin and
Basile, Valerio and
Tonelli, Sara and
Rieser, Verena and
Uma, Alexandra",
booktitle = "Proceedings of the 1st Workshop on Perspectivist Approaches to NLP @LREC2022",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.nlperspectives-1.5/",
pages = "32--36",
abstract = "Annotating workplace bias in text is a noisy and subjective task. In encoding the inherently continuous nature of bias, aggregated binary classifications do not suffice. Best-worst scaling (BWS) offers a framework to obtain real-valued scores through a series of comparative evaluations, but it is often impractical to deploy to traditional annotation pipelines within industry. We present analyses of a small-scale bias dataset, jointly annotated with categorical annotations and BWS annotations. We show that there is a strong correlation between observed agreement and BWS score (Spearman{'}s r=0.72). We identify several shortcomings of BWS relative to traditional categorical annotation: (1) When compared to categorical annotation, we estimate BWS takes approximately 4.5x longer to complete; (2) BWS does not scale well to large annotation tasks with sparse target phenomena; (3) The high correlation between BWS and the traditional task shows that the benefits of BWS can be recovered from a simple categorically annotated, non-aggregated dataset."
}
Markdown (Informal)
[The Viability of Best-worst Scaling and Categorical Data Label Annotation Tasks in Detecting Implicit Bias](https://preview.aclanthology.org/fix-sig-urls/2022.nlperspectives-1.5/) (Glenn et al., NLPerspectives 2022)
ACL