@inproceedings{merdjanovska-akbik-2025-token,
title = "Token-Level Metrics for Detecting Incorrect Gold Annotations in Named Entity Recognition",
author = "Merdjanovska, Elena and
Akbik, Alan",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.827/",
doi = "10.18653/v1/2025.findings-emnlp.827",
pages = "15292--15304",
ISBN = "979-8-89176-335-7",
abstract = "Annotated datasets for supervised learning tasks often contain incorrect gold annotations, i.e. label noise. To address this issue, many noisy label learning approaches incorporate metrics to filter out unreliable samples, for example using heuristics such as high loss or low confidence. However, when these metrics are integrated into larger pipelines, it becomes difficult to compare their effectiveness, and understand their individual contribution to reducing label noise. This paper directly compares popular sample metrics for detecting incorrect annotations in named entity recognition (NER). NER is commonly approached as token classification, so the metrics are calculated for each training token and we flag the incorrect ones by defining metrics thresholds. We compare the metrics based on (i) their accuracy in detecting the incorrect labels and (ii) the test scores when retraining a model using the cleaned dataset. We show that training dynamics metrics work the best overall. The best metrics effectively reduce the label noise across different noise types. The errors that the model has not yet memorized are more feasible to detect, and relabeling these tokens is a more effective strategy than excluding them from training."
}Markdown (Informal)
[Token-Level Metrics for Detecting Incorrect Gold Annotations in Named Entity Recognition](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.827/) (Merdjanovska & Akbik, Findings 2025)
ACL