@inproceedings{desmond-etal-2020-label,
title = "Label Noise in Context",
author = "Desmond, Michael and
Finegan-Dollak, Catherine and
Boston, Jeff and
Arnold, Matt",
editor = "Celikyilmaz, Asli and
Wen, Tsung-Hsien",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.acl-demos.21/",
doi = "10.18653/v1/2020.acl-demos.21",
pages = "157--186",
abstract = "Label noise{---}incorrectly or ambiguously labeled training examples{---}can negatively impact model performance. Although noise detection techniques have been around for decades, practitioners rarely apply them, as manual noise remediation is a tedious process. Examples incorrectly flagged as noise waste reviewers' time, and correcting label noise without guidance can be difficult. We propose LNIC, a noise-detection method that uses an example`s neighborhood within the training set to (a) reduce false positives and (b) provide an explanation as to why the ex- ample was flagged as noise. We demonstrate on several short-text classification datasets that LNIC outperforms the state of the art on measures of precision and F0.5-score. We also show how LNIC`s training set context helps a reviewer to understand and correct label noise in a dataset. The LNIC tool lowers the barriers to label noise remediation, increasing its utility for NLP practitioners."
}
Markdown (Informal)
[Label Noise in Context](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.acl-demos.21/) (Desmond et al., ACL 2020)
ACL
- Michael Desmond, Catherine Finegan-Dollak, Jeff Boston, and Matt Arnold. 2020. Label Noise in Context. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations, pages 157–186, Online. Association for Computational Linguistics.