@inproceedings{mayhew-etal-2019-named,
title = "Named Entity Recognition with Partially Annotated Training Data",
author = "Mayhew, Stephen and
Chaturvedi, Snigdha and
Tsai, Chen-Tse and
Roth, Dan",
editor = "Bansal, Mohit and
Villavicencio, Aline",
booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/K19-1060/",
doi = "10.18653/v1/K19-1060",
pages = "645--655",
abstract = "Supervised machine learning assumes the availability of fully-labeled data, but in many cases, such as low-resource languages, the only data available is partially annotated. We study the problem of Named Entity Recognition (NER) with partially annotated training data in which a fraction of the named entities are labeled, and all other tokens, entities or otherwise, are labeled as non-entity by default. In order to train on this noisy dataset, we need to distinguish between the true and false negatives. To this end, we introduce a constraint-driven iterative algorithm that learns to detect false negatives in the noisy set and downweigh them, resulting in a weighted training set. With this set, we train a weighted NER model. We evaluate our algorithm with weighted variants of neural and non-neural NER models on data in 8 languages from several language and script families, showing strong ability to learn from partial data. Finally, to show real-world efficacy, we evaluate on a Bengali NER corpus annotated by non-speakers, outperforming the prior state-of-the-art by over 5 points F1."
}
Markdown (Informal)
[Named Entity Recognition with Partially Annotated Training Data](https://preview.aclanthology.org/jlcl-multiple-ingestion/K19-1060/) (Mayhew et al., CoNLL 2019)
ACL