@inproceedings{huang-etal-2019-learning,
title = "Learning a Unified Named Entity Tagger from Multiple Partially Annotated Corpora for Efficient Adaptation",
author = "Huang, Xiao and
Dong, Li and
Boschee, Elizabeth and
Peng, Nanyun",
editor = "Bansal, Mohit and
Villavicencio, Aline",
booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/K19-1048/",
doi = "10.18653/v1/K19-1048",
pages = "515--527",
abstract = "Named entity recognition (NER) identifies typed entity mentions in raw text. While the task is well-established, there is no universally used tagset: often, datasets are annotated for use in downstream applications and accordingly only cover a small set of entity types relevant to a particular task. For instance, in the biomedical domain, one corpus might annotate genes, another chemicals, and another diseases{---}despite the texts in each corpus containing references to all three types of entities. In this paper, we propose a deep structured model to integrate these {\textquotedblleft}partially annotated{\textquotedblright} datasets to jointly identify all entity types appearing in the training corpora. By leveraging multiple datasets, the model can learn robust input representations; by building a joint structured model, it avoids potential conflicts caused by combining several models' predictions at test time. Experiments show that the proposed model significantly outperforms strong multi-task learning baselines when training on multiple, partially annotated datasets and testing on datasets that contain tags from more than one of the training corpora."
}
Markdown (Informal)
[Learning a Unified Named Entity Tagger from Multiple Partially Annotated Corpora for Efficient Adaptation](https://preview.aclanthology.org/jlcl-multiple-ingestion/K19-1048/) (Huang et al., CoNLL 2019)
ACL