@inproceedings{ghaddar-langlais-2017-winer,
title = "{W}i{NER}: A {W}ikipedia Annotated Corpus for Named Entity Recognition",
author = "Ghaddar, Abbas and
Langlais, Phillippe",
editor = "Kondrak, Greg and
Watanabe, Taro",
booktitle = "Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = nov,
year = "2017",
address = "Taipei, Taiwan",
publisher = "Asian Federation of Natural Language Processing",
url = "https://preview.aclanthology.org/fix-sig-urls/I17-1042/",
pages = "413--422",
abstract = "We revisit the idea of mining Wikipedia in order to generate named-entity annotations. We propose a new methodology that we applied to English Wikipedia to build WiNER, a large, high quality, annotated corpus. We evaluate its usefulness on 6 NER tasks, comparing 4 popular state-of-the art approaches. We show that LSTM-CRF is the approach that benefits the most from our corpus. We report impressive gains with this model when using a small portion of WiNER on top of the CONLL training material. Last, we propose a simple but efficient method for exploiting the full range of WiNER, leading to further improvements."
}
Markdown (Informal)
[WiNER: A Wikipedia Annotated Corpus for Named Entity Recognition](https://preview.aclanthology.org/fix-sig-urls/I17-1042/) (Ghaddar & Langlais, IJCNLP 2017)
ACL