@inproceedings{ghaddar-langlais-2018-robust,
title = "Robust Lexical Features for Improved Neural Network Named-Entity Recognition",
author = "Ghaddar, Abbas and
Langlais, Phillippe",
editor = "Bender, Emily M. and
Derczynski, Leon and
Isabelle, Pierre",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest_wac_2008/C18-1161/",
pages = "1896--1907",
abstract = "Neural network approaches to Named-Entity Recognition reduce the need for carefully hand-crafted features. While some features do remain in state-of-the-art systems, lexical features have been mostly discarded, with the exception of gazetteers. In this work, we show that this is unfair: lexical features are actually quite useful. We propose to embed words and entity types into a low-dimensional vector space we train from annotated data produced by distant supervision thanks to Wikipedia. From this, we compute {---} offline {---} a feature vector representing each word. When used with a vanilla recurrent neural network model, this representation yields substantial improvements. We establish a new state-of-the-art F1 score of 87.95 on ONTONOTES 5.0, while matching state-of-the-art performance with a F1 score of 91.73 on the over-studied CONLL-2003 dataset."
}
Markdown (Informal)
[Robust Lexical Features for Improved Neural Network Named-Entity Recognition](https://preview.aclanthology.org/ingest_wac_2008/C18-1161/) (Ghaddar & Langlais, COLING 2018)
ACL