@inproceedings{le-titov-2019-boosting,
title = "Boosting Entity Linking Performance by Leveraging Unlabeled Documents",
author = "Le, Phong and
Titov, Ivan",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/P19-1187/",
doi = "10.18653/v1/P19-1187",
pages = "1935--1945",
abstract = "Modern entity linking systems rely on large collections of documents specifically annotated for the task (e.g., AIDA CoNLL). In contrast, we propose an approach which exploits only naturally occurring information: unlabeled documents and Wikipedia. Our approach consists of two stages. First, we construct a high recall list of candidate entities for each mention in an unlabeled document. Second, we use the candidate lists as weak supervision to constrain our document-level entity linking model. The model treats entities as latent variables and, when estimated on a collection of unlabelled texts, learns to choose entities relying both on local context of each mention and on coherence with other entities in the document. The resulting approach rivals fully-supervised state-of-the-art systems on standard test sets. It also approaches their performance in the very challenging setting: when tested on a test set sampled from the data used to estimate the supervised systems. By comparing to Wikipedia-only training of our model, we demonstrate that modeling unlabeled documents is beneficial."
}
Markdown (Informal)
[Boosting Entity Linking Performance by Leveraging Unlabeled Documents](https://preview.aclanthology.org/fix-sig-urls/P19-1187/) (Le & Titov, ACL 2019)
ACL