@inproceedings{jacovi-etal-2021-scalable,
title = "Scalable Evaluation and Improvement of Document Set Expansion via Neural Positive-Unlabeled Learning",
author = "Jacovi, Alon and
Niu, Gang and
Goldberg, Yoav and
Sugiyama, Masashi",
editor = "Merlo, Paola and
Tiedemann, Jorg and
Tsarfaty, Reut",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2021.eacl-main.47/",
doi = "10.18653/v1/2021.eacl-main.47",
pages = "581--592",
abstract = "We consider the situation in which a user has collected a small set of documents on a cohesive topic, and they want to retrieve additional documents on this topic from a large collection. Information Retrieval (IR) solutions treat the document set as a query, and look for similar documents in the collection. We propose to extend the IR approach by treating the problem as an instance of positive-unlabeled (PU) learning{---}i.e., learning binary classifiers from only positive (the query documents) and unlabeled (the results of the IR engine) data. Utilizing PU learning for text with big neural networks is a largely unexplored field. We discuss various challenges in applying PU learning to the setting, showing that the standard implementations of state-of-the-art PU solutions fail. We propose solutions for each of the challenges and empirically validate them with ablation tests. We demonstrate the effectiveness of the new method using a series of experiments of retrieving PubMed abstracts adhering to fine-grained topics, showing improvements over the common IR solution and other baselines."
}
Markdown (Informal)
[Scalable Evaluation and Improvement of Document Set Expansion via Neural Positive-Unlabeled Learning](https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2021.eacl-main.47/) (Jacovi et al., EACL 2021)
ACL