@inproceedings{thorne-2022-data,
title = "Data-Efficient Auto-Regressive Document Retrieval for Fact Verification",
author = "Thorne, James",
editor = {Fan, Angela and
Gurevych, Iryna and
Hou, Yufang and
Kozareva, Zornitsa and
Luccioni, Sasha and
Sadat Moosavi, Nafise and
Ravi, Sujith and
Kim, Gyuwan and
Schwartz, Roy and
R{\"u}ckl{\'e}, Andreas},
booktitle = "Proceedings of the Third Workshop on Simple and Efficient Natural Language Processing (SustaiNLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.sustainlp-1.7/",
doi = "10.18653/v1/2022.sustainlp-1.7",
pages = "44--51",
abstract = "Document retrieval is a core component of many knowledge-intensive natural language processing task formulations such as fact verification. Sources of textual knowledge such as Wikipedia articles condition the generation of answers from the models. Recent advances in retrieval use sequence-to-sequence models to incrementally predict the title of the appropriate Wikipedia page given an input instance. However, this method requires supervision in the form of human annotation to label which Wikipedia pages contain appropriate context. This paper introduces a distant-supervision method that does not require any annotation train auto-regressive retrievers that attain competitive R-Precision and Recall in a zero-shot setting. Furthermore we show that with task-specific supervised fine-tuning, auto-regressive retrieval performance for two Wikipedia-based fact verification tasks can approach or even exceed full supervision using less than $1/4$ of the annotated data. We release all code and models"
}
Markdown (Informal)
[Data-Efficient Auto-Regressive Document Retrieval for Fact Verification](https://preview.aclanthology.org/fix-sig-urls/2022.sustainlp-1.7/) (Thorne, sustainlp 2022)
ACL