@inproceedings{zhang-etal-2020-little,
title = "A Little Bit Is Worse Than None: Ranking with Limited Training Data",
author = "Zhang, Xinyu and
Yates, Andrew and
Lin, Jimmy",
editor = "Moosavi, Nafise Sadat and
Fan, Angela and
Shwartz, Vered and
Glava{\v{s}}, Goran and
Joty, Shafiq and
Wang, Alex and
Wolf, Thomas",
booktitle = "Proceedings of SustaiNLP: Workshop on Simple and Efficient Natural Language Processing",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.sustainlp-1.14/",
doi = "10.18653/v1/2020.sustainlp-1.14",
pages = "107--112",
abstract = "Researchers have proposed simple yet effective techniques for the retrieval problem based on using BERT as a relevance classifier to rerank initial candidates from keyword search. In this work, we tackle the challenge of fine-tuning these models for specific domains in a data and computationally efficient manner. Typically, researchers fine-tune models using corpus-specific labeled data from sources such as TREC. We first answer the question: How much data of this type do we need? Recognizing that the most computationally efficient training is no training, we explore zero-shot ranking using BERT models that have already been fine-tuned with the large MS MARCO passage retrieval dataset. We arrive at the surprising and novel finding that {\textquotedblleft}some{\textquotedblright} labeled in-domain data can be worse than none at all."
}
Markdown (Informal)
[A Little Bit Is Worse Than None: Ranking with Limited Training Data](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.sustainlp-1.14/) (Zhang et al., sustainlp 2020)
ACL