@inproceedings{rao-etal-2025-nsf,
title = "{NSF}-{S}ci{F}y: Mining the {NSF} Awards Database for Scientific Claims",
author = "Rao, Delip and
You, Weiqiu and
Wong, Eric and
Callison-Burch, Chris",
editor = "Dong, Yue and
Xiao, Wen and
Zhang, Haopeng and
Zhang, Rui and
Ernst, Ori and
Wang, Lu and
Liu, Fei",
booktitle = "Proceedings of The 5th New Frontiers in Summarization Workshop",
month = nov,
year = "2025",
address = "Hybrid",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.newsum-main.13/",
pages = "183--198",
ISBN = "979-8-89176-337-1",
abstract = "We introduce NSF-SciFy, a comprehensive dataset of scientific claims and investigation proposals extracted from National Science Foundation award abstracts. While previous scientific claim verification datasets have been limited in size and scope, NSF-SciFy represents a significant advance with an estimated 2.8 million claims from 400,000 abstracts spanning all science and mathematics disciplines. We present two focused subsets: NSF-SciFy-MatSci with 114,000 claims from materials science awards, and NSF-SciFy-20K with 135,000 claims across five NSF directorates. Using zero-shot prompting, we develop a scalable approach for joint extraction of scientific claims and investigation proposals. We demonstrate the dataset{'}s utility through three downstream tasks: non-technical abstract generation, claim extraction, and investigation proposal extraction. Fine-tuning language models on our dataset yields substantial improvements, with relative gains often exceeding 100{\%}, particularly for claim and proposal extraction tasks. Our error analysis reveals that extracted claims exhibit high precision but lower recall, suggesting opportunities for further methodological refinement. NSF-SciFy enables new research directions in large-scale claim verification, scientific discovery tracking, and meta-scientific analysis."
}Markdown (Informal)
[NSF-SciFy: Mining the NSF Awards Database for Scientific Claims](https://preview.aclanthology.org/ingest-emnlp/2025.newsum-main.13/) (Rao et al., NewSum 2025)
ACL