@inproceedings{he-etal-2025-gradase,
title = "{G}ra{D}a{SE}: Graph-Based Dataset Search with Examples",
author = "He, Jing and
Lv, Mingyang and
Shi, Qing and
Cheng, Gong",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-11/2025.emnlp-main.353/",
doi = "10.18653/v1/2025.emnlp-main.353",
pages = "6932--6943",
ISBN = "979-8-89176-332-6",
abstract = "Dataset search is a specialized information retrieval task. In the emerging scenario of Dataset Search with Examples (DSE), the user submits a query and a few target datasets that are known to be relevant as examples. The retrieved datasets are expected to be relevant to the query and also similar to the target datasets. Distinguished from existing text-based retrievers, we propose a graph-based approach GraDaSE. Besides the textual metadata of the datasets, we identify their provenance-based and topic-based relationships to construct a graph, and jointly encode their structural and textual information for ranking candidate datasets. GraDaSE outperforms a variety of strong baselines on two test collections, including DataFinder-E that we construct."
}Markdown (Informal)
[GraDaSE: Graph-Based Dataset Search with Examples](https://preview.aclanthology.org/corrections-2025-11/2025.emnlp-main.353/) (He et al., EMNLP 2025)
ACL
- Jing He, Mingyang Lv, Qing Shi, and Gong Cheng. 2025. GraDaSE: Graph-Based Dataset Search with Examples. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 6932–6943, Suzhou, China. Association for Computational Linguistics.