@inproceedings{watanabe-etal-2025-metadata,
title = "Metadata Generation for Research Data from {URL} Citation Contexts in Scholarly Papers: Task Definition and Dataset Construction",
author = "Watanabe, Yu and
Ito, Koichiro and
Matsubara, Shigeki",
editor = "Accomazzi, Alberto and
Ghosal, Tirthankar and
Grezes, Felix and
Lockhart, Kelly",
booktitle = "Proceedings of the Third Workshop for Artificial Intelligence for Scientific Publications",
month = dec,
year = "2025",
address = "Mumbai, India and virtual",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.wasp-main.8/",
pages = "72--79",
ISBN = "979-8-89176-310-4",
abstract = "This paper proposes a new research task aimed at automatically generating metadata for research data, such as datasets and code, to accelerate open science. From the perspective of `Findable' in the FAIR data principles, research data is required to be assigned a global unique identifier and described with rich metadata. The proposed task is defined as extracting information about research data (specifically, name, generic mention, and in-text citation) from texts surrounding URLs that serve as identifiers for research data references in scholarly papers. To support this task, we constructed a dataset containing approximately 600 manually annotated citation contexts with URLs of research data from conference papers. To evaluate the task, we conducted a preliminary experiment using the constructed dataset, employing the In-Context Learning method with LLMs as a baseline. The results showed that the performance of LLMs matched that of humans in some cases, demonstrating the feasibility of the task."
}Markdown (Informal)
[Metadata Generation for Research Data from URL Citation Contexts in Scholarly Papers: Task Definition and Dataset Construction](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.wasp-main.8/) (Watanabe et al., WASP 2025)
ACL