@inproceedings{bolucu-etal-2025-bridging,
title = "Bridging the Gap: Instruction-Tuned {LLM}s for Scientific Named Entity Recognition",
author = {B{\"o}l{\"u}c{\"u}, Necva and
Rybinski, Maciej and
Wan, Stephen},
editor = "Accomazzi, Alberto and
Ghosal, Tirthankar and
Grezes, Felix and
Lockhart, Kelly",
booktitle = "Proceedings of the Third Workshop for Artificial Intelligence for Scientific Publications",
month = dec,
year = "2025",
address = "Mumbai, India and virtual",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.wasp-main.7/",
pages = "56--71",
ISBN = "979-8-89176-310-4",
abstract = "Information extraction (IE) from scientific literature plays an important role in many information-seeking pipelines. Large Language Models (LLMs) have demonstrated strong zero-shot and few-shot performance on IE tasks. However, there are challenges in practical deployment, especially in scenarios that involve sensitive information, such as industrial research or limited budgets. A key question is whether there is a need for a fine-tuned model for optimal domain adaptation (i.e., whether in-domain labelled training data is needed, or zero-shot to few-shot effectiveness is enough). In this paper, we explore this question in the context of IE on scientific literature. We further consider methodological questions, such as alternatives to cloud-based proprietary LLMs (e.g., GPT and Claude) when these are unsuitable due to data privacy, data sensitivity, or cost reasons. This paper outlines empirical results to recommend which locally hosted open-source LLM approach to adopt and illustrates the trade-offs in domain adaptation."
}Markdown (Informal)
[Bridging the Gap: Instruction-Tuned LLMs for Scientific Named Entity Recognition](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.wasp-main.7/) (Bölücü et al., WASP 2025)
ACL