@article{zhang-etal-2025-active-knowledge,
title = "Active Knowledge Structuring for Large Language Models in Materials Science Text Mining",
author = "Zhang, Xin and
Yuan, Jingling and
Zhang, Peiliang and
Liu, Jia and
Li, Lin",
journal = "Transactions of the Association for Computational Linguistics",
volume = "13",
year = "2025",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://preview.aclanthology.org/ingest-eacl/2025.tacl-1.55/",
doi = "10.1162/tacl.a.36",
pages = "1186--1203",
abstract = "Large Language Models (LLMs) offer a promising alternative to traditional Materials Science Text Mining (MSTM) by reducing the need for extensive data labeling and fine-tuning. However, existing zero-/few-shot methods still face limitations in aligning with personalized needs in scientific discovery. To address this, we propose ClassMATe, an active knowledge structuring approach for MSTM. Specifically, we first propose a class definition stylization method to structure knowledge, enabling explicit clustering of latent material knowledge in LLMs for enhanced inference. To align with the scientists' needs, we propose an active needs refining strategy that iteratively clarifies needs by learning from uncertainty-aware hard samples of LLMs, further refining the knowledge structuring. Extensive experiments on seven tasks and eight datasets show that ClassMATe, as a plug-and-play method, achieves performance comparable to supervised learning without requiring fine-tuning or extra knowledge base, highlighting the potential to bridge the gap between LLMs' latent knowledge and real-world scientific applications.1"
}Markdown (Informal)
[Active Knowledge Structuring for Large Language Models in Materials Science Text Mining](https://preview.aclanthology.org/ingest-eacl/2025.tacl-1.55/) (Zhang et al., TACL 2025)
ACL