@inproceedings{chetwani-mahmmdla-2025-biopsy,
title = "{BIOPSY} - Biomarkers In Oncology: Pipeline for Structured Yielding",
author = "Chetwani, Sanya A. and
Mahmmdla, Jaseem",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.159/",
pages = "2313--2321",
ISBN = "979-8-89176-333-3",
abstract = "In clinical science, biomarkers are crucial indicators for early cancer detection, prognosis, and guiding personalized treatment decisions. Although critical, extracting biomarkers and their levels from clinical texts remains a complex and underexplored problem in natural language processing research. In this paper, we present BIOPSY, an end-to-end pipeline that integrates a domain-adapted biomarker entity recognition model, a relation extraction model to link biomarkers to their respective mutations, a biomarker-type classifier, and finally, a tailored algorithm to capture biomarker expression levels. Evaluated on 5,000 real-world clinical texts, our system achieved an overall F1 score of 0.86 for oncology and 0.87 for neuroscience domains. This reveals the ability of the pipeline to adapt across various clinical sources, including trial records, research papers, and medical notes, offering the first comprehensive solution for end-to-end, context-aware biomarker extraction and interpretation in clinical research."
}Markdown (Informal)
[BIOPSY - Biomarkers In Oncology: Pipeline for Structured Yielding](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.159/) (Chetwani & Mahmmdla, EMNLP 2025)
ACL