@inproceedings{kumari-etal-2026-team,
title = "Team Aurum at {M}ed{E}x{ACT} 2026@{ACL}: Data Augmentation and Clinical Longformer Fine-Tuning for Medical Decision Extraction",
author = "Kumari, Jyoti and
Ulli, Vinay and
Mondal, Anindita",
editor = "Gupta, Deepak and
Demner-Fushman, Dina",
booktitle = "Proceedings of the {B}io{NLP} 2026 (Shared Tasks)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-2.29/",
pages = "224--228",
ISBN = "979-8-89176-435-4",
abstract = "This paper describes the system submitted by team Aurum to the Medical Decision Extraction, Analysis, and Classification Task (MedExACT) at BioNLP 2026. The task requires the extraction and classification of contiguous text spans representing medical decisions from lengthy ICU discharge summaries. To address the dual challenges of long document lengths and severe class imbalance withina limited training set of 350 notes, we propose a two-pronged strategy. First, we employ a tripartite data augmentation pipeline utilizing rule-based entity replacement, LLM-based contextual paraphrasing, and synthetic note generation to expand the training data to over 2,300 notes. Second, we fine-tune a domain-specific Clinical Longformer model equipped with a sliding-window inference mechanism and Focal Loss to handle sequences up to 2,048 tokens while focusing on rare decision categories. Paired with a targeted post-processing module,our system achieved a Final Score of 0.5251, demonstrating high token-level detection (Token F1: 0.6311) and strong stability across patient demographics."
}Markdown (Informal)
[Team Aurum at MedExACT 2026@ACL: Data Augmentation and Clinical Longformer Fine-Tuning for Medical Decision Extraction](https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-2.29/) (Kumari et al., BioNLP 2026)
ACL