@inproceedings{elshehaby-etal-2026-sparse,
title = "Sparse Category Routing and Fairness-Aware Optimization for Medical Decision Extraction",
author = "Elshehaby, Ahmed and
Abdalla, Mohamed and
Mohamed, Youssef",
editor = "Gupta, Deepak and
Demner-Fushman, Dina",
booktitle = "Proceedings of the {B}io{NLP} 2026 (Shared Tasks)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-2.27/",
pages = "201--212",
ISBN = "979-8-89176-435-4",
abstract = "Extracting structured medical decisions fromICU discharge summaries is hard because oflong documents, severe category imbalanceacross nine DICTUM decision types, and afairness-aware evaluation that penalizes incon-sistent performance across demographic sub-groups. We present our system for the MedEx-ACT 2026 shared task (Elgaar et al., 2026),which fine-tunes BiomedBERT with a com-posite loss combining label-smoothed cross-entropy, a soft token-F1 auxiliary term, andR-Drop regularization. At inference time weapply a deterministic ensemble: half-offsetsliding-window augmentation across four win-dow configurations, dual-branch logit aggrega-tion from the same checkpoint, per-categorylength calibration on the Anchor Branch, andsparse routing of categories 4 and 7 to a context-weighted specialist branch motivated by theirunusual span-length distributions. Adding R-Drop improved validation Overall{\_}F1 by 1.24points over the CE + soft-F1 baseline, with alarger 1.70-point gain on Worst-Group F1. Ourbest submission achieves Span F1 of 0.4900,Token F1 of 0.6796, and an official Overall{\_}F1of 0.5724, with the African American subgroupas the Worst-Group bottleneck at Base{\_}Score0.5601"
}Markdown (Informal)
[Sparse Category Routing and Fairness-Aware Optimization for Medical Decision Extraction](https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-2.27/) (Elshehaby et al., BioNLP 2026)
ACL