@inproceedings{dey-etal-2026-cuet-diagnlp,
title = "{CUET}{\_}{D}iag{NLP} at {\#}{SMM}4{H}-{H}ea{RD} 2026: Per-Axis {TNM} Staging from Pathology Reports and Opioid Impact Span Detection from Social Media",
author = "Dey, Shuva and
Barua, Priyangshu and
Habib, Mohammad Ashfak",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.30/",
pages = "182--186",
ISBN = "979-8-89176-432-3",
abstract = "In this paper, we describe systems for two {\#}SMM4H-HeaRD 2026 shared tasks. Task 6 asks for per-axis TNM cancer staging from free-text TCGA pathology reports under severe label imbalance and long-document constraints. We fine-tune GatorTron-base separately on each axis using Focal loss with class weights and a pooled [CLS]{--}mean representation, reaching macro F1 of 0.700 (T), 0.774 (N), and 0.640 (M) on test set 2 against a baseline of 0.454, 0.591, and 0.554 respectively. Task 7 asks for span-level detection of opioid-related ClinicalImpacts and SocialImpacts in first-person Reddit posts. We combine DeBERTa-large and PubMedBERT (two seeds each) in a uniform-weight ensemble with boundary-aware loss, entity-replacement augmentation, and a first-person post filter, achieving strict F1 of 0.51 and relaxed F1 of 0.60, above both the task mean (0.46 / 0.55) and median (0.48 / 0.58)."
}Markdown (Informal)
[CUET_DiagNLP at #SMM4H-HeaRD 2026: Per-Axis TNM Staging from Pathology Reports and Opioid Impact Span Detection from Social Media](https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.30/) (Dey et al., SMM4H 2026)
ACL