@inproceedings{kaur-etal-2026-team,
title = "Team {TIET} at {\#}{SMM}4{H}-{H}ea{RD} 2026: Fine-tuned Biomedical Transformers with Language-Balanced Sampling for Patient Metadata and Multilingual {ADE} Detection",
author = "Kaur, Divrose and
Bedi, Jatin and
Singh, Jasmeet",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.44/",
pages = "268--271",
ISBN = "979-8-89176-432-3",
abstract = "We present Team TIET{'}s systems for two shared tasks at {\#}SMM4H-HeaRD 2026: Task 5 (detection of patient metadata in SARS-CoV-2 sequencing papers) and Task 1 (multilingual adverse drug event detection across six languages plus an unseen Farsi subset). For Task 5 we explore iterative LLM prompting followed by fine-tuning BiomedBERT-base with weighted cross-entropy loss and probability threshold optimization, achieving F1 = 0.760 on the official test set (above the competition mean of 0.729). For Task 1 we fine-tune XLM-RoBERTa-base with a combined language- and class-balanced sampling strategy and per-language threshold tuning, achieving macro F1 = 0.497 overall (0.608 excluding the unseen Farsi subset). We report empirical findings on BERT+LLM ensemble failure with bimodal probability distributions, the superiority of base over large model variants under limited data, and the importance of language-balanced gradient contribution in multilingual classification."
}Markdown (Informal)
[Team TIET at #SMM4H-HeaRD 2026: Fine-tuned Biomedical Transformers with Language-Balanced Sampling for Patient Metadata and Multilingual ADE Detection](https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.44/) (Kaur et al., SMM4H 2026)
ACL