@inproceedings{singh-2026-gladiators,
title = "Gladiators at {\#}{SMM}4{H}{--}{H}ea{RD} 2026: Multi-Seed {XLM}-{R}o{BERT}a Ensemble with Focal Loss and Per-Language Threshold Optimization for Multilingual Adverse Drug Event Detection",
author = "Singh, Ankit Kumar",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.2/",
pages = "7--11",
ISBN = "979-8-89176-432-3",
abstract = "This paper describes the Gladiators system for Task 1 of the SMM4H 2026 shared task on binary classification of adverse drug event (ADE) mentions in multilingual social media posts. Our system fine-tunes three XLM-RoBERTa large models with different random seeds using focal loss ({\ensuremath{\alpha}}=0.75, {\ensuremath{\gamma}}=2.0) and 3{\texttimes} positive oversampling, then averages their predicted probabilities and applies per-language threshold optimization. On the development set, our ensemble achieves a pooled binary F1 of 0.7505. On the official test set{---}which introduced surprise Farsi comprising 35.5{\%} of samples{---}our system achieves F1 = 0.6039, above the competition mean (0.5465) and median (0.5798). We evaluated eleven approaches and document key negative results. Post evaluation, a six-model cross-regime ensembleimproved dev F1 to 0.7585."
}Markdown (Informal)
[Gladiators at #SMM4H–HeaRD 2026: Multi-Seed XLM-RoBERTa Ensemble with Focal Loss and Per-Language Threshold Optimization for Multilingual Adverse Drug Event Detection](https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.2/) (Singh, SMM4H 2026)
ACL