@inproceedings{zelina-novacek-2026-discovery,
title = "Discovery@{FI} at {\#}{SMM}4{H}{--}{H}ea{RD} 2026: Ensemble Character Classifier for Multilingual Clinical {NER}",
author = "Zelina, Petr and
Novacek, Vit",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.28/",
pages = "173--176",
ISBN = "979-8-89176-432-3",
abstract = "We present a system for multilingual clinical named entity recognition (NER) submitted to the MultiClinNER subtask of MultiClinAI 2026, covering all seven languages and three entity classes (disease, symptom, procedure).Our approach trains one binary token classifier ensemble per entity class using cross-lingual fine-tuning of XLM-RoBERTa-large, with all languages handled jointly.We apply character-level ensembling over six models (two encoder variants $\times$ three cross-validation folds).This ensembling method provides more granular probability estimates than single-model classifiers, allowing for more flexible precision-recall trade-off tuning.The system achieves character-level F1 scores of 0.70{--}0.88 on the official test set."
}Markdown (Informal)
[Discovery@FI at #SMM4H–HeaRD 2026: Ensemble Character Classifier for Multilingual Clinical NER](https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.28/) (Zelina & Novacek, SMM4H 2026)
ACL