@inproceedings{anastasia-2026-gmail,
title = "No{\_}gmail at {\#}{SMM}4{H}-{H}ea{RD} 2026: Detecting Patient Metadata in {COVID}-19 Scientific Literature: A Comparative Study of Encoder-Only and Autoregressive Language Models",
author = "Anastasia, Stefanescu",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.46/",
pages = "280--285",
ISBN = "979-8-89176-432-3",
abstract = "Identifying sentences in COVID-19 literature that report patient metadata is an important step in genomic epidemiology, currently requiring costly manual curation. We compare fine-tuned encoder-only models (BERT, BioLinkBERT) and autoregressive LLMs (Llama, Gemma, GPT-OSS) under prompting and fine-tuning regimes, using Focal Loss and undersampling to address severe class imbalance. Encoder-only models substantially outperform autoregressive models: BioLinkBERT-base with Focal Loss achieves macro F1 of 0.76, versus 0.54 for the best fine-tuned autoregressive model."
}Markdown (Informal)
[No_gmail at #SMM4H-HeaRD 2026: Detecting Patient Metadata in COVID-19 Scientific Literature: A Comparative Study of Encoder-Only and Autoregressive Language Models](https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.46/) (Anastasia, SMM4H 2026)
ACL