@inproceedings{cristea-2026-smmtech,
title = "{SMMT}ech at {\#}{SMM}4{H}-{H}ea{RD} 2026: Detection of Insomnia in Clinical Notes",
author = "Cristea, Emilia-Ioana",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.15/",
pages = "88--92",
ISBN = "979-8-89176-432-3",
abstract = "This paper describes the participation of team SMMTech in the SMM4H-HeaRD 2026 Shared Task 2: Detection of Insomnia in Clinical Notes. We present a comparative architectural study exploring the friction between extractive token-classification models and generative Large Language Models (LLMs) in clinical span extraction, on the MIMIC-III Clinical Database. During the validation phase we established baselines using encoder-only transformers such as BERT, ClinicalBERT, BigBird and Clinical BigBird. For the official test phase, we deployed a 4-bit quantized generative hybrid pipeline using Llama3-Med42-8B to evaluate its multi-hop reasoning capabilities. While the generative pipeline achieved an F1-score of 0.4783 on Subtask 1 (Classification), it struggled with exact span matching on Subtask 2.In this paper we present the mechanical limitations of zero-shot JSON extraction and the necessity of decoupling clinical reasoning from character-level span extraction."
}Markdown (Informal)
[SMMTech at #SMM4H-HeaRD 2026: Detection of Insomnia in Clinical Notes](https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.15/) (Cristea, SMM4H 2026)
ACL