@inproceedings{awatramani-2026-vasudev,
title = "Vasudev Awatramani at {\#}{SMM}4{H}-{H}ea{RD} 2026: A Two-Pass {LLM} Pipeline with Deterministic Rule Derivation for Interpretable Insomnia Detection in Clinical Notes",
author = "Awatramani, Vasudev",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.26/",
pages = "160--164",
ISBN = "979-8-89176-432-3",
abstract = "We describe our system for Shared Task 2 of {\#}SMM4H{--}HeaRD 2026, which targets the detection of insomnia in MIMIC-III clinical notes. We frame the task as evidence extraction followed by deterministic rule application, rather than end-to-end label prediction. Our system operates in two passes: (1) a Gemini 2.5 Flash large language model (LLM), invoked through typed prompts written in BAML, extracts structured evidence (sleep difficulties, daytime impairment, hypnotic medications) with verbatim character-level citations from each note; (2) a small Python rule engine deterministically applies the task{'}s published Insomnia rules{--}Definition 1, Definition 2, and Rules B and C{--}to derive the binary patient-level label, the rule-component labels, and their evidence spans. We submitted two test-set systems: a zero-shot variant and a retrieval-augmented few-shot variant that selects nearest-neighbor training notes via FAISS over a sentence-embedding index. Our zero-shot variant achieved F1 = 0.8108 on Subtask 1 (binary classification) and a label-classification micro-F1 of 0.7126 with partial-match span F1 = 0.6621 on Subtask 2, both above the across-team mean. We additionally evaluate a GEPA-optimized prompt variant on the validation split. We discuss two findings of methodological interest: the few-shot variant improves Subtask 1 precision but does not improve F1, and does not move the multi-label or span metrics on Subtask 2 in our submission, and pushing the deterministic rule engine to consume LLM-extracted evidence (rather than asking the LLM to emit labels directly) gives strong, easily auditable behavior on a small test set."
}Markdown (Informal)
[Vasudev Awatramani at #SMM4H-HeaRD 2026: A Two-Pass LLM Pipeline with Deterministic Rule Derivation for Interpretable Insomnia Detection in Clinical Notes](https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.26/) (Awatramani, SMM4H 2026)
ACL