@article{wiechmann-etal-2026-adhd,
title = "{ADHD}-Lang: A Large-Scale Social Media Dataset for Verbal Behavior and Digital Phenotyping in Adult {ADHD}",
author = "Wiechmann, Daniel and
Kerz, Elma and
Kempa, Edward and
Qiao, Yu",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.577/",
pages = "7279--7291",
abstract = "We introduce ADHD-Lang, a large-scale language resource derived from Reddit to advance computational phenotyping of adult ADHD. The corpus is constructed using a high-precision self-disclosure pattern to confirm ADHD diagnoses and a matched control cohort, comprising 12,070 ADHD users (317,073 posts; 2.83M sentences) and 12,070 controls (174,765 posts; 1.27M sentences). In releasing ADHD-Lang to the research community, we also provide the first comprehensive baseline results, systematically examining the accuracy{--}transparency trade-off across three model families: (1) interpretable shallow machine learning models trained on clinically meaningful, expert-engineered language biomarkers; (2) a deep BiLSTM network trained on the same feature representations to capture temporal dynamics across users' posts; and (3) black-box transformer-based models (BERT, RoBERTa, MentalRoBERTa) leveraging contextual embeddings{---}non-interpretable, high-dimensional representations. ADHD-Lang is released as a standardized benchmark to promote reproducible research and accelerate progress toward digital verbal-behavior phenotyping for adult ADHD."
}Markdown (Informal)
[ADHD-Lang: A Large-Scale Social Media Dataset for Verbal Behavior and Digital Phenotyping in Adult ADHD](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.577/) (Wiechmann et al., LREC 2026)
ACL