@inproceedings{chiu-liu-2026-probing,
title = "Probing Functional Correctness in Diffusion Language Models",
author = "Chiu, Guan-Ming and
Liu, Jeng-Yue",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-srw.15/",
pages = "163--172",
ISBN = "979-8-89176-393-7",
abstract = "Diffusion language models generate text by iteratively denoising all tokens in parallel, but when and where their hidden states encode whether the output will be functionally correct remains unknown.We present the first probing study of DLM internals, training linear classifiers on hidden states to predict functional correctness.Across two models (LLaDA-8B, Dream-7B) and four tasks, we find that DLMs uniquely accumulate correctness signal across denoising steps (AUC gains of 0.08{--}0.11 on reasoning tasks), absent in single-pass AR decoding. However, step-0 signal reflects prompt difficulty rather than diffusion-specific computation. Signal emergence is task-dependent: structural tasks show flat profiles while reasoning tasks show gradual buildup. The two models exhibit distinct layer dynamics, with LLaDA concentrating signal in upper layers while Dream redistributes toward lower layers. We further show that probe confidence can identify likely failures, enabling selective generation that avoids 36{--}98{\%} of wasted compute."
}Markdown (Informal)
[Probing Functional Correctness in Diffusion Language Models](https://preview.aclanthology.org/ingest-acl/2026.acl-srw.15/) (Chiu & Liu, ACL 2026)
ACL
- Guan-Ming Chiu and Jeng-Yue Liu. 2026. Probing Functional Correctness in Diffusion Language Models. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026), pages 163–172, San Diego, California, United States. Association for Computational Linguistics.