@inproceedings{asada-miwa-2026-principled,
title = "Principled Self-Correction in Discrete Diffusion: A {UCB}-Guided Framework for Text Generation",
author = "Asada, Masaki and
Miwa, Makoto",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.314/",
pages = "6678--6692",
ISBN = "979-8-89176-380-7",
abstract = "Inspired by their success in image synthesis, diffusion models offer a flexible, iterative alternative to rigid left-to-right text generation. However, a fundamental training-inference discrepancy hinders their performance: models are trained on corrupted ground-truth tokens, but at inference time they must denoise inputs corrupted from their own predictions. To bridge this gap, we propose a unified framework. First, Deeper Self-Prediction (DSP) is a multi-step training objective that teaches robust self-correction by forcing the model to denoise its own intermediate outputs. Second, UCB-guided Decoding is a principled inference algorithm that frames token re-masking as a multi-armed bandit problem, using the Upper Confidence Bound (UCB) to balance exploration and exploitation. Experiments on text generation tasks demonstrate consistent improvements over existing diffusion baselines. The framework achieves higher faithfulness and coherence according to both automatic metrics and LLM-as-a-Judge evaluations."
}Markdown (Informal)
[Principled Self-Correction in Discrete Diffusion: A UCB-Guided Framework for Text Generation](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.314/) (Asada & Miwa, EACL 2026)
ACL