@inproceedings{nguyen-tri-etal-2025-diffusion,
title = "Diffusion Directed Acyclic Transformer for Non-Autoregressive Machine Translation",
author = "Nguyen-Tri, Quan and
Tran, Cong Dao and
Thanh-Tung, Hoang",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-short.64/",
pages = "814--828",
ISBN = "979-8-89176-252-7",
abstract = "Non-autoregressive transformers (NATs) predict entire sequences in parallel to reduce decoding latency, but they often encounter performance challenges due to the multi-modality problem. A recent advancement, the Directed Acyclic Transformer (DAT), addresses this issue by capturing multiple translation modalities to paths in a Directed Acyclic Graph (DAG). However, the collaboration with the latent variable introduced through the Glancing training (GLAT) is crucial for DAT to attain state-of-the-art performance. In this paper, we introduce Diffusion Directed Acyclic Transformer (Diff-DAT), which serves as an alternative to GLAT as a latent variable introduction for DAT. Diff-DAT offers two significant benefits over the previous approach. Firstly, it establishes a stronger alignment between training and inference. Secondly, it facilitates a more flexible tradeoff between quality and latency."
}
Markdown (Informal)
[Diffusion Directed Acyclic Transformer for Non-Autoregressive Machine Translation](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-short.64/) (Nguyen-Tri et al., ACL 2025)
ACL