@inproceedings{zhou-de-cruys-2025-non,
title = "Non-autoregressive Modeling for Sign-gloss to Texts Translation",
author = "Zhou, Fan and
de Cruys, Tim Van",
editor = "Bouillon, Pierrette and
Gerlach, Johanna and
Girletti, Sabrina and
Volkart, Lise and
Rubino, Raphael and
Sennrich, Rico and
Farinha, Ana C. and
Gaido, Marco and
Daems, Joke and
Kenny, Dorothy and
Moniz, Helena and
Szoc, Sara",
booktitle = "Proceedings of Machine Translation Summit XX: Volume 1",
month = jun,
year = "2025",
address = "Geneva, Switzerland",
publisher = "European Association for Machine Translation",
url = "https://preview.aclanthology.org/mtsummit-25-ingestion/2025.mtsummit-1.17/",
pages = "220--230",
ISBN = "978-2-9701897-0-1",
abstract = "Automatic sign language translation has seen significant advancements, driven by progress in computer vision and natural language processing. While end to end sign-to-text translation systems are available, many systems still rely on a gloss-based representation{--}an intermediate symbolic representation that functions as a bridge between sign language and its written counterpart. This paper focuses on the gloss-to-text (gloss2text) task, a key step in the sign-to-text translation pipeline, which has traditionally been addressed using autoregressive (AR) modeling approaches. In this study, we propose the use of non-autoregressive (NAR) modeling techniques, including non-autoregressive Transformer (NAT) and diffusion models, tailored to the unique characteristics of gloss2text. Specifically, we introduce PointerLevT, a novel NAT-based model designed to enhance performance in this task. Our experiments demonstrate that NAR models achieve higher accuracy than pre-trained AR models with less data, while also matching the performance of fine-tuned AR models such as mBART. Furthermore, we evaluate inference speed and find that NAR models benefit from parallel generation, resulting in faster inference. However, they require more time to achieve an optimal balance between accuracy and speed, particularly in the multistep denoising process of diffusion models."
}
Markdown (Informal)
[Non-autoregressive Modeling for Sign-gloss to Texts Translation](https://preview.aclanthology.org/mtsummit-25-ingestion/2025.mtsummit-1.17/) (Zhou & de Cruys, MTSummit 2025)
ACL