@inproceedings{kermani-etal-2025-finetuning,
title = "Finetuning Pre-trained Language Models for Bidirectional Sign Language Gloss to Text Translation",
author = "Kermani, Arshia and
Irani, Habib and
Metsis, Vangelis",
editor = "Hasanuzzaman, Mohammed and
Quiroga, Facundo Manuel and
Modi, Ashutosh and
Kamila, Sabyasachi and
Artiaga, Keren and
Joshi, Abhinav and
Singh, Sanjeet",
booktitle = "Proceedings of the Workshop on Sign Language Processing (WSLP)",
month = dec,
year = "2025",
address = "IIT Bombay, Mumbai, India (Co-located with IJCNLP{--}AACL 2025)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.wslp-main.11/",
pages = "73--81",
ISBN = "979-8-89176-304-3",
abstract = "Sign Language Translation (SLT) is a crucial technology for fostering communication accessibility for the Deaf and Hard-of-Hearing (DHH) community. A dominant approach in SLT involves a two-stage pipeline: first, transcribing video to sign language glosses, and then translating these glosses into natural text. This second stage, gloss-to-text translation, is a challenging, low-resource machine translation task due to data scarcity and significant syntactic divergence. While prior work has often relied on training translation models from scratch, we show that fine-tuning large, pre-trained language models (PLMs) offers a more effective and data-efficient paradigm. In this work, we conduct a comprehensive bidirectional evaluation of several PLMs (T5, Flan-T5, mBART, and Llama) on this task. We use a collection of popular SLT datasets (RWTH-PHOENIX-14T, SIGNUM, and ASLG-PC12) and evaluate performance using standard machine translation metrics. Our results show that fine-tuned PLMs consistently and significantly outperform Transformer models trained from scratch, establishing new state-of-the-art results. Crucially, our bidirectional analysis reveals a significant performance gap, with Text-to-Gloss translation posing a greater challenge than Gloss-to-Text. We conclude that leveraging the linguistic knowledge of pre-trained models is a superior strategy for gloss translation and provides a more practical foundation for building robust, real-world SLT systems."
}Markdown (Informal)
[Finetuning Pre-trained Language Models for Bidirectional Sign Language Gloss to Text Translation](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.wslp-main.11/) (Kermani et al., WSLP 2025)
ACL