@inproceedings{li-etal-2023-transformer,
title = "{T}ran{SF}ormer: Slow-Fast Transformer for Machine Translation",
author = "Li, Bei and
Jing, Yi and
Tan, Xu and
Xing, Zhen and
Xiao, Tong and
Zhu, Jingbo",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2023.findings-acl.430/",
doi = "10.18653/v1/2023.findings-acl.430",
pages = "6883--6896",
abstract = "Learning multiscale Transformer models has been evidenced as a viable approach to augmenting machine translation systems. Prior research has primarily focused on treating subwords as basic units in developing such systems. However, the incorporation of fine-grained character-level features into multiscale Transformer has not yet been explored. In this work, we present a \textbf{S}low-\textbf{F}ast two-stream learning model, referred to as Tran\textbf{SF}ormer, which utilizes a {\textquotedblleft}slow{\textquotedblright} branch to deal with subword sequences and a {\textquotedblleft}fast{\textquotedblright} branch to deal with longer character sequences. This model is efficient since the fast branch is very lightweight by reducing the model width, and yet provides useful fine-grained features for the slow branch. Our TranSFormer shows consistent BLEU improvements (larger than 1 BLEU point) on several machine translation benchmarks."
}
Markdown (Informal)
[TranSFormer: Slow-Fast Transformer for Machine Translation](https://preview.aclanthology.org/add-emnlp-2024-awards/2023.findings-acl.430/) (Li et al., Findings 2023)
ACL