@inproceedings{hu-etal-2020-niutrans,
title = "The {N}iu{T}rans System for {WNGT} 2020 Efficiency Task",
author = "Hu, Chi and
Li, Bei and
Li, Yinqiao and
Lin, Ye and
Li, Yanyang and
Wang, Chenglong and
Xiao, Tong and
Zhu, Jingbo",
booktitle = "Proceedings of the Fourth Workshop on Neural Generation and Translation",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.ngt-1.24",
doi = "10.18653/v1/2020.ngt-1.24",
pages = "204--210",
abstract = "This paper describes the submissions of the NiuTrans Team to the WNGT 2020 Efficiency Shared Task. We focus on the efficient implementation of deep Transformer models (Wang et al., 2019; Li et al., 2019) using NiuTensor, a flexible toolkit for NLP tasks. We explored the combination of deep encoder and shallow decoder in Transformer models via model compression and knowledge distillation. The neural machine translation decoding also benefits from FP16 inference, attention caching, dynamic batching, and batch pruning. Our systems achieve promising results in both translation quality and efficiency, e.g., our fastest system can translate more than 40,000 tokens per second with an RTX 2080 Ti while maintaining 42.9 BLEU on newstest2018.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hu-etal-2020-niutrans">
<titleInfo>
<title>The NiuTrans System for WNGT 2020 Efficiency Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chi</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bei</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yinqiao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ye</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanyang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenglong</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tong</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingbo</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-jul</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Neural Generation and Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the submissions of the NiuTrans Team to the WNGT 2020 Efficiency Shared Task. We focus on the efficient implementation of deep Transformer models (Wang et al., 2019; Li et al., 2019) using NiuTensor, a flexible toolkit for NLP tasks. We explored the combination of deep encoder and shallow decoder in Transformer models via model compression and knowledge distillation. The neural machine translation decoding also benefits from FP16 inference, attention caching, dynamic batching, and batch pruning. Our systems achieve promising results in both translation quality and efficiency, e.g., our fastest system can translate more than 40,000 tokens per second with an RTX 2080 Ti while maintaining 42.9 BLEU on newstest2018.</abstract>
<identifier type="citekey">hu-etal-2020-niutrans</identifier>
<identifier type="doi">10.18653/v1/2020.ngt-1.24</identifier>
<location>
<url>https://aclanthology.org/2020.ngt-1.24</url>
</location>
<part>
<date>2020-jul</date>
<extent unit="page">
<start>204</start>
<end>210</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The NiuTrans System for WNGT 2020 Efficiency Task
%A Hu, Chi
%A Li, Bei
%A Li, Yinqiao
%A Lin, Ye
%A Li, Yanyang
%A Wang, Chenglong
%A Xiao, Tong
%A Zhu, Jingbo
%S Proceedings of the Fourth Workshop on Neural Generation and Translation
%D 2020
%8 jul
%I Association for Computational Linguistics
%C Online
%F hu-etal-2020-niutrans
%X This paper describes the submissions of the NiuTrans Team to the WNGT 2020 Efficiency Shared Task. We focus on the efficient implementation of deep Transformer models (Wang et al., 2019; Li et al., 2019) using NiuTensor, a flexible toolkit for NLP tasks. We explored the combination of deep encoder and shallow decoder in Transformer models via model compression and knowledge distillation. The neural machine translation decoding also benefits from FP16 inference, attention caching, dynamic batching, and batch pruning. Our systems achieve promising results in both translation quality and efficiency, e.g., our fastest system can translate more than 40,000 tokens per second with an RTX 2080 Ti while maintaining 42.9 BLEU on newstest2018.
%R 10.18653/v1/2020.ngt-1.24
%U https://aclanthology.org/2020.ngt-1.24
%U https://doi.org/10.18653/v1/2020.ngt-1.24
%P 204-210
Markdown (Informal)
[The NiuTrans System for WNGT 2020 Efficiency Task](https://aclanthology.org/2020.ngt-1.24) (Hu et al., NGT 2020)
ACL
- Chi Hu, Bei Li, Yinqiao Li, Ye Lin, Yanyang Li, Chenglong Wang, Tong Xiao, and Jingbo Zhu. 2020. The NiuTrans System for WNGT 2020 Efficiency Task. In Proceedings of the Fourth Workshop on Neural Generation and Translation, pages 204–210, Online. Association for Computational Linguistics.