@inproceedings{germann-etal-2020-speed,
title = "Speed-optimized, Compact Student Models that Distill Knowledge from a Larger Teacher Model: the {UEDIN}-{CUNI} Submission to the {WMT} 2020 News Translation Task",
author = "Germann, Ulrich and
Grundkiewicz, Roman and
Popel, Martin and
Dobreva, Radina and
Bogoychev, Nikolay and
Heafield, Kenneth",
booktitle = "Proceedings of the Fifth Conference on Machine Translation",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wmt-1.17",
pages = "191--196",
abstract = "We describe the joint submission of the University of Edinburgh and Charles University, Prague, to the Czech/English track in the WMT 2020 Shared Task on News Translation. Our fast and compact student models distill knowledge from a larger, slower teacher. They are designed to offer a good trade-off between translation quality and inference efficiency. On the WMT 2020 Czech ↔ English test sets, they achieve translation speeds of over 700 whitespace-delimited source words per second on a single CPU thread, thus making neural translation feasible on consumer hardware without a GPU.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="germann-etal-2020-speed">
<titleInfo>
<title>Speed-optimized, Compact Student Models that Distill Knowledge from a Larger Teacher Model: the UEDIN-CUNI Submission to the WMT 2020 News Translation Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ulrich</namePart>
<namePart type="family">Germann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Grundkiewicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Popel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Radina</namePart>
<namePart type="family">Dobreva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolay</namePart>
<namePart type="family">Bogoychev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Heafield</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Conference on Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We describe the joint submission of the University of Edinburgh and Charles University, Prague, to the Czech/English track in the WMT 2020 Shared Task on News Translation. Our fast and compact student models distill knowledge from a larger, slower teacher. They are designed to offer a good trade-off between translation quality and inference efficiency. On the WMT 2020 Czech ↔ English test sets, they achieve translation speeds of over 700 whitespace-delimited source words per second on a single CPU thread, thus making neural translation feasible on consumer hardware without a GPU.</abstract>
<identifier type="citekey">germann-etal-2020-speed</identifier>
<location>
<url>https://aclanthology.org/2020.wmt-1.17</url>
</location>
<part>
<date>2020-nov</date>
<extent unit="page">
<start>191</start>
<end>196</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speed-optimized, Compact Student Models that Distill Knowledge from a Larger Teacher Model: the UEDIN-CUNI Submission to the WMT 2020 News Translation Task
%A Germann, Ulrich
%A Grundkiewicz, Roman
%A Popel, Martin
%A Dobreva, Radina
%A Bogoychev, Nikolay
%A Heafield, Kenneth
%S Proceedings of the Fifth Conference on Machine Translation
%D 2020
%8 nov
%I Association for Computational Linguistics
%C Online
%F germann-etal-2020-speed
%X We describe the joint submission of the University of Edinburgh and Charles University, Prague, to the Czech/English track in the WMT 2020 Shared Task on News Translation. Our fast and compact student models distill knowledge from a larger, slower teacher. They are designed to offer a good trade-off between translation quality and inference efficiency. On the WMT 2020 Czech ↔ English test sets, they achieve translation speeds of over 700 whitespace-delimited source words per second on a single CPU thread, thus making neural translation feasible on consumer hardware without a GPU.
%U https://aclanthology.org/2020.wmt-1.17
%P 191-196
Markdown (Informal)
[Speed-optimized, Compact Student Models that Distill Knowledge from a Larger Teacher Model: the UEDIN-CUNI Submission to the WMT 2020 News Translation Task](https://aclanthology.org/2020.wmt-1.17) (Germann et al., WMT 2020)
ACL