@inproceedings{gordon-duh-2020-distill,
title = "Distill, Adapt, Distill: Training Small, In-Domain Models for Neural Machine Translation",
author = "Gordon, Mitchell and
Duh, Kevin",
booktitle = "Proceedings of the Fourth Workshop on Neural Generation and Translation",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.ngt-1.12",
doi = "10.18653/v1/2020.ngt-1.12",
pages = "110--118",
abstract = "We explore best practices for training small, memory efficient machine translation models with sequence-level knowledge distillation in the domain adaptation setting. While both domain adaptation and knowledge distillation are widely-used, their interaction remains little understood. Our large-scale empirical results in machine translation (on three language pairs with three domains each) suggest distilling twice for best performance: once using general-domain data and again using in-domain data with an adapted teacher.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gordon-duh-2020-distill">
<titleInfo>
<title>Distill, Adapt, Distill: Training Small, In-Domain Models for Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mitchell</namePart>
<namePart type="family">Gordon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-jul</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Neural Generation and Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We explore best practices for training small, memory efficient machine translation models with sequence-level knowledge distillation in the domain adaptation setting. While both domain adaptation and knowledge distillation are widely-used, their interaction remains little understood. Our large-scale empirical results in machine translation (on three language pairs with three domains each) suggest distilling twice for best performance: once using general-domain data and again using in-domain data with an adapted teacher.</abstract>
<identifier type="citekey">gordon-duh-2020-distill</identifier>
<identifier type="doi">10.18653/v1/2020.ngt-1.12</identifier>
<location>
<url>https://aclanthology.org/2020.ngt-1.12</url>
</location>
<part>
<date>2020-jul</date>
<extent unit="page">
<start>110</start>
<end>118</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Distill, Adapt, Distill: Training Small, In-Domain Models for Neural Machine Translation
%A Gordon, Mitchell
%A Duh, Kevin
%S Proceedings of the Fourth Workshop on Neural Generation and Translation
%D 2020
%8 jul
%I Association for Computational Linguistics
%C Online
%F gordon-duh-2020-distill
%X We explore best practices for training small, memory efficient machine translation models with sequence-level knowledge distillation in the domain adaptation setting. While both domain adaptation and knowledge distillation are widely-used, their interaction remains little understood. Our large-scale empirical results in machine translation (on three language pairs with three domains each) suggest distilling twice for best performance: once using general-domain data and again using in-domain data with an adapted teacher.
%R 10.18653/v1/2020.ngt-1.12
%U https://aclanthology.org/2020.ngt-1.12
%U https://doi.org/10.18653/v1/2020.ngt-1.12
%P 110-118
Markdown (Informal)
[Distill, Adapt, Distill: Training Small, In-Domain Models for Neural Machine Translation](https://aclanthology.org/2020.ngt-1.12) (Gordon & Duh, NGT 2020)
ACL