@inproceedings{kreutzer-sokolov-2018-learning,
title = "Learning to Segment Inputs for {NMT} Favors Character-Level Processing",
author = "Kreutzer, Julia and
Sokolov, Artem",
booktitle = "Proceedings of the 15th International Conference on Spoken Language Translation",
month = oct # " 29-30",
year = "2018",
address = "Brussels",
publisher = "International Conference on Spoken Language Translation",
url = "https://aclanthology.org/2018.iwslt-1.25",
pages = "166--172",
abstract = "Most modern neural machine translation (NMT) systems rely on presegmented inputs. Segmentation granularity importantly determines the input and output sequence lengths, hence the modeling depth, and source and target vocabularies, which in turn determine model size, computational costs of softmax normalization, and handling of out-of-vocabulary words. However, the current practice is to use static, heuristic-based segmentations that are fixed before NMT training. This begs the question whether the chosen segmentation is optimal for the translation task. To overcome suboptimal segmentation choices, we present an algorithm for dynamic segmentation, that is trainable end-to-end and driven by the NMT objective. In an evaluation on four translation tasks we found that, given the freedom to navigate between different segmentation levels, the model prefers to operate on (almost) character level, providing support for purely character-level NMT models from a novel angle.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kreutzer-sokolov-2018-learning">
<titleInfo>
<title>Learning to Segment Inputs for NMT Favors Character-Level Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Kreutzer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Sokolov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct" 29-30"</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Spoken Language Translation</title>
</titleInfo>
<originInfo>
<publisher>International Conference on Spoken Language Translation</publisher>
<place>
<placeTerm type="text">Brussels</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Most modern neural machine translation (NMT) systems rely on presegmented inputs. Segmentation granularity importantly determines the input and output sequence lengths, hence the modeling depth, and source and target vocabularies, which in turn determine model size, computational costs of softmax normalization, and handling of out-of-vocabulary words. However, the current practice is to use static, heuristic-based segmentations that are fixed before NMT training. This begs the question whether the chosen segmentation is optimal for the translation task. To overcome suboptimal segmentation choices, we present an algorithm for dynamic segmentation, that is trainable end-to-end and driven by the NMT objective. In an evaluation on four translation tasks we found that, given the freedom to navigate between different segmentation levels, the model prefers to operate on (almost) character level, providing support for purely character-level NMT models from a novel angle.</abstract>
<identifier type="citekey">kreutzer-sokolov-2018-learning</identifier>
<location>
<url>https://aclanthology.org/2018.iwslt-1.25</url>
</location>
<part>
<date>2018-oct" 29-30"</date>
<extent unit="page">
<start>166</start>
<end>172</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning to Segment Inputs for NMT Favors Character-Level Processing
%A Kreutzer, Julia
%A Sokolov, Artem
%S Proceedings of the 15th International Conference on Spoken Language Translation
%D 2018
%8 oct" 29 30"
%I International Conference on Spoken Language Translation
%C Brussels
%F kreutzer-sokolov-2018-learning
%X Most modern neural machine translation (NMT) systems rely on presegmented inputs. Segmentation granularity importantly determines the input and output sequence lengths, hence the modeling depth, and source and target vocabularies, which in turn determine model size, computational costs of softmax normalization, and handling of out-of-vocabulary words. However, the current practice is to use static, heuristic-based segmentations that are fixed before NMT training. This begs the question whether the chosen segmentation is optimal for the translation task. To overcome suboptimal segmentation choices, we present an algorithm for dynamic segmentation, that is trainable end-to-end and driven by the NMT objective. In an evaluation on four translation tasks we found that, given the freedom to navigate between different segmentation levels, the model prefers to operate on (almost) character level, providing support for purely character-level NMT models from a novel angle.
%U https://aclanthology.org/2018.iwslt-1.25
%P 166-172
Markdown (Informal)
[Learning to Segment Inputs for NMT Favors Character-Level Processing](https://aclanthology.org/2018.iwslt-1.25) (Kreutzer & Sokolov, IWSLT 2018)
ACL