@inproceedings{nikolov-etal-2018-character,
title = "Character-level {C}hinese-{E}nglish Translation through {ASCII} Encoding",
author = "Nikolov, Nikola I. and
Hu, Yuhuang and
Tan, Mi Xue and
Hahnloser, Richard H.R.",
booktitle = "Proceedings of the Third Conference on Machine Translation: Research Papers",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-6302",
doi = "10.18653/v1/W18-6302",
pages = "10--16",
abstract = "Character-level Neural Machine Translation (NMT) models have recently achieved impressive results on many language pairs. They mainly do well for Indo-European language pairs, where the languages share the same writing system. However, for translating between Chinese and English, the gap between the two different writing systems poses a major challenge because of a lack of systematic correspondence between the individual linguistic units. In this paper, we enable character-level NMT for Chinese, by breaking down Chinese characters into linguistic units similar to that of Indo-European languages. We use the Wubi encoding scheme, which preserves the original shape and semantic information of the characters, while also being reversible. We show promising results from training Wubi-based models on the character- and subword-level with recurrent as well as convolutional models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nikolov-etal-2018-character">
<titleInfo>
<title>Character-level Chinese-English Translation through ASCII Encoding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="given">I</namePart>
<namePart type="family">Nikolov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuhuang</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mi</namePart>
<namePart type="given">Xue</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="given">H.R.</namePart>
<namePart type="family">Hahnloser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Conference on Machine Translation: Research Papers</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Character-level Neural Machine Translation (NMT) models have recently achieved impressive results on many language pairs. They mainly do well for Indo-European language pairs, where the languages share the same writing system. However, for translating between Chinese and English, the gap between the two different writing systems poses a major challenge because of a lack of systematic correspondence between the individual linguistic units. In this paper, we enable character-level NMT for Chinese, by breaking down Chinese characters into linguistic units similar to that of Indo-European languages. We use the Wubi encoding scheme, which preserves the original shape and semantic information of the characters, while also being reversible. We show promising results from training Wubi-based models on the character- and subword-level with recurrent as well as convolutional models.</abstract>
<identifier type="citekey">nikolov-etal-2018-character</identifier>
<identifier type="doi">10.18653/v1/W18-6302</identifier>
<location>
<url>https://aclanthology.org/W18-6302</url>
</location>
<part>
<date>2018-oct</date>
<extent unit="page">
<start>10</start>
<end>16</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Character-level Chinese-English Translation through ASCII Encoding
%A Nikolov, Nikola I.
%A Hu, Yuhuang
%A Tan, Mi Xue
%A Hahnloser, Richard H.R.
%S Proceedings of the Third Conference on Machine Translation: Research Papers
%D 2018
%8 oct
%I Association for Computational Linguistics
%C Brussels, Belgium
%F nikolov-etal-2018-character
%X Character-level Neural Machine Translation (NMT) models have recently achieved impressive results on many language pairs. They mainly do well for Indo-European language pairs, where the languages share the same writing system. However, for translating between Chinese and English, the gap between the two different writing systems poses a major challenge because of a lack of systematic correspondence between the individual linguistic units. In this paper, we enable character-level NMT for Chinese, by breaking down Chinese characters into linguistic units similar to that of Indo-European languages. We use the Wubi encoding scheme, which preserves the original shape and semantic information of the characters, while also being reversible. We show promising results from training Wubi-based models on the character- and subword-level with recurrent as well as convolutional models.
%R 10.18653/v1/W18-6302
%U https://aclanthology.org/W18-6302
%U https://doi.org/10.18653/v1/W18-6302
%P 10-16
Markdown (Informal)
[Character-level Chinese-English Translation through ASCII Encoding](https://aclanthology.org/W18-6302) (Nikolov et al., 2018)
ACL