@inproceedings{hagiwara-2020-octanove,
title = "Octanove Labs' {J}apanese-{C}hinese Open Domain Translation System",
author = "Hagiwara, Masato",
editor = {Federico, Marcello and
Waibel, Alex and
Knight, Kevin and
Nakamura, Satoshi and
Ney, Hermann and
Niehues, Jan and
St{\"u}ker, Sebastian and
Wu, Dekai and
Mariani, Joseph and
Yvon, Francois},
booktitle = "Proceedings of the 17th International Conference on Spoken Language Translation",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.iwslt-1.20/",
doi = "10.18653/v1/2020.iwslt-1.20",
pages = "166--171",
abstract = "This paper describes Octanove Labs' submission to the IWSLT 2020 open domain translation challenge. In order to build a high-quality Japanese-Chinese neural machine translation (NMT) system, we use a combination of 1) parallel corpus filtering and 2) back-translation. We have shown that, by using heuristic rules and learned classifiers, the size of the parallel data can be reduced by 70{\%} to 90{\%} without much impact on the final MT performance. We have also shown that including the artificially generated parallel data through back-translation further boosts the metric by 17{\%} to 27{\%}, while self-training contributes little. Aside from a small number of parallel sentences annotated for filtering, no external resources have been used to build our system."
}
Markdown (Informal)
[Octanove Labs’ Japanese-Chinese Open Domain Translation System](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.iwslt-1.20/) (Hagiwara, IWSLT 2020)
ACL