@inproceedings{ma-etal-2018-results,
title = "Results of the {WMT}18 Metrics Shared Task: Both characters and embeddings achieve good performance",
author = "Ma, Qingsong and
Bojar, Ond{\v{r}}ej and
Graham, Yvette",
booktitle = "Proceedings of the Third Conference on Machine Translation: Shared Task Papers",
month = oct,
year = "2018",
address = "Belgium, Brussels",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-6450",
doi = "10.18653/v1/W18-6450",
pages = "671--688",
abstract = "This paper presents the results of the WMT18 Metrics Shared Task. We asked participants of this task to score the outputs of the MT systems involved in the WMT18 News Translation Task with automatic metrics. We collected scores of 10 metrics and 8 research groups. In addition to that, we computed scores of 8 standard metrics (BLEU, SentBLEU, chrF, NIST, WER, PER, TER and CDER) as baselines. The collected scores were evaluated in terms of system-level correlation (how well each metric{'}s scores correlate with WMT18 official manual ranking of systems) and in terms of segment-level correlation (how often a metric agrees with humans in judging the quality of a particular sentence relative to alternate outputs). This year, we employ a single kind of manual evaluation: direct assessment (DA).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ma-etal-2018-results">
<titleInfo>
<title>Results of the WMT18 Metrics Shared Task: Both characters and embeddings achieve good performance</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qingsong</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Bojar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Conference on Machine Translation: Shared Task Papers</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Belgium, Brussels</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the results of the WMT18 Metrics Shared Task. We asked participants of this task to score the outputs of the MT systems involved in the WMT18 News Translation Task with automatic metrics. We collected scores of 10 metrics and 8 research groups. In addition to that, we computed scores of 8 standard metrics (BLEU, SentBLEU, chrF, NIST, WER, PER, TER and CDER) as baselines. The collected scores were evaluated in terms of system-level correlation (how well each metric’s scores correlate with WMT18 official manual ranking of systems) and in terms of segment-level correlation (how often a metric agrees with humans in judging the quality of a particular sentence relative to alternate outputs). This year, we employ a single kind of manual evaluation: direct assessment (DA).</abstract>
<identifier type="citekey">ma-etal-2018-results</identifier>
<identifier type="doi">10.18653/v1/W18-6450</identifier>
<location>
<url>https://aclanthology.org/W18-6450</url>
</location>
<part>
<date>2018-oct</date>
<extent unit="page">
<start>671</start>
<end>688</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Results of the WMT18 Metrics Shared Task: Both characters and embeddings achieve good performance
%A Ma, Qingsong
%A Bojar, Ondřej
%A Graham, Yvette
%S Proceedings of the Third Conference on Machine Translation: Shared Task Papers
%D 2018
%8 oct
%I Association for Computational Linguistics
%C Belgium, Brussels
%F ma-etal-2018-results
%X This paper presents the results of the WMT18 Metrics Shared Task. We asked participants of this task to score the outputs of the MT systems involved in the WMT18 News Translation Task with automatic metrics. We collected scores of 10 metrics and 8 research groups. In addition to that, we computed scores of 8 standard metrics (BLEU, SentBLEU, chrF, NIST, WER, PER, TER and CDER) as baselines. The collected scores were evaluated in terms of system-level correlation (how well each metric’s scores correlate with WMT18 official manual ranking of systems) and in terms of segment-level correlation (how often a metric agrees with humans in judging the quality of a particular sentence relative to alternate outputs). This year, we employ a single kind of manual evaluation: direct assessment (DA).
%R 10.18653/v1/W18-6450
%U https://aclanthology.org/W18-6450
%U https://doi.org/10.18653/v1/W18-6450
%P 671-688
Markdown (Informal)
[Results of the WMT18 Metrics Shared Task: Both characters and embeddings achieve good performance](https://aclanthology.org/W18-6450) (Ma et al., 2018)
ACL