@inproceedings{voss-etal-2008-exploitation,
title = "Exploitation of an {A}rabic Language Resource for Machine Translation Evaluation: using {B}uckwalter-based Lookup Tool to Augment {CMU} Alignment Algorithm",
author = "Voss, Clare and
Laoudi, Jamal and
Micher, Jeffrey",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/887_paper.pdf",
abstract = "Voss et al. (2006) analyzed newswire translations of three DARPA GALE Arabic-English MT systems at the segment level in terms of subjective judgmen+F925t scores, automated metric scores, and correlations among these different score types. At this level of granularity, the correlations are weak. In this paper, we begin to reconcile the subjective and automated scores that underlie these correlations by explicitly grounding MT output with its Reference Translation (RT) prior to subjective or automated evaluation. The first two phases of our approach annotate {MT, RT} pairs with the same types of textual comparisons that subjects intuitively apply, while the third phase (not presented here) entails scoring the pairs: (i) automated calculation of MT-RT hits using CMU aligner from METEOR, (ii) an extension phase where our Buckwalter-based Lookup Tool serves to generate six other textual comparison categories on items in the MT output that the CMU aligner does not identify, and (iii) given the fully categorized RT {\&} MT pair, a final adequacy score is assigned to the MT output, either by an automated metric based on weighted category counts and segment length, or by a trained human judge.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="voss-etal-2008-exploitation">
<titleInfo>
<title>Exploitation of an Arabic Language Resource for Machine Translation Evaluation: using Buckwalter-based Lookup Tool to Augment CMU Alignment Algorithm</title>
</titleInfo>
<name type="personal">
<namePart type="given">Clare</namePart>
<namePart type="family">Voss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jamal</namePart>
<namePart type="family">Laoudi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeffrey</namePart>
<namePart type="family">Micher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Voss et al. (2006) analyzed newswire translations of three DARPA GALE Arabic-English MT systems at the segment level in terms of subjective judgmen+F925t scores, automated metric scores, and correlations among these different score types. At this level of granularity, the correlations are weak. In this paper, we begin to reconcile the subjective and automated scores that underlie these correlations by explicitly grounding MT output with its Reference Translation (RT) prior to subjective or automated evaluation. The first two phases of our approach annotate MT, RT pairs with the same types of textual comparisons that subjects intuitively apply, while the third phase (not presented here) entails scoring the pairs: (i) automated calculation of MT-RT hits using CMU aligner from METEOR, (ii) an extension phase where our Buckwalter-based Lookup Tool serves to generate six other textual comparison categories on items in the MT output that the CMU aligner does not identify, and (iii) given the fully categorized RT & MT pair, a final adequacy score is assigned to the MT output, either by an automated metric based on weighted category counts and segment length, or by a trained human judge.</abstract>
<identifier type="citekey">voss-etal-2008-exploitation</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/887_paper.pdf</url>
</location>
<part>
<date>2008-may</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploitation of an Arabic Language Resource for Machine Translation Evaluation: using Buckwalter-based Lookup Tool to Augment CMU Alignment Algorithm
%A Voss, Clare
%A Laoudi, Jamal
%A Micher, Jeffrey
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 may
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F voss-etal-2008-exploitation
%X Voss et al. (2006) analyzed newswire translations of three DARPA GALE Arabic-English MT systems at the segment level in terms of subjective judgmen+F925t scores, automated metric scores, and correlations among these different score types. At this level of granularity, the correlations are weak. In this paper, we begin to reconcile the subjective and automated scores that underlie these correlations by explicitly grounding MT output with its Reference Translation (RT) prior to subjective or automated evaluation. The first two phases of our approach annotate MT, RT pairs with the same types of textual comparisons that subjects intuitively apply, while the third phase (not presented here) entails scoring the pairs: (i) automated calculation of MT-RT hits using CMU aligner from METEOR, (ii) an extension phase where our Buckwalter-based Lookup Tool serves to generate six other textual comparison categories on items in the MT output that the CMU aligner does not identify, and (iii) given the fully categorized RT & MT pair, a final adequacy score is assigned to the MT output, either by an automated metric based on weighted category counts and segment length, or by a trained human judge.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/887_paper.pdf
Markdown (Informal)
[Exploitation of an Arabic Language Resource for Machine Translation Evaluation: using Buckwalter-based Lookup Tool to Augment CMU Alignment Algorithm](http://www.lrec-conf.org/proceedings/lrec2008/pdf/887_paper.pdf) (Voss et al., LREC 2008)
ACL