@inproceedings{cho-etal-2014-machine,
title = "Machine translation of multi-party meetings: segmentation and disfluency removal strategies",
author = "Cho, Eunah and
Niehues, Jan and
Waibel, Alex",
booktitle = "Proceedings of the 11th International Workshop on Spoken Language Translation: Papers",
month = dec # " 4-5",
year = "2014",
address = "Lake Tahoe, California",
url = "https://aclanthology.org/2014.iwslt-papers.4",
pages = "176--183",
abstract = "Translating meetings presents a challenge since multi-speaker speech shows a variety of disfluencies. In this paper we investigate the importance of transforming speech into well-written input prior to translating multi-party meetings. We first analyze the characteristics of this data and establish oracle scores. Sentence segmentation and punctuation are performed using a language model, turn information, or a monolingual translation system. Disfluencies are removed by a CRF model trained on in-domain and out-of-domain data. For comparison, we build a combined CRF model for punctuation insertion and disfluency removal. By applying these models, multi-party meetings are transformed into fluent input for machine translation. We evaluate the models with regard to translation performance and are able to achieve an improvement of 2.1 to 4.9 BLEU points depending on the availability of turn information.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cho-etal-2014-machine">
<titleInfo>
<title>Machine translation of multi-party meetings: segmentation and disfluency removal strategies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eunah</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-dec" 4-5"</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">Lake Tahoe, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Translating meetings presents a challenge since multi-speaker speech shows a variety of disfluencies. In this paper we investigate the importance of transforming speech into well-written input prior to translating multi-party meetings. We first analyze the characteristics of this data and establish oracle scores. Sentence segmentation and punctuation are performed using a language model, turn information, or a monolingual translation system. Disfluencies are removed by a CRF model trained on in-domain and out-of-domain data. For comparison, we build a combined CRF model for punctuation insertion and disfluency removal. By applying these models, multi-party meetings are transformed into fluent input for machine translation. We evaluate the models with regard to translation performance and are able to achieve an improvement of 2.1 to 4.9 BLEU points depending on the availability of turn information.</abstract>
<identifier type="citekey">cho-etal-2014-machine</identifier>
<location>
<url>https://aclanthology.org/2014.iwslt-papers.4</url>
</location>
<part>
<date>2014-dec" 4-5"</date>
<extent unit="page">
<start>176</start>
<end>183</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Machine translation of multi-party meetings: segmentation and disfluency removal strategies
%A Cho, Eunah
%A Niehues, Jan
%A Waibel, Alex
%S Proceedings of the 11th International Workshop on Spoken Language Translation: Papers
%D 2014
%8 dec" 4 5"
%C Lake Tahoe, California
%F cho-etal-2014-machine
%X Translating meetings presents a challenge since multi-speaker speech shows a variety of disfluencies. In this paper we investigate the importance of transforming speech into well-written input prior to translating multi-party meetings. We first analyze the characteristics of this data and establish oracle scores. Sentence segmentation and punctuation are performed using a language model, turn information, or a monolingual translation system. Disfluencies are removed by a CRF model trained on in-domain and out-of-domain data. For comparison, we build a combined CRF model for punctuation insertion and disfluency removal. By applying these models, multi-party meetings are transformed into fluent input for machine translation. We evaluate the models with regard to translation performance and are able to achieve an improvement of 2.1 to 4.9 BLEU points depending on the availability of turn information.
%U https://aclanthology.org/2014.iwslt-papers.4
%P 176-183
Markdown (Informal)
[Machine translation of multi-party meetings: segmentation and disfluency removal strategies](https://aclanthology.org/2014.iwslt-papers.4) (Cho et al., IWSLT 2014)
ACL