@inproceedings{barrera-etal-2016-enhancing,
title = "Enhancing Cross-border {EU} {E}-commerce through Machine Translation: Needed Language Resources, Challenges and Opportunities",
author = "Barrera, Meritxell Fern{\'a}ndez and
Popescu, Vladimir and
Toral, Antonio and
Gaspari, Federico and
Choukri, Khalid",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1721",
pages = "4550--4556",
abstract = "This paper discusses the role that statistical machine translation (SMT) can play in the development of cross-border EU e-commerce,by highlighting extant obstacles and identifying relevant technologies to overcome them. In this sense, it firstly proposes a typology of e-commerce static and dynamic textual genres and it identifies those that may be more successfully targeted by SMT. The specific challenges concerning the automatic translation of user-generated content are discussed in detail. Secondly, the paper highlights the risk of data sparsity inherent to e-commerce and it explores the state-of-the-art strategies to achieve domain adequacy via adaptation. Thirdly, it proposes a robust workflow for the development of SMT systems adapted to the e-commerce domain by relying on inexpensive methods. Given the scarcity of user-generated language corpora for most language pairs, the paper proposes to obtain monolingual target-language data to train language models and aligned parallel corpora to tune and evaluate MT systems by means of crowdsourcing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barrera-etal-2016-enhancing">
<titleInfo>
<title>Enhancing Cross-border EU E-commerce through Machine Translation: Needed Language Resources, Challenges and Opportunities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Meritxell</namePart>
<namePart type="given">Fernández</namePart>
<namePart type="family">Barrera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladimir</namePart>
<namePart type="family">Popescu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Toral</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federico</namePart>
<namePart type="family">Gaspari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper discusses the role that statistical machine translation (SMT) can play in the development of cross-border EU e-commerce,by highlighting extant obstacles and identifying relevant technologies to overcome them. In this sense, it firstly proposes a typology of e-commerce static and dynamic textual genres and it identifies those that may be more successfully targeted by SMT. The specific challenges concerning the automatic translation of user-generated content are discussed in detail. Secondly, the paper highlights the risk of data sparsity inherent to e-commerce and it explores the state-of-the-art strategies to achieve domain adequacy via adaptation. Thirdly, it proposes a robust workflow for the development of SMT systems adapted to the e-commerce domain by relying on inexpensive methods. Given the scarcity of user-generated language corpora for most language pairs, the paper proposes to obtain monolingual target-language data to train language models and aligned parallel corpora to tune and evaluate MT systems by means of crowdsourcing.</abstract>
<identifier type="citekey">barrera-etal-2016-enhancing</identifier>
<location>
<url>https://aclanthology.org/L16-1721</url>
</location>
<part>
<date>2016-may</date>
<extent unit="page">
<start>4550</start>
<end>4556</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhancing Cross-border EU E-commerce through Machine Translation: Needed Language Resources, Challenges and Opportunities
%A Barrera, Meritxell Fernández
%A Popescu, Vladimir
%A Toral, Antonio
%A Gaspari, Federico
%A Choukri, Khalid
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 may
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F barrera-etal-2016-enhancing
%X This paper discusses the role that statistical machine translation (SMT) can play in the development of cross-border EU e-commerce,by highlighting extant obstacles and identifying relevant technologies to overcome them. In this sense, it firstly proposes a typology of e-commerce static and dynamic textual genres and it identifies those that may be more successfully targeted by SMT. The specific challenges concerning the automatic translation of user-generated content are discussed in detail. Secondly, the paper highlights the risk of data sparsity inherent to e-commerce and it explores the state-of-the-art strategies to achieve domain adequacy via adaptation. Thirdly, it proposes a robust workflow for the development of SMT systems adapted to the e-commerce domain by relying on inexpensive methods. Given the scarcity of user-generated language corpora for most language pairs, the paper proposes to obtain monolingual target-language data to train language models and aligned parallel corpora to tune and evaluate MT systems by means of crowdsourcing.
%U https://aclanthology.org/L16-1721
%P 4550-4556
Markdown (Informal)
[Enhancing Cross-border EU E-commerce through Machine Translation: Needed Language Resources, Challenges and Opportunities](https://aclanthology.org/L16-1721) (Barrera et al., LREC 2016)
ACL