@inproceedings{stajner-etal-2016-use,
title = "Use of Domain-Specific Language Resources in Machine Translation",
author = "{\v{S}}tajner, Sanja and
Querido, Andreia and
Rendeiro, Nuno and
Rodrigues, Jo{\~a}o Ant{\'o}nio and
Branco, Ant{\'o}nio",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1094",
pages = "592--598",
abstract = "In this paper, we address the problem of Machine Translation (MT) for a specialised domain in a language pair for which only a very small domain-specific parallel corpus is available. We conduct a series of experiments using a purely phrase-based SMT (PBSMT) system and a hybrid MT system (TectoMT), testing three different strategies to overcome the problem of the small amount of in-domain training data. Our results show that adding a small size in-domain bilingual terminology to the small in-domain training corpus leads to the best improvements of a hybrid MT system, while the PBSMT system achieves the best results by adding a combination of in-domain bilingual terminology and a larger out-of-domain corpus. We focus on qualitative human evaluation of the output of two best systems (one for each approach) and perform a systematic in-depth error analysis which revealed advantages of the hybrid MT system over the pure PBSMT system for this specific task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stajner-etal-2016-use">
<titleInfo>
<title>Use of Domain-Specific Language Resources in Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sanja</namePart>
<namePart type="family">Štajner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreia</namePart>
<namePart type="family">Querido</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuno</namePart>
<namePart type="family">Rendeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="given">António</namePart>
<namePart type="family">Rodrigues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">António</namePart>
<namePart type="family">Branco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we address the problem of Machine Translation (MT) for a specialised domain in a language pair for which only a very small domain-specific parallel corpus is available. We conduct a series of experiments using a purely phrase-based SMT (PBSMT) system and a hybrid MT system (TectoMT), testing three different strategies to overcome the problem of the small amount of in-domain training data. Our results show that adding a small size in-domain bilingual terminology to the small in-domain training corpus leads to the best improvements of a hybrid MT system, while the PBSMT system achieves the best results by adding a combination of in-domain bilingual terminology and a larger out-of-domain corpus. We focus on qualitative human evaluation of the output of two best systems (one for each approach) and perform a systematic in-depth error analysis which revealed advantages of the hybrid MT system over the pure PBSMT system for this specific task.</abstract>
<identifier type="citekey">stajner-etal-2016-use</identifier>
<location>
<url>https://aclanthology.org/L16-1094</url>
</location>
<part>
<date>2016-may</date>
<extent unit="page">
<start>592</start>
<end>598</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Use of Domain-Specific Language Resources in Machine Translation
%A Štajner, Sanja
%A Querido, Andreia
%A Rendeiro, Nuno
%A Rodrigues, João António
%A Branco, António
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 may
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F stajner-etal-2016-use
%X In this paper, we address the problem of Machine Translation (MT) for a specialised domain in a language pair for which only a very small domain-specific parallel corpus is available. We conduct a series of experiments using a purely phrase-based SMT (PBSMT) system and a hybrid MT system (TectoMT), testing three different strategies to overcome the problem of the small amount of in-domain training data. Our results show that adding a small size in-domain bilingual terminology to the small in-domain training corpus leads to the best improvements of a hybrid MT system, while the PBSMT system achieves the best results by adding a combination of in-domain bilingual terminology and a larger out-of-domain corpus. We focus on qualitative human evaluation of the output of two best systems (one for each approach) and perform a systematic in-depth error analysis which revealed advantages of the hybrid MT system over the pure PBSMT system for this specific task.
%U https://aclanthology.org/L16-1094
%P 592-598
Markdown (Informal)
[Use of Domain-Specific Language Resources in Machine Translation](https://aclanthology.org/L16-1094) (Štajner et al., LREC 2016)
ACL
- Sanja Štajner, Andreia Querido, Nuno Rendeiro, João António Rodrigues, and António Branco. 2016. Use of Domain-Specific Language Resources in Machine Translation. In Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16), pages 592–598, Portorož, Slovenia. European Language Resources Association (ELRA).