@inproceedings{khademian-etal-2012-holistic,
title = "A Holistic Approach to Bilingual Sentence Fragment Extraction from Comparable Corpora",
author = "Khademian, Mahdi and
Taghipour, Kaveh and
Mansour, Saab and
Khadivi, Shahram",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/892_Paper.pdf",
pages = "4073--4079",
abstract = "Achieving accurate translation, especially in multiple domain documents with statistical machine translation systems, requires more and more bilingual texts and this need becomes more critical when training such systems for language pairs with scarce training data. In the recent years, there have been some researches on new sources of parallel texts that are documents which are not necessarily parallel but are comparable. Since these methods search for possible translation equivalences in a greedy manner, they are unable to consider all possible parallel texts in comparable documents. This paper investigates a different approach for this need by considering relationships between all words of two comparable documents, which works fairly well even in the worst case of comparability. We represent each document pair in a matrix and then transform it to a new space to find parallel fragments. Evaluations show that the system is successful in extraction of useful fragment pairs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khademian-etal-2012-holistic">
<titleInfo>
<title>A Holistic Approach to Bilingual Sentence Fragment Extraction from Comparable Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mahdi</namePart>
<namePart type="family">Khademian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaveh</namePart>
<namePart type="family">Taghipour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saab</namePart>
<namePart type="family">Mansour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shahram</namePart>
<namePart type="family">Khadivi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Achieving accurate translation, especially in multiple domain documents with statistical machine translation systems, requires more and more bilingual texts and this need becomes more critical when training such systems for language pairs with scarce training data. In the recent years, there have been some researches on new sources of parallel texts that are documents which are not necessarily parallel but are comparable. Since these methods search for possible translation equivalences in a greedy manner, they are unable to consider all possible parallel texts in comparable documents. This paper investigates a different approach for this need by considering relationships between all words of two comparable documents, which works fairly well even in the worst case of comparability. We represent each document pair in a matrix and then transform it to a new space to find parallel fragments. Evaluations show that the system is successful in extraction of useful fragment pairs.</abstract>
<identifier type="citekey">khademian-etal-2012-holistic</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/892_Paper.pdf</url>
</location>
<part>
<date>2012-may</date>
<extent unit="page">
<start>4073</start>
<end>4079</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Holistic Approach to Bilingual Sentence Fragment Extraction from Comparable Corpora
%A Khademian, Mahdi
%A Taghipour, Kaveh
%A Mansour, Saab
%A Khadivi, Shahram
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 may
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F khademian-etal-2012-holistic
%X Achieving accurate translation, especially in multiple domain documents with statistical machine translation systems, requires more and more bilingual texts and this need becomes more critical when training such systems for language pairs with scarce training data. In the recent years, there have been some researches on new sources of parallel texts that are documents which are not necessarily parallel but are comparable. Since these methods search for possible translation equivalences in a greedy manner, they are unable to consider all possible parallel texts in comparable documents. This paper investigates a different approach for this need by considering relationships between all words of two comparable documents, which works fairly well even in the worst case of comparability. We represent each document pair in a matrix and then transform it to a new space to find parallel fragments. Evaluations show that the system is successful in extraction of useful fragment pairs.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/892_Paper.pdf
%P 4073-4079
Markdown (Informal)
[A Holistic Approach to Bilingual Sentence Fragment Extraction from Comparable Corpora](http://www.lrec-conf.org/proceedings/lrec2012/pdf/892_Paper.pdf) (Khademian et al., LREC 2012)
ACL