@inproceedings{rosen-vavrin-2012-building,
title = "Building a multilingual parallel corpus for human users",
author = "Rosen, Alexandr and
Vav{\v{r}}{\'\i}n, Martin",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/200_Paper.pdf",
pages = "2447--2452",
abstract = "We present the architecture and the current state of InterCorp, a multilingual parallel corpus centered around Czech, intended primarily for human users and consisting of written texts with a focus on fiction. Following an outline of its recent development and a comparison with some other multilingual parallel corpora we give an overview of the data collection procedure that covers text selection criteria, data format, conversion, alignment, lemmatization and tagging. Finally, we show a sample query using the web-based search interface and discuss challenges and prospects of the project.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rosen-vavrin-2012-building">
<titleInfo>
<title>Building a multilingual parallel corpus for human users</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexandr</namePart>
<namePart type="family">Rosen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Vavřín</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present the architecture and the current state of InterCorp, a multilingual parallel corpus centered around Czech, intended primarily for human users and consisting of written texts with a focus on fiction. Following an outline of its recent development and a comparison with some other multilingual parallel corpora we give an overview of the data collection procedure that covers text selection criteria, data format, conversion, alignment, lemmatization and tagging. Finally, we show a sample query using the web-based search interface and discuss challenges and prospects of the project.</abstract>
<identifier type="citekey">rosen-vavrin-2012-building</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/200_Paper.pdf</url>
</location>
<part>
<date>2012-may</date>
<extent unit="page">
<start>2447</start>
<end>2452</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building a multilingual parallel corpus for human users
%A Rosen, Alexandr
%A Vavřín, Martin
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 may
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F rosen-vavrin-2012-building
%X We present the architecture and the current state of InterCorp, a multilingual parallel corpus centered around Czech, intended primarily for human users and consisting of written texts with a focus on fiction. Following an outline of its recent development and a comparison with some other multilingual parallel corpora we give an overview of the data collection procedure that covers text selection criteria, data format, conversion, alignment, lemmatization and tagging. Finally, we show a sample query using the web-based search interface and discuss challenges and prospects of the project.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/200_Paper.pdf
%P 2447-2452
Markdown (Informal)
[Building a multilingual parallel corpus for human users](http://www.lrec-conf.org/proceedings/lrec2012/pdf/200_Paper.pdf) (Rosen & Vavřín, LREC 2012)
ACL
- Alexandr Rosen and Martin Vavřín. 2012. Building a multilingual parallel corpus for human users. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), pages 2447–2452, Istanbul, Turkey. European Language Resources Association (ELRA).