@inproceedings{erjavec-2006-english,
title = "The {E}nglish-{S}lovene {ACQUIS} corpus",
author = "Erjavec, Toma{\v{z}}",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/140_pdf.pdf",
abstract = "The paper presents the SVEZ-IJS corpus, a large parallel annotated English-Slovene corpus containing translated legal texts of the European Union, the ACQUIS Communautaire. The corpus contains approx. 2 x 5 million words and was compiled from the translation memory obtained from the Translation Unit of the Slovene Government Office for European Affairs. The corpus is encoded in XML, accordingto the Text Encoding Initiative Guidelines TEI P4, where each translation memory unit contains useful metadata and the two aligned segments (sentences). Both the Slovene and English text islinguistically annotated at the word-level, by context disambiguatedlemmas and morphosyntactic descriptions, which follow the MULTEXTguidelines. The complete corpus is freely available for research, either via an on-line concordancer, or for downloading from the corpushome page at http://nl.ijs.si/svez/.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="erjavec-2006-english">
<titleInfo>
<title>The English-Slovene ACQUIS corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tomaž</namePart>
<namePart type="family">Erjavec</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The paper presents the SVEZ-IJS corpus, a large parallel annotated English-Slovene corpus containing translated legal texts of the European Union, the ACQUIS Communautaire. The corpus contains approx. 2 x 5 million words and was compiled from the translation memory obtained from the Translation Unit of the Slovene Government Office for European Affairs. The corpus is encoded in XML, accordingto the Text Encoding Initiative Guidelines TEI P4, where each translation memory unit contains useful metadata and the two aligned segments (sentences). Both the Slovene and English text islinguistically annotated at the word-level, by context disambiguatedlemmas and morphosyntactic descriptions, which follow the MULTEXTguidelines. The complete corpus is freely available for research, either via an on-line concordancer, or for downloading from the corpushome page at http://nl.ijs.si/svez/.</abstract>
<identifier type="citekey">erjavec-2006-english</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/140_pdf.pdf</url>
</location>
<part>
<date>2006-may</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The English-Slovene ACQUIS corpus
%A Erjavec, Tomaž
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 may
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F erjavec-2006-english
%X The paper presents the SVEZ-IJS corpus, a large parallel annotated English-Slovene corpus containing translated legal texts of the European Union, the ACQUIS Communautaire. The corpus contains approx. 2 x 5 million words and was compiled from the translation memory obtained from the Translation Unit of the Slovene Government Office for European Affairs. The corpus is encoded in XML, accordingto the Text Encoding Initiative Guidelines TEI P4, where each translation memory unit contains useful metadata and the two aligned segments (sentences). Both the Slovene and English text islinguistically annotated at the word-level, by context disambiguatedlemmas and morphosyntactic descriptions, which follow the MULTEXTguidelines. The complete corpus is freely available for research, either via an on-line concordancer, or for downloading from the corpushome page at http://nl.ijs.si/svez/.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/140_pdf.pdf
Markdown (Informal)
[The English-Slovene ACQUIS corpus](http://www.lrec-conf.org/proceedings/lrec2006/pdf/140_pdf.pdf) (Erjavec, LREC 2006)
ACL
- Tomaž Erjavec. 2006. The English-Slovene ACQUIS corpus. In Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06), Genoa, Italy. European Language Resources Association (ELRA).