@inproceedings{proisl-etal-2020-empirist,
title = "{E}mpiri{ST} Corpus 2.0: Adding Manual Normalization, Lemmatization and Semantic Tagging to a {G}erman Web and {CMC} Corpus",
author = "Proisl, Thomas and
Dykes, Natalie and
Heinrich, Philipp and
Kabashi, Besim and
Blombach, Andreas and
Evert, Stefan",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.754",
pages = "6142--6148",
abstract = "The EmpiriST corpus (Bei{\ss}wenger et al., 2016) is a manually tokenized and part-of-speech tagged corpus of approximately 23,000 tokens of German Web and CMC (computer-mediated communication) data. We extend the corpus with manually created annotation layers for word form normalization, lemmatization and lexical semantics. All annotations have been independently performed by multiple human annotators. We report inter-annotator agreements and results of baseline systems and state-of-the-art off-the-shelf tools.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="proisl-etal-2020-empirist">
<titleInfo>
<title>EmpiriST Corpus 2.0: Adding Manual Normalization, Lemmatization and Semantic Tagging to a German Web and CMC Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Proisl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Dykes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Heinrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Besim</namePart>
<namePart type="family">Kabashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Blombach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Evert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>The EmpiriST corpus (Beißwenger et al., 2016) is a manually tokenized and part-of-speech tagged corpus of approximately 23,000 tokens of German Web and CMC (computer-mediated communication) data. We extend the corpus with manually created annotation layers for word form normalization, lemmatization and lexical semantics. All annotations have been independently performed by multiple human annotators. We report inter-annotator agreements and results of baseline systems and state-of-the-art off-the-shelf tools.</abstract>
<identifier type="citekey">proisl-etal-2020-empirist</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.754</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>6142</start>
<end>6148</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EmpiriST Corpus 2.0: Adding Manual Normalization, Lemmatization and Semantic Tagging to a German Web and CMC Corpus
%A Proisl, Thomas
%A Dykes, Natalie
%A Heinrich, Philipp
%A Kabashi, Besim
%A Blombach, Andreas
%A Evert, Stefan
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F proisl-etal-2020-empirist
%X The EmpiriST corpus (Beißwenger et al., 2016) is a manually tokenized and part-of-speech tagged corpus of approximately 23,000 tokens of German Web and CMC (computer-mediated communication) data. We extend the corpus with manually created annotation layers for word form normalization, lemmatization and lexical semantics. All annotations have been independently performed by multiple human annotators. We report inter-annotator agreements and results of baseline systems and state-of-the-art off-the-shelf tools.
%U https://aclanthology.org/2020.lrec-1.754
%P 6142-6148
Markdown (Informal)
[EmpiriST Corpus 2.0: Adding Manual Normalization, Lemmatization and Semantic Tagging to a German Web and CMC Corpus](https://aclanthology.org/2020.lrec-1.754) (Proisl et al., LREC 2020)
ACL