@inproceedings{vossen-etal-2012-dutchsemcor,
title = "{D}utch{S}em{C}or: Targeting the ideal sense-tagged corpus",
author = {Vossen, Piek and
G{\"o}r{\"o}g, Attila and
Izquierdo, Rub{\'e}n and
van den Bosch, Antal},
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/187_Paper.pdf",
pages = "584--589",
abstract = "Word Sense Disambiguation (WSD) systems require large sense-tagged corpora along with lexical databases to reach satisfactory results. The number of English language resources for developed WSD increased in the past years while most other languages are still under-resourced. The situation is no different for Dutch. In order to overcome this data bottleneck, the DutchSemCor project will deliver a Dutch corpus that is sense-tagged with senses from the Cornetto lexical database. In this paper, we discuss the different conflicting requirements for a sense-tagged corpus and our strategies to fulfill them. We report on a first series of experiments to sup- port our semi-automatic approach to build the corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vossen-etal-2012-dutchsemcor">
<titleInfo>
<title>DutchSemCor: Targeting the ideal sense-tagged corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Piek</namePart>
<namePart type="family">Vossen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Attila</namePart>
<namePart type="family">Görög</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rubén</namePart>
<namePart type="family">Izquierdo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antal</namePart>
<namePart type="family">van den Bosch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word Sense Disambiguation (WSD) systems require large sense-tagged corpora along with lexical databases to reach satisfactory results. The number of English language resources for developed WSD increased in the past years while most other languages are still under-resourced. The situation is no different for Dutch. In order to overcome this data bottleneck, the DutchSemCor project will deliver a Dutch corpus that is sense-tagged with senses from the Cornetto lexical database. In this paper, we discuss the different conflicting requirements for a sense-tagged corpus and our strategies to fulfill them. We report on a first series of experiments to sup- port our semi-automatic approach to build the corpus.</abstract>
<identifier type="citekey">vossen-etal-2012-dutchsemcor</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/187_Paper.pdf</url>
</location>
<part>
<date>2012-may</date>
<extent unit="page">
<start>584</start>
<end>589</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DutchSemCor: Targeting the ideal sense-tagged corpus
%A Vossen, Piek
%A Görög, Attila
%A Izquierdo, Rubén
%A van den Bosch, Antal
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 may
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F vossen-etal-2012-dutchsemcor
%X Word Sense Disambiguation (WSD) systems require large sense-tagged corpora along with lexical databases to reach satisfactory results. The number of English language resources for developed WSD increased in the past years while most other languages are still under-resourced. The situation is no different for Dutch. In order to overcome this data bottleneck, the DutchSemCor project will deliver a Dutch corpus that is sense-tagged with senses from the Cornetto lexical database. In this paper, we discuss the different conflicting requirements for a sense-tagged corpus and our strategies to fulfill them. We report on a first series of experiments to sup- port our semi-automatic approach to build the corpus.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/187_Paper.pdf
%P 584-589
Markdown (Informal)
[DutchSemCor: Targeting the ideal sense-tagged corpus](http://www.lrec-conf.org/proceedings/lrec2012/pdf/187_Paper.pdf) (Vossen et al., LREC 2012)
ACL
- Piek Vossen, Attila Görög, Rubén Izquierdo, and Antal van den Bosch. 2012. DutchSemCor: Targeting the ideal sense-tagged corpus. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), pages 584–589, Istanbul, Turkey. European Language Resources Association (ELRA).