@inproceedings{scarlini-etal-2020-sense,
title = "Sense-Annotated Corpora for Word Sense Disambiguation in Multiple Languages and Domains",
author = "Scarlini, Bianca and
Pasini, Tommaso and
Navigli, Roberto",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.723",
pages = "5905--5911",
abstract = "The knowledge acquisition bottleneck problem dramatically hampers the creation of sense-annotated data for Word Sense Disambiguation (WSD). Sense-annotated data are scarce for English and almost absent for other languages. This limits the range of action of deep-learning approaches, which today are at the base of any NLP task and are hungry for data. We mitigate this issue and encourage further research in multilingual WSD by releasing to the NLP community five large datasets annotated with word-senses in five different languages, namely, English, French, Italian, German and Spanish, and 5 distinct datasets in English, each for a different semantic domain. We show that supervised WSD models trained on our data attain higher performance than when trained on other automatically-created corpora. We release all our data containing more than 15 million annotated instances in 5 different languages at http://trainomatic.org/onesec.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="scarlini-etal-2020-sense">
<titleInfo>
<title>Sense-Annotated Corpora for Word Sense Disambiguation in Multiple Languages and Domains</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bianca</namePart>
<namePart type="family">Scarlini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommaso</namePart>
<namePart type="family">Pasini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Navigli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>The knowledge acquisition bottleneck problem dramatically hampers the creation of sense-annotated data for Word Sense Disambiguation (WSD). Sense-annotated data are scarce for English and almost absent for other languages. This limits the range of action of deep-learning approaches, which today are at the base of any NLP task and are hungry for data. We mitigate this issue and encourage further research in multilingual WSD by releasing to the NLP community five large datasets annotated with word-senses in five different languages, namely, English, French, Italian, German and Spanish, and 5 distinct datasets in English, each for a different semantic domain. We show that supervised WSD models trained on our data attain higher performance than when trained on other automatically-created corpora. We release all our data containing more than 15 million annotated instances in 5 different languages at http://trainomatic.org/onesec.</abstract>
<identifier type="citekey">scarlini-etal-2020-sense</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.723</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>5905</start>
<end>5911</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sense-Annotated Corpora for Word Sense Disambiguation in Multiple Languages and Domains
%A Scarlini, Bianca
%A Pasini, Tommaso
%A Navigli, Roberto
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F scarlini-etal-2020-sense
%X The knowledge acquisition bottleneck problem dramatically hampers the creation of sense-annotated data for Word Sense Disambiguation (WSD). Sense-annotated data are scarce for English and almost absent for other languages. This limits the range of action of deep-learning approaches, which today are at the base of any NLP task and are hungry for data. We mitigate this issue and encourage further research in multilingual WSD by releasing to the NLP community five large datasets annotated with word-senses in five different languages, namely, English, French, Italian, German and Spanish, and 5 distinct datasets in English, each for a different semantic domain. We show that supervised WSD models trained on our data attain higher performance than when trained on other automatically-created corpora. We release all our data containing more than 15 million annotated instances in 5 different languages at http://trainomatic.org/onesec.
%U https://aclanthology.org/2020.lrec-1.723
%P 5905-5911
Markdown (Informal)
[Sense-Annotated Corpora for Word Sense Disambiguation in Multiple Languages and Domains](https://aclanthology.org/2020.lrec-1.723) (Scarlini et al., LREC 2020)
ACL