@inproceedings{blloshmi-etal-2021-ir,
title = "{IR} like a {SIR}: {S}ense-enhanced {I}nformation {R}etrieval for {M}ultiple {L}anguages",
author = "Blloshmi, Rexhina and
Pasini, Tommaso and
Campolungo, Niccol{\`o} and
Banerjee, Somnath and
Navigli, Roberto and
Pasi, Gabriella",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.79",
doi = "10.18653/v1/2021.emnlp-main.79",
pages = "1030--1041",
abstract = "With the advent of contextualized embeddings, attention towards neural ranking approaches for Information Retrieval increased considerably. However, two aspects have remained largely neglected: i) queries usually consist of few keywords only, which increases ambiguity and makes their contextualization harder, and ii) performing neural ranking on non-English documents is still cumbersome due to shortage of labeled datasets. In this paper we present SIR (Sense-enhanced Information Retrieval) to mitigate both problems by leveraging word sense information. At the core of our approach lies a novel multilingual query expansion mechanism based on Word Sense Disambiguation that provides sense definitions as additional semantic information for the query. Importantly, we use senses as a bridge across languages, thus allowing our model to perform considerably better than its supervised and unsupervised alternatives across French, German, Italian and Spanish languages on several CLEF benchmarks, while being trained on English Robust04 data only. We release SIR at https://github.com/SapienzaNLP/sir.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="blloshmi-etal-2021-ir">
<titleInfo>
<title>IR like a SIR: Sense-enhanced Information Retrieval for Multiple Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rexhina</namePart>
<namePart type="family">Blloshmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommaso</namePart>
<namePart type="family">Pasini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niccolò</namePart>
<namePart type="family">Campolungo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Somnath</namePart>
<namePart type="family">Banerjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Navigli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriella</namePart>
<namePart type="family">Pasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the advent of contextualized embeddings, attention towards neural ranking approaches for Information Retrieval increased considerably. However, two aspects have remained largely neglected: i) queries usually consist of few keywords only, which increases ambiguity and makes their contextualization harder, and ii) performing neural ranking on non-English documents is still cumbersome due to shortage of labeled datasets. In this paper we present SIR (Sense-enhanced Information Retrieval) to mitigate both problems by leveraging word sense information. At the core of our approach lies a novel multilingual query expansion mechanism based on Word Sense Disambiguation that provides sense definitions as additional semantic information for the query. Importantly, we use senses as a bridge across languages, thus allowing our model to perform considerably better than its supervised and unsupervised alternatives across French, German, Italian and Spanish languages on several CLEF benchmarks, while being trained on English Robust04 data only. We release SIR at https://github.com/SapienzaNLP/sir.</abstract>
<identifier type="citekey">blloshmi-etal-2021-ir</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-main.79</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-main.79</url>
</location>
<part>
<date>2021-nov</date>
<extent unit="page">
<start>1030</start>
<end>1041</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IR like a SIR: Sense-enhanced Information Retrieval for Multiple Languages
%A Blloshmi, Rexhina
%A Pasini, Tommaso
%A Campolungo, Niccolò
%A Banerjee, Somnath
%A Navigli, Roberto
%A Pasi, Gabriella
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%D 2021
%8 nov
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F blloshmi-etal-2021-ir
%X With the advent of contextualized embeddings, attention towards neural ranking approaches for Information Retrieval increased considerably. However, two aspects have remained largely neglected: i) queries usually consist of few keywords only, which increases ambiguity and makes their contextualization harder, and ii) performing neural ranking on non-English documents is still cumbersome due to shortage of labeled datasets. In this paper we present SIR (Sense-enhanced Information Retrieval) to mitigate both problems by leveraging word sense information. At the core of our approach lies a novel multilingual query expansion mechanism based on Word Sense Disambiguation that provides sense definitions as additional semantic information for the query. Importantly, we use senses as a bridge across languages, thus allowing our model to perform considerably better than its supervised and unsupervised alternatives across French, German, Italian and Spanish languages on several CLEF benchmarks, while being trained on English Robust04 data only. We release SIR at https://github.com/SapienzaNLP/sir.
%R 10.18653/v1/2021.emnlp-main.79
%U https://aclanthology.org/2021.emnlp-main.79
%U https://doi.org/10.18653/v1/2021.emnlp-main.79
%P 1030-1041
Markdown (Informal)
[IR like a SIR: Sense-enhanced Information Retrieval for Multiple Languages](https://aclanthology.org/2021.emnlp-main.79) (Blloshmi et al., EMNLP 2021)
ACL
- Rexhina Blloshmi, Tommaso Pasini, Niccolò Campolungo, Somnath Banerjee, Roberto Navigli, and Gabriella Pasi. 2021. IR like a SIR: Sense-enhanced Information Retrieval for Multiple Languages. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pages 1030–1041, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics.