@inproceedings{arefyev-etal-2020-always,
title = "Always Keep your Target in Mind: Studying Semantics and Improving Performance of Neural Lexical Substitution",
author = "Arefyev, Nikolay and
Sheludko, Boris and
Podolskiy, Alexander and
Panchenko, Alexander",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.107",
doi = "10.18653/v1/2020.coling-main.107",
pages = "1242--1255",
abstract = "Lexical substitution, i.e. generation of plausible words that can replace a particular target word in a given context, is an extremely powerful technology that can be used as a backbone of various NLP applications, including word sense induction and disambiguation, lexical relation extraction, data augmentation, etc. In this paper, we present a large-scale comparative study of lexical substitution methods employing both rather old and most recent language and masked language models (LMs and MLMs), such as context2vec, ELMo, BERT, RoBERTa, XLNet. We show that already competitive results achieved by SOTA LMs/MLMs can be further substantially improved if information about the target word is injected properly. Several existing and new target word injection methods are compared for each LM/MLM using both intrinsic evaluation on lexical substitution datasets and extrinsic evaluation on word sense induction (WSI) datasets. On two WSI datasets we obtain new SOTA results. Besides, we analyze the types of semantic relations between target words and their substitutes generated by different models or given by annotators.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="arefyev-etal-2020-always">
<titleInfo>
<title>Always Keep your Target in Mind: Studying Semantics and Improving Performance of Neural Lexical Substitution</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolay</namePart>
<namePart type="family">Arefyev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Boris</namePart>
<namePart type="family">Sheludko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Podolskiy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Panchenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-dec</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Lexical substitution, i.e. generation of plausible words that can replace a particular target word in a given context, is an extremely powerful technology that can be used as a backbone of various NLP applications, including word sense induction and disambiguation, lexical relation extraction, data augmentation, etc. In this paper, we present a large-scale comparative study of lexical substitution methods employing both rather old and most recent language and masked language models (LMs and MLMs), such as context2vec, ELMo, BERT, RoBERTa, XLNet. We show that already competitive results achieved by SOTA LMs/MLMs can be further substantially improved if information about the target word is injected properly. Several existing and new target word injection methods are compared for each LM/MLM using both intrinsic evaluation on lexical substitution datasets and extrinsic evaluation on word sense induction (WSI) datasets. On two WSI datasets we obtain new SOTA results. Besides, we analyze the types of semantic relations between target words and their substitutes generated by different models or given by annotators.</abstract>
<identifier type="citekey">arefyev-etal-2020-always</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.107</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.107</url>
</location>
<part>
<date>2020-dec</date>
<extent unit="page">
<start>1242</start>
<end>1255</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Always Keep your Target in Mind: Studying Semantics and Improving Performance of Neural Lexical Substitution
%A Arefyev, Nikolay
%A Sheludko, Boris
%A Podolskiy, Alexander
%A Panchenko, Alexander
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 dec
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F arefyev-etal-2020-always
%X Lexical substitution, i.e. generation of plausible words that can replace a particular target word in a given context, is an extremely powerful technology that can be used as a backbone of various NLP applications, including word sense induction and disambiguation, lexical relation extraction, data augmentation, etc. In this paper, we present a large-scale comparative study of lexical substitution methods employing both rather old and most recent language and masked language models (LMs and MLMs), such as context2vec, ELMo, BERT, RoBERTa, XLNet. We show that already competitive results achieved by SOTA LMs/MLMs can be further substantially improved if information about the target word is injected properly. Several existing and new target word injection methods are compared for each LM/MLM using both intrinsic evaluation on lexical substitution datasets and extrinsic evaluation on word sense induction (WSI) datasets. On two WSI datasets we obtain new SOTA results. Besides, we analyze the types of semantic relations between target words and their substitutes generated by different models or given by annotators.
%R 10.18653/v1/2020.coling-main.107
%U https://aclanthology.org/2020.coling-main.107
%U https://doi.org/10.18653/v1/2020.coling-main.107
%P 1242-1255
Markdown (Informal)
[Always Keep your Target in Mind: Studying Semantics and Improving Performance of Neural Lexical Substitution](https://aclanthology.org/2020.coling-main.107) (Arefyev et al., COLING 2020)
ACL