@inproceedings{vahtola-etal-2022-easy,
title = "It Is Not Easy To Detect Paraphrases: Analysing Semantic Similarity With Antonyms and Negation Using the New {S}em{A}nto{N}eg Benchmark",
author = {Vahtola, Teemu and
Creutz, Mathias and
Tiedemann, J{\"o}rg},
editor = "Bastings, Jasmijn and
Belinkov, Yonatan and
Elazar, Yanai and
Hupkes, Dieuwke and
Saphra, Naomi and
Wiegreffe, Sarah",
booktitle = "Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/moar-dois/2022.blackboxnlp-1.20/",
doi = "10.18653/v1/2022.blackboxnlp-1.20",
pages = "249--262",
abstract = "We investigate to what extent a hundred publicly available, popular neural language models capture meaning systematically. Sentence embeddings obtained from pretrained or fine-tuned language models can be used to perform particular tasks, such as paraphrase detection, semantic textual similarity assessment or natural language inference. Common to all of these tasks is that paraphrastic sentences, that is, sentences that carry (nearly) the same meaning, should have (nearly) the same embeddings regardless of surface form. We demonstrate that performance varies greatly across different language models when a specific type of meaning-preserving transformation is applied: two sentences should be identified as paraphrastic if one of them contains a negated antonym in relation to the other one, such as ``I am not guilty'' versus ``I am innocent''.We introduce and release SemAntoNeg, a new test suite containing 3152 entries for probing paraphrasticity in sentences incorporating negation and antonyms. Among other things, we show that language models fine-tuned for natural language inference outperform other types of models, especially the ones fine-tuned to produce general-purpose sentence embeddings, on the test suite. Furthermore, we show that most models designed explicitly for paraphrasing are rather mediocre in our task."
}
Markdown (Informal)
[It Is Not Easy To Detect Paraphrases: Analysing Semantic Similarity With Antonyms and Negation Using the New SemAntoNeg Benchmark](https://preview.aclanthology.org/moar-dois/2022.blackboxnlp-1.20/) (Vahtola et al., BlackboxNLP 2022)
ACL