@inproceedings{zhu-bhat-2021-euphemistic-phrase,
title = "Euphemistic Phrase Detection by Masked Language Model",
author = "Zhu, Wanzheng and
Bhat, Suma",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2021.findings-emnlp.16/",
doi = "10.18653/v1/2021.findings-emnlp.16",
pages = "163--168",
abstract = "It is a well-known approach for fringe groups and organizations to use euphemisms{---}ordinary-sounding and innocent-looking words with a secret meaning{---}to conceal what they are discussing. For instance, drug dealers often use {\textquotedblleft}pot{\textquotedblright} for marijuana and {\textquotedblleft}avocado{\textquotedblright} for heroin. From a social media content moderation perspective, though recent advances in NLP have enabled the automatic detection of such single-word euphemisms, no existing work is capable of automatically detecting multi-word euphemisms, such as {\textquotedblleft}blue dream{\textquotedblright} (marijuana) and {\textquotedblleft}black tar{\textquotedblright} (heroin). Our paper tackles the problem of euphemistic phrase detection without human effort for the first time, as far as we are aware. We first perform phrase mining on a raw text corpus (e.g., social media posts) to extract quality phrases. Then, we utilize word embedding similarities to select a set of euphemistic phrase candidates. Finally, we rank those candidates by a masked language model{---}SpanBERT. Compared to strong baselines, we report 20-50{\%} higher detection accuracies using our algorithm for detecting euphemistic phrases."
}
Markdown (Informal)
[Euphemistic Phrase Detection by Masked Language Model](https://preview.aclanthology.org/Author-page-Marten-During-lu/2021.findings-emnlp.16/) (Zhu & Bhat, Findings 2021)
ACL