@inproceedings{hayati-etal-2021-bert,
title = "Does {BERT} Learn as Humans Perceive? Understanding Linguistic Styles through Lexica",
author = "Hayati, Shirley Anugrah and
Kang, Dongyeop and
Ungar, Lyle",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.510",
doi = "10.18653/v1/2021.emnlp-main.510",
pages = "6323--6331",
abstract = "People convey their intention and attitude through linguistic styles of the text that they write. In this study, we investigate lexicon usages across styles throughout two lenses: human perception and machine word importance, since words differ in the strength of the stylistic cues that they provide. To collect labels of human perception, we curate a new dataset, Hummingbird, on top of benchmarking style datasets. We have crowd workers highlight the representative words in the text that makes them think the text has the following styles: politeness, sentiment, offensiveness, and five emotion types. We then compare these human word labels with word importance derived from a popular fine-tuned style classifier like BERT. Our results show that the BERT often finds content words not relevant to the target style as important words used in style prediction, but humans do not perceive the same way even though for some styles (e.g., positive sentiment and joy) human- and machine-identified words share significant overlap for some styles.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hayati-etal-2021-bert">
<titleInfo>
<title>Does BERT Learn as Humans Perceive? Understanding Linguistic Styles through Lexica</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shirley</namePart>
<namePart type="given">Anugrah</namePart>
<namePart type="family">Hayati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongyeop</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lyle</namePart>
<namePart type="family">Ungar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>People convey their intention and attitude through linguistic styles of the text that they write. In this study, we investigate lexicon usages across styles throughout two lenses: human perception and machine word importance, since words differ in the strength of the stylistic cues that they provide. To collect labels of human perception, we curate a new dataset, Hummingbird, on top of benchmarking style datasets. We have crowd workers highlight the representative words in the text that makes them think the text has the following styles: politeness, sentiment, offensiveness, and five emotion types. We then compare these human word labels with word importance derived from a popular fine-tuned style classifier like BERT. Our results show that the BERT often finds content words not relevant to the target style as important words used in style prediction, but humans do not perceive the same way even though for some styles (e.g., positive sentiment and joy) human- and machine-identified words share significant overlap for some styles.</abstract>
<identifier type="citekey">hayati-etal-2021-bert</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-main.510</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-main.510</url>
</location>
<part>
<date>2021-nov</date>
<extent unit="page">
<start>6323</start>
<end>6331</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Does BERT Learn as Humans Perceive? Understanding Linguistic Styles through Lexica
%A Hayati, Shirley Anugrah
%A Kang, Dongyeop
%A Ungar, Lyle
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%D 2021
%8 nov
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F hayati-etal-2021-bert
%X People convey their intention and attitude through linguistic styles of the text that they write. In this study, we investigate lexicon usages across styles throughout two lenses: human perception and machine word importance, since words differ in the strength of the stylistic cues that they provide. To collect labels of human perception, we curate a new dataset, Hummingbird, on top of benchmarking style datasets. We have crowd workers highlight the representative words in the text that makes them think the text has the following styles: politeness, sentiment, offensiveness, and five emotion types. We then compare these human word labels with word importance derived from a popular fine-tuned style classifier like BERT. Our results show that the BERT often finds content words not relevant to the target style as important words used in style prediction, but humans do not perceive the same way even though for some styles (e.g., positive sentiment and joy) human- and machine-identified words share significant overlap for some styles.
%R 10.18653/v1/2021.emnlp-main.510
%U https://aclanthology.org/2021.emnlp-main.510
%U https://doi.org/10.18653/v1/2021.emnlp-main.510
%P 6323-6331
Markdown (Informal)
[Does BERT Learn as Humans Perceive? Understanding Linguistic Styles through Lexica](https://aclanthology.org/2021.emnlp-main.510) (Hayati et al., EMNLP 2021)
ACL