@inproceedings{tian-etal-2021-bert,
title = "How does {BERT} process disfluency?",
author = "Tian, Ye and
Nieradzik, Tim and
Jalali, Sepehr and
Shiu, Da-shan",
booktitle = "Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = jul,
year = "2021",
address = "Singapore and Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.sigdial-1.22",
pages = "208--217",
abstract = "Natural conversations are filled with disfluencies. This study investigates if and how BERT understands disfluency with three experiments: (1) a behavioural study using a downstream task, (2) an analysis of sentence embeddings and (3) an analysis of the attention mechanism on disfluency. The behavioural study shows that without fine-tuning on disfluent data, BERT does not suffer significant performance loss when presented disfluent compared to fluent inputs (exp1). Analysis on sentence embeddings of disfluent and fluent sentence pairs reveals that the deeper the layer, the more similar their representation (exp2). This indicates that deep layers of BERT become relatively invariant to disfluency. We pinpoint attention as a potential mechanism that could explain this phenomenon (exp3). Overall, the study suggests that BERT has knowledge of disfluency structure. We emphasise the potential of using BERT to understand natural utterances without disfluency removal.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tian-etal-2021-bert">
<titleInfo>
<title>How does BERT process disfluency?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ye</namePart>
<namePart type="family">Tian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Nieradzik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sepehr</namePart>
<namePart type="family">Jalali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Da-shan</namePart>
<namePart type="family">Shiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-jul</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore and Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Natural conversations are filled with disfluencies. This study investigates if and how BERT understands disfluency with three experiments: (1) a behavioural study using a downstream task, (2) an analysis of sentence embeddings and (3) an analysis of the attention mechanism on disfluency. The behavioural study shows that without fine-tuning on disfluent data, BERT does not suffer significant performance loss when presented disfluent compared to fluent inputs (exp1). Analysis on sentence embeddings of disfluent and fluent sentence pairs reveals that the deeper the layer, the more similar their representation (exp2). This indicates that deep layers of BERT become relatively invariant to disfluency. We pinpoint attention as a potential mechanism that could explain this phenomenon (exp3). Overall, the study suggests that BERT has knowledge of disfluency structure. We emphasise the potential of using BERT to understand natural utterances without disfluency removal.</abstract>
<identifier type="citekey">tian-etal-2021-bert</identifier>
<location>
<url>https://aclanthology.org/2021.sigdial-1.22</url>
</location>
<part>
<date>2021-jul</date>
<extent unit="page">
<start>208</start>
<end>217</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How does BERT process disfluency?
%A Tian, Ye
%A Nieradzik, Tim
%A Jalali, Sepehr
%A Shiu, Da-shan
%S Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2021
%8 jul
%I Association for Computational Linguistics
%C Singapore and Online
%F tian-etal-2021-bert
%X Natural conversations are filled with disfluencies. This study investigates if and how BERT understands disfluency with three experiments: (1) a behavioural study using a downstream task, (2) an analysis of sentence embeddings and (3) an analysis of the attention mechanism on disfluency. The behavioural study shows that without fine-tuning on disfluent data, BERT does not suffer significant performance loss when presented disfluent compared to fluent inputs (exp1). Analysis on sentence embeddings of disfluent and fluent sentence pairs reveals that the deeper the layer, the more similar their representation (exp2). This indicates that deep layers of BERT become relatively invariant to disfluency. We pinpoint attention as a potential mechanism that could explain this phenomenon (exp3). Overall, the study suggests that BERT has knowledge of disfluency structure. We emphasise the potential of using BERT to understand natural utterances without disfluency removal.
%U https://aclanthology.org/2021.sigdial-1.22
%P 208-217
Markdown (Informal)
[How does BERT process disfluency?](https://aclanthology.org/2021.sigdial-1.22) (Tian et al., SIGDIAL 2021)
ACL
- Ye Tian, Tim Nieradzik, Sepehr Jalali, and Da-shan Shiu. 2021. How does BERT process disfluency?. In Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue, pages 208–217, Singapore and Online. Association for Computational Linguistics.