@inproceedings{ebrahimi-etal-2020-self,
title = "How Can Self-Attention Networks Recognize {D}yck-n Languages?",
author = "Ebrahimi, Javid and
Gelda, Dhruv and
Zhang, Wei",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.findings-emnlp.384/",
doi = "10.18653/v1/2020.findings-emnlp.384",
pages = "4301--4306",
abstract = "We focus on the recognition of Dyck-n (Dn) languages with self-attention (SA) networks, which has been deemed to be a difficult task for these networks. We compare the performance of two variants of SA, one with a starting symbol (SA+) and one without (SA-). Our results show that SA+ is able to generalize to longer sequences and deeper dependencies. For D2, we find that SA- completely breaks down on long sequences whereas the accuracy of SA+ is 58.82{\%}. We find attention maps learned by SA+ to be amenable to interpretation and compatible with a stack-based language recognizer. Surprisingly, the performance of SA networks is at par with LSTMs, which provides evidence on the ability of SA to learn hierarchies without recursion."
}
Markdown (Informal)
[How Can Self-Attention Networks Recognize Dyck-n Languages?](https://preview.aclanthology.org/fix-sig-urls/2020.findings-emnlp.384/) (Ebrahimi et al., Findings 2020)
ACL